kandimat-user-app/bin/xlsx-data/index.js

169 lines
4.9 KiB
JavaScript

const XLSX = require('xlsx')
const ora = require('ora')
const { writeFile } = require('../helper')
function toJSON (data = {}) {
return JSON.stringify(data, null, 2)
}
function normalisePartyToken (name) {
return name
.toUpperCase()
.trim()
.replace(/\s/g, '-')
}
function createLocaleMap (section, data = {}) {
return Object.keys(data)
.filter(key => key.includes(section))
.reduce((acc, keyLabel) => {
const splitKeyLabel = keyLabel.split(' ')
const locale = splitKeyLabel[splitKeyLabel.length - 1].toLowerCase()
acc[locale] = data[keyLabel].trim()
return acc
}, {})
}
function createPartyPositionMap (sheetName) {
if (!sheetName) return []
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
return rawData.map(block => ({
thesis: parseInt(block.Thesis, 10),
position: block.Position,
statement: createLocaleMap('Statement', block)
}))
}
function createNationalPartyMap (token, nationalParties) {
const parties = nationalParties.filter(np => normalisePartyToken(np['European Party']) === token)
const partyMap = data => ({
token: normalisePartyToken(data.Token),
name: data.Name,
program: data.Program
})
return parties.reduce((acc, cur) => {
const countryCode = cur['Country Code'].toLowerCase()
if (!acc.hasOwnProperty(countryCode)) {
acc[countryCode] = partyMap(cur)
}
return acc
}, {})
}
async function writeDataset (fileName, data = {}) {
const path = `${OUTPUT_DIRECTORY}/${fileName}`
try {
await writeFile(path, toJSON(data))
spinner.succeed(`Success! It's located at ${path}`)
} catch (error) {
spinner.fail(`Failure! Couldn't write '${fileName}'. Error: ${error.message}`)
}
}
async function createOptionsDataset (sheetName) {
if (!sheetName) {
throw new Error(`createOptionsDataset() requires 'sheetName', got "${sheetName}"`)
}
await writeDataset(
'options.json',
XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
)
}
async function createThesesDataset (sheetName) {
if (!sheetName) {
throw new Error(`createThesesDataset() requires 'sheetName', got "${sheetName}"`)
}
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
const data = rawData.map(block => ({
id: parseInt(block.ID, 10),
category: createLocaleMap('Category', block),
thesis: createLocaleMap('Thesis', block),
terminology: block.Terminology || []
}))
await writeDataset('theses.json', data)
}
async function createTerminologyDataset (sheetName) {
if (!sheetName) {
throw new Error(`createTerminologyDataset() requires 'sheetName', got "${sheetName}"`)
}
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
const data = rawData.map(block => {
return {
id: parseInt(block.ID, 10),
explanation: createLocaleMap('Explanation', block),
reference: createLocaleMap('Reference', block)
}
})
await writeDataset('terminology.json', data)
}
async function createPartiesDataset (sheetName, sheets = {}) {
if (!sheetName) {
throw new Error(`createPartiesDataset() requires 'sheetName', got "${sheetName}"`)
}
const { europeanParties, nationalParties, parties } = sheets
const rawDataEU = XLSX.utils.sheet_to_json(workbook.Sheets[europeanParties])
const rawDataNational = XLSX.utils.sheet_to_json(workbook.Sheets[nationalParties])
const data = rawDataEU.map(block => {
const token = normalisePartyToken(block.Token)
return {
id: parseInt(block.ID, 10),
token,
name: createLocaleMap('European Party', block),
european_profile: {
party: createLocaleMap('European Party', block)
},
national_parties: createNationalPartyMap(token, rawDataNational),
program: createLocaleMap('Program', block),
positions: createPartyPositionMap(
parties.find(sName => normalisePartyToken(sName) === token)
)
}
})
await writeDataset('parties.json', data)
}
// const OUTPUT_DIRECTORY = './bin/xlsx-data/test'
const OUTPUT_DIRECTORY = './src/data'
const RESOURCE_FILE = 'euromat-dataset.xlsx'
const spinner = ora()
const workbook = XLSX.readFile(`./resources/${RESOURCE_FILE}`)
const [
options,
theses, terminology,
europeanParties, nationalParties,
...morePartySheets
] = workbook.SheetNames
;(async () => {
spinner.start()
spinner.info(`Parsing '${RESOURCE_FILE}' to JSON files`)
spinner.info(`XLSX SheetNames: ${workbook.SheetNames}`)
spinner.info(`Writing '${options}.json' file`)
await createOptionsDataset(options)
spinner.info(`Writing '${theses}.json' file`)
await createThesesDataset(theses)
spinner.info(`Writing '${terminology}.json' file`)
await createTerminologyDataset(terminology)
spinner.info(`Writing 'parties.json' file`)
await createPartiesDataset('parties', {
europeanParties,
nationalParties,
parties: morePartySheets
})
spinner.stopAndPersist()
})()