2019-04-17 09:55:08 +02:00
|
|
|
const XLSX = require('xlsx')
|
|
|
|
const ora = require('ora')
|
|
|
|
const { writeFile } = require('../helper')
|
|
|
|
|
2019-04-17 10:40:21 +02:00
|
|
|
function toJSON (data = {}) {
|
|
|
|
return JSON.stringify(data, null, 2)
|
|
|
|
}
|
2019-04-17 09:55:08 +02:00
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
function normalisePartyToken (name) {
|
|
|
|
return name
|
|
|
|
.toUpperCase()
|
|
|
|
.trim()
|
|
|
|
.replace(/\s/g, '-')
|
|
|
|
}
|
|
|
|
|
2019-04-17 09:55:08 +02:00
|
|
|
function createLocaleMap (section, data = {}) {
|
|
|
|
return Object.keys(data)
|
|
|
|
.filter(key => key.includes(section))
|
|
|
|
.reduce((acc, keyLabel) => {
|
|
|
|
const splitKeyLabel = keyLabel.split(' ')
|
|
|
|
const locale = splitKeyLabel[splitKeyLabel.length - 1].toLowerCase()
|
|
|
|
acc[locale] = data[keyLabel].trim()
|
|
|
|
return acc
|
|
|
|
}, {})
|
|
|
|
}
|
|
|
|
|
|
|
|
function createPartyPositionMap (sheetName) {
|
|
|
|
if (!sheetName) return []
|
|
|
|
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
|
2019-04-17 10:40:21 +02:00
|
|
|
return rawData.map(block => ({
|
2019-04-17 22:17:13 +02:00
|
|
|
thesis: parseInt(block.Thesis, 10),
|
2019-04-17 10:40:21 +02:00
|
|
|
position: block.Position,
|
|
|
|
statement: createLocaleMap('Statement', block)
|
|
|
|
}))
|
2019-04-17 09:55:08 +02:00
|
|
|
}
|
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
function createNationalPartyMap (token, nationalParties) {
|
|
|
|
const parties = nationalParties.filter(np => normalisePartyToken(np['European Party']) === token)
|
2019-05-03 17:55:07 +02:00
|
|
|
const partyMap = data => ({
|
|
|
|
token: normalisePartyToken(data.Token),
|
|
|
|
name: data.Name,
|
|
|
|
program: data.Program
|
|
|
|
})
|
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
return parties.reduce((acc, cur) => {
|
|
|
|
const countryCode = cur['Country Code'].toLowerCase()
|
|
|
|
if (!acc.hasOwnProperty(countryCode)) {
|
2019-05-03 17:55:07 +02:00
|
|
|
acc[countryCode] = partyMap(cur)
|
2019-04-24 23:28:55 +02:00
|
|
|
}
|
|
|
|
return acc
|
|
|
|
}, {})
|
|
|
|
}
|
|
|
|
|
2019-04-17 09:55:08 +02:00
|
|
|
async function writeDataset (fileName, data = {}) {
|
2019-04-17 10:40:21 +02:00
|
|
|
const path = `${OUTPUT_DIRECTORY}/${fileName}`
|
2019-04-17 09:55:08 +02:00
|
|
|
try {
|
|
|
|
await writeFile(path, toJSON(data))
|
|
|
|
spinner.succeed(`Success! It's located at ${path}`)
|
|
|
|
} catch (error) {
|
|
|
|
spinner.fail(`Failure! Couldn't write '${fileName}'. Error: ${error.message}`)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async function createOptionsDataset (sheetName) {
|
|
|
|
if (!sheetName) {
|
|
|
|
throw new Error(`createOptionsDataset() requires 'sheetName', got "${sheetName}"`)
|
|
|
|
}
|
|
|
|
await writeDataset(
|
|
|
|
'options.json',
|
|
|
|
XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
async function createThesesDataset (sheetName) {
|
|
|
|
if (!sheetName) {
|
|
|
|
throw new Error(`createThesesDataset() requires 'sheetName', got "${sheetName}"`)
|
|
|
|
}
|
|
|
|
|
|
|
|
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
|
2019-04-17 10:40:21 +02:00
|
|
|
const data = rawData.map(block => ({
|
|
|
|
id: parseInt(block.ID, 10),
|
|
|
|
category: createLocaleMap('Category', block),
|
|
|
|
thesis: createLocaleMap('Thesis', block),
|
|
|
|
terminology: block.Terminology || []
|
|
|
|
}))
|
|
|
|
|
2019-04-17 09:55:08 +02:00
|
|
|
await writeDataset('theses.json', data)
|
|
|
|
}
|
|
|
|
|
|
|
|
async function createTerminologyDataset (sheetName) {
|
|
|
|
if (!sheetName) {
|
|
|
|
throw new Error(`createTerminologyDataset() requires 'sheetName', got "${sheetName}"`)
|
|
|
|
}
|
|
|
|
|
|
|
|
const rawData = XLSX.utils.sheet_to_json(workbook.Sheets[sheetName])
|
|
|
|
const data = rawData.map(block => {
|
|
|
|
return {
|
|
|
|
id: parseInt(block.ID, 10),
|
|
|
|
explanation: createLocaleMap('Explanation', block),
|
|
|
|
reference: createLocaleMap('Reference', block)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
await writeDataset('terminology.json', data)
|
|
|
|
}
|
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
async function createPartiesDataset (sheetName, sheets = {}) {
|
2019-04-17 09:55:08 +02:00
|
|
|
if (!sheetName) {
|
|
|
|
throw new Error(`createPartiesDataset() requires 'sheetName', got "${sheetName}"`)
|
|
|
|
}
|
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
const { europeanParties, nationalParties, parties } = sheets
|
|
|
|
const rawDataEU = XLSX.utils.sheet_to_json(workbook.Sheets[europeanParties])
|
|
|
|
const rawDataNational = XLSX.utils.sheet_to_json(workbook.Sheets[nationalParties])
|
|
|
|
const data = rawDataEU.map(block => {
|
|
|
|
const token = normalisePartyToken(block.Token)
|
2019-04-17 22:17:13 +02:00
|
|
|
return {
|
|
|
|
id: parseInt(block.ID, 10),
|
|
|
|
token,
|
|
|
|
name: createLocaleMap('European Party', block),
|
|
|
|
european_profile: {
|
|
|
|
party: createLocaleMap('European Party', block)
|
|
|
|
},
|
2019-04-24 23:28:55 +02:00
|
|
|
national_parties: createNationalPartyMap(token, rawDataNational),
|
2019-04-17 22:17:13 +02:00
|
|
|
program: createLocaleMap('Program', block),
|
2019-04-24 23:28:55 +02:00
|
|
|
positions: createPartyPositionMap(
|
|
|
|
parties.find(sName => normalisePartyToken(sName) === token)
|
|
|
|
)
|
2019-04-17 22:17:13 +02:00
|
|
|
}
|
|
|
|
})
|
2019-04-17 10:40:21 +02:00
|
|
|
|
2019-04-17 09:55:08 +02:00
|
|
|
await writeDataset('parties.json', data)
|
|
|
|
}
|
|
|
|
|
2019-04-17 10:40:21 +02:00
|
|
|
// const OUTPUT_DIRECTORY = './bin/xlsx-data/test'
|
|
|
|
const OUTPUT_DIRECTORY = './src/data'
|
2019-04-17 09:55:08 +02:00
|
|
|
const RESOURCE_FILE = 'euromat-dataset.xlsx'
|
|
|
|
const spinner = ora()
|
|
|
|
const workbook = XLSX.readFile(`./resources/${RESOURCE_FILE}`)
|
2019-04-24 23:28:55 +02:00
|
|
|
const [
|
|
|
|
options,
|
|
|
|
theses, terminology,
|
|
|
|
europeanParties, nationalParties,
|
|
|
|
...morePartySheets
|
|
|
|
] = workbook.SheetNames
|
2019-04-17 09:55:08 +02:00
|
|
|
|
|
|
|
;(async () => {
|
|
|
|
spinner.start()
|
|
|
|
spinner.info(`Parsing '${RESOURCE_FILE}' to JSON files`)
|
|
|
|
spinner.info(`XLSX SheetNames: ${workbook.SheetNames}`)
|
|
|
|
|
2019-04-17 10:40:21 +02:00
|
|
|
spinner.info(`Writing '${options}.json' file`)
|
2019-04-17 09:55:08 +02:00
|
|
|
await createOptionsDataset(options)
|
|
|
|
|
2019-04-17 10:40:21 +02:00
|
|
|
spinner.info(`Writing '${theses}.json' file`)
|
2019-04-17 09:55:08 +02:00
|
|
|
await createThesesDataset(theses)
|
|
|
|
|
2019-04-17 10:40:21 +02:00
|
|
|
spinner.info(`Writing '${terminology}.json' file`)
|
2019-04-17 09:55:08 +02:00
|
|
|
await createTerminologyDataset(terminology)
|
|
|
|
|
2019-04-24 23:28:55 +02:00
|
|
|
spinner.info(`Writing 'parties.json' file`)
|
|
|
|
await createPartiesDataset('parties', {
|
|
|
|
europeanParties,
|
|
|
|
nationalParties,
|
|
|
|
parties: morePartySheets
|
|
|
|
})
|
2019-04-17 09:55:08 +02:00
|
|
|
|
|
|
|
spinner.stopAndPersist()
|
|
|
|
})()
|