green-spider/export/__init__.py

49 lines
1.4 KiB
Python
Raw Normal View History

2018-08-23 09:37:02 +02:00
"""
Exports data from the database to JSON files for use in a static webapp
"""
2019-05-04 23:00:00 +02:00
import datetime
2018-08-27 23:39:00 +02:00
import logging
2018-04-17 20:45:51 +02:00
import sys
2018-08-15 22:02:20 +02:00
import os
2019-05-04 23:00:00 +02:00
from hashlib import md5
2018-04-17 20:45:51 +02:00
2019-05-04 23:00:00 +02:00
import json
2018-08-27 23:39:00 +02:00
import requests
2019-05-04 23:00:00 +02:00
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj, datetime.date):
return obj.isoformat()
elif isinstance(obj, datetime.timedelta):
return (datetime.datetime.min + obj).time().isoformat()
else:
return super(DateTimeEncoder, self).default(obj)
def export_results(client, entity_kind):
2018-08-23 09:37:02 +02:00
"""
Export of the main results data
"""
out = []
# Load data from database
query = client.query(kind=entity_kind)
2018-08-23 09:37:02 +02:00
for entity in query.fetch():
2018-08-27 23:39:00 +02:00
logging.debug(entity.key.name)
out.append({
'input_url': entity.key.name,
'resulting_urls': entity.get('checks').get('url_canonicalization'),
'created': entity.get('created').isoformat(),
'meta': entity.get('meta'),
'checks': entity.get('checks'),
'rating': entity.get('rating'),
'score': entity.get('score'),
})
2018-04-17 20:45:51 +02:00
2019-05-04 23:00:00 +02:00
output_filename = "/json-export/spider_result.json"
2018-04-17 20:45:51 +02:00
with open(output_filename, 'w', encoding="utf8") as jsonfile:
2019-05-04 23:00:00 +02:00
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False, cls=DateTimeEncoder)