green-spider/export/__init__.py
Marian Steinbach c59db691a0
Reparatur und Aufräumen an der job execution (#340)
* Update jq URL

* Improve docker compose setup

* Script makeover: only one spider job, debian 11, add git clone

* Update image name

* Add some docs

* Pin click to v7 due to problems with rq

* Newline

* Improve manager code

* Add make tarket venv

* Remove obsolete 'spider' command from cli

* Remove git clone from manager code

* Remove worker functions from spider code

* Let 'make jobs' execute git clone and use docker compose

* Add 'spider' make target

* Update .dockerignore

* Add dryrun target to spider a URL without storing results

* Remove unused config entry
2024-03-04 17:18:37 +01:00

39 lines
1.1 KiB
Python

"""
Exports data from the database to JSON files for use in a static webapp
"""
import datetime
import logging
import sys
import os
from hashlib import md5
import json
import requests
from export import datetimeencoder
def export_results(client, entity_kind):
"""
Export of the main results data
"""
out = []
# Load data from database
query = client.query(kind=entity_kind)
for entity in query.fetch():
logging.debug(entity.key.name)
out.append({
'input_url': entity.key.name,
'resulting_urls': entity.get('checks').get('url_canonicalization'),
'created': entity.get('created').isoformat(),
'meta': entity.get('meta'),
'checks': entity.get('checks'),
'rating': entity.get('rating'),
'score': entity.get('score'),
})
output_filename = "/json-export/spider_result.json"
with open(output_filename, 'w', encoding="utf8") as jsonfile:
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False, cls=datetimeencoder.DateTimeEncoder)