mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-04-27 23:04:52 +02:00
c59db691a0
* Update jq URL * Improve docker compose setup * Script makeover: only one spider job, debian 11, add git clone * Update image name * Add some docs * Pin click to v7 due to problems with rq * Newline * Improve manager code * Add make tarket venv * Remove obsolete 'spider' command from cli * Remove git clone from manager code * Remove worker functions from spider code * Let 'make jobs' execute git clone and use docker compose * Add 'spider' make target * Update .dockerignore * Add dryrun target to spider a URL without storing results * Remove unused config entry
39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
"""
|
|
Exports data from the database to JSON files for use in a static webapp
|
|
"""
|
|
|
|
import datetime
|
|
import logging
|
|
import sys
|
|
import os
|
|
from hashlib import md5
|
|
|
|
import json
|
|
import requests
|
|
|
|
from export import datetimeencoder
|
|
|
|
def export_results(client, entity_kind):
|
|
"""
|
|
Export of the main results data
|
|
"""
|
|
out = []
|
|
|
|
# Load data from database
|
|
query = client.query(kind=entity_kind)
|
|
for entity in query.fetch():
|
|
logging.debug(entity.key.name)
|
|
out.append({
|
|
'input_url': entity.key.name,
|
|
'resulting_urls': entity.get('checks').get('url_canonicalization'),
|
|
'created': entity.get('created').isoformat(),
|
|
'meta': entity.get('meta'),
|
|
'checks': entity.get('checks'),
|
|
'rating': entity.get('rating'),
|
|
'score': entity.get('score'),
|
|
})
|
|
|
|
output_filename = "/json-export/spider_result.json"
|
|
with open(output_filename, 'w', encoding="utf8") as jsonfile:
|
|
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False, cls=datetimeencoder.DateTimeEncoder)
|