From ae40915185fee5b0689aaa64063ef68cdc7d98e6 Mon Sep 17 00:00:00 2001 From: Marian Steinbach Date: Mon, 26 Nov 2018 09:35:52 +0100 Subject: [PATCH] Add big result export --- main.py | 96 +++++++++++++++++++++++++++++++++++++++++++--------- main_test.py | 31 +++++++++++++++++ 2 files changed, 111 insertions(+), 16 deletions(-) create mode 100644 main_test.py diff --git a/main.py b/main.py index 4a2b8a7..38470bd 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import collections from datetime import datetime from os import getenv from wsgiref import simple_server @@ -16,34 +17,68 @@ spider_results_kind = 'spider-results' webscreenshots_kind = 'webscreenshot' +def convert_datastore_datetime(field): + """ + return datetime in different ways, depending on whether the lib returns + a str, int, or datetime.datetime + """ + dt = '' + if type(field) == datetime: + dt = field + elif type(field) == int: + dt = datetime.utcfromtimestamp(field / 1000000) + elif type(field) == str: + dt = datetime.utcfromtimestamp(int(field) / 1000000) + return dt + + +def flatten(d, parent_key='', sep='.'): + items = [] + for k, v in d.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, collections.MutableMapping): + items.extend(flatten(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + + def get_compact_results(client): query = client.query(kind=spider_results_kind, - order=['-created'], - #projection=['created', 'meta', 'score'], - ) + order=['-created'], + #projection=['created', 'meta', 'score'], + ) out = [] for entity in query.fetch(eventual=True): + created = convert_datastore_datetime(entity.get('created')) - # handle creation date in different ways, depending on whether the lib returns - # a str, int, or datetime.datetime - created = entity.get('created') - dt = '' - if type(created) == datetime: - dt = created - elif type(created) == int: - dt = datetime.utcfromtimestamp(created / 1000000) - elif type(created) == str: - dt = datetime.utcfromtimestamp(int(created) / 1000000) - out.append({ 'input_url': entity.key.name, - 'created': dt.isoformat(), + 'created': created.isoformat(), 'meta': entity.get('meta'), 'score': entity.get('score'), }) return out - + + +def get_full_results(client): + query = client.query(kind=spider_results_kind) + + out = [] + for entity in query.fetch(eventual=True): + created = convert_datastore_datetime(entity.get('created')) + + record = { + 'input_url': entity.key.name, + 'created': created.isoformat(), + 'score': entity.get('score'), + } + record.update(flatten(entity.get('meta'), parent_key='meta')) + record.update(flatten(entity.get('rating'), parent_key='rating')) + out.append(record) + return out + class LastUpdated(object): @@ -78,6 +113,19 @@ class CompactResults(object): resp.media = out +class BigResults(object): + + def on_get(self, req, resp): + """ + Returns big sites results + """ + out = get_full_results(datastore_client) + + maxage = 48 * 60 * 60 # two days + resp.cache_control = ["max_age=%d" % maxage] + resp.media = out + + class SiteDetails(object): def on_get(self, req, resp): @@ -128,6 +176,20 @@ class SiteScreenshots(object): resp.media = entities +class Index(object): + def on_get(self, req, resp): + resp.media = { + "message": "This is green-spider-api", + "url": "https://github.com/netzbegruenung/green-spider-api", + "endpoints": [ + "/api/v1/spider-results/last-updated/", + "/api/v1/spider-results/big/", + "/api/v1/spider-results/compact/", + "/api/v1/spider-results/site", + "/api/v1/screenshots/site", + ] + } + handlers = media.Handlers({ 'application/json': jsonhandler.JSONHandler(), }) @@ -139,8 +201,10 @@ app.resp_options.media_handlers = handlers app.add_route('/api/v1/spider-results/last-updated/', LastUpdated()) app.add_route('/api/v1/spider-results/compact/', CompactResults()) +app.add_route('/api/v1/spider-results/big/', BigResults()) app.add_route('/api/v1/spider-results/site', SiteDetails()) app.add_route('/api/v1/screenshots/site', SiteScreenshots()) +app.add_route('/', Index()) if __name__ == '__main__': diff --git a/main_test.py b/main_test.py new file mode 100644 index 0000000..a143e79 --- /dev/null +++ b/main_test.py @@ -0,0 +1,31 @@ +import unittest +from main import flatten + +class TestFlattenDict(unittest.TestCase): + + def test_flatten(self): + input = { + "foo": { + "bar": { + "one": 1, + "two": 2, + } + }, + "bar": { + "one": 1, + "two": 2, + } + } + expected = { + "foo.bar.one": 1, + "foo.bar.two": 2, + "bar.one": 1, + "bar.two": 2, + } + out = flatten(input) + self.assertEqual(out, expected) + + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file