green-spider-api/main.py

95 lines
2.7 KiB
Python

from datetime import datetime
from os import getenv
from wsgiref import simple_server
import falcon
from falcon import media
import jsonhandler
from google.cloud import datastore
credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH')
datastore_client = datastore.Client.from_service_account_json(credentials_path)
entity_kind = 'spider-results'
def get_compact_results(client):
query = client.query(kind=entity_kind,
order=['-created'],
#projection=['created', 'meta', 'score'],
)
out = []
for entity in query.fetch(eventual=True):
# handle creation date in different ways, depending on whether the lib returns
# a str, int, or datetime.datetime
created = entity.get('created')
dt = ''
if type(created) == datetime:
dt = created
elif type(created) == int:
dt = datetime.utcfromtimestamp(created / 1000000)
elif type(created) == str:
dt = datetime.utcfromtimestamp(int(created) / 1000000)
out.append({
'input_url': entity.key.name,
'created': dt.isoformat(),
'meta': entity.get('meta'),
'score': entity.get('score'),
})
return out
class LastUpdated(object):
def on_get(self, req, resp):
"""
Informs about the most recent update to the spider results data
"""
query = datastore_client.query(kind=entity_kind,
order=['-created'],
projection=['created'])
items = list(query.fetch(limit=1, eventual=True))
ts = int(items[0].get('created')) / 1000000
dt = datetime.utcfromtimestamp(ts).isoformat()
maxage = 60 * 60 # one hour in seconds
resp.cache_control = ["max_age=%d" % maxage]
resp.media = {
"last_updated": dt
}
class CompactResults(object):
def on_get(self, req, resp):
"""
Returns compact sites overview and score
"""
out = get_compact_results(datastore_client)
maxage = 6 * 60 * 60 # six hours in seconds
resp.cache_control = ["max_age=%d" % maxage]
resp.media = out
handlers = media.Handlers({
'application/json': jsonhandler.JSONHandler(),
})
app = falcon.API()
app.req_options.media_handlers = handlers
app.resp_options.media_handlers = handlers
app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
app.add_route('/api/v1/spider-results/compact/', CompactResults())
if __name__ == '__main__':
httpd = simple_server.make_server('127.0.0.1', 5000, app)
httpd.serve_forever()