First working version
This commit is contained in:
parent
f09d4e8b46
commit
e6e1f618c5
|
@ -0,0 +1,3 @@
|
|||
/__pycache__
|
||||
/venv
|
||||
/secrets
|
|
@ -102,3 +102,6 @@ venv.bak/
|
|||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
|
||||
/secrets
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
FROM python:3.6.7-slim-jessie
|
||||
|
||||
ADD requirements.txt /
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ADD jsonhandler.py /
|
||||
ADD main.py /
|
||||
|
||||
ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "main:app"]
|
||||
|
||||
EXPOSE 5000
|
|
@ -0,0 +1,9 @@
|
|||
docker-build:
|
||||
docker build -t quay.io/netzbegruenung/green-spider-api .
|
||||
|
||||
docker-run:
|
||||
docker run --rm \
|
||||
-p 5000:5000 \
|
||||
-v $(shell pwd)/secrets:/secrets \
|
||||
-e GCLOUD_DATASTORE_CREDENTIALS_PATH=/secrets/green-spider-api.json \
|
||||
quay.io/netzbegruenung/green-spider-api
|
41
README.md
41
README.md
|
@ -1,2 +1,43 @@
|
|||
# green-spider-api
|
||||
|
||||
Web service API für Green Spider
|
||||
|
||||
## API Dokumentation
|
||||
|
||||
### `GET /api/v1/spider-results/last-updated/`
|
||||
|
||||
Gibt den Zeitpunkt der letzten Aktualisierung der Spider-Ergebnisse zurück.
|
||||
|
||||
```json
|
||||
{
|
||||
"last_updated": "2018-10-25T15:23:30.589683"
|
||||
}
|
||||
```
|
||||
|
||||
### `GET /api/v1/spider-results/compact/`
|
||||
|
||||
Gibt die kompakte Liste aller Sites aus. Diese enthält nur die Details, die für eine Übersicht benötigt werden.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"input_url": "https://www.gruenekoeln.de/bezirke/bezirk7.html",
|
||||
"created": "2018-10-31T01:21:03.361931+00:00",
|
||||
"meta": {
|
||||
"level": "DE:ORTSVERBAND",
|
||||
"state": "Nordrhein-Westfalen",
|
||||
"type": "REGIONAL_CHAPTER",
|
||||
"city": "Köln-Porz/Poll",
|
||||
"district": "Köln"
|
||||
},
|
||||
"score": 11.5
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
## Konfiguration
|
||||
|
||||
Umgebungsvariablen:
|
||||
|
||||
- `GCLOUD_DATASTORE_CREDENTIALS_PATH`: Pfad der JSON-Datei mit Google Cloud Service-Account-Credentials. Benötigt lesenden Zugriff auf `spider-results` Datastore-Entitäten.
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
import six
|
||||
|
||||
from datetime import date, datetime
|
||||
|
||||
from falcon import errors
|
||||
from falcon.media import BaseHandler
|
||||
from falcon.util import json
|
||||
|
||||
class ComplexEncoder(json.JSONEncoder):
|
||||
|
||||
"""JSONENcoder that handles date and datetime"""
|
||||
|
||||
def default(self, obj):
|
||||
if isinstance(obj, date) or isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
# Let the base class default method raise the TypeError
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
|
||||
class JSONHandler(BaseHandler):
|
||||
"""Handler built using Python's :py:mod:`json` module."""
|
||||
|
||||
def deserialize(self, raw):
|
||||
try:
|
||||
return json.loads(raw.decode('utf-8'))
|
||||
except ValueError as err:
|
||||
raise errors.HTTPBadRequest(
|
||||
'Invalid JSON',
|
||||
'Could not parse JSON body - {0}'.format(err)
|
||||
)
|
||||
|
||||
def serialize(self, media):
|
||||
result = json.dumps(media,
|
||||
ensure_ascii=False,
|
||||
cls=ComplexEncoder)
|
||||
if six.PY3 or not isinstance(result, bytes):
|
||||
return result.encode('utf-8')
|
||||
|
||||
return result
|
|
@ -0,0 +1,94 @@
|
|||
from datetime import datetime
|
||||
from os import getenv
|
||||
from wsgiref import simple_server
|
||||
|
||||
import falcon
|
||||
from falcon import media
|
||||
import jsonhandler
|
||||
|
||||
from google.cloud import datastore
|
||||
|
||||
|
||||
credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH')
|
||||
datastore_client = datastore.Client.from_service_account_json(credentials_path)
|
||||
|
||||
entity_kind = 'spider-results'
|
||||
|
||||
|
||||
def get_compact_results(client):
|
||||
query = client.query(kind=entity_kind,
|
||||
order=['-created'],
|
||||
#projection=['created', 'meta', 'score'],
|
||||
)
|
||||
|
||||
out = []
|
||||
for entity in query.fetch(eventual=True):
|
||||
|
||||
# handle creation date in different ways, depending on whether the lib returns
|
||||
# a str, int, or datetime.datetime
|
||||
created = entity.get('created')
|
||||
dt = ''
|
||||
if type(created) == datetime:
|
||||
dt = created
|
||||
elif type(created) == int:
|
||||
dt = datetime.utcfromtimestamp(created / 1000000)
|
||||
elif type(created) == str:
|
||||
dt = datetime.utcfromtimestamp(int(created) / 1000000)
|
||||
|
||||
out.append({
|
||||
'input_url': entity.key.name,
|
||||
'created': dt.isoformat(),
|
||||
'meta': entity.get('meta'),
|
||||
'score': entity.get('score'),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
class LastUpdated(object):
|
||||
|
||||
def on_get(self, req, resp):
|
||||
"""
|
||||
Informs about the most recent update to the spider results data
|
||||
"""
|
||||
query = datastore_client.query(kind=entity_kind,
|
||||
order=['-created'],
|
||||
projection=['created'])
|
||||
items = list(query.fetch(limit=1, eventual=True))
|
||||
ts = int(items[0].get('created')) / 1000000
|
||||
dt = datetime.utcfromtimestamp(ts).isoformat()
|
||||
|
||||
maxage = 60 * 60 # one hour in seconds
|
||||
resp.cache_control = ["max_age=%d" % maxage]
|
||||
resp.media = {
|
||||
"last_updated": dt
|
||||
}
|
||||
|
||||
|
||||
class CompactResults(object):
|
||||
|
||||
def on_get(self, req, resp):
|
||||
"""
|
||||
Returns compact sites overview and score
|
||||
"""
|
||||
out = get_compact_results(datastore_client)
|
||||
|
||||
maxage = 6 * 60 * 60 # six hours in seconds
|
||||
resp.cache_control = ["max_age=%d" % maxage]
|
||||
resp.media = out
|
||||
|
||||
|
||||
handlers = media.Handlers({
|
||||
'application/json': jsonhandler.JSONHandler(),
|
||||
})
|
||||
|
||||
app = falcon.API()
|
||||
|
||||
app.req_options.media_handlers = handlers
|
||||
app.resp_options.media_handlers = handlers
|
||||
|
||||
app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
|
||||
app.add_route('/api/v1/spider-results/compact/', CompactResults())
|
||||
|
||||
if __name__ == '__main__':
|
||||
httpd = simple_server.make_server('127.0.0.1', 5000, app)
|
||||
httpd.serve_forever()
|
|
@ -0,0 +1,28 @@
|
|||
astroid==2.0.4
|
||||
cachetools==2.1.0
|
||||
certifi==2018.10.15
|
||||
chardet==3.0.4
|
||||
falcon==1.4.1
|
||||
google-api-core==1.5.1
|
||||
google-auth==1.5.1
|
||||
google-cloud-core==0.28.1
|
||||
google-cloud-datastore==1.7.1
|
||||
googleapis-common-protos==1.5.3
|
||||
grpcio==1.16.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
isort==4.3.4
|
||||
lazy-object-proxy==1.3.1
|
||||
mccabe==0.6.1
|
||||
protobuf==3.6.1
|
||||
pyasn1==0.4.4
|
||||
pyasn1-modules==0.2.2
|
||||
pylint==2.1.1
|
||||
python-mimeparse==1.6.0
|
||||
pytz==2018.7
|
||||
requests==2.20.0
|
||||
rsa==4.0
|
||||
six==1.11.0
|
||||
typed-ast==1.1.0
|
||||
urllib3==1.24
|
||||
wrapt==1.10.11
|
Loading…
Reference in New Issue