diff --git a/Dockerfile b/Dockerfile index 00503fd..67ed079 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,6 @@ RUN pip install --no-cache-dir -r requirements.txt ADD jsonhandler.py / ADD main.py / -ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "main:app"] +ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "--access-logfile=-", "main:app"] EXPOSE 5000 diff --git a/README.md b/README.md index e9f9666..a411499 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # green-spider-api -Web service API für Green Spider +Web service API für die [Green Spider Webapp](https://github.com/netzbegruenung/green-spider-webapp) ## API Dokumentation @@ -46,6 +46,29 @@ Gibt sämtliche Inhalte zu einer Site aus. Ein Beispiel würde hier den Rahmen sprengen. +### `GET /api/v1/screenshots?url={site_url}` + +Gibt Daten zu allen vorhandenen Screenshots zu einer Site aus. + +```json +[ + { + "url": "http://wordpress.gruene-hameln-pyrmont.de/category/hessisch-oldendorf-gesamt/", + "screenshot_url": "http://green-spider-screenshots.sendung.de/1500x1500/4fc61b4918dc9eaaef645c694c84224e.png", + "user_agent": "phantomjs-2.1.1", + "size": [1500, 1500], + "created": "2018-10-08T08:33:21.284933+00:00" + }, + { + "url": "http://wordpress.gruene-hameln-pyrmont.de/category/hessisch-oldendorf-gesamt/", + "screenshot_url": "http://green-spider-screenshots.sendung.de/360x640/4fc61b4918dc9eaaef645c694c84224e.png", + "user_agent": "phantomjs-2.1.1", + "size": [360, 640], + "created": "2018-10-08T08:33:19.353841+00:00" + } +] +``` + ## Konfiguration Umgebungsvariablen: diff --git a/main.py b/main.py index 6dfb014..4a2b8a7 100644 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH') datastore_client = datastore.Client.from_service_account_json(credentials_path) spider_results_kind = 'spider-results' -webscreenshots_kind = 'webscreenshots' +webscreenshots_kind = 'webscreenshot' def get_compact_results(client): @@ -103,6 +103,31 @@ class SiteDetails(object): resp.media = dict(entity) +class SiteScreenshots(object): + + def on_get(self, req, resp): + """ + Returns screenshots for one URL + """ + + url = req.get_param('url') + if url is None or url == '': + raise falcon.HTTPError(falcon.HTTP_400, + 'Bad request', + 'The parameter url must not be empty') + + query = datastore_client.query(kind=webscreenshots_kind) + query.add_filter('url', '=', req.get_param('url')) + entities = list(query.fetch()) + + maxage = 24 * 60 * 60 # 24 hours in seconds + if len(entities) == 0: + maxage = 3 * 60 * 60 # 3 hours in seconds + + resp.cache_control = ["max_age=%d" % maxage] + resp.media = entities + + handlers = media.Handlers({ 'application/json': jsonhandler.JSONHandler(), }) @@ -115,6 +140,8 @@ app.resp_options.media_handlers = handlers app.add_route('/api/v1/spider-results/last-updated/', LastUpdated()) app.add_route('/api/v1/spider-results/compact/', CompactResults()) app.add_route('/api/v1/spider-results/site', SiteDetails()) +app.add_route('/api/v1/screenshots/site', SiteScreenshots()) + if __name__ == '__main__': httpd = simple_server.make_server('127.0.0.1', 5000, app)