diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..76744ab --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +/__pycache__ +/venv +/secrets diff --git a/.gitignore b/.gitignore index 894a44c..dd0073a 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + + +/secrets diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..00503fd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.6.7-slim-jessie + +ADD requirements.txt / +RUN pip install --no-cache-dir -r requirements.txt + +ADD jsonhandler.py / +ADD main.py / + +ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "main:app"] + +EXPOSE 5000 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bf6666d --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +docker-build: + docker build -t quay.io/netzbegruenung/green-spider-api . + +docker-run: + docker run --rm \ + -p 5000:5000 \ + -v $(shell pwd)/secrets:/secrets \ + -e GCLOUD_DATASTORE_CREDENTIALS_PATH=/secrets/green-spider-api.json \ + quay.io/netzbegruenung/green-spider-api diff --git a/README.md b/README.md index 21ae930..de5a16a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,43 @@ # green-spider-api + Web service API für Green Spider + +## API Dokumentation + +### `GET /api/v1/spider-results/last-updated/` + +Gibt den Zeitpunkt der letzten Aktualisierung der Spider-Ergebnisse zurück. + +```json +{ + "last_updated": "2018-10-25T15:23:30.589683" +} +``` + +### `GET /api/v1/spider-results/compact/` + +Gibt die kompakte Liste aller Sites aus. Diese enthält nur die Details, die für eine Übersicht benötigt werden. + +```json +[ + { + "input_url": "https://www.gruenekoeln.de/bezirke/bezirk7.html", + "created": "2018-10-31T01:21:03.361931+00:00", + "meta": { + "level": "DE:ORTSVERBAND", + "state": "Nordrhein-Westfalen", + "type": "REGIONAL_CHAPTER", + "city": "Köln-Porz/Poll", + "district": "Köln" + }, + "score": 11.5 + }, + ... +] +``` + +## Konfiguration + +Umgebungsvariablen: + +- `GCLOUD_DATASTORE_CREDENTIALS_PATH`: Pfad der JSON-Datei mit Google Cloud Service-Account-Credentials. Benötigt lesenden Zugriff auf `spider-results` Datastore-Entitäten. diff --git a/jsonhandler.py b/jsonhandler.py new file mode 100644 index 0000000..141be06 --- /dev/null +++ b/jsonhandler.py @@ -0,0 +1,38 @@ +import six + +from datetime import date, datetime + +from falcon import errors +from falcon.media import BaseHandler +from falcon.util import json + +class ComplexEncoder(json.JSONEncoder): + + """JSONENcoder that handles date and datetime""" + + def default(self, obj): + if isinstance(obj, date) or isinstance(obj, datetime): + return obj.isoformat() + # Let the base class default method raise the TypeError + return json.JSONEncoder.default(self, obj) + +class JSONHandler(BaseHandler): + """Handler built using Python's :py:mod:`json` module.""" + + def deserialize(self, raw): + try: + return json.loads(raw.decode('utf-8')) + except ValueError as err: + raise errors.HTTPBadRequest( + 'Invalid JSON', + 'Could not parse JSON body - {0}'.format(err) + ) + + def serialize(self, media): + result = json.dumps(media, + ensure_ascii=False, + cls=ComplexEncoder) + if six.PY3 or not isinstance(result, bytes): + return result.encode('utf-8') + + return result diff --git a/main.py b/main.py new file mode 100644 index 0000000..26ebe89 --- /dev/null +++ b/main.py @@ -0,0 +1,94 @@ +from datetime import datetime +from os import getenv +from wsgiref import simple_server + +import falcon +from falcon import media +import jsonhandler + +from google.cloud import datastore + + +credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH') +datastore_client = datastore.Client.from_service_account_json(credentials_path) + +entity_kind = 'spider-results' + + +def get_compact_results(client): + query = client.query(kind=entity_kind, + order=['-created'], + #projection=['created', 'meta', 'score'], + ) + + out = [] + for entity in query.fetch(eventual=True): + + # handle creation date in different ways, depending on whether the lib returns + # a str, int, or datetime.datetime + created = entity.get('created') + dt = '' + if type(created) == datetime: + dt = created + elif type(created) == int: + dt = datetime.utcfromtimestamp(created / 1000000) + elif type(created) == str: + dt = datetime.utcfromtimestamp(int(created) / 1000000) + + out.append({ + 'input_url': entity.key.name, + 'created': dt.isoformat(), + 'meta': entity.get('meta'), + 'score': entity.get('score'), + }) + return out + + +class LastUpdated(object): + + def on_get(self, req, resp): + """ + Informs about the most recent update to the spider results data + """ + query = datastore_client.query(kind=entity_kind, + order=['-created'], + projection=['created']) + items = list(query.fetch(limit=1, eventual=True)) + ts = int(items[0].get('created')) / 1000000 + dt = datetime.utcfromtimestamp(ts).isoformat() + + maxage = 60 * 60 # one hour in seconds + resp.cache_control = ["max_age=%d" % maxage] + resp.media = { + "last_updated": dt + } + + +class CompactResults(object): + + def on_get(self, req, resp): + """ + Returns compact sites overview and score + """ + out = get_compact_results(datastore_client) + + maxage = 6 * 60 * 60 # six hours in seconds + resp.cache_control = ["max_age=%d" % maxage] + resp.media = out + + +handlers = media.Handlers({ + 'application/json': jsonhandler.JSONHandler(), +}) + +app = falcon.API() + +app.req_options.media_handlers = handlers +app.resp_options.media_handlers = handlers + +app.add_route('/api/v1/spider-results/last-updated/', LastUpdated()) +app.add_route('/api/v1/spider-results/compact/', CompactResults()) + +if __name__ == '__main__': + httpd = simple_server.make_server('127.0.0.1', 5000, app) + httpd.serve_forever() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ead59c1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +astroid==2.0.4 +cachetools==2.1.0 +certifi==2018.10.15 +chardet==3.0.4 +falcon==1.4.1 +google-api-core==1.5.1 +google-auth==1.5.1 +google-cloud-core==0.28.1 +google-cloud-datastore==1.7.1 +googleapis-common-protos==1.5.3 +grpcio==1.16.0 +gunicorn==19.9.0 +idna==2.7 +isort==4.3.4 +lazy-object-proxy==1.3.1 +mccabe==0.6.1 +protobuf==3.6.1 +pyasn1==0.4.4 +pyasn1-modules==0.2.2 +pylint==2.1.1 +python-mimeparse==1.6.0 +pytz==2018.7 +requests==2.20.0 +rsa==4.0 +six==1.11.0 +typed-ast==1.1.0 +urllib3==1.24 +wrapt==1.10.11