mirror of
https://github.com/netzbegruenung/green-spider-api.git
synced 2024-04-26 22:04:52 +02:00
First working version
This commit is contained in:
parent
f09d4e8b46
commit
e6e1f618c5
3
.dockerignore
Normal file
3
.dockerignore
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
/__pycache__
|
||||||
|
/venv
|
||||||
|
/secrets
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -102,3 +102,6 @@ venv.bak/
|
||||||
|
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
|
|
||||||
|
|
||||||
|
/secrets
|
||||||
|
|
11
Dockerfile
Normal file
11
Dockerfile
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
FROM python:3.6.7-slim-jessie
|
||||||
|
|
||||||
|
ADD requirements.txt /
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
ADD jsonhandler.py /
|
||||||
|
ADD main.py /
|
||||||
|
|
||||||
|
ENTRYPOINT ["gunicorn", "--bind=0.0.0.0:5000", "main:app"]
|
||||||
|
|
||||||
|
EXPOSE 5000
|
9
Makefile
Normal file
9
Makefile
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
docker-build:
|
||||||
|
docker build -t quay.io/netzbegruenung/green-spider-api .
|
||||||
|
|
||||||
|
docker-run:
|
||||||
|
docker run --rm \
|
||||||
|
-p 5000:5000 \
|
||||||
|
-v $(shell pwd)/secrets:/secrets \
|
||||||
|
-e GCLOUD_DATASTORE_CREDENTIALS_PATH=/secrets/green-spider-api.json \
|
||||||
|
quay.io/netzbegruenung/green-spider-api
|
41
README.md
41
README.md
|
@ -1,2 +1,43 @@
|
||||||
# green-spider-api
|
# green-spider-api
|
||||||
|
|
||||||
Web service API für Green Spider
|
Web service API für Green Spider
|
||||||
|
|
||||||
|
## API Dokumentation
|
||||||
|
|
||||||
|
### `GET /api/v1/spider-results/last-updated/`
|
||||||
|
|
||||||
|
Gibt den Zeitpunkt der letzten Aktualisierung der Spider-Ergebnisse zurück.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"last_updated": "2018-10-25T15:23:30.589683"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### `GET /api/v1/spider-results/compact/`
|
||||||
|
|
||||||
|
Gibt die kompakte Liste aller Sites aus. Diese enthält nur die Details, die für eine Übersicht benötigt werden.
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"input_url": "https://www.gruenekoeln.de/bezirke/bezirk7.html",
|
||||||
|
"created": "2018-10-31T01:21:03.361931+00:00",
|
||||||
|
"meta": {
|
||||||
|
"level": "DE:ORTSVERBAND",
|
||||||
|
"state": "Nordrhein-Westfalen",
|
||||||
|
"type": "REGIONAL_CHAPTER",
|
||||||
|
"city": "Köln-Porz/Poll",
|
||||||
|
"district": "Köln"
|
||||||
|
},
|
||||||
|
"score": 11.5
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Konfiguration
|
||||||
|
|
||||||
|
Umgebungsvariablen:
|
||||||
|
|
||||||
|
- `GCLOUD_DATASTORE_CREDENTIALS_PATH`: Pfad der JSON-Datei mit Google Cloud Service-Account-Credentials. Benötigt lesenden Zugriff auf `spider-results` Datastore-Entitäten.
|
||||||
|
|
38
jsonhandler.py
Normal file
38
jsonhandler.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import six
|
||||||
|
|
||||||
|
from datetime import date, datetime
|
||||||
|
|
||||||
|
from falcon import errors
|
||||||
|
from falcon.media import BaseHandler
|
||||||
|
from falcon.util import json
|
||||||
|
|
||||||
|
class ComplexEncoder(json.JSONEncoder):
|
||||||
|
|
||||||
|
"""JSONENcoder that handles date and datetime"""
|
||||||
|
|
||||||
|
def default(self, obj):
|
||||||
|
if isinstance(obj, date) or isinstance(obj, datetime):
|
||||||
|
return obj.isoformat()
|
||||||
|
# Let the base class default method raise the TypeError
|
||||||
|
return json.JSONEncoder.default(self, obj)
|
||||||
|
|
||||||
|
class JSONHandler(BaseHandler):
|
||||||
|
"""Handler built using Python's :py:mod:`json` module."""
|
||||||
|
|
||||||
|
def deserialize(self, raw):
|
||||||
|
try:
|
||||||
|
return json.loads(raw.decode('utf-8'))
|
||||||
|
except ValueError as err:
|
||||||
|
raise errors.HTTPBadRequest(
|
||||||
|
'Invalid JSON',
|
||||||
|
'Could not parse JSON body - {0}'.format(err)
|
||||||
|
)
|
||||||
|
|
||||||
|
def serialize(self, media):
|
||||||
|
result = json.dumps(media,
|
||||||
|
ensure_ascii=False,
|
||||||
|
cls=ComplexEncoder)
|
||||||
|
if six.PY3 or not isinstance(result, bytes):
|
||||||
|
return result.encode('utf-8')
|
||||||
|
|
||||||
|
return result
|
94
main.py
Normal file
94
main.py
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from os import getenv
|
||||||
|
from wsgiref import simple_server
|
||||||
|
|
||||||
|
import falcon
|
||||||
|
from falcon import media
|
||||||
|
import jsonhandler
|
||||||
|
|
||||||
|
from google.cloud import datastore
|
||||||
|
|
||||||
|
|
||||||
|
credentials_path = getenv('GCLOUD_DATASTORE_CREDENTIALS_PATH')
|
||||||
|
datastore_client = datastore.Client.from_service_account_json(credentials_path)
|
||||||
|
|
||||||
|
entity_kind = 'spider-results'
|
||||||
|
|
||||||
|
|
||||||
|
def get_compact_results(client):
|
||||||
|
query = client.query(kind=entity_kind,
|
||||||
|
order=['-created'],
|
||||||
|
#projection=['created', 'meta', 'score'],
|
||||||
|
)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for entity in query.fetch(eventual=True):
|
||||||
|
|
||||||
|
# handle creation date in different ways, depending on whether the lib returns
|
||||||
|
# a str, int, or datetime.datetime
|
||||||
|
created = entity.get('created')
|
||||||
|
dt = ''
|
||||||
|
if type(created) == datetime:
|
||||||
|
dt = created
|
||||||
|
elif type(created) == int:
|
||||||
|
dt = datetime.utcfromtimestamp(created / 1000000)
|
||||||
|
elif type(created) == str:
|
||||||
|
dt = datetime.utcfromtimestamp(int(created) / 1000000)
|
||||||
|
|
||||||
|
out.append({
|
||||||
|
'input_url': entity.key.name,
|
||||||
|
'created': dt.isoformat(),
|
||||||
|
'meta': entity.get('meta'),
|
||||||
|
'score': entity.get('score'),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class LastUpdated(object):
|
||||||
|
|
||||||
|
def on_get(self, req, resp):
|
||||||
|
"""
|
||||||
|
Informs about the most recent update to the spider results data
|
||||||
|
"""
|
||||||
|
query = datastore_client.query(kind=entity_kind,
|
||||||
|
order=['-created'],
|
||||||
|
projection=['created'])
|
||||||
|
items = list(query.fetch(limit=1, eventual=True))
|
||||||
|
ts = int(items[0].get('created')) / 1000000
|
||||||
|
dt = datetime.utcfromtimestamp(ts).isoformat()
|
||||||
|
|
||||||
|
maxage = 60 * 60 # one hour in seconds
|
||||||
|
resp.cache_control = ["max_age=%d" % maxage]
|
||||||
|
resp.media = {
|
||||||
|
"last_updated": dt
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CompactResults(object):
|
||||||
|
|
||||||
|
def on_get(self, req, resp):
|
||||||
|
"""
|
||||||
|
Returns compact sites overview and score
|
||||||
|
"""
|
||||||
|
out = get_compact_results(datastore_client)
|
||||||
|
|
||||||
|
maxage = 6 * 60 * 60 # six hours in seconds
|
||||||
|
resp.cache_control = ["max_age=%d" % maxage]
|
||||||
|
resp.media = out
|
||||||
|
|
||||||
|
|
||||||
|
handlers = media.Handlers({
|
||||||
|
'application/json': jsonhandler.JSONHandler(),
|
||||||
|
})
|
||||||
|
|
||||||
|
app = falcon.API()
|
||||||
|
|
||||||
|
app.req_options.media_handlers = handlers
|
||||||
|
app.resp_options.media_handlers = handlers
|
||||||
|
|
||||||
|
app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
|
||||||
|
app.add_route('/api/v1/spider-results/compact/', CompactResults())
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
httpd = simple_server.make_server('127.0.0.1', 5000, app)
|
||||||
|
httpd.serve_forever()
|
28
requirements.txt
Normal file
28
requirements.txt
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
astroid==2.0.4
|
||||||
|
cachetools==2.1.0
|
||||||
|
certifi==2018.10.15
|
||||||
|
chardet==3.0.4
|
||||||
|
falcon==1.4.1
|
||||||
|
google-api-core==1.5.1
|
||||||
|
google-auth==1.5.1
|
||||||
|
google-cloud-core==0.28.1
|
||||||
|
google-cloud-datastore==1.7.1
|
||||||
|
googleapis-common-protos==1.5.3
|
||||||
|
grpcio==1.16.0
|
||||||
|
gunicorn==19.9.0
|
||||||
|
idna==2.7
|
||||||
|
isort==4.3.4
|
||||||
|
lazy-object-proxy==1.3.1
|
||||||
|
mccabe==0.6.1
|
||||||
|
protobuf==3.6.1
|
||||||
|
pyasn1==0.4.4
|
||||||
|
pyasn1-modules==0.2.2
|
||||||
|
pylint==2.1.1
|
||||||
|
python-mimeparse==1.6.0
|
||||||
|
pytz==2018.7
|
||||||
|
requests==2.20.0
|
||||||
|
rsa==4.0
|
||||||
|
six==1.11.0
|
||||||
|
typed-ast==1.1.0
|
||||||
|
urllib3==1.24
|
||||||
|
wrapt==1.10.11
|
Loading…
Reference in a new issue