Add big result export
This commit is contained in:
parent
61c555f45e
commit
ae40915185
96
main.py
96
main.py
|
@ -1,3 +1,4 @@
|
||||||
|
import collections
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from os import getenv
|
from os import getenv
|
||||||
from wsgiref import simple_server
|
from wsgiref import simple_server
|
||||||
|
@ -16,34 +17,68 @@ spider_results_kind = 'spider-results'
|
||||||
webscreenshots_kind = 'webscreenshot'
|
webscreenshots_kind = 'webscreenshot'
|
||||||
|
|
||||||
|
|
||||||
|
def convert_datastore_datetime(field):
|
||||||
|
"""
|
||||||
|
return datetime in different ways, depending on whether the lib returns
|
||||||
|
a str, int, or datetime.datetime
|
||||||
|
"""
|
||||||
|
dt = ''
|
||||||
|
if type(field) == datetime:
|
||||||
|
dt = field
|
||||||
|
elif type(field) == int:
|
||||||
|
dt = datetime.utcfromtimestamp(field / 1000000)
|
||||||
|
elif type(field) == str:
|
||||||
|
dt = datetime.utcfromtimestamp(int(field) / 1000000)
|
||||||
|
return dt
|
||||||
|
|
||||||
|
|
||||||
|
def flatten(d, parent_key='', sep='.'):
|
||||||
|
items = []
|
||||||
|
for k, v in d.items():
|
||||||
|
new_key = parent_key + sep + k if parent_key else k
|
||||||
|
if isinstance(v, collections.MutableMapping):
|
||||||
|
items.extend(flatten(v, new_key, sep=sep).items())
|
||||||
|
else:
|
||||||
|
items.append((new_key, v))
|
||||||
|
return dict(items)
|
||||||
|
|
||||||
|
|
||||||
def get_compact_results(client):
|
def get_compact_results(client):
|
||||||
query = client.query(kind=spider_results_kind,
|
query = client.query(kind=spider_results_kind,
|
||||||
order=['-created'],
|
order=['-created'],
|
||||||
#projection=['created', 'meta', 'score'],
|
#projection=['created', 'meta', 'score'],
|
||||||
)
|
)
|
||||||
|
|
||||||
out = []
|
out = []
|
||||||
for entity in query.fetch(eventual=True):
|
for entity in query.fetch(eventual=True):
|
||||||
|
created = convert_datastore_datetime(entity.get('created'))
|
||||||
|
|
||||||
# handle creation date in different ways, depending on whether the lib returns
|
|
||||||
# a str, int, or datetime.datetime
|
|
||||||
created = entity.get('created')
|
|
||||||
dt = ''
|
|
||||||
if type(created) == datetime:
|
|
||||||
dt = created
|
|
||||||
elif type(created) == int:
|
|
||||||
dt = datetime.utcfromtimestamp(created / 1000000)
|
|
||||||
elif type(created) == str:
|
|
||||||
dt = datetime.utcfromtimestamp(int(created) / 1000000)
|
|
||||||
|
|
||||||
out.append({
|
out.append({
|
||||||
'input_url': entity.key.name,
|
'input_url': entity.key.name,
|
||||||
'created': dt.isoformat(),
|
'created': created.isoformat(),
|
||||||
'meta': entity.get('meta'),
|
'meta': entity.get('meta'),
|
||||||
'score': entity.get('score'),
|
'score': entity.get('score'),
|
||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def get_full_results(client):
|
||||||
|
query = client.query(kind=spider_results_kind)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for entity in query.fetch(eventual=True):
|
||||||
|
created = convert_datastore_datetime(entity.get('created'))
|
||||||
|
|
||||||
|
record = {
|
||||||
|
'input_url': entity.key.name,
|
||||||
|
'created': created.isoformat(),
|
||||||
|
'score': entity.get('score'),
|
||||||
|
}
|
||||||
|
record.update(flatten(entity.get('meta'), parent_key='meta'))
|
||||||
|
record.update(flatten(entity.get('rating'), parent_key='rating'))
|
||||||
|
out.append(record)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
class LastUpdated(object):
|
class LastUpdated(object):
|
||||||
|
|
||||||
|
@ -78,6 +113,19 @@ class CompactResults(object):
|
||||||
resp.media = out
|
resp.media = out
|
||||||
|
|
||||||
|
|
||||||
|
class BigResults(object):
|
||||||
|
|
||||||
|
def on_get(self, req, resp):
|
||||||
|
"""
|
||||||
|
Returns big sites results
|
||||||
|
"""
|
||||||
|
out = get_full_results(datastore_client)
|
||||||
|
|
||||||
|
maxage = 48 * 60 * 60 # two days
|
||||||
|
resp.cache_control = ["max_age=%d" % maxage]
|
||||||
|
resp.media = out
|
||||||
|
|
||||||
|
|
||||||
class SiteDetails(object):
|
class SiteDetails(object):
|
||||||
|
|
||||||
def on_get(self, req, resp):
|
def on_get(self, req, resp):
|
||||||
|
@ -128,6 +176,20 @@ class SiteScreenshots(object):
|
||||||
resp.media = entities
|
resp.media = entities
|
||||||
|
|
||||||
|
|
||||||
|
class Index(object):
|
||||||
|
def on_get(self, req, resp):
|
||||||
|
resp.media = {
|
||||||
|
"message": "This is green-spider-api",
|
||||||
|
"url": "https://github.com/netzbegruenung/green-spider-api",
|
||||||
|
"endpoints": [
|
||||||
|
"/api/v1/spider-results/last-updated/",
|
||||||
|
"/api/v1/spider-results/big/",
|
||||||
|
"/api/v1/spider-results/compact/",
|
||||||
|
"/api/v1/spider-results/site",
|
||||||
|
"/api/v1/screenshots/site",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
handlers = media.Handlers({
|
handlers = media.Handlers({
|
||||||
'application/json': jsonhandler.JSONHandler(),
|
'application/json': jsonhandler.JSONHandler(),
|
||||||
})
|
})
|
||||||
|
@ -139,8 +201,10 @@ app.resp_options.media_handlers = handlers
|
||||||
|
|
||||||
app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
|
app.add_route('/api/v1/spider-results/last-updated/', LastUpdated())
|
||||||
app.add_route('/api/v1/spider-results/compact/', CompactResults())
|
app.add_route('/api/v1/spider-results/compact/', CompactResults())
|
||||||
|
app.add_route('/api/v1/spider-results/big/', BigResults())
|
||||||
app.add_route('/api/v1/spider-results/site', SiteDetails())
|
app.add_route('/api/v1/spider-results/site', SiteDetails())
|
||||||
app.add_route('/api/v1/screenshots/site', SiteScreenshots())
|
app.add_route('/api/v1/screenshots/site', SiteScreenshots())
|
||||||
|
app.add_route('/', Index())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
import unittest
|
||||||
|
from main import flatten
|
||||||
|
|
||||||
|
class TestFlattenDict(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_flatten(self):
|
||||||
|
input = {
|
||||||
|
"foo": {
|
||||||
|
"bar": {
|
||||||
|
"one": 1,
|
||||||
|
"two": 2,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bar": {
|
||||||
|
"one": 1,
|
||||||
|
"two": 2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expected = {
|
||||||
|
"foo.bar.one": 1,
|
||||||
|
"foo.bar.two": 2,
|
||||||
|
"bar.one": 1,
|
||||||
|
"bar.two": 2,
|
||||||
|
}
|
||||||
|
out = flatten(input)
|
||||||
|
self.assertEqual(out, expected)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in New Issue