mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-04-29 07:44:50 +02:00
Add spider result export capabilities
This commit is contained in:
parent
88ec1f63f7
commit
404365897d
|
@ -1,17 +1,35 @@
|
|||
"""
|
||||
Exports data from the database to JSON files for use in a static webapp
|
||||
"""
|
||||
|
||||
from google.cloud import datastore
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: please provide path to Google Storage API system account JSON file as argument")
|
||||
sys.exit(1)
|
||||
client = None
|
||||
|
||||
key_path = sys.argv[1]
|
||||
client = datastore.Client.from_service_account_json(key_path)
|
||||
def export_results():
|
||||
"""
|
||||
Export of the main results data
|
||||
"""
|
||||
out = []
|
||||
|
||||
query = client.query(kind='spider-results')
|
||||
for entity in query.fetch():
|
||||
print(entity.key.name)
|
||||
out.append(dict(entity)["results"])
|
||||
|
||||
output_filename = "/out/spider_result.json"
|
||||
with open(output_filename, 'w', encoding="utf8") as jsonfile:
|
||||
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
def export_screenshots():
|
||||
"""
|
||||
Export of screenshot meta data
|
||||
"""
|
||||
out = {}
|
||||
|
||||
query = client.query(kind='webscreenshot')
|
||||
|
@ -19,10 +37,18 @@ def main():
|
|||
print(item['url'], os.path.basename(item['screenshot_url']))
|
||||
out[item['url']] = os.path.basename(item['screenshot_url'])
|
||||
|
||||
output_filename = "./webapp/dist/data/screenshots.json"
|
||||
output_filename = "/out/screenshots.json"
|
||||
with open(output_filename, 'w', encoding="utf8") as jsonfile:
|
||||
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
if len(sys.argv) == 1:
|
||||
print("Error: please provide path to Google Storage API system account JSON file as argument")
|
||||
sys.exit(1)
|
||||
|
||||
key_path = sys.argv[1]
|
||||
client = datastore.Client.from_service_account_json(key_path)
|
||||
|
||||
export_screenshots()
|
||||
export_results()
|
Loading…
Reference in a new issue