Add spider result export capabilities

This commit is contained in:
Marian Steinbach 2018-08-23 09:37:02 +02:00
parent 88ec1f63f7
commit 404365897d

View file

@ -1,17 +1,35 @@
"""
Exports data from the database to JSON files for use in a static webapp
"""
from google.cloud import datastore from google.cloud import datastore
import json import json
import sys import sys
import os import os
def main(): client = None
if len(sys.argv) == 1:
print("Error: please provide path to Google Storage API system account JSON file as argument")
sys.exit(1)
key_path = sys.argv[1] def export_results():
client = datastore.Client.from_service_account_json(key_path) """
Export of the main results data
"""
out = []
query = client.query(kind='spider-results')
for entity in query.fetch():
print(entity.key.name)
out.append(dict(entity)["results"])
output_filename = "/out/spider_result.json"
with open(output_filename, 'w', encoding="utf8") as jsonfile:
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
def export_screenshots():
"""
Export of screenshot meta data
"""
out = {} out = {}
query = client.query(kind='webscreenshot') query = client.query(kind='webscreenshot')
@ -19,10 +37,18 @@ def main():
print(item['url'], os.path.basename(item['screenshot_url'])) print(item['url'], os.path.basename(item['screenshot_url']))
out[item['url']] = os.path.basename(item['screenshot_url']) out[item['url']] = os.path.basename(item['screenshot_url'])
output_filename = "./webapp/dist/data/screenshots.json" output_filename = "/out/screenshots.json"
with open(output_filename, 'w', encoding="utf8") as jsonfile: with open(output_filename, 'w', encoding="utf8") as jsonfile:
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False) json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
if __name__ == "__main__": if __name__ == "__main__":
main() if len(sys.argv) == 1:
print("Error: please provide path to Google Storage API system account JSON file as argument")
sys.exit(1)
key_path = sys.argv[1]
client = datastore.Client.from_service_account_json(key_path)
export_screenshots()
export_results()