mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-14 14:36:08 +02:00
Remove worker functions from spider code
This commit is contained in:
parent
3f4ea41e86
commit
75964011c2
|
@ -88,51 +88,6 @@ def test_url(url):
|
|||
pprint(result)
|
||||
|
||||
|
||||
def execute_single_job(datastore_client, job, entity_kind):
|
||||
"""
|
||||
Executes spider for one single job
|
||||
"""
|
||||
validate_job(job)
|
||||
|
||||
logging.info("Starting job %s", job["url"])
|
||||
result = check_and_rate_site(entry=job)
|
||||
|
||||
logging.debug("Full JSON representation of returned result: %s", json.dumps(result, default=str))
|
||||
|
||||
logging.info("Job %s finished checks", job["url"])
|
||||
logging.info("Job %s writing to DB", job["url"])
|
||||
|
||||
key = datastore_client.key(entity_kind, job["url"])
|
||||
entity = datastore.Entity(key=key)
|
||||
record = {
|
||||
'created': datetime.utcnow(),
|
||||
'meta': result['meta'],
|
||||
'checks': result['checks'],
|
||||
'rating': result['rating'],
|
||||
'score': result['score'],
|
||||
}
|
||||
|
||||
entity.update(record)
|
||||
try:
|
||||
datastore_client.put(entity)
|
||||
logging.debug("Successfully wrote record to database")
|
||||
except InvalidArgument as ex:
|
||||
logging.error("Could not write result: %s", ex)
|
||||
except Exception as ex:
|
||||
logging.error("Could not write result: %s", ex)
|
||||
|
||||
def work_of_queue(datastore_client, entity_kind):
|
||||
"""
|
||||
Take job from queue and finish it until there are no more jobs
|
||||
"""
|
||||
while True:
|
||||
job = manager.get_job_from_queue(datastore_client)
|
||||
if job is None:
|
||||
logging.info("No more jobs. Exiting.")
|
||||
break
|
||||
|
||||
execute_single_job(datastore_client, job, entity_kind)
|
||||
|
||||
def validate_job(jobdict):
|
||||
if "url" not in jobdict:
|
||||
raise Exception("Job does not have required 'url' attribute")
|
||||
|
|
Loading…
Reference in a new issue