mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-05 10:33:39 +02:00
Marian Steinbach
618e29d763
* CLI: remove 'jobs' command, add 'manager' * Add job definition * Move jobs to manage folder * Rename jobs to manager * Add rq and redis dependencies * Add docker-compose YAML * Downgrade to alpine 3.8 * Adjust paths in Dockerfile, remove entrypoint * Rename 'make spiderjobs' to 'make jobs' * Fix docker exectution * Adapt 'make jobs' * Fix metadata scheme * Add docker dependency * Rendomize queue (a bit) * Use latest image, remove debug output * Make docker-compose file downwards-compatible * Use latest instead of dev image tag * Update docker-compose.yaml * Adapt job start script * Fix redis connection in manager * Add support for increasing timeout via environment variable * Adapt load_in_browser to cookies table schema change * Fix execution * Mitigate yaml warning * Bump some dependency versions * Report resource usage stats for each job * checks/load_in_browser: Return DOM size, prevent multiple page loads * Update .dockerignore * Code update * Script update * Update README.md * WIP * WIP commit * Update Dockerfile to alpine:edge and chromium v90 * Update TestCertificateChecker * Set defaults for __init__ function * Detect sunflower theme * Update unit test for new datetime (zero-basing) * Set logging prefs from Chromium in a new way * Move datastore client instantiation As it is not needed for all commands * Change green-directory repository URL * Add git settings for cloning green-directory * Pin alpine version 3.14, fix py3-cryptography * Use plain docker build progress output * Add volumes to 'make test' docker run command * Fix bug * Update example command in README * Update dependencies * Add creation of Kubernetes jobs
27 lines
686 B
Python
27 lines
686 B
Python
|
|
|
|
# connection timeout for website checks (seconds)
|
|
CONNECT_TIMEOUT = 5
|
|
|
|
# response timeout for website checks
|
|
READ_TIMEOUT = 10
|
|
|
|
# Git repo for our data
|
|
GREEN_DIRECTORY_REPO = 'https://git.verdigado.com/NB-Public/green-directory.git'
|
|
|
|
# folder in that repo that holds the data
|
|
GREEN_DIRECTORY_DATA_PATH = 'data/countries/de'
|
|
|
|
# folder we use locally to clone the repo
|
|
GREEN_DIRECTORY_LOCAL_PATH = './cache/green-directory'
|
|
|
|
# IP address of the verdigado GCMS server
|
|
GCMS_IP = "194.29.234.123"
|
|
|
|
# kind name of the spider job key datastore entities
|
|
JOB_DATASTORE_KIND = 'spider-jobs'
|
|
|
|
K8S_JOBS_PATH = './k8s-jobs'
|
|
K8S_JOB_TEMPLATE = './manager/job_template.yaml'
|
|
K8S_JOB_BATCH_SIZE = 10
|