mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-11 13:06:08 +02:00
Marian Steinbach
618e29d763
* CLI: remove 'jobs' command, add 'manager' * Add job definition * Move jobs to manage folder * Rename jobs to manager * Add rq and redis dependencies * Add docker-compose YAML * Downgrade to alpine 3.8 * Adjust paths in Dockerfile, remove entrypoint * Rename 'make spiderjobs' to 'make jobs' * Fix docker exectution * Adapt 'make jobs' * Fix metadata scheme * Add docker dependency * Rendomize queue (a bit) * Use latest image, remove debug output * Make docker-compose file downwards-compatible * Use latest instead of dev image tag * Update docker-compose.yaml * Adapt job start script * Fix redis connection in manager * Add support for increasing timeout via environment variable * Adapt load_in_browser to cookies table schema change * Fix execution * Mitigate yaml warning * Bump some dependency versions * Report resource usage stats for each job * checks/load_in_browser: Return DOM size, prevent multiple page loads * Update .dockerignore * Code update * Script update * Update README.md * WIP * WIP commit * Update Dockerfile to alpine:edge and chromium v90 * Update TestCertificateChecker * Set defaults for __init__ function * Detect sunflower theme * Update unit test for new datetime (zero-basing) * Set logging prefs from Chromium in a new way * Move datastore client instantiation As it is not needed for all commands * Change green-directory repository URL * Add git settings for cloning green-directory * Pin alpine version 3.14, fix py3-cryptography * Use plain docker build progress output * Add volumes to 'make test' docker run command * Fix bug * Update example command in README * Update dependencies * Add creation of Kubernetes jobs
53 lines
1.3 KiB
Makefile
53 lines
1.3 KiB
Makefile
IMAGE := quay.io/netzbegruenung/green-spider:latest
|
|
|
|
DB_ENTITY := spider-results
|
|
|
|
.PHONY: dockerimage spider export
|
|
|
|
# Build docker image
|
|
dockerimage:
|
|
docker build --progress plain -t $(IMAGE) .
|
|
|
|
# Fill the queue with spider jobs, one for each site.
|
|
jobs:
|
|
docker run --rm -ti \
|
|
-v $(PWD)/secrets:/secrets \
|
|
$(IMAGE) \
|
|
python cli.py \
|
|
--credentials-path /secrets/datastore-writer.json \
|
|
--loglevel debug \
|
|
manager
|
|
|
|
# Run spider in docker image
|
|
spider:
|
|
docker run --rm -ti \
|
|
-v $(PWD)/volumes/dev-shm:/dev/shm \
|
|
-v $(PWD)/secrets:/secrets \
|
|
-v $(PWD)/volumes/chrome-userdir:/opt/chrome-userdir \
|
|
--shm-size=2g \
|
|
$(IMAGE) \
|
|
--credentials-path /secrets/datastore-writer.json \
|
|
--loglevel debug \
|
|
spider --kind $(DB_ENTITY) ${ARGS}
|
|
|
|
export:
|
|
docker run --rm -ti \
|
|
-v $(PWD)/secrets:/secrets \
|
|
-v $(PWD)/volumes/json-export:/json-export \
|
|
$(IMAGE) \
|
|
--credentials-path /secrets/datastore-reader.json \
|
|
--loglevel debug \
|
|
export --kind $(DB_ENTITY)
|
|
|
|
# run spider tests
|
|
test:
|
|
docker run --rm -ti \
|
|
-v $(PWD)/volumes/dev-shm:/dev/shm \
|
|
-v $(PWD)/secrets:/secrets \
|
|
-v $(PWD)/screenshots:/screenshots \
|
|
-v $(PWD)/volumes/chrome-userdir:/opt/chrome-userdir \
|
|
--entrypoint "python3" \
|
|
$(IMAGE) \
|
|
-m unittest discover -p '*_test.py' -v
|
|
|