green-spider/Makefile

67 lines
1.8 KiB
Makefile

IMAGE := ghcr.io/netzbegruenung/green-spider:latest
DB_ENTITY := spider-results
VERSION = $(shell git describe --exact-match --tags 2> /dev/null || git rev-parse HEAD)
.PHONY: dockerimage spider export dryrun test
# Build docker image
dockerimage: VERSION
docker build --progress plain -t $(IMAGE) .
# Fill the queue with spider jobs, one for each site.
jobs:
mkdir -p cache
test -d cache/green-directory || git clone --depth 1 https://git.verdigado.com/NB-Public/green-directory.git cache/green-directory
git -C cache/green-directory fetch && git -C cache/green-directory pull
docker compose up manager
venv/bin/rq info
# Spider a single URL and inspect the result
dryrun:
docker run --rm -ti \
-v $(PWD)/volumes/dev-shm:/dev/shm \
-v $(PWD)/secrets:/secrets \
-v $(PWD)/volumes/chrome-userdir:/opt/chrome-userdir \
--shm-size=2g \
$(IMAGE) \
python3 cli.py \
--credentials-path /secrets/datastore-writer.json \
--loglevel debug \
dryrun ${ARGS}
# Run the spider.
# OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES is a workaround for mac OS.
spider:
OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES venv/bin/rq worker --burst high default low
export:
docker run --rm -ti \
-v $(PWD)/secrets:/secrets \
-v $(PWD)/volumes/json-export:/json-export \
$(IMAGE) \
python3 cli.py \
--credentials-path /secrets/datastore-reader.json \
--loglevel debug \
export --kind $(DB_ENTITY)
# run spider tests
test:
docker run --rm \
-v $(PWD)/volumes/dev-shm:/dev/shm \
-v $(PWD)/secrets:/secrets \
-v $(PWD)/screenshots:/screenshots \
-v $(PWD)/volumes/chrome-userdir:/opt/chrome-userdir \
$(IMAGE) \
python3 -m unittest discover -p '*_test.py' -v
# Create Python virtual environment
venv:
python3 -m venv venv
venv/bin/pip install --upgrade pip
venv/bin/pip install -r requirements.txt
VERSION:
@echo $(VERSION) > VERSION