diff --git a/Makefile b/Makefile index e87b36b..eb97bd4 100644 --- a/Makefile +++ b/Makefile @@ -2,23 +2,23 @@ IMAGE := quay.io/netzbegruenung/green-spider:latest DB_ENTITY := spider-results -.PHONY: dockerimage +.PHONY: dockerimage spider export # Build docker image dockerimage: docker build -t $(IMAGE) . # Create spider job queue -spiderjobs: dockerimage +spiderjobs: docker run --rm -ti \ -v $(PWD)/secrets:/secrets \ $(IMAGE) \ --credentials-path /secrets/datastore-writer.json \ - --loglevel info \ + --loglevel debug \ jobs # Run spider in docker image -spider: dockerimage +spider: docker run --rm -ti \ -v $(PWD)/dev-shm:/dev/shm \ -v $(PWD)/secrets:/secrets \ @@ -27,7 +27,7 @@ spider: dockerimage --loglevel debug \ spider --kind $(DB_ENTITY) -export: dockerimage +export: docker run --rm -ti \ -v $(PWD)/export-json:/out \ -v $(PWD)/secrets:/secrets \ @@ -39,7 +39,7 @@ export: dockerimage # run spider tests # FIXME -test: dockerimage +test: docker run --rm -ti \ --entrypoint "python3" \ $(IMAGE) \ diff --git a/README.md b/README.md index f07575e..53d2430 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,6 @@ make spider Siehe Verzeichnis [devops](https://github.com/netzbegruenung/green-spider/tree/master/devops). -### Webapp aktualisieren +### Webapp deployen -Siehe [netzbegruenung/green-spider-webapp](https://github.com/netzbegruenung/green-spider-webapp) +Siehe Verzeichnis [devops](https://github.com/netzbegruenung/green-spider/tree/master/devops). diff --git a/devops/README.md b/devops/README.md index eb5b826..e95c92a 100644 --- a/devops/README.md +++ b/devops/README.md @@ -1,6 +1,6 @@ # DevOps -Die Scripte in diesem Verzeichnis erlauben das weitgehend automatisierte +Die Skripte in diesem Verzeichnis erlauben das weitgehend automatisierte Provisionieren eines Servers, Ausführen von Jobs wie Spider und Screenshotter und Entfernen des Servers. @@ -17,17 +17,25 @@ Server unbedingt manuell entfernt werden, um unnötige Kosten zu vermeiden. - jq (https://stedolan.github.io/jq/) - ssh -## Ausführung +## Generelles -Die Scripte werden aus dem Root-Verzeichnis des Repositories ausgeführt. +- Die Skripte müssen aus dem root-Verzeichnis des git repositories ausgeführt werden +- Der Terminal muss bis zum Ende der Ausführung geöffnet bleiben. -``` -# Spidern +## Spider starten + +```nohighlight devops/run-job.sh spider +``` +## Screenshots erstellen -# Screenshots erzeugen +```nohighlight devops/run-job.sh screenshotter ``` -Der Terminal muss bis zum Ende der Ausführung geöffnet bleiben. +## Webapp deployen + +```nohighlight +devops/deploy-webapp.sh +``` diff --git a/devops/deploy-webapp.sh b/devops/deploy-webapp.sh new file mode 100755 index 0000000..2e432bc --- /dev/null +++ b/devops/deploy-webapp.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +# This is a Work-In-Progress deployment script for the webapp on Hetzner Cloud. +# it is not yet expected to work. +# +# The general mechanics should be: +# - Detect which server is running the webapp +# - Create a new server +# - Deploy the webapp on the new server +# - Test the new server +# - If okay, bind the public IP to the new server +# +# Requirements: +# +# - curl +# - jq (https://stedolan.github.io/jq/) +# - ssh +# - SSH key referenced in the server details ("ssh_keys") +# - Service account with write permission for Storage and Datastore in +# secrets/datastore-writer.json + + +API_TOKEN_SECRET="secrets/hetzner-api-token.sh" +test -f $API_TOKEN_SECRET || { echo >&2 "File $API_TOKEN_SECRET does not exist."; exit 1; } +source $API_TOKEN_SECRET + +# possible values: cx11 (1 core 2 GB), cx21 (2 cores, 4 GB), cx31 (2 cores, 8 GB) +SERVERTYPE="cx11" + +# Gets the IP address with description "webapp" +function get_ip() +{ + echo "Getting IP address" + + RESPONSE=$(curl -s https://api.hetzner.cloud/v1/floating_ips \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN") + + IP_ID=$(echo $RESPONSE | jq '.floating_ips[] | select(.description == "webapp") | .id') + IP_IP=$(echo $RESPONSE | jq '.floating_ips[] | select(.description == "webapp") | .ip') +} + +# find_webapp_server checks which server is currently running the webapp, +# using the "purpose=webapp" label. +function find_webapp_server() +{ + RESPONSE=$(curl -s "https://api.hetzner.cloud/v1/servers?label_selector=purpose=webapp" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN") + + CURRENT_SERVER_ID=$(echo $RESPONSE | jq '.servers[0] | .id') + CURRENT_SERVER_IP=$(echo $RESPONSE | jq '.servers[0] | .ip') + + if [ "$CURRENT_SERVER_ID" = "null" ]; then + echo "Currently there is no server" + else + echo "Current server has ID $CURRENT_SERVER_ID and IP $CURRENT_SERVER_IP" + fi + +} + +# create_server creates a new server to deploy the webapp. +function create_server() +{ + SERVERNAME=webapp-$(date -u '+%FT%H-%M') + echo "Creating server named $SERVERNAME" + + # server_type 'cx11' is the smallest, cheapest category. + # location 'nbg1' is Nürnberg/Nuremberg, Germany. + # image 'debian-9' is a plain Debian stretch. + # ssh_keys ['Marian'] adds Marian's public key to the server and can be extended. + # user_data: Ensures that we can detect when the cloud-init setup is done. + # + CREATE_RESPONSE=$(curl -s -X POST https://api.hetzner.cloud/v1/servers \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + -d "{ + \"name\": \"$SERVERNAME\", + \"server_type\": \"$SERVERTYPE\", + \"location\": \"nbg1\", + \"start_after_create\": true, + \"image\": \"debian-9\", + \"ssh_keys\": [ + \"Marian\" + ], + \"labels\": {\"purpose\": \"webapp\"}, + \"user_data\": \"#cloud-config\nruncmd:\n - touch /cloud-init-done\n\" + }") + + # Get ID: + SERVER_ID=$(echo $CREATE_RESPONSE | jq -r .server.id) + + # Get IP: + SERVER_IP=$(echo $CREATE_RESPONSE | jq -r .server.public_net.ipv4.ip) + + echo "Created server with ID $SERVER_ID and IP $SERVER_IP" +} + +# assign_ip assigns the public IP address for 'green-spider.netzbegruenung.de' +# to the server. +function assign_ip() +{ + curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $API_TOKEN" \ + https://api.hetzner.cloud/v1/floating_ips/${IP_ID}/actions/assign \ + -d "{\"server\": ${SERVER_ID}}" +} + +# wait_for_server waits until the new server is reachable via SSH +function wait_for_server() +{ + echo -n "Waiting for the server to be reachable via SSH " + + sleep 30 + + STATUS="255" + while [ "$STATUS" != "0" ]; do + echo -n "." + sleep 5 + ssh -o StrictHostKeyChecking=no -q root@$SERVER_IP ls /cloud-init-done &> /dev/null + STATUS=$? + done + + echo "" +} + +get_ip +echo "webapp IP address has ID ${IP_ID}" + +find_webapp_server + +create_server +wait_for_server + + +echo "Executing remote commands..." + +ssh -o StrictHostKeyChecking=no -q root@$SERVER_IP << EOF + DEBIAN_FRONTEND=noninteractive + + echo "" + echo "Update package sources" + apt-get update -q + + echo "" + echo "Install dependencies" + apt-get install -y curl apt-transport-https gnupg2 software-properties-common + + echo "" + echo "Add docker repo key" + curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - + + echo "" + echo "Add repo" + add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian stretch stable" + + echo "" + echo "Update package sources again" + apt-get update -q + + echo "" + echo "Install docker" + apt-get install -y docker-ce python-pip + + pip install setuptools + pip install docker-compose + docker-compose version + + mkdir /root/etc-letsencrypt + + curl -s https://raw.githubusercontent.com/netzbegruenung/green-spider-webapp/proxy-api-requests/docker-compose-prod.yaml > docker-compose.yaml + docker-compose pull + + curl -s https://raw.githubusercontent.com/netzbegruenung/green-spider-webapp/proxy-api-requests/config/nginx/nginx_prod.conf > nginx.conf +EOF + +echo "Done with remote setup." + +# Copy TLS certificate files from old to new server +scp -3 -o StrictHostKeyChecking=no -r root@$CURRENT_SERVER_IP:/letsencrypt root@$SERVER_IP:/letsencrypt + +# Upload secret for database access +scp -o StrictHostKeyChecking=no secrets/green-spider-api.json root@$SERVER_IP:/root/ + +exit + +# TODO: +# - docker-compose up + +echo "Launching server" + + +ssh -o StrictHostKeyChecking=no root@$SERVER_IP \ + docker run --name webapp -d \ + -p 443:443 -p 80:8000 \ + -v /root/etc-letsencrypt:/etc/letsencrypt \ + $DOCKERIMAGE + +# Assign the IP to the new server +assign_ip +ssh -o StrictHostKeyChecking=no root@$SERVER_IP sudo ip addr add $IP_IP dev eth0 + +# remove old server +if [ "$CURRENT_SERVER_ID" != "null" ]; then + echo "Deleting old webapp server with ID $CURRENT_SERVER_ID" + curl -s -X DELETE -H "Content-Type: application/json" \ + -H "Authorization: Bearer $API_TOKEN" \ + https://api.hetzner.cloud/v1/servers/$CURRENT_SERVER_ID +fi diff --git a/devops/run-job.sh b/devops/run-job.sh index dac00e4..c94b3b1 100755 --- a/devops/run-job.sh +++ b/devops/run-job.sh @@ -28,7 +28,7 @@ test -f $API_TOKEN_SECRET || { echo >&2 "File $API_TOKEN_SECRET does not exist." source $API_TOKEN_SECRET -if [[ "$1" == "" ]]; then +if [[ "$1" = "" ]]; then echo "No argument given. Please use 'screenshotter' or 'spider' as arguments." exit 1 fi @@ -69,6 +69,12 @@ function create_server() # Get IP: SERVER_IP=$(echo $CREATE_RESPONSE | jq -r .server.public_net.ipv4.ip) + if [ "$SERVER_ID" = "null" ]; then + echo "No server created." + echo $CREATE_RESPONSE | jq . + exit 1 + fi + echo "Created server $SERVERNAME with ID $SERVER_ID and IP $SERVER_IP" } diff --git a/index.yaml b/index.yaml new file mode 100644 index 0000000..a7a4ba0 --- /dev/null +++ b/index.yaml @@ -0,0 +1,9 @@ +# Google cloud datastore index config +indexes: + +- kind: spider-results + properties: + - name: created + direction: desc + - name: meta + - name: score diff --git a/kubernetes/green-spider-screenshotter-job.yaml b/kubernetes/green-spider-screenshotter-job.yaml new file mode 100644 index 0000000..da93d93 --- /dev/null +++ b/kubernetes/green-spider-screenshotter-job.yaml @@ -0,0 +1,29 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: green-spider-screenshotter +spec: + template: + spec: + containers: + - name: screenshotter + image: quay.io/netzbegruenung/green-spider-screenshotter:latest + imagePullPolicy: Always + volumeMounts: + - name: secrets + mountPath: "/secrets" + readOnly: true + resources: + requests: + cpu: 800m + memory: 4000M + restartPolicy: Never + volumes: + - name: secrets + secret: + secretName: green-spider + items: + - key: datastore-writer.json + path: datastore-writer.json + - key: screenshots-uploader.json + path: screenshots-uploader.json diff --git a/kubernetes/green-spider-spider-job.yaml b/kubernetes/green-spider-spider-job.yaml new file mode 100644 index 0000000..21f051a --- /dev/null +++ b/kubernetes/green-spider-spider-job.yaml @@ -0,0 +1,37 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: green-spider-spider +spec: + parallelism: 1 + template: + spec: + containers: + - name: spider + image: quay.io/netzbegruenung/green-spider:latest + imagePullPolicy: Always + args: + - "--credentials-path=/secrets/datastore-writer.json" + - "--loglevel=info" + - spider + volumeMounts: + - name: secrets + mountPath: "/secrets" + readOnly: true + - name: shared + mountPath: /dev/shm + resources: + requests: + cpu: 900m + memory: 2000M + restartPolicy: OnFailure + volumes: + - name: secrets + secret: + secretName: green-spider + items: + - key: datastore-writer.json + path: datastore-writer.json + - name: shared + emptyDir: {} + diff --git a/spider/spider.py b/spider/spider.py index d1cf6de..2b27d96 100644 --- a/spider/spider.py +++ b/spider/spider.py @@ -88,7 +88,7 @@ def work_of_queue(datastore_client, entity_kind): logging.info("Job %s writing to DB", job["url"]) key = datastore_client.key(entity_kind, job["url"]) - entity = datastore.Entity(key=key, exclude_from_indexes=['results']) + entity = datastore.Entity(key=key) record = { 'created': datetime.utcnow(), 'meta': result['meta'],