Browse Source

Add retry for get_job_from_queue

pull/60/head
Marian Steinbach 4 years ago
parent
commit
545ea671d8
  1. 2
      Dockerfile
  2. 4
      spider.py

2
Dockerfile

@ -12,7 +12,7 @@ RUN apt-get update \
&& wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& dpkg -i google-chrome-stable_current_amd64.deb \
&& rm google-chrome-stable_current_amd64.deb \
&& pip3 install GitPython idna PyYAML beautifulsoup4==4.6.0 requests==2.18.4 responses==0.9.0 selenium==3.11.0 smmap2==2.0.3 urllib3==1.22 google-cloud-datastore==1.7.0 \
&& pip3 install GitPython idna PyYAML beautifulsoup4==4.6.0 requests==2.18.4 responses==0.9.0 selenium==3.11.0 smmap2==2.0.3 urllib3==1.22 google-cloud-datastore==1.7.0 tenacity==5.0.2 \
&& wget https://chromedriver.storage.googleapis.com/2.38/chromedriver_linux64.zip \
&& unzip chromedriver_linux64.zip \
&& rm chromedriver_linux64.zip \

4
spider.py

@ -18,11 +18,13 @@ from urllib.parse import urlparse
import requests
import yaml
import tenacity
from bs4 import BeautifulSoup
from git import Repo
from selenium import webdriver
from google.cloud import datastore
from google.api_core.exceptions import Aborted
from google.api_core.exceptions import InvalidArgument
@ -713,6 +715,8 @@ def check_site(entry):
return result
@tenacity.retry(wait=tenacity.wait_exponential(),
retry=tenacity.retry_if_exception_type(Aborted))
def get_job_from_queue():
"""
Returns a URL from the queue

Loading…
Cancel
Save