mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-08 03:43:41 +02:00
Replace PhantomJS with Chromedriver
This commit is contained in:
parent
54b6d24b61
commit
25e5fc936c
13
Dockerfile
13
Dockerfile
|
@ -7,22 +7,11 @@ RUN apt-get update \
|
|||
libpango-1.0-0 libpangocairo-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
|
||||
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 \
|
||||
libxrandr2 libxrender1 libxss1 libxtst6 lsb-release xdg-utils \
|
||||
python3 python3-pip unzip \
|
||||
python3 python3-pip unzip chromium-driver \
|
||||
&& apt-get clean \
|
||||
&& wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
|
||||
&& dpkg -i google-chrome-stable_current_amd64.deb \
|
||||
&& rm google-chrome-stable_current_amd64.deb \
|
||||
&& pip3 install GitPython idna PyYAML beautifulsoup4==4.6.0 requests==2.18.4 responses==0.9.0 selenium==3.11.0 smmap2==2.0.3 urllib3==1.22 google-cloud-datastore==1.7.0 tenacity==5.0.2 \
|
||||
&& wget https://chromedriver.storage.googleapis.com/2.38/chromedriver_linux64.zip \
|
||||
&& unzip chromedriver_linux64.zip \
|
||||
&& rm chromedriver_linux64.zip \
|
||||
&& apt-get clean
|
||||
|
||||
RUN wget https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-x86_64.tar.bz2 \
|
||||
&& tar xjf phantomjs-2.1.1-linux-x86_64.tar.bz2 \
|
||||
&& mv phantomjs-2.1.1-linux-x86_64/bin/phantomjs /usr/local/bin/ \
|
||||
&& rm -rf phantomjs-2.1.1-linux-x86_64
|
||||
|
||||
ADD spider.py /
|
||||
ADD spider_test.py /
|
||||
ADD data_export.py /
|
||||
|
|
10
spider.py
10
spider.py
|
@ -263,8 +263,14 @@ def check_responsiveness(url):
|
|||
(1920, 1080), # Full HD horizontal
|
||||
)
|
||||
|
||||
# Our selenium user agent using PhantomJS/Webkit as an engine
|
||||
driver = webdriver.PhantomJS()
|
||||
# Our selenium user agent using Chrome headless as an engine
|
||||
chrome_options = webdriver.ChromeOptions()
|
||||
chrome_options.add_argument('--headless')
|
||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-extensions')
|
||||
driver = webdriver.Chrome(chrome_options=chrome_options)
|
||||
driver.set_window_size(sizes[0][0], sizes[0][1])
|
||||
driver.get(url)
|
||||
|
||||
|
|
Loading…
Reference in a new issue