mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-01 16:44:51 +02:00
Add check responsive_layout
This commit is contained in:
parent
62033d587a
commit
818e072756
|
@ -1,5 +1,6 @@
|
|||
"""
|
||||
The checks module contains the individual checks we perform with a page
|
||||
The checks module contains the functionality to get information and test certain
|
||||
functionality of a site or individual pages.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
@ -12,6 +13,7 @@ from checks import generator
|
|||
from checks import html_head
|
||||
from checks import http_and_https
|
||||
from checks import page_content
|
||||
from checks import responsive_layout
|
||||
from checks import url_reachability
|
||||
from checks import url_canonicalization
|
||||
|
||||
|
@ -37,6 +39,7 @@ def perform_checks(input_url):
|
|||
('charset', charset),
|
||||
('html_head', html_head),
|
||||
('generator', generator),
|
||||
('responsive_layout', responsive_layout),
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
|
69
checks/responsive_layout.py
Normal file
69
checks/responsive_layout.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
"""
|
||||
Check for responsive layout.
|
||||
|
||||
This relies on
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from selenium import webdriver
|
||||
|
||||
from checks.abstract_checker import AbstractChecker
|
||||
|
||||
|
||||
class Checker(AbstractChecker):
|
||||
|
||||
page_load_timeout = 20
|
||||
|
||||
# sizes we check for (width, height)
|
||||
sizes = (
|
||||
(320, 480), # old smartphone
|
||||
(360, 640), # slightly newer smartphone
|
||||
(768, 1024), # older tablet or newer smartphone
|
||||
(1024, 768), # older desktop or horiz. tablet
|
||||
(1920, 1080), # Full HD horizontal
|
||||
)
|
||||
|
||||
def __init__(self, config, previous_results=None):
|
||||
super().__init__(config, previous_results)
|
||||
|
||||
def run(self):
|
||||
# Our selenium user agent using Chrome headless as an engine
|
||||
chrome_options = webdriver.ChromeOptions()
|
||||
chrome_options.add_argument('--headless')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-extensions')
|
||||
self.driver = webdriver.Chrome(chrome_options=chrome_options)
|
||||
self.driver.set_page_load_timeout(self.page_load_timeout)
|
||||
|
||||
results = {}
|
||||
for url in self.config.urls:
|
||||
results[url] = self.check_responsiveness(url)
|
||||
return results
|
||||
|
||||
|
||||
def check_responsiveness(self, url):
|
||||
result = []
|
||||
|
||||
# set window to the first size initially
|
||||
self.driver.set_window_size(self.sizes[0][0], self.sizes[0][1])
|
||||
self.driver.get(url)
|
||||
|
||||
# give the page some time to load
|
||||
time.sleep(2)
|
||||
|
||||
for (width, height) in self.sizes:
|
||||
self.driver.set_window_size(width, height)
|
||||
|
||||
# wait for re-render/re-flow
|
||||
time.sleep(1.0)
|
||||
doc_width = self.driver.execute_script("return document.body.scrollWidth")
|
||||
|
||||
result.append({
|
||||
'viewport_width': width,
|
||||
'document_width': int(doc_width),
|
||||
})
|
||||
|
||||
return result
|
65
spider.py
65
spider.py
|
@ -15,7 +15,6 @@ from urllib.parse import urlparse
|
|||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
|
||||
from google.api_core.exceptions import InvalidArgument
|
||||
from google.cloud import datastore
|
||||
|
@ -37,53 +36,6 @@ def normalize_title(title):
|
|||
return title
|
||||
|
||||
|
||||
def check_responsiveness(url):
|
||||
"""
|
||||
Checks
|
||||
- whether a page adapts to different viewport sizes
|
||||
- whether a viewport meta tag exists
|
||||
and returns details
|
||||
"""
|
||||
details = {
|
||||
'document_width': {},
|
||||
'viewport_meta_tag': None,
|
||||
}
|
||||
|
||||
# sizes we check for (width, height)
|
||||
sizes = (
|
||||
(320, 480), # old smartphone
|
||||
(768, 1024), # older tablet or newer smartphone
|
||||
(1024, 768), # older desktop or horiz. tablet
|
||||
(1920, 1080), # Full HD horizontal
|
||||
)
|
||||
|
||||
# Our selenium user agent using Chrome headless as an engine
|
||||
chrome_options = webdriver.ChromeOptions()
|
||||
chrome_options.add_argument('--headless')
|
||||
chrome_options.add_argument('--disable-gpu')
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-extensions')
|
||||
driver = webdriver.Chrome(chrome_options=chrome_options)
|
||||
driver.set_page_load_timeout(60)
|
||||
driver.set_window_size(sizes[0][0], sizes[0][1])
|
||||
driver.get(url)
|
||||
time.sleep(1)
|
||||
|
||||
for (width, height) in sizes:
|
||||
driver.set_window_size(width, height)
|
||||
key = "%sx%s" % (width, height)
|
||||
width = driver.execute_script("return document.body.scrollWidth")
|
||||
details['document_width'][key] = int(width)
|
||||
|
||||
try:
|
||||
element = driver.find_element_by_xpath("//meta[@name='viewport']")
|
||||
details['viewport_meta_tag'] = element.get_attribute('content')
|
||||
except:
|
||||
pass
|
||||
|
||||
return details
|
||||
|
||||
|
||||
def check_content(req):
|
||||
"""
|
||||
Adds details to check regarding content of the page
|
||||
|
@ -242,20 +194,21 @@ def check_site(entry):
|
|||
nextgen_results = checks.perform_checks(entry['url'])
|
||||
|
||||
pprint(nextgen_results['dns_resolution'])
|
||||
pprint(nextgen_results['url_reachability'])
|
||||
pprint(nextgen_results['charset'])
|
||||
pprint(nextgen_results['html_head'])
|
||||
pprint(nextgen_results['generator'])
|
||||
|
||||
result['details']['hostnames'] = nextgen_results['domain_variations'].items()
|
||||
result['details']['hostnames'] = nextgen_results['dns_resolution'].values()
|
||||
#logging.debug("result[details][hostnames]: %r" % result['details']['hostnames'])
|
||||
|
||||
result['details']['ipv4_addresses'] = collect_ipv4_addresses(nextgen_results['domain_variations'])
|
||||
result['details']['ipv4_addresses'] = collect_ipv4_addresses(nextgen_results['dns_resolution'])
|
||||
#logging.debug("result[details][ipv4_addresses]: %r" % result['details']['ipv4_addresses'])
|
||||
|
||||
result['details']['resolvable_urls'] = sorted(nextgen_results['url_reachability'].items(), key=lambda url: url['url'])
|
||||
result['details']['resolvable_urls'] = sorted(nextgen_results['url_reachability'].values(), key=lambda url: url['url'])
|
||||
|
||||
result['details']['canonical_urls'] = sorted(nextgen_results['url_canonicalization'].items())
|
||||
result['details']['canonical_urls'] = sorted(nextgen_results['url_canonicalization'])
|
||||
|
||||
|
||||
|
||||
# TODO: continue with content checks
|
||||
logging.info("Waiting 10 seconds...")
|
||||
|
@ -284,12 +237,6 @@ def check_site(entry):
|
|||
if req.status_code < 300:
|
||||
check['content'] = check_content(req)
|
||||
|
||||
# Responsiveness check
|
||||
try:
|
||||
check['responsive'] = check_responsiveness(check_url)
|
||||
except Exception as exc:
|
||||
logging.error("Error when checking responsiveness for '%s': %s", check_url, exc)
|
||||
|
||||
except requests.exceptions.ConnectionError as exc:
|
||||
logging.error(str(exc) + " " + check_url)
|
||||
check['error'] = "connection"
|
||||
|
|
Loading…
Reference in a new issue