From 914fb1f35ec57c88277106ac6c218394f75b9d93 Mon Sep 17 00:00:00 2001 From: Marian Steinbach Date: Mon, 23 Apr 2018 09:11:50 +0200 Subject: [PATCH] Detect and report responsive layout details --- KRITERIEN.md | 2 ++ requirements.txt | 1 + spider.py | 72 ++++++++++++++++++++++++++++++++++++++++++ webapp/dist/index.html | 3 +- webapp/src/index.js | 9 ++++-- 5 files changed, 83 insertions(+), 4 deletions(-) diff --git a/KRITERIEN.md b/KRITERIEN.md index a9f7cc2..fa77c31 100644 --- a/KRITERIEN.md +++ b/KRITERIEN.md @@ -17,3 +17,5 @@ Wir prüfen Sites nach den folgenden Kriterien: - `FEEDS`: Die Site verweist auf RSS oder Atom Feeds via `rel=alternate` Link Tag. - `HTTP_RESPONSE_DURATION`: Zeit, die vom Absenden des HTTP-Request bis zum Empfang der Response-Header vergangen ist. + +- `RESPONSIVE`: Die Seite besitzt ein `viewport` Meta-Tag und die Breite der Inhalte passt sich an verschiedene Fenster- bzw. Gerätegrößen an. diff --git a/requirements.txt b/requirements.txt index dbbba35..8ed8559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,6 @@ GitPython==2.1.9 idna==2.6 PyYAML==3.12 requests==2.18.4 +selenium==3.11.0 smmap2==2.0.3 urllib3==1.22 diff --git a/spider.py b/spider.py index 0bd838a..6900a5e 100644 --- a/spider.py +++ b/spider.py @@ -3,6 +3,7 @@ from bs4 import BeautifulSoup from git import Repo from multiprocessing import Pool +from selenium import webdriver from socket import gethostbyname_ex from urllib.parse import urljoin from urllib.parse import urlparse @@ -132,6 +133,45 @@ def normalize_title(s): s = s.strip() return s +def check_responsiveness(url): + """ + Checks + - whether a page adapts to different viewport sizes + - whether a viewport meta tag exists + and returns details + """ + details = { + 'document_width': {}, + 'viewport_meta_tag': None, + } + + # sizes we check for (width, height) + sizes = ( + (320,480), # old smartphone + (768,1024), # older tablet or newer smartphone + (1024,768), # older desktop or horiz. tablet + (1920, 1080), # Full HD horizontal + ) + + # Our selenium user agent using PhantomJS/Webkit as an engine + driver = webdriver.PhantomJS() + driver.set_window_size(sizes[0][0], sizes[0][1]) + driver.get(url) + + for (width, height) in sizes: + driver.set_window_size(width, height) + key = "%sx%s" % (width, height) + width = driver.execute_script("return document.body.scrollWidth") + details['document_width'][key] = int(width) + + try: + element = driver.find_element_by_xpath("//meta[@name='viewport']") + details['viewport_meta_tag'] = element.get_attribute('content') + except: + pass + + return details + def check_content(r): """ Adds details to check regarding content of the page @@ -261,6 +301,7 @@ def check_site(entry): 'icons': [], 'feeds': [], 'cms': None, + 'responsive': None, }, # The actual report criteria 'result': { @@ -272,6 +313,7 @@ def check_site(entry): 'FAVICON': {'type': 'boolean', 'value': False, 'score': 0}, 'FEEDS': {'type': 'boolean', 'value': False, 'score': 0}, 'HTTP_RESPONSE_DURATION': {'type': 'number', 'value': None, 'score': 0}, + 'RESPONSIVE': {'type': 'boolean', 'value': False, 'score': 0}, }, 'score': 0.0, } @@ -357,6 +399,7 @@ def check_site(entry): 'duration': None, 'error': None, 'content': None, + 'responsive': None, } try: @@ -368,6 +411,12 @@ def check_site(entry): if r.status_code < 300: check['content'] = check_content(r) + # Responsiveness check + try: + check['responsive'] = check_responsiveness(check_url) + except Exception as e: + logging.error("Error when checking responsiveness for '%s': %s" % (check_url, e)) + except requests.exceptions.ConnectionError as e: logging.error(str(e) + " " + check_url) check['error'] = "connection" @@ -409,6 +458,22 @@ def check_site(entry): feeds.add(feed) result['details']['feeds'] = sorted(list(feeds)) + # detect responsive + viewports = set() + min_width = 2000 + for c in result['details']['urlchecks']: + if c['responsive'] is None: + continue + if c['responsive']['viewport_meta_tag'] is not None: + viewports.add(c['responsive']['viewport_meta_tag']) + widths = c['responsive']['document_width'].values() + if min(widths) < min_width: + min_width = min(widths) + result['details']['responsive'] = { + 'viewport_meta_tag': list(viewports), + 'min_width': min_width, + } + # detect CMS for c in result['details']['urlchecks']: if c['content'] is None: @@ -503,6 +568,13 @@ def check_site(entry): elif val < 1000: result['result']['HTTP_RESPONSE_DURATION']['score'] = 0.5 + # RESPONSIVE + if result['details']['responsive'] is not None: + if (result['details']['responsive']['min_width'] < 500 and + len(result['details']['responsive']['viewport_meta_tag']) > 0): + result['result']['RESPONSIVE']['value'] = True + result['result']['RESPONSIVE']['score'] = 1 + # Overall score for item in result['result'].keys(): result['score'] += result['result'][item]['score'] diff --git a/webapp/dist/index.html b/webapp/dist/index.html index 99ed7a5..3f1bf6e 100644 --- a/webapp/dist/index.html +++ b/webapp/dist/index.html @@ -61,9 +61,10 @@ Erreichbar Antwortzeit Icon + HTTPS www. optional Kanonische URL - HTTPS + Responsive Feed Screenshots CMS diff --git a/webapp/src/index.js b/webapp/src/index.js index 1a58ebf..366dad0 100644 --- a/webapp/src/index.js +++ b/webapp/src/index.js @@ -68,6 +68,10 @@ $(function(){ var icon = item.result.FAVICON.value; row.append('' + (icon ? ('') : '❌') + ''); + // HTTPS + var hasHTTPS = item.result.HTTPS.value; + row.append('' + (hasHTTPS ? '✅' : '❌') + ''); + // WWW_OPTIONAL var wwwOptional = item.result.WWW_OPTIONAL.value; row.append('' + (wwwOptional ? '✅' : '❌') + ''); @@ -76,9 +80,8 @@ $(function(){ var canonical = item.result.CANONICAL_URL.value; row.append('' + (canonical ? '✅' : '❌') + ''); - // https - var hasHTTPS = item.result.HTTPS.value; - row.append('' + (hasHTTPS ? '✅' : '❌') + ''); + var responsive = item.result.RESPONSIVE.value; + row.append('' + (responsive ? '✅' : '❌') + ''); // feeds var feeds = item.result.FEEDS.value;