Check third party cookies
This commit is contained in:
parent
5e8347916c
commit
56f9f1ba86
|
@ -5,11 +5,15 @@ Information includes:
|
|||
|
||||
- whether the document width adapts well to viewports as little as 360 pixels wide
|
||||
- whether javascript errors or errors from missing resources occur
|
||||
- collects CSS font-family properties in use
|
||||
- what CSS font-family properties are in use
|
||||
- what cookies are set during loading the page
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
import shutil
|
||||
import time
|
||||
import sqlite3
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import StaleElementReferenceException
|
||||
|
@ -18,10 +22,11 @@ import tenacity
|
|||
|
||||
from checks.abstract_checker import AbstractChecker
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
class Checker(AbstractChecker):
|
||||
|
||||
page_load_timeout = 20
|
||||
page_load_timeout = 30
|
||||
|
||||
# sizes we check for (width, height)
|
||||
sizes = (
|
||||
|
@ -40,6 +45,13 @@ class Checker(AbstractChecker):
|
|||
chrome_options.add_argument('--disable-gpu')
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-extensions')
|
||||
|
||||
# path where to get cookies from
|
||||
chrome_options.add_argument("--user-data-dir=/opt/chrome-userdir")
|
||||
|
||||
# empty /opt/chrome-userdir
|
||||
shutil.rmtree('/opt/chrome-userdir', ignore_errors=True)
|
||||
|
||||
self.driver = webdriver.Chrome(options=chrome_options)
|
||||
self.driver.set_page_load_timeout(self.page_load_timeout)
|
||||
|
||||
|
@ -71,6 +83,15 @@ class Checker(AbstractChecker):
|
|||
logging.warn("RetryError when checking responsiveness for %s: %s" % (url, re))
|
||||
pass
|
||||
|
||||
try:
|
||||
self.scroll_to_bottom()
|
||||
except TimeoutException as e:
|
||||
logging.warn("TimeoutException in scroll_to_bottom for %s: %s" % (url, e))
|
||||
pass
|
||||
except tenacity.RetryError as re:
|
||||
logging.warn("RetryError in scroll_to_bottom for %s: %s" % (url, re))
|
||||
pass
|
||||
|
||||
# CSS collection
|
||||
font_families = None
|
||||
|
||||
|
@ -91,18 +112,35 @@ class Checker(AbstractChecker):
|
|||
|
||||
except TimeoutException as e:
|
||||
logging.warn("TimeoutException when collecting CSS elements for %s: %s" % (url, e))
|
||||
pass
|
||||
|
||||
# get cookies
|
||||
try:
|
||||
cookies = self.driver.get_cookies()
|
||||
results[url]['cookies'] = cookies
|
||||
results[url]['cookies'] = self.get_cookies()
|
||||
pprint(results[url]['cookies'])
|
||||
except TimeoutException as e:
|
||||
logging.warn("TimeoutException when collecting CSS elements for %s: %s" % (url, e))
|
||||
logging.warn("TimeoutException when collecting cookies %s: %s" % (url, e))
|
||||
pass
|
||||
except tenacity.RetryError as re:
|
||||
logging.warn("RetryError when collecting cookies for %s: %s" % (url, re))
|
||||
pass
|
||||
|
||||
self.driver.quit()
|
||||
|
||||
return results
|
||||
|
||||
def get_cookies(self):
|
||||
# read cookie DB to get 3rd party cookies, too
|
||||
cookies = []
|
||||
db = sqlite3.connect('/opt/chrome-userdir/Default/Cookies')
|
||||
db.row_factory = sqlite3.Row
|
||||
c = db.cursor()
|
||||
c.execute("SELECT creation_utc, host_key, name, path, expires_utc, is_secure, is_httponly, has_expires, is_persistent, firstpartyonly FROM cookies")
|
||||
for row in c.fetchall():
|
||||
cookies.append(dict(row))
|
||||
c.close()
|
||||
db.close()
|
||||
|
||||
return cookies
|
||||
|
||||
@tenacity.retry(stop=tenacity.stop_after_attempt(3),
|
||||
retry=tenacity.retry_if_exception_type(TimeoutException))
|
||||
|
@ -129,10 +167,23 @@ class Checker(AbstractChecker):
|
|||
|
||||
def capture_log(self):
|
||||
"""
|
||||
Returns log elements with level "SEVERE"
|
||||
Returns log elements with level "SEVERE" or "WARNING"
|
||||
"""
|
||||
entries = []
|
||||
for entry in self.driver.get_log('browser'):
|
||||
if entry['level'] in ('WARNING', 'SEVERE'):
|
||||
entries.append(entry)
|
||||
return entries
|
||||
|
||||
@tenacity.retry(stop=tenacity.stop_after_attempt(3),
|
||||
retry=tenacity.retry_if_exception_type(TimeoutException))
|
||||
def scroll_to_bottom(self):
|
||||
"""
|
||||
Scroll through the entire page once to trigger loading of all resources
|
||||
"""
|
||||
height = self.driver.execute_script("return document.body.scrollHeight")
|
||||
height = int(height)
|
||||
pages = math.floor(height / 1000)
|
||||
for _ in range(0, pages):
|
||||
self.driver.execute_script("window.scrollBy(0,1000)")
|
||||
time.sleep(0.2)
|
||||
|
|
|
@ -12,6 +12,7 @@ from rating import feeds
|
|||
from rating import https
|
||||
from rating import no_network_errors
|
||||
from rating import no_script_errors
|
||||
from rating import no_third_party_cookies
|
||||
from rating import reachable
|
||||
from rating import resolvable
|
||||
from rating import response_duration
|
||||
|
@ -40,6 +41,7 @@ def calculate_rating(results):
|
|||
'HTTP_RESPONSE_DURATION': response_duration,
|
||||
'NO_NETWORK_ERRORS': no_network_errors,
|
||||
'NO_SCRIPT_ERRORS': no_script_errors,
|
||||
'NO_THIRD_PARTY_COOKIES': no_third_party_cookies,
|
||||
'RESPONSIVE': responsive_layout,
|
||||
'SITE_REACHABLE': reachable,
|
||||
'SOCIAL_MEDIA_LINKS': social_media_links,
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Adds a point if the site sets no third party cookies.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from rating.abstract_rater import AbstractRater
|
||||
|
||||
class Rater(AbstractRater):
|
||||
|
||||
rating_type = 'boolean'
|
||||
default_value = False
|
||||
depends_on_checks = ['load_in_browser']
|
||||
max_score = 1
|
||||
|
||||
def __init__(self, check_results):
|
||||
super().__init__(check_results)
|
||||
|
||||
def rate(self):
|
||||
value = self.default_value
|
||||
score = 0
|
||||
|
||||
found_urls = 0
|
||||
found_urls_with_third_party_cookie = 0
|
||||
|
||||
for url in self.check_results['load_in_browser']:
|
||||
found_urls += 1
|
||||
|
||||
if (self.check_results['load_in_browser'][url]['cookies'] == [] or
|
||||
self.check_results['load_in_browser'][url]['cookies'] is None):
|
||||
# no cookies for this URL
|
||||
continue
|
||||
|
||||
# scan cookies for URL match
|
||||
if type(self.check_results['load_in_browser'][url]['cookies']) is list:
|
||||
parsed = urlparse(url)
|
||||
|
||||
for cookie in self.check_results['load_in_browser'][url]['cookies']:
|
||||
if parsed.netloc.endswith(cookie['host_key']):
|
||||
# first party cookie
|
||||
logging.debug("Cookie with host_key %s matches site URL %s" % (cookie['host_key'], parsed.netloc))
|
||||
continue
|
||||
|
||||
# third party cookie
|
||||
logging.debug("Cookie with host_key %s is a third party cookie" % cookie['host_key'])
|
||||
found_urls_with_third_party_cookie += 1
|
||||
break
|
||||
|
||||
if found_urls > 0 and found_urls_with_third_party_cookie == 0:
|
||||
value = True
|
||||
score = self.max_score
|
||||
|
||||
return {
|
||||
'type': self.rating_type,
|
||||
'value': value,
|
||||
'score': score,
|
||||
'max_score': self.max_score,
|
||||
}
|
Loading…
Reference in New Issue