Check third party cookies

This commit is contained in:
Marian Steinbach 2019-04-29 10:09:25 +02:00
parent 5e8347916c
commit 56f9f1ba86
3 changed files with 119 additions and 7 deletions

View File

@ -5,11 +5,15 @@ Information includes:
- whether the document width adapts well to viewports as little as 360 pixels wide
- whether javascript errors or errors from missing resources occur
- collects CSS font-family properties in use
- what CSS font-family properties are in use
- what cookies are set during loading the page
"""
import logging
import math
import shutil
import time
import sqlite3
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
@ -18,10 +22,11 @@ import tenacity
from checks.abstract_checker import AbstractChecker
from pprint import pprint
class Checker(AbstractChecker):
page_load_timeout = 20
page_load_timeout = 30
# sizes we check for (width, height)
sizes = (
@ -40,6 +45,13 @@ class Checker(AbstractChecker):
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-extensions')
# path where to get cookies from
chrome_options.add_argument("--user-data-dir=/opt/chrome-userdir")
# empty /opt/chrome-userdir
shutil.rmtree('/opt/chrome-userdir', ignore_errors=True)
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.set_page_load_timeout(self.page_load_timeout)
@ -71,6 +83,15 @@ class Checker(AbstractChecker):
logging.warn("RetryError when checking responsiveness for %s: %s" % (url, re))
pass
try:
self.scroll_to_bottom()
except TimeoutException as e:
logging.warn("TimeoutException in scroll_to_bottom for %s: %s" % (url, e))
pass
except tenacity.RetryError as re:
logging.warn("RetryError in scroll_to_bottom for %s: %s" % (url, re))
pass
# CSS collection
font_families = None
@ -91,18 +112,35 @@ class Checker(AbstractChecker):
except TimeoutException as e:
logging.warn("TimeoutException when collecting CSS elements for %s: %s" % (url, e))
pass
# get cookies
try:
cookies = self.driver.get_cookies()
results[url]['cookies'] = cookies
results[url]['cookies'] = self.get_cookies()
pprint(results[url]['cookies'])
except TimeoutException as e:
logging.warn("TimeoutException when collecting CSS elements for %s: %s" % (url, e))
logging.warn("TimeoutException when collecting cookies %s: %s" % (url, e))
pass
except tenacity.RetryError as re:
logging.warn("RetryError when collecting cookies for %s: %s" % (url, re))
pass
self.driver.quit()
return results
def get_cookies(self):
# read cookie DB to get 3rd party cookies, too
cookies = []
db = sqlite3.connect('/opt/chrome-userdir/Default/Cookies')
db.row_factory = sqlite3.Row
c = db.cursor()
c.execute("SELECT creation_utc, host_key, name, path, expires_utc, is_secure, is_httponly, has_expires, is_persistent, firstpartyonly FROM cookies")
for row in c.fetchall():
cookies.append(dict(row))
c.close()
db.close()
return cookies
@tenacity.retry(stop=tenacity.stop_after_attempt(3),
retry=tenacity.retry_if_exception_type(TimeoutException))
@ -129,10 +167,23 @@ class Checker(AbstractChecker):
def capture_log(self):
"""
Returns log elements with level "SEVERE"
Returns log elements with level "SEVERE" or "WARNING"
"""
entries = []
for entry in self.driver.get_log('browser'):
if entry['level'] in ('WARNING', 'SEVERE'):
entries.append(entry)
return entries
@tenacity.retry(stop=tenacity.stop_after_attempt(3),
retry=tenacity.retry_if_exception_type(TimeoutException))
def scroll_to_bottom(self):
"""
Scroll through the entire page once to trigger loading of all resources
"""
height = self.driver.execute_script("return document.body.scrollHeight")
height = int(height)
pages = math.floor(height / 1000)
for _ in range(0, pages):
self.driver.execute_script("window.scrollBy(0,1000)")
time.sleep(0.2)

View File

@ -12,6 +12,7 @@ from rating import feeds
from rating import https
from rating import no_network_errors
from rating import no_script_errors
from rating import no_third_party_cookies
from rating import reachable
from rating import resolvable
from rating import response_duration
@ -40,6 +41,7 @@ def calculate_rating(results):
'HTTP_RESPONSE_DURATION': response_duration,
'NO_NETWORK_ERRORS': no_network_errors,
'NO_SCRIPT_ERRORS': no_script_errors,
'NO_THIRD_PARTY_COOKIES': no_third_party_cookies,
'RESPONSIVE': responsive_layout,
'SITE_REACHABLE': reachable,
'SOCIAL_MEDIA_LINKS': social_media_links,

View File

@ -0,0 +1,59 @@
"""
Adds a point if the site sets no third party cookies.
"""
import logging
from urllib.parse import urlparse
from rating.abstract_rater import AbstractRater
class Rater(AbstractRater):
rating_type = 'boolean'
default_value = False
depends_on_checks = ['load_in_browser']
max_score = 1
def __init__(self, check_results):
super().__init__(check_results)
def rate(self):
value = self.default_value
score = 0
found_urls = 0
found_urls_with_third_party_cookie = 0
for url in self.check_results['load_in_browser']:
found_urls += 1
if (self.check_results['load_in_browser'][url]['cookies'] == [] or
self.check_results['load_in_browser'][url]['cookies'] is None):
# no cookies for this URL
continue
# scan cookies for URL match
if type(self.check_results['load_in_browser'][url]['cookies']) is list:
parsed = urlparse(url)
for cookie in self.check_results['load_in_browser'][url]['cookies']:
if parsed.netloc.endswith(cookie['host_key']):
# first party cookie
logging.debug("Cookie with host_key %s matches site URL %s" % (cookie['host_key'], parsed.netloc))
continue
# third party cookie
logging.debug("Cookie with host_key %s is a third party cookie" % cookie['host_key'])
found_urls_with_third_party_cookie += 1
break
if found_urls > 0 and found_urls_with_third_party_cookie == 0:
value = True
score = self.max_score
return {
'type': self.rating_type,
'value': value,
'score': score,
'max_score': self.max_score,
}