This commit is contained in:
Marian Steinbach 2019-05-05 22:41:34 +02:00
commit 9f7efddc61
5 changed files with 91 additions and 21 deletions

View File

@ -5,21 +5,22 @@ functionality of a site or individual pages.
import logging
from checks import charset
from checks import certificate
from checks import charset
from checks import dns_resolution
from checks import duplicate_content
from checks import domain_variations
from checks import duplicate_content
from checks import frameset
from checks import generator
from checks import html_head
from checks import http_and_https
from checks import hyperlinks
from checks import page_content
from checks import load_favicons
from checks import load_feeds
from checks import load_in_browser
from checks import url_reachability
from checks import page_content
from checks import url_canonicalization
from checks import url_reachability
from checks.config import Config
@ -46,6 +47,7 @@ def perform_checks(input_url):
('frameset', frameset),
('hyperlinks', hyperlinks),
('generator', generator),
('load_favicons', load_favicons),
('load_feeds', load_feeds),
('load_in_browser', load_in_browser),
]

35
checks/load_favicons.py Normal file
View File

@ -0,0 +1,35 @@
"""
Loads /favicon if no icon has been found otherwise
"""
import logging
from time import mktime
from datetime import datetime
from urllib.parse import urlparse
import requests
from checks.abstract_checker import AbstractChecker
class Checker(AbstractChecker):
def __init__(self, config, previous_results=None):
super().__init__(config, previous_results)
self.favicons = {}
def run(self):
for url in self.config.urls:
self.load_favicon(url)
return self.favicons
def load_favicon(self, url):
"""
This loads /favicon.ico for the site's URL
"""
parsed = urlparse(url)
ico_url = parsed.scheme + "://" + parsed.hostname + "/favicon.ico"
r = requests.head(ico_url)
if r.status_code == 200:
self.favicons[url] = {
'url': ico_url,
}

View File

@ -0,0 +1,43 @@
from pprint import pprint
import httpretty
from httpretty import httprettified
import unittest
from checks import load_favicons
from checks.config import Config
@httprettified
class TestFavicons(unittest.TestCase):
def test_favicons(self):
# This site has a favicon
url1 = 'http://example1.com/favicon.ico'
httpretty.register_uri(httpretty.HEAD, url1,
body='',
adding_headers={
"Content-type": "image/x-ico",
})
# This site has no favicon
url2 = 'http://example2.com/favicon.ico'
httpretty.register_uri(httpretty.HEAD, url2,
status=404,
body='Not found',
adding_headers={
"Content-type": "text/plain",
})
config = Config(urls=['http://example1.com/path/', 'http://example2.com/'])
checker = load_favicons.Checker(config=config)
result = checker.run()
pprint(result)
self.assertEqual(result, {
'http://example1.com/path/': {
'url': 'http://example1.com/favicon.ico'
}
})

View File

@ -26,21 +26,5 @@ class TestLoadInBrowser(unittest.TestCase):
self.assertEqual(result[url]['font_families'], ['"times new roman"'])
def test_cookies(self):
"""Loads a page that sets cookies"""
url = 'https://httpbin.org/cookies/set/cookiename/cookievalue'
config = Config(urls=[url])
checker = load_in_browser.Checker(config=config, previous_results={})
result = checker.run()
self.assertEqual(result[url]['cookies'], [{
'domain': 'httpbin.org',
'httpOnly': False,
'name': 'cookiename',
'path': '/',
'secure': False,
'value': 'cookievalue'
}])
if __name__ == '__main__':
unittest.main()

View File

@ -8,7 +8,7 @@ class Rater(AbstractRater):
rating_type = 'boolean'
default_value = False
depends_on_checks = ['html_head']
depends_on_checks = ['html_head', 'load_favicons']
max_score = 1
def __init__(self, check_results):
@ -23,6 +23,12 @@ class Rater(AbstractRater):
value = True
score = self.max_score
break
# /favicon.ico as fall back
if url in self.check_results['load_favicons']:
value = True
score = self.max_score
break
return {
'type': self.rating_type,