Merge branch 'master' of https://github.com/netzbegruenung/green-spider
This commit is contained in:
commit
9f7efddc61
|
@ -5,21 +5,22 @@ functionality of a site or individual pages.
|
|||
|
||||
import logging
|
||||
|
||||
from checks import charset
|
||||
from checks import certificate
|
||||
from checks import charset
|
||||
from checks import dns_resolution
|
||||
from checks import duplicate_content
|
||||
from checks import domain_variations
|
||||
from checks import duplicate_content
|
||||
from checks import frameset
|
||||
from checks import generator
|
||||
from checks import html_head
|
||||
from checks import http_and_https
|
||||
from checks import hyperlinks
|
||||
from checks import page_content
|
||||
from checks import load_favicons
|
||||
from checks import load_feeds
|
||||
from checks import load_in_browser
|
||||
from checks import url_reachability
|
||||
from checks import page_content
|
||||
from checks import url_canonicalization
|
||||
from checks import url_reachability
|
||||
|
||||
from checks.config import Config
|
||||
|
||||
|
@ -46,6 +47,7 @@ def perform_checks(input_url):
|
|||
('frameset', frameset),
|
||||
('hyperlinks', hyperlinks),
|
||||
('generator', generator),
|
||||
('load_favicons', load_favicons),
|
||||
('load_feeds', load_feeds),
|
||||
('load_in_browser', load_in_browser),
|
||||
]
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
"""
|
||||
Loads /favicon if no icon has been found otherwise
|
||||
"""
|
||||
|
||||
import logging
|
||||
from time import mktime
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from checks.abstract_checker import AbstractChecker
|
||||
|
||||
class Checker(AbstractChecker):
|
||||
def __init__(self, config, previous_results=None):
|
||||
super().__init__(config, previous_results)
|
||||
self.favicons = {}
|
||||
|
||||
def run(self):
|
||||
for url in self.config.urls:
|
||||
self.load_favicon(url)
|
||||
|
||||
return self.favicons
|
||||
|
||||
def load_favicon(self, url):
|
||||
"""
|
||||
This loads /favicon.ico for the site's URL
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
ico_url = parsed.scheme + "://" + parsed.hostname + "/favicon.ico"
|
||||
r = requests.head(ico_url)
|
||||
if r.status_code == 200:
|
||||
self.favicons[url] = {
|
||||
'url': ico_url,
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
from pprint import pprint
|
||||
|
||||
import httpretty
|
||||
from httpretty import httprettified
|
||||
import unittest
|
||||
|
||||
from checks import load_favicons
|
||||
from checks.config import Config
|
||||
|
||||
@httprettified
|
||||
class TestFavicons(unittest.TestCase):
|
||||
|
||||
def test_favicons(self):
|
||||
# This site has a favicon
|
||||
url1 = 'http://example1.com/favicon.ico'
|
||||
httpretty.register_uri(httpretty.HEAD, url1,
|
||||
body='',
|
||||
adding_headers={
|
||||
"Content-type": "image/x-ico",
|
||||
})
|
||||
|
||||
# This site has no favicon
|
||||
url2 = 'http://example2.com/favicon.ico'
|
||||
httpretty.register_uri(httpretty.HEAD, url2,
|
||||
status=404,
|
||||
body='Not found',
|
||||
adding_headers={
|
||||
"Content-type": "text/plain",
|
||||
})
|
||||
|
||||
|
||||
config = Config(urls=['http://example1.com/path/', 'http://example2.com/'])
|
||||
checker = load_favicons.Checker(config=config)
|
||||
|
||||
result = checker.run()
|
||||
pprint(result)
|
||||
|
||||
self.assertEqual(result, {
|
||||
'http://example1.com/path/': {
|
||||
'url': 'http://example1.com/favicon.ico'
|
||||
}
|
||||
})
|
||||
|
|
@ -26,21 +26,5 @@ class TestLoadInBrowser(unittest.TestCase):
|
|||
self.assertEqual(result[url]['font_families'], ['"times new roman"'])
|
||||
|
||||
|
||||
def test_cookies(self):
|
||||
"""Loads a page that sets cookies"""
|
||||
url = 'https://httpbin.org/cookies/set/cookiename/cookievalue'
|
||||
config = Config(urls=[url])
|
||||
checker = load_in_browser.Checker(config=config, previous_results={})
|
||||
result = checker.run()
|
||||
|
||||
self.assertEqual(result[url]['cookies'], [{
|
||||
'domain': 'httpbin.org',
|
||||
'httpOnly': False,
|
||||
'name': 'cookiename',
|
||||
'path': '/',
|
||||
'secure': False,
|
||||
'value': 'cookievalue'
|
||||
}])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -8,7 +8,7 @@ class Rater(AbstractRater):
|
|||
|
||||
rating_type = 'boolean'
|
||||
default_value = False
|
||||
depends_on_checks = ['html_head']
|
||||
depends_on_checks = ['html_head', 'load_favicons']
|
||||
max_score = 1
|
||||
|
||||
def __init__(self, check_results):
|
||||
|
@ -23,6 +23,12 @@ class Rater(AbstractRater):
|
|||
value = True
|
||||
score = self.max_score
|
||||
break
|
||||
|
||||
# /favicon.ico as fall back
|
||||
if url in self.check_results['load_favicons']:
|
||||
value = True
|
||||
score = self.max_score
|
||||
break
|
||||
|
||||
return {
|
||||
'type': self.rating_type,
|
||||
|
|
Loading…
Reference in New Issue