You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.5 KiB
Python
99 lines
3.5 KiB
Python
"""
|
|
The checks module contains the functionality to get information and test certain
|
|
functionality of a site or individual pages.
|
|
"""
|
|
|
|
import logging
|
|
|
|
from checks import certificate
|
|
from checks import charset
|
|
from checks import dns_resolution
|
|
from checks import domain_variations
|
|
from checks import duplicate_content
|
|
from checks import frameset
|
|
from checks import generator
|
|
from checks import html_head
|
|
from checks import http_and_https
|
|
from checks import hyperlinks
|
|
from checks import load_favicons
|
|
from checks import load_feeds
|
|
from checks import load_in_browser
|
|
from checks import page_content
|
|
from checks import url_canonicalization
|
|
from checks import url_reachability
|
|
|
|
from checks.config import Config
|
|
|
|
|
|
def perform_checks(input_url):
|
|
"""
|
|
Executes all our URL/site checks and returns a big-ass result dict.
|
|
"""
|
|
|
|
# The sequence of checks to run. Order is important!
|
|
# Checks which expand the URLs list must come first.
|
|
# After that, dependencies (encoded in the checks) have to be fulfilled.
|
|
check_modules = [
|
|
('domain_variations', domain_variations),
|
|
('http_and_https', http_and_https),
|
|
('dns_resolution', dns_resolution),
|
|
('url_reachability', url_reachability),
|
|
('certificate', certificate),
|
|
('url_canonicalization', url_canonicalization),
|
|
('page_content', page_content),
|
|
('duplicate_content', duplicate_content),
|
|
('charset', charset),
|
|
('html_head', html_head),
|
|
('frameset', frameset),
|
|
('hyperlinks', hyperlinks),
|
|
('generator', generator),
|
|
('load_favicons', load_favicons),
|
|
('load_feeds', load_feeds),
|
|
('load_in_browser', load_in_browser),
|
|
]
|
|
|
|
results = {}
|
|
|
|
# TODO:
|
|
# Set screenshot_bucket_name and storage_credentials_path
|
|
# based on flags.
|
|
config = Config(urls=[input_url],
|
|
user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) ' +
|
|
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 ' +
|
|
'Safari/537.36 green-spider/0.2',
|
|
screenshot_bucket_name='green-spider-screenshots.sendung.de',
|
|
screenshot_datastore_kind='webscreenshot',
|
|
storage_credentials_path='/secrets/screenshots-uploader.json',
|
|
datastore_credentials_path='/secrets/datastore-writer.json')
|
|
|
|
# Iterate over all checks.
|
|
for check_name, check in check_modules:
|
|
|
|
# checker is the individual test/assertion handler we instantiate
|
|
# for each check step.
|
|
checker = check.Checker(config=config,
|
|
previous_results=results)
|
|
|
|
# Ensure that dependencies are met for the checker.
|
|
dependencies = checker.depends_on_results()
|
|
if dependencies != []:
|
|
for dep in dependencies:
|
|
if (dep not in results or results[dep] is None or results[dep] == {} or results[dep] == []):
|
|
logging.debug("Skipping check %s as dependency %s is not met" % (check_name, dep))
|
|
continue
|
|
|
|
# Execute the checker's main function.
|
|
result = checker.run()
|
|
results[check_name] = result
|
|
|
|
# Execute any cleanup/aftermath function (if given) for the checker.
|
|
modified_results = checker.post_hook(result)
|
|
if modified_results is not None:
|
|
results[check_name] = modified_results
|
|
|
|
# Update config for the next check(s) in the sequence.
|
|
config = checker.config
|
|
logging.debug("config after check %s: %r" % (check_name, config))
|
|
|
|
return results
|