Detect frameset (#102)
* Add frameset checker * Remove unused variable (unrelated)
This commit is contained in:
parent
deff95306b
commit
3063a4488d
|
@ -10,6 +10,7 @@ from checks import certificate
|
|||
from checks import dns_resolution
|
||||
from checks import duplicate_content
|
||||
from checks import domain_variations
|
||||
from checks import frameset
|
||||
from checks import generator
|
||||
from checks import html_head
|
||||
from checks import http_and_https
|
||||
|
@ -41,6 +42,7 @@ def perform_checks(input_url):
|
|||
('duplicate_content', duplicate_content),
|
||||
('charset', charset),
|
||||
('html_head', html_head),
|
||||
('frameset', frameset),
|
||||
('hyperlinks', hyperlinks),
|
||||
('generator', generator),
|
||||
('load_in_browser', load_in_browser),
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
"""
|
||||
Collects information on usage of the frameset tag
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from checks.abstract_checker import AbstractChecker
|
||||
|
||||
class Checker(AbstractChecker):
|
||||
def __init__(self, config, previous_results=None):
|
||||
super().__init__(config, previous_results)
|
||||
|
||||
def depends_on_results(self):
|
||||
return ['page_content']
|
||||
|
||||
def run(self):
|
||||
assert 'page_content' in self.previous_results
|
||||
|
||||
results = {}
|
||||
|
||||
for url in self.config.urls:
|
||||
results[url] = self.get_framesets(url)
|
||||
|
||||
return results
|
||||
|
||||
def get_framesets(self, url):
|
||||
"""
|
||||
Expects page_content_dict['content'] to carry the HTML content
|
||||
"""
|
||||
page_content = self.previous_results['page_content'][url]
|
||||
assert 'content' in page_content
|
||||
|
||||
if page_content['content'] is None:
|
||||
return
|
||||
|
||||
result = {
|
||||
'frameset': None,
|
||||
}
|
||||
|
||||
soup = BeautifulSoup(page_content['content'], 'html.parser')
|
||||
|
||||
count = 0
|
||||
for _ in soup.find_all("frameset"):
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
result['frameset'] = True
|
||||
else:
|
||||
result['frameset'] = False
|
||||
|
||||
return result
|
|
@ -0,0 +1,85 @@
|
|||
import httpretty
|
||||
from httpretty import httprettified
|
||||
import unittest
|
||||
|
||||
from checks import frameset
|
||||
from checks import page_content
|
||||
from checks.config import Config
|
||||
|
||||
@httprettified
|
||||
class TestFrameset(unittest.TestCase):
|
||||
|
||||
def test_frameset_positive(self):
|
||||
page_body = """
|
||||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<html>
|
||||
<head>
|
||||
<title>A frameset page</title>
|
||||
</head>
|
||||
<frameset framespacing="0" border="false" frameborder="0" rows="30,*">
|
||||
<frame name="top" src="top.htm" scrolling="no">
|
||||
<frame name="base" src="titel.htm" target="_top">
|
||||
<noframes>
|
||||
<body>
|
||||
<p>Here we have some body content</p>
|
||||
</body>
|
||||
</noframes>
|
||||
</frameset>
|
||||
</html>
|
||||
"""
|
||||
|
||||
url = 'http://example.com/'
|
||||
httpretty.register_uri(httpretty.GET, url, body=page_body)
|
||||
|
||||
results = {}
|
||||
|
||||
config = Config(urls=[url])
|
||||
page_content_checker = page_content.Checker(config=config, previous_results={})
|
||||
results['page_content'] = page_content_checker.run()
|
||||
|
||||
checker = frameset.Checker(config=page_content_checker.config,
|
||||
previous_results=results)
|
||||
result = checker.run()
|
||||
urls_after = checker.config.urls
|
||||
|
||||
self.assertEqual(result, {
|
||||
'http://example.com/': {'frameset': True}
|
||||
})
|
||||
self.assertEqual(urls_after, ['http://example.com/'])
|
||||
|
||||
|
||||
def test_frameset_negative(self):
|
||||
page_body = """
|
||||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<html>
|
||||
<head>
|
||||
<title>A frameset page</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Here we have some body content</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
url = 'http://example.com/'
|
||||
httpretty.register_uri(httpretty.GET, url, body=page_body)
|
||||
|
||||
results = {}
|
||||
|
||||
config = Config(urls=[url])
|
||||
page_content_checker = page_content.Checker(config=config, previous_results={})
|
||||
results['page_content'] = page_content_checker.run()
|
||||
|
||||
checker = frameset.Checker(config=page_content_checker.config,
|
||||
previous_results=results)
|
||||
result = checker.run()
|
||||
urls_after = checker.config.urls
|
||||
|
||||
self.assertEqual(result, {
|
||||
'http://example.com/': {'frameset': False}
|
||||
})
|
||||
self.assertEqual(urls_after, ['http://example.com/'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -10,7 +10,6 @@ from checks.config import Config
|
|||
class TestHyperlinks(unittest.TestCase):
|
||||
|
||||
def test_links(self):
|
||||
self.maxDiff = 2000
|
||||
page_body = """
|
||||
<html>
|
||||
<head>
|
||||
|
|
Loading…
Reference in New Issue