import httpretty
from httpretty import httprettified
import unittest
from checks import duplicate_content
from checks import page_content
from checks.config import Config
@httprettified
class TestDuplicateContent(unittest.TestCase):
def test_identical(self):
page_body = """
Title
Headline
Second paragraph with strong words
Third paragraph
"""
url1 = 'http://example.com/'
httpretty.register_uri(httpretty.GET, url1, body=page_body)
url2 = 'http://www.example.com/'
httpretty.register_uri(httpretty.GET, url2, body=page_body)
results = {}
config = Config(urls=[url1, url2])
page_content_checker = page_content.Checker(config=config, previous_results={})
results['page_content'] = page_content_checker.run()
checker = duplicate_content.Checker(config=page_content_checker.config,
previous_results=results)
result = checker.run()
urls_after = checker.config.urls
self.assertEqual(result, {
'http://example.com/ http://www.example.com/': {
'exception': None,
'similarity': 1.0
}
})
self.assertEqual(urls_after, ['http://example.com/'])
if __name__ == '__main__':
unittest.main()