mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-01 16:44:51 +02:00
Add test for duplicate_content check
This commit is contained in:
parent
4b7537fd17
commit
b6ed615e08
56
checks/duplicate_content_test.py
Normal file
56
checks/duplicate_content_test.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
import httpretty
|
||||
from httpretty import httprettified
|
||||
import unittest
|
||||
|
||||
from checks import duplicate_content
|
||||
from checks import page_content
|
||||
from checks.config import Config
|
||||
|
||||
@httprettified
|
||||
class TestDuplicateContent(unittest.TestCase):
|
||||
|
||||
def test_similar(self):
|
||||
page_body = """
|
||||
<html>
|
||||
<head>
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1 class="title">Headline</h1>
|
||||
<p class="intro">Second paragraph with <strong>strong words</strong></p>
|
||||
<p class="text">Third paragraph</p>
|
||||
<ul class="somelist">
|
||||
<li>A list item</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
url1 = 'http://example.com/'
|
||||
httpretty.register_uri(httpretty.GET, url1, body=page_body)
|
||||
|
||||
url2 = 'http://www.example.com/'
|
||||
httpretty.register_uri(httpretty.GET, url2, body=page_body)
|
||||
|
||||
results = {}
|
||||
|
||||
config = Config(urls=[url1, url2])
|
||||
page_content_checker = page_content.Checker(config=config, previous_results={})
|
||||
results['page_content'] = page_content_checker.run()
|
||||
|
||||
checker = duplicate_content.Checker(config=page_content_checker.config,
|
||||
previous_results=results)
|
||||
result = checker.run()
|
||||
urls_after = checker.config.urls
|
||||
|
||||
self.assertEqual(result, {
|
||||
'http://example.com/ http://www.example.com/': {
|
||||
'exception': None,
|
||||
'similarity': 1.0
|
||||
}
|
||||
})
|
||||
self.assertEqual(urls_after, ['http://example.com/'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in a new issue