green-spider/test.py

65 lines
1.9 KiB
Python
Raw Normal View History

2018-05-03 11:44:06 +02:00
import unittest
2018-05-04 00:38:44 +02:00
import requests
import responses
2018-05-03 11:44:06 +02:00
import spider
class TestSpider(unittest.TestCase):
2018-05-03 13:55:34 +02:00
def test_derive_test_hostnames(self):
# case 1
2018-05-03 11:44:06 +02:00
hn = spider.derive_test_hostnames('www.my-domain.de')
expected = ['my-domain.de', 'www.my-domain.de']
self.assertEqual(hn, expected)
2018-05-03 13:55:34 +02:00
# case 2
2018-05-03 11:44:06 +02:00
hn = spider.derive_test_hostnames('domain.de')
2018-05-03 11:46:03 +02:00
expected = ['domain.de', 'www.domain.de']
2018-05-03 11:44:06 +02:00
self.assertEqual(hn, expected)
2018-05-03 13:55:34 +02:00
def test_reduce_urls(self):
# This is our testdata
testdata = [
{'url': 'one', 'error': None, 'redirects_to': None},
{'url': 'two', 'error': 'Yes', 'redirects_to': None},
{'url': 'three', 'error': None, 'redirects_to': 'five'},
]
expected_result = ['five', 'one']
result = spider.reduce_urls(testdata)
self.assertEqual(result, expected_result)
2018-05-04 00:38:44 +02:00
@responses.activate
def test_check_content1(self):
"""
Very basic test of our content analysis function
"""
url = 'http://my.url'
responses.add(responses.GET, url, status=200,
content_type='text/html',
body='''
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>The title</title>
</head>
</html>
''')
r = requests.get(url)
result = spider.check_content(r)
del result['html'] # don't want to have the messy HTML part in comparison
expected_result = {
'icon': None,
'title': 'The title',
'generator': None,
'feeds': [],
'encoding': 'ISO-8859-1',
'canonical_link': None,
'opengraph': None
}
self.assertDictEqual(result, expected_result)
2018-05-03 13:55:34 +02:00
2018-05-03 11:44:06 +02:00
if __name__ == '__main__':
unittest.main()