mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-02 00:54:52 +02:00
Fixing several bugs in spider code
This commit is contained in:
parent
3b9ead330d
commit
3b8328d804
|
@ -90,7 +90,8 @@ class Checker(AbstractChecker):
|
||||||
if max_date is None or timestamp > max_date:
|
if max_date is None or timestamp > max_date:
|
||||||
max_date = timestamp
|
max_date = timestamp
|
||||||
|
|
||||||
return datetime.fromtimestamp(max_date)
|
if max_date is not None:
|
||||||
|
return datetime.fromtimestamp(max_date)
|
||||||
|
|
||||||
|
|
||||||
def find_first_entry(self, entries):
|
def find_first_entry(self, entries):
|
||||||
|
@ -101,4 +102,5 @@ class Checker(AbstractChecker):
|
||||||
if min_date is None or timestamp < min_date:
|
if min_date is None or timestamp < min_date:
|
||||||
min_date = timestamp
|
min_date = timestamp
|
||||||
|
|
||||||
return datetime.fromtimestamp(min_date)
|
if min_date is not None:
|
||||||
|
return datetime.fromtimestamp(min_date)
|
||||||
|
|
|
@ -70,5 +70,53 @@ class TestFeed(unittest.TestCase):
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_feed_rss2(self):
|
||||||
|
"""
|
||||||
|
Checks RSS 2.0
|
||||||
|
"""
|
||||||
|
|
||||||
|
feed = """<?xml version="1.0"?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Empty Feed</title>
|
||||||
|
<link>http://example.com/</link>
|
||||||
|
<pubDate></pubDate>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
"""
|
||||||
|
|
||||||
|
feed_url = 'http://example.com/feed.xml'
|
||||||
|
httpretty.register_uri(httpretty.GET, feed_url,
|
||||||
|
body=feed,
|
||||||
|
adding_headers={
|
||||||
|
"Content-type": "application/rss+xml",
|
||||||
|
})
|
||||||
|
|
||||||
|
# mocking a previous result from some page
|
||||||
|
results = {
|
||||||
|
'html_head': {
|
||||||
|
'http://example.com/': {
|
||||||
|
'link_rss_atom': ['http://example.com/feed.xml']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
config = Config(urls=['http://example.com/'])
|
||||||
|
checker = load_feeds.Checker(config=config, previous_results=results)
|
||||||
|
|
||||||
|
result = checker.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
self.assertEqual(result, {
|
||||||
|
'http://example.com/feed.xml': {
|
||||||
|
'exception': None,
|
||||||
|
'title': 'Empty Feed',
|
||||||
|
'latest_entry': None,
|
||||||
|
'first_entry': None,
|
||||||
|
'average_interval': None,
|
||||||
|
'num_entries': 0,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -78,7 +78,7 @@ def work_of_queue(datastore_client, entity_kind):
|
||||||
logging.info("Starting job %s", job["url"])
|
logging.info("Starting job %s", job["url"])
|
||||||
result = check_and_rate_site(entry=job)
|
result = check_and_rate_site(entry=job)
|
||||||
|
|
||||||
logging.debug("Full JSON representation of returned result: %s", json.dumps(result))
|
logging.debug("Full JSON representation of returned result: %s", json.dumps(result, default=str))
|
||||||
|
|
||||||
logging.info("Job %s finished checks", job["url"])
|
logging.info("Job %s finished checks", job["url"])
|
||||||
logging.info("Job %s writing to DB", job["url"])
|
logging.info("Job %s writing to DB", job["url"])
|
||||||
|
|
Loading…
Reference in a new issue