From 3b8328d804a4faedc2b253c08062dd9258b24333 Mon Sep 17 00:00:00 2001 From: Marian Steinbach Date: Mon, 17 Dec 2018 17:31:09 +0100 Subject: [PATCH] Fixing several bugs in spider code --- checks/load_feeds.py | 6 +++-- checks/load_feeds_test.py | 48 +++++++++++++++++++++++++++++++++++++++ spider/spider.py | 2 +- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/checks/load_feeds.py b/checks/load_feeds.py index bdecec5..206f721 100644 --- a/checks/load_feeds.py +++ b/checks/load_feeds.py @@ -90,7 +90,8 @@ class Checker(AbstractChecker): if max_date is None or timestamp > max_date: max_date = timestamp - return datetime.fromtimestamp(max_date) + if max_date is not None: + return datetime.fromtimestamp(max_date) def find_first_entry(self, entries): @@ -101,4 +102,5 @@ class Checker(AbstractChecker): if min_date is None or timestamp < min_date: min_date = timestamp - return datetime.fromtimestamp(min_date) + if min_date is not None: + return datetime.fromtimestamp(min_date) diff --git a/checks/load_feeds_test.py b/checks/load_feeds_test.py index ec69d6e..d008889 100644 --- a/checks/load_feeds_test.py +++ b/checks/load_feeds_test.py @@ -70,5 +70,53 @@ class TestFeed(unittest.TestCase): }) + def test_empty_feed_rss2(self): + """ + Checks RSS 2.0 + """ + + feed = """ + + + Empty Feed + http://example.com/ + + + + """ + + feed_url = 'http://example.com/feed.xml' + httpretty.register_uri(httpretty.GET, feed_url, + body=feed, + adding_headers={ + "Content-type": "application/rss+xml", + }) + + # mocking a previous result from some page + results = { + 'html_head': { + 'http://example.com/': { + 'link_rss_atom': ['http://example.com/feed.xml'] + } + } + } + config = Config(urls=['http://example.com/']) + checker = load_feeds.Checker(config=config, previous_results=results) + + result = checker.run() + print(result) + + self.assertEqual(result, { + 'http://example.com/feed.xml': { + 'exception': None, + 'title': 'Empty Feed', + 'latest_entry': None, + 'first_entry': None, + 'average_interval': None, + 'num_entries': 0, + } + }) + + if __name__ == '__main__': unittest.main() diff --git a/spider/spider.py b/spider/spider.py index 3c82f8b..e47c701 100644 --- a/spider/spider.py +++ b/spider/spider.py @@ -78,7 +78,7 @@ def work_of_queue(datastore_client, entity_kind): logging.info("Starting job %s", job["url"]) result = check_and_rate_site(entry=job) - logging.debug("Full JSON representation of returned result: %s", json.dumps(result)) + logging.debug("Full JSON representation of returned result: %s", json.dumps(result, default=str)) logging.info("Job %s finished checks", job["url"]) logging.info("Job %s writing to DB", job["url"])