Add icon retrieval

This commit is contained in:
Marian Steinbach 2018-04-09 23:02:12 +02:00
parent 313f56f39e
commit 9672e41dba
1 changed files with 15 additions and 3 deletions

View File

@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
from git import Repo
from multiprocessing import Pool
from socket import gethostbyname_ex
from urllib.parse import urljoin
from urllib.parse import urlparse
import certifi
import json
@ -148,7 +149,17 @@ def check_content(r):
result['canonical_link'] = None
link = soup.find('link', rel='canonical')
if link:
result['canonical_link'] = link.get('href')
result['canonical_link'] = urljoin(r.url, link.get('href'))
# icon
result['icon'] = None
link = soup.find('link', rel='icon')
if link:
result['icon'] = urljoin(r.url, link.get('href'))
else:
link = soup.find('link', rel='shortcut icon')
if link:
result['icon'] = urljoin(r.url, link.get('href'))
# feed links
result['feeds'] = []
@ -157,10 +168,10 @@ def check_content(r):
if len(rss_links) > 0:
for l in rss_links:
result['feeds'].append(l.get('href'))
result['feeds'].append(urljoin(r.url, l.get('href')))
if len(atom_links) > 0:
for l in rss_links:
result['feeds'].append(l.get('href'))
result['feeds'].append(urljoin(r.url, l.get('href')))
# generator meta tag
result['generator'] = None
@ -180,6 +191,7 @@ def check_content(r):
return result
def check_site(url):
"""
Performs our site check and returns results as a dict.