Add icon retrieval

This commit is contained in:
Marian Steinbach 2018-04-09 23:02:12 +02:00
parent 313f56f39e
commit 9672e41dba

View file

@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
from git import Repo from git import Repo
from multiprocessing import Pool from multiprocessing import Pool
from socket import gethostbyname_ex from socket import gethostbyname_ex
from urllib.parse import urljoin
from urllib.parse import urlparse from urllib.parse import urlparse
import certifi import certifi
import json import json
@ -148,7 +149,17 @@ def check_content(r):
result['canonical_link'] = None result['canonical_link'] = None
link = soup.find('link', rel='canonical') link = soup.find('link', rel='canonical')
if link: if link:
result['canonical_link'] = link.get('href') result['canonical_link'] = urljoin(r.url, link.get('href'))
# icon
result['icon'] = None
link = soup.find('link', rel='icon')
if link:
result['icon'] = urljoin(r.url, link.get('href'))
else:
link = soup.find('link', rel='shortcut icon')
if link:
result['icon'] = urljoin(r.url, link.get('href'))
# feed links # feed links
result['feeds'] = [] result['feeds'] = []
@ -157,10 +168,10 @@ def check_content(r):
if len(rss_links) > 0: if len(rss_links) > 0:
for l in rss_links: for l in rss_links:
result['feeds'].append(l.get('href')) result['feeds'].append(urljoin(r.url, l.get('href')))
if len(atom_links) > 0: if len(atom_links) > 0:
for l in rss_links: for l in rss_links:
result['feeds'].append(l.get('href')) result['feeds'].append(urljoin(r.url, l.get('href')))
# generator meta tag # generator meta tag
result['generator'] = None result['generator'] = None
@ -180,6 +191,7 @@ def check_content(r):
return result return result
def check_site(url): def check_site(url):
""" """
Performs our site check and returns results as a dict. Performs our site check and returns results as a dict.