wip on add verification info

This commit is contained in:
Lukas Mehl 2019-09-25 15:44:29 +02:00
parent fdde736b91
commit f7456fd9b3
2 changed files with 22 additions and 16 deletions

View file

@ -24,33 +24,35 @@ def scrapeInstagramData(username):
else:
print("No data found for", username, file=sys.stderr)
def scrapeFacebookLikes(username):
def scrapeFacebookData(username):
url = "https://www.facebook.com/" + username
r = requests.get(url)
s = str(r.content)
pattern = "Gefällt ([\d\.]+) Mal"
verified = "Das blaue Verifizierungsabzeichen" in s
pattern = r"Gefällt ([\d\.]+) Mal"
result = re.search(pattern, s)
if result:
return int(result[1].replace(".", ""))
return int(result[1].replace(".", "")), verified
else:
print("No data found for", username, file=sys.stderr)
return 0
return 0, verified
def scrapeTwitterFollowers(username):
def scrapeTwitterData(username):
url = "https://www.twitter.com/" + username
r = requests.get(url)
s = str(r.content)
verified = "ProfileHeaderCard-badges" in s
pattern = r' title="([\d\.]+) Follower"'
result = re.search(pattern, s)
if result:
return int(result[1].replace(".", ""))
return int(result[1].replace(".", "")), verified
else:
print("No data found for", username, file=sys.stderr)
return 0
return 0, verified
if __name__ == '__main__':
print(scrapeFacebookLikes("B90DieGruenen"))
print(scrapeTwitterFollowers("Die_Gruenen"))
print(scrapeFacebookData("B90DieGruenen"))
print(scrapeTwitterData("Die_Gruenen"))
print(scrapeInstagramData("die_gruenen")["edge_followed_by"]["count"])

View file

@ -6,7 +6,7 @@ from pprint import pprint
import sys
import re
import json
from scraper import scrapeFacebookLikes, scrapeInstagramData, scrapeTwitterFollowers
from scraper import scrapeFacebookData, scrapeInstagramData, scrapeTwitterData
from time import sleep
# Git repo for our data
@ -107,11 +107,14 @@ def main():
for entry in dir_entries():
fbname = "--"
fbLikes = 0
fbVerified = False
twtname = "--"
twtFollower = 0
twtVerified = False
instaName = "--"
instaFollower = 0
instaVerified = False
if not entry.get("urls"):
continue
for url in entry["urls"]:
@ -119,26 +122,26 @@ def main():
fbname = getFacebookName(url["url"])
if fbname:
try:
fbLikes = scrapeFacebookLikes(fbname)
fbLikes, fbVerified = scrapeFacebookData(fbname)
sleep(0.1)
except Exception as e:
print("FACEBOOK ERROR for", url["url"], "--", fbname, file=sys.stderr)
print(e, file=sys.stderr)
continue
print(" FB", fbname, fbLikes)
print(" FB", fbname, fbLikes, fbVerified)
fbcount += 1
elif url["type"] == "TWITTER":
twtname = getTwitterName(url["url"])
try:
twtFollower = scrapeTwitterFollowers(twtname)
twtFollower, twtVerified = scrapeTwitterData(twtname)
sleep(0.1)
except Exception as e:
print("TWITTER ERROR for", url["url"], "--", twtname, file=sys.stderr)
print(e, file=sys.stderr)
continue
twtcount += 1
print(" TWITTER", twtname, twtFollower)
print(" TWITTER", twtname, twtFollower, twtVerified)
elif url["type"] == "INSTAGRAM":
instaName = getInstagramName(url["url"])
@ -146,13 +149,14 @@ def main():
instaData = scrapeInstagramData(instaName)
if instaData:
instaFollower = instaData["edge_followed_by"]["count"]
instaVerified = instaData["is_verified"]
sleep(0.1)
except Exception as e:
print("INSTAGRAM ERROR for", url["url"], "--", instaName, file=sys.stderr)
print(e, file=sys.stderr)
continue
instacount += 1
print(" INSTA", instaName, instaFollower)
print(" INSTA", instaName, instaFollower, instaVerified)
typ = entry.get("type")
level = entry.get("level", "")