mirror of
https://github.com/netzbegruenung/social-spider.git
synced 2024-05-04 03:33:42 +02:00
wip on add verification info
This commit is contained in:
parent
fdde736b91
commit
f7456fd9b3
20
scraper.py
20
scraper.py
|
@ -24,33 +24,35 @@ def scrapeInstagramData(username):
|
|||
else:
|
||||
print("No data found for", username, file=sys.stderr)
|
||||
|
||||
def scrapeFacebookLikes(username):
|
||||
def scrapeFacebookData(username):
|
||||
url = "https://www.facebook.com/" + username
|
||||
r = requests.get(url)
|
||||
|
||||
s = str(r.content)
|
||||
pattern = "Gefällt ([\d\.]+) Mal"
|
||||
verified = "Das blaue Verifizierungsabzeichen" in s
|
||||
pattern = r"Gefällt ([\d\.]+) Mal"
|
||||
result = re.search(pattern, s)
|
||||
if result:
|
||||
return int(result[1].replace(".", ""))
|
||||
return int(result[1].replace(".", "")), verified
|
||||
else:
|
||||
print("No data found for", username, file=sys.stderr)
|
||||
return 0
|
||||
return 0, verified
|
||||
|
||||
def scrapeTwitterFollowers(username):
|
||||
def scrapeTwitterData(username):
|
||||
url = "https://www.twitter.com/" + username
|
||||
r = requests.get(url)
|
||||
|
||||
s = str(r.content)
|
||||
verified = "ProfileHeaderCard-badges" in s
|
||||
pattern = r' title="([\d\.]+) Follower"'
|
||||
result = re.search(pattern, s)
|
||||
if result:
|
||||
return int(result[1].replace(".", ""))
|
||||
return int(result[1].replace(".", "")), verified
|
||||
else:
|
||||
print("No data found for", username, file=sys.stderr)
|
||||
return 0
|
||||
return 0, verified
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(scrapeFacebookLikes("B90DieGruenen"))
|
||||
print(scrapeTwitterFollowers("Die_Gruenen"))
|
||||
print(scrapeFacebookData("B90DieGruenen"))
|
||||
print(scrapeTwitterData("Die_Gruenen"))
|
||||
print(scrapeInstagramData("die_gruenen")["edge_followed_by"]["count"])
|
||||
|
|
18
spider.py
18
spider.py
|
@ -6,7 +6,7 @@ from pprint import pprint
|
|||
import sys
|
||||
import re
|
||||
import json
|
||||
from scraper import scrapeFacebookLikes, scrapeInstagramData, scrapeTwitterFollowers
|
||||
from scraper import scrapeFacebookData, scrapeInstagramData, scrapeTwitterData
|
||||
from time import sleep
|
||||
|
||||
# Git repo for our data
|
||||
|
@ -107,11 +107,14 @@ def main():
|
|||
for entry in dir_entries():
|
||||
fbname = "--"
|
||||
fbLikes = 0
|
||||
fbVerified = False
|
||||
twtname = "--"
|
||||
twtFollower = 0
|
||||
twtVerified = False
|
||||
instaName = "--"
|
||||
instaFollower = 0
|
||||
|
||||
instaVerified = False
|
||||
|
||||
if not entry.get("urls"):
|
||||
continue
|
||||
for url in entry["urls"]:
|
||||
|
@ -119,26 +122,26 @@ def main():
|
|||
fbname = getFacebookName(url["url"])
|
||||
if fbname:
|
||||
try:
|
||||
fbLikes = scrapeFacebookLikes(fbname)
|
||||
fbLikes, fbVerified = scrapeFacebookData(fbname)
|
||||
sleep(0.1)
|
||||
except Exception as e:
|
||||
print("FACEBOOK ERROR for", url["url"], "--", fbname, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
continue
|
||||
print(" FB", fbname, fbLikes)
|
||||
print(" FB", fbname, fbLikes, fbVerified)
|
||||
fbcount += 1
|
||||
|
||||
elif url["type"] == "TWITTER":
|
||||
twtname = getTwitterName(url["url"])
|
||||
try:
|
||||
twtFollower = scrapeTwitterFollowers(twtname)
|
||||
twtFollower, twtVerified = scrapeTwitterData(twtname)
|
||||
sleep(0.1)
|
||||
except Exception as e:
|
||||
print("TWITTER ERROR for", url["url"], "--", twtname, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
continue
|
||||
twtcount += 1
|
||||
print(" TWITTER", twtname, twtFollower)
|
||||
print(" TWITTER", twtname, twtFollower, twtVerified)
|
||||
|
||||
elif url["type"] == "INSTAGRAM":
|
||||
instaName = getInstagramName(url["url"])
|
||||
|
@ -146,13 +149,14 @@ def main():
|
|||
instaData = scrapeInstagramData(instaName)
|
||||
if instaData:
|
||||
instaFollower = instaData["edge_followed_by"]["count"]
|
||||
instaVerified = instaData["is_verified"]
|
||||
sleep(0.1)
|
||||
except Exception as e:
|
||||
print("INSTAGRAM ERROR for", url["url"], "--", instaName, file=sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
continue
|
||||
instacount += 1
|
||||
print(" INSTA", instaName, instaFollower)
|
||||
print(" INSTA", instaName, instaFollower, instaVerified)
|
||||
|
||||
typ = entry.get("type")
|
||||
level = entry.get("level", "")
|
||||
|
|
Loading…
Reference in a new issue