2018-07-20 18:00:02 +02:00
|
|
|
import requests
|
|
|
|
import json
|
|
|
|
from pprint import pprint
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
|
2018-07-21 17:37:45 +02:00
|
|
|
def decode(s):
|
|
|
|
return s.replace("\\'", "'").replace("\\\\\"", "\\\"")
|
2018-07-20 18:00:02 +02:00
|
|
|
|
2018-07-21 13:07:00 +02:00
|
|
|
def scrapeInstagramData(username):
|
2018-07-20 18:00:02 +02:00
|
|
|
url = "https://www.instagram.com/" + username
|
|
|
|
r = requests.get(url)
|
2019-06-14 20:24:33 +02:00
|
|
|
|
2018-07-20 18:00:02 +02:00
|
|
|
s = str(r.content)
|
|
|
|
part1 = """<script type="text/javascript">window._sharedData = """
|
|
|
|
part2 = """;</script>"""
|
|
|
|
pattern = part1 + "(.*?)" + part2
|
|
|
|
result = re.search(pattern, s)
|
|
|
|
if result:
|
2018-07-21 17:37:45 +02:00
|
|
|
decoded = decode(result[1])
|
|
|
|
data = json.loads(decoded)
|
2018-07-20 18:00:02 +02:00
|
|
|
data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"] = "----"
|
2019-10-19 15:26:42 +02:00
|
|
|
return data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_followed_by"]["count"], data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["is_verified"]
|
2018-07-20 18:00:02 +02:00
|
|
|
else:
|
2018-07-21 17:37:45 +02:00
|
|
|
print("No data found for", username, file=sys.stderr)
|
2018-07-20 18:00:02 +02:00
|
|
|
|
2019-09-25 15:44:29 +02:00
|
|
|
def scrapeFacebookData(username):
|
2018-07-21 13:07:00 +02:00
|
|
|
url = "https://www.facebook.com/" + username
|
|
|
|
r = requests.get(url)
|
2019-06-14 20:24:33 +02:00
|
|
|
|
2018-07-21 13:07:00 +02:00
|
|
|
s = str(r.content)
|
2019-09-25 15:44:29 +02:00
|
|
|
verified = "Das blaue Verifizierungsabzeichen" in s
|
|
|
|
pattern = r"Gefällt ([\d\.]+) Mal"
|
2018-07-21 13:07:00 +02:00
|
|
|
result = re.search(pattern, s)
|
|
|
|
if result:
|
2019-09-25 15:44:29 +02:00
|
|
|
return int(result[1].replace(".", "")), verified
|
2018-07-21 13:07:00 +02:00
|
|
|
else:
|
2018-07-21 17:37:45 +02:00
|
|
|
print("No data found for", username, file=sys.stderr)
|
2019-09-25 15:44:29 +02:00
|
|
|
return 0, verified
|
2018-07-20 18:00:02 +02:00
|
|
|
|
2019-09-25 15:44:29 +02:00
|
|
|
def scrapeTwitterData(username):
|
2019-06-14 20:24:33 +02:00
|
|
|
url = "https://www.twitter.com/" + username
|
|
|
|
r = requests.get(url)
|
|
|
|
|
|
|
|
s = str(r.content)
|
2019-09-25 15:44:29 +02:00
|
|
|
verified = "ProfileHeaderCard-badges" in s
|
2019-06-14 20:24:33 +02:00
|
|
|
pattern = r' title="([\d\.]+) Follower"'
|
|
|
|
result = re.search(pattern, s)
|
|
|
|
if result:
|
2019-09-25 15:44:29 +02:00
|
|
|
return int(result[1].replace(".", "")), verified
|
2019-06-14 20:24:33 +02:00
|
|
|
else:
|
|
|
|
print("No data found for", username, file=sys.stderr)
|
2019-09-25 15:44:29 +02:00
|
|
|
return 0, verified
|
2019-06-14 20:24:33 +02:00
|
|
|
|
2018-07-20 18:00:02 +02:00
|
|
|
if __name__ == '__main__':
|
2019-09-25 15:44:29 +02:00
|
|
|
print(scrapeFacebookData("B90DieGruenen"))
|
|
|
|
print(scrapeTwitterData("Die_Gruenen"))
|
2019-10-19 15:26:42 +02:00
|
|
|
print(scrapeInstagramData("die_gruenen"))
|