add instagram scraper

2024-05-05 04:03:41 +02:00 · 2018-07-20 18:00:02 +02:00 · 2018-07-20 18:00:02 +02:00 · 2d4fb22e35
parent 3c12e67b97
commit 2d4fb22e35
1 changed files with 27 additions and 0 deletions
--- a/insta.py
+++ b/insta.py
@ -0,0 +1,27 @@
+import requests
+import json
+from pprint import pprint
+import re
+import sys
+
+
+def scrape(username):
+    url = "https://www.instagram.com/" + username
+    r = requests.get(url)
+    
+    s = str(r.content)
+    part1 = """<script type="text/javascript">window._sharedData = """
+    part2 = """;</script>"""
+    pattern = part1 + "(.*?)" + part2
+    print(pattern)
+    result = re.search(pattern, s)
+    if result:
+        data = json.loads(result[1])
+        data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"] = "----"
+        return data["entry_data"]["ProfilePage"][0]["graphql"]["user"]
+    else:
+        print("No data found", file=sys.stderr)
+        
+
+if __name__ == '__main__':
+    pprint(scrape("die_gruenen"))