From 82c2e43e2e3fa23357c3d13dc7b7948fc56a3455 Mon Sep 17 00:00:00 2001 From: Lukas Mehl Date: Sat, 19 Oct 2019 15:26:42 +0200 Subject: [PATCH] add verification info to result, html page --- docs/index.html | 61 ++++++++++++++++++++++++++++++++-------------- docs/verified.png | Bin 0 -> 550 bytes scraper.py | 4 +-- spider.py | 9 +++---- 4 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 docs/verified.png diff --git a/docs/index.html b/docs/index.html index e3a50e1..7fecf87 100644 --- a/docs/index.html +++ b/docs/index.html @@ -83,26 +83,51 @@ var tbl_body = document.createElement("tbody"); $.each(data, function() { var tbl_row = tbl_body.insertRow(); + var name; + var follower; + var isVerified; $.each(this, function(idx , v) { - var cell = tbl_row.insertCell(); - txt = v.toString(); - if(idx == 5 && txt != "--"){ - var a = document.createElement('a'); - a.appendChild(document.createTextNode(txt)); - a.href = "https://www.facebook.com/" + txt; - cell.appendChild(a); - } else if (idx == 7 && txt != "--") { - var a = document.createElement('a'); - a.appendChild(document.createTextNode(txt)); - a.href = "https://www.twitter.com/" + txt; - cell.appendChild(a); - } else if (idx == 9 && txt != "--") { - var a = document.createElement('a'); - a.appendChild(document.createTextNode(txt)); - a.href = "https://www.instagram.com/" + txt; - cell.appendChild(a); - } else { + if (idx < 5){ + var cell = tbl_row.insertCell(); + txt = v.toString(); cell.appendChild(document.createTextNode(txt)); + } else if (idx == 5 || idx == 8 || idx === 11) { + name = v.toString(); + } else if (idx == 6 || idx == 9 || idx === 12) { + follower = v.toString(); + } else if (idx == 7 || idx == 10 || idx === 13) { + var cell = tbl_row.insertCell(); + var a = document.createElement('a'); + isVerified = v; + if (isVerified) + { + verification = document.createElement("div"); + icon = document.createElement("img") + icon.setAttribute("src", "verified.png") + icon.setAttribute("style", "height:19px;width:19px;float:left;margin-right:3px;"); + verification.appendChild(icon) + text = document.createElement("div") + text.setAttribute("style", "float:left;text-decoration:underline;"); + text.appendChild(document.createTextNode(name)) + verification.appendChild(text); + a.appendChild(verification) + } else { + a.appendChild(document.createTextNode(name)); + } + if (idx == 7 && name != "--") { + a.href = "https://www.facebook.com/" + name; + cell.appendChild(a); + } else if (idx == 10 && name != "--") { + a.href = "https://www.twitter.com/" + name; + cell.appendChild(a); + } else if (idx == 13 && name != "--") { + a.href = "https://www.instagram.com/" + name; + cell.appendChild(a); + } else { + cell.appendChild(document.createTextNode(name)); + } + var cell2 = tbl_row.insertCell(); + cell2.appendChild(document.createTextNode(follower)); } }) }) diff --git a/docs/verified.png b/docs/verified.png new file mode 100644 index 0000000000000000000000000000000000000000..af8b2bbde8c93698bb88e964ab42140624d9f12e GIT binary patch literal 550 zcmV+>0@?kEP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!~g&e!~vBn4jTXf0lP^=K~y+Tt(40v zL{S{azg~ri1yWKd!oo%}lsp=1DTyoy$<}{hYh`6&Wo9P}W$CeCV@4AtWkFsWg$44O z$%5bacg~&eJQy?i)a{=0yXW`4=lmWg>r22tkyI@F3GAH$hnK+h1Mu1+L={{hp7nqdC zjpMIkSNA%O!S!=xu-%kIM2@t5VvQEZU&UU-I*!s*1uTpK^_6Dc)d&oE0H%F;2e^^i zbMqE4Zxt)b*Krh2Jy2Z+%nt*z{lJXZ?3ASwkgUg3A_YxUthPeOQGAlwtz-Ep3OpJE ze{=<`1?+-2_+z4CY!Nps;}uEy`hl6>J_1Xd#-0qvUps(xGUrQP9v#v$mv_M3vo*xQ z)>@n}y{bFoV7y1aCBPW{Bt+=vG`)%u2A#c@4x_T3eh!JZhV&X+B;~CkJ8}z=1To^= oXQ2yIQ>-JVpIJ-mKFb{78@i5|6t2PYz5oCK07*qoM6N<$g3Dg#`~Uy| literal 0 HcmV?d00001 diff --git a/scraper.py b/scraper.py index c36bc95..42e617d 100644 --- a/scraper.py +++ b/scraper.py @@ -20,7 +20,7 @@ def scrapeInstagramData(username): decoded = decode(result[1]) data = json.loads(decoded) data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"] = "----" - return data["entry_data"]["ProfilePage"][0]["graphql"]["user"] + return data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["edge_followed_by"]["count"], data["entry_data"]["ProfilePage"][0]["graphql"]["user"]["is_verified"] else: print("No data found for", username, file=sys.stderr) @@ -55,4 +55,4 @@ def scrapeTwitterData(username): if __name__ == '__main__': print(scrapeFacebookData("B90DieGruenen")) print(scrapeTwitterData("Die_Gruenen")) - print(scrapeInstagramData("die_gruenen")["edge_followed_by"]["count"]) + print(scrapeInstagramData("die_gruenen")) diff --git a/spider.py b/spider.py index d33e0d7..9768ad1 100644 --- a/spider.py +++ b/spider.py @@ -146,10 +146,7 @@ def main(): elif url["type"] == "INSTAGRAM": instaName = getInstagramName(url["url"]) try: - instaData = scrapeInstagramData(instaName) - if instaData: - instaFollower = instaData["edge_followed_by"]["count"] - instaVerified = instaData["is_verified"] + instaFollower, instaVerified = scrapeInstagramData(instaName) sleep(0.1) except Exception as e: print("INSTAGRAM ERROR for", url["url"], "--", instaName, file=sys.stderr) @@ -167,8 +164,10 @@ def main(): fbname = "--" if fbLikes + twtFollower + instaFollower > 0: key = "//".join([typ, level, land, kreis, stadt]) - result.update({key: [typ, level, land, kreis, stadt, fbname, fbLikes, twtname, twtFollower, instaName, instaFollower]}) + result.update({key: [typ, level, land, kreis, stadt, fbname, fbLikes, fbVerified, twtname, twtFollower, twtVerified, instaName, instaFollower, instaVerified]}) idx += 1 + if idx == 50: + break with open("docs/result.json", "w") as f: json.dump(result, f)