mirror of
https://github.com/netzbegruenung/social-spider.git
synced 2024-05-04 19:53:40 +02:00
get facebook infos
This commit is contained in:
parent
f0531b059a
commit
115a765de4
|
@ -5,6 +5,7 @@ import shutil
|
|||
from ruamel import yaml
|
||||
from pprint import pprint
|
||||
import sys
|
||||
import re
|
||||
|
||||
# Git repo for our data
|
||||
green_directory_repo = 'https://github.com/netzbegruenung/green-directory.git'
|
||||
|
@ -38,11 +39,11 @@ def dir_entries():
|
|||
continue
|
||||
|
||||
with open(filepath, 'r') as yamlfile:
|
||||
for doc in yaml.load_all(yamlfile):
|
||||
for doc in yaml.load_all(yamlfile, Loader=yaml.Loader):
|
||||
yield doc
|
||||
|
||||
|
||||
def onerror(func, path, exc_info):
|
||||
def onerror(func, path, _):
|
||||
"""
|
||||
Error handler for ``shutil.rmtree``.
|
||||
|
||||
|
@ -61,14 +62,61 @@ def onerror(func, path, exc_info):
|
|||
else:
|
||||
raise
|
||||
|
||||
def getFacebookName(url):
|
||||
if "/groups/" in url:
|
||||
return None
|
||||
if re.match(r".+-(\d)+", url):
|
||||
result = re.match(r".+-(\d+)", url).group(1)
|
||||
if len(result) < 10:
|
||||
print(url, "--", result, file=sys.stderr)
|
||||
return
|
||||
return result
|
||||
|
||||
if url.split("/")[-1]:
|
||||
return url.split("/")[-1]
|
||||
|
||||
elif url.split("/")[-2]:
|
||||
return url.split("/")[-2]
|
||||
|
||||
def getTwitterName(url):
|
||||
if url.split("/")[-1]:
|
||||
return url.split("/")[-1]
|
||||
elif url.split("/")[-2]:
|
||||
return url.split("/")[-2]
|
||||
|
||||
def main():
|
||||
get_green_directory()
|
||||
if not access_token:
|
||||
print("No access token found", file=sys.stderr)
|
||||
return
|
||||
|
||||
graph = facebook.GraphAPI(access_token=access_token)
|
||||
pprint(graph.get_object("B90DieGruenen", fields="fan_count,username,verification_status,website"))
|
||||
|
||||
#pprint(graph.get_object("B90DieGruenen", fields="fan_count,username,verification_status,website"))
|
||||
doc = []
|
||||
count = 0
|
||||
for entry in dir_entries():
|
||||
if not entry.get("urls"):
|
||||
continue
|
||||
for url in entry["urls"]:
|
||||
if url["type"] == "FACEBOOK":
|
||||
fbname = getFacebookName(url["url"])
|
||||
if fbname:
|
||||
try:
|
||||
fbdata = graph.get_object(fbname, fields="fan_count,username,verification_status,website")
|
||||
except Exception as e:
|
||||
print(e, file=sys.stderr)
|
||||
continue
|
||||
entry.update({"facebookData": fbdata, "facebookID": fbname})
|
||||
print(fbname)
|
||||
print(fbdata)
|
||||
doc.append(entry)
|
||||
count += 1;
|
||||
elif url["type"] == "TWITTER":
|
||||
twtname = getTwitterName(url["url"])
|
||||
|
||||
with open("result.yaml", "w") as f:
|
||||
yaml.dump_all(doc, f)
|
||||
print(count)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in a new issue