Fetch screenshot data from database

pull/55/head
Marian Steinbach 4 years ago
parent d6f3f93244
commit 989ed53acf
  1. 2
      Makefile
  2. 21
      docs/data/screenshots.json
  3. 79
      screenshots.py
  4. 21
      webapp/dist/data/screenshots.json

@ -19,7 +19,7 @@ test: dockerimage
screenshots: venv
docker pull netzbegruenung/green-spider-screenshotter:latest
venv/bin/python ./screenshots.py
venv/bin/python ./screenshots.py secrets/screenshot-reader.json
webapp/node_modules:
cd webapp && npm install

@ -44,12 +44,10 @@
"http://gruene-braunschweig.de/": "76d9736e07d81bbff413a6ee615c4f5b.png",
"http://gruene-brookmerland-hinte-suedbrookmerland.de/": "2b6773aae630a9fa1a4c6af68f379a35.png",
"http://gruene-bruchkoebel.de/": "6384cfd8940edac48c93c68478fea13a.png",
"http://gruene-brunnthal.de/": "2872dca208a7ae5cc158457e8dbe8804.png",
"http://gruene-bubenreuth.de/": "3424d463afb975cb63e7e30b25376985.png",
"http://gruene-cadolzburg.de/": "c7c9318bd992cbd9d923257b83a56fbb.png",
"http://gruene-coelbe.de/": "bc6e8a939231bd88889fdb10c38dee7f.png",
"http://gruene-dachau.de/": "80923fafe3338087e2c4fe77d8fd3e59.png",
"http://gruene-dieburg.de/": "ae9569d6d92500673a25c7b91f0b436c.png",
"http://gruene-dietzenbach.de/": "d4b5d9ba4744e01429bd1b5b41b6cb68.png",
"http://gruene-dinkelsbuehl.de/": "7e2262ee48d36557ec258baaa4128c6e.png",
"http://gruene-dortmund.de/": "59484f851fa0e9119294c7e169d9caab.png",
@ -72,7 +70,6 @@
"http://gruene-freudenstadt.de/": "626bc0f70005f4a7c8ead873e8231815.png",
"http://gruene-gescher.de/": "ee5384e403f31dafdd28a7cafdf618b3.png",
"http://gruene-graefenberg.de/": "4e22c78d1fa01637e47f82abcedf2882.png",
"http://gruene-grafrath.de/": "9c22a0e941136269e1c683f55de70509.png",
"http://gruene-griesheim.de/": "bbe0d591202150b2f28e92f92b34e821.png",
"http://gruene-guenzburg.de/": "457a9a6bd103ed71b978a167102e38be.png",
"http://gruene-gundelfingen.de/": "b20074c0c41c75b145c469aae0acc521.png",
@ -115,7 +112,6 @@
"http://gruene-landsberg.de/": "cc932a3cce7208b8d9788ddcbe7eaa5d.png",
"http://gruene-langenzenn.de/": "1df9054ffa3072f3cad51dce454c9447.png",
"http://gruene-lichtenberg.de/": "8e4f9985b1cfaca3beadb089274a8ab3.png",
"http://gruene-linnich.de/": "14ceb538cb393cf38e58b55f59135dd9.png",
"http://gruene-lorsch.de/": "c3d5e30266a6e10fc682bc22cb1f6926.png",
"http://gruene-luechow-dannenberg.de/": "4cbf3bdf703a8fb52aab6974691a9f7d.png",
"http://gruene-luedinghausen.de/": "63934cef314b1afdf0d50e49b00cb944.png",
@ -139,7 +135,6 @@
"http://gruene-nideggen.de/": "603b84a8e5a061ea8bc862892dc8b7e5.png",
"http://gruene-niederaula.de/": "28743857b40f1d609028c23beea2e4e7.png",
"http://gruene-niederrhein-wupper.de/": "96e85f536a9f7613ace09a9ee2b56be5.png",
"http://gruene-niederzier.de/": "31c70d174e24ca1b6f9a0cd292dc3ac3.png",
"http://gruene-nks.de/": "40c05c1139cbf3044bf400aca8b08ceb.png",
"http://gruene-norderney.de/": "ab0e40e89f8a68c0c8b106796f42b297.png",
"http://gruene-nuernberg.de/": "2cd3d615e6a00c669aca6ff4fda1aa74.png",
@ -193,9 +188,7 @@
"http://gruene-suedwest.de/": "eee459730829261190961daa3f25bda5.png",
"http://gruene-swisttal.de/": "b62e46625a1f9f44773458ab6af4e80b.png",
"http://gruene-taufkirchen.de/": "87b114edd59252325d139212ca80dc88.png",
"http://gruene-tecklenburg.de/": "a77fe4e93c6218f3f78937df0d88a882.png",
"http://gruene-tholey.de/": "02abc75e9e11b427e3f96161d7218f43.png",
"http://gruene-titz.de/": "7fadc96a34327f0c5b16dbfd7401e163.png",
"http://gruene-tostedt.de/": "578133a2ea884eec37d52c0b564b2320.png",
"http://gruene-troisdorf.de/": "e893ae3f46780b3544b61c1c0da64795.png",
"http://gruene-uedem.de/": "d2ac6378177edf99a922f457d144fcac.png",
@ -382,7 +375,6 @@
"http://www.gruenekaarst.de": "bf252d8baa2833d7d82d97471a37e2d6.png",
"http://www.gruenestadterfurt.de/": "f289930c624dffd2881fc2652f192c7e.png",
"http://www.gruenewesseling.de/": "bdc13beadf59bc7c82bfdc5eb77a4660.png",
"http://www.xn--grne-burbach-elb.de/": "06ed2f735715d3162d94cee98c702ec7.png",
"http://www.xn--grne-speyer-uhb.de/": "7219577a288c3137899aaec80642566b.png",
"http://www.xn--salzlandgrne-mlb.de/": "5ad1ecb92eecff58134fb86d484e4cb8.png",
"http://xn--grne-deggendorf-0vb.de/": "46ba6d5fb20305a2a9763d839ca14af7.png",
@ -618,7 +610,6 @@
"https://gruene-magdeburg.de/start/": "94e95a004da17ae66edc25a749b00b7d.png",
"https://gruene-main-kinzig.de/home/": "9af6e6c1ae63fd8aa325d5f88c49b53c.png",
"https://gruene-main-tauber.de/startseite.html": "5e3555d5d3b000abae9724de15c21856.png",
"https://gruene-maisach.de/": "e753776a10620ac66824a3f055ac1621.png",
"https://gruene-mandelbachtal.de/": "647b371613f324fedf3fa6094ef1ce0c.png",
"https://gruene-mansfeld-suedharz.de/home/": "7703c3517ed12817d888b2cb5774d4af.png",
"https://gruene-meitingen.de/startseite/": "86cb2c5cc5f192c870ab0695071f27cc.png",
@ -957,7 +948,6 @@
"https://www.gruene-lahn-dill.de/home/": "5ff6a1541c3d2a44768f56955561e9f9.png",
"https://www.gruene-lampertheim.de/startseite/": "b29648f5179c78615d57bed12c84c2e1.png",
"https://www.gruene-landau.de/": "ab51d90be5514ea512c5240d13ebdfc2.png",
"https://www.gruene-langen.de/": "994effc6fa44d94b8769ecc41e4b56e8.png",
"https://www.gruene-lauffen.de/home/": "ed5fd891d4b72ae3b68bda5dafffe17c.png",
"https://www.gruene-lehrte.de/home/": "855f4cf0b999a76d110b4c28078c3145.png",
"https://www.gruene-leingarten.de/ortsverbandleingarten/": "d61fd10d5300bd06aa42fe87558b8139.png",
@ -1010,7 +1000,6 @@
"https://www.gruene-owl.de/aktuell-aus-owl/": "7096fd293c280205abaef38a41d0e756.png",
"https://www.gruene-peine.de/home/": "c705083557219c5a1a4bcbb02122b8ce.png",
"https://www.gruene-pi.de/home/": "a061d21132727e0fbd8ea6692cfbfc88.png",
"https://www.gruene-preussischoldendorf.de/": "4d8a8327d510dad698bd71645054d201.png",
"https://www.gruene-puchheim.de/home/": "60d35f3926eab452c764da9b58517dee.png",
"https://www.gruene-pulheim.de/": "20678f80d9afdb5254649b4d06785860.png",
"https://www.gruene-rastede.de/home/": "408e9e5430ff03a7f8e88ad0a715c6b4.png",
@ -1093,13 +1082,5 @@
"https://www.grueneprignitz.de/aktuelles/": "cab6938910d59f29a74e51b13820b9f8.png",
"https://www.mein-wachtberg.de/": "3323e371bd2cae2591aca4db6a325da1.png",
"https://www.rotenburger-gruene.de/": "4ae543cdcccdc74b598ec7c41c3079c7.png",
"https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png",
"https://www.xn--grne-alternative-liste-flrsheim-vdd8o.de/": "b794965c6573c1d3df064883ebfdfb67.png",
"https://www.xn--grne-idstein-elb.de/index.php?id=158": "41bfce9871338e072e47fad735c6cd54.png",
"https://www.xn--grne-mnster-uhbe.de/": "ce680ef05bd61a75733e5f4f8169952e.png",
"https://www.xn--grne-stadtallendorf-69b.de/": "7fdfd4272232dae3ecb94b90ecc634e7.png",
"https://www.xn--grne-teltow-uhb.de/": "1da5c54e1b3354e690f20aff1117fddd.png",
"https://www.xn--grne-wf-o2a.de/startseite/": "fb8c08c887930872d2da8510398c7811.png",
"https://www.xn--padergrn-d6a.de/": "1ac623c3d37173d1595d3d7242651938.png",
"https://xn--grne-lippe-beb.de/startseite/": "a055a381e09977afb9b334621a577d72.png"
"https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png"
}

@ -1,79 +1,28 @@
from google.cloud import storage
import hashlib
from google.cloud import datastore
import json
import subprocess
import os
import sys
import os
json_file = 'webapp/dist/data/spider_result.json'
bucket_name = "green-spider-screenshots.sendung.de"
if len(sys.argv) == 1:
print("Error: please provide path to Google Storage API system account JSON file as argument")
sys.exit(1)
key_path = sys.argv[1]
client = None
bucket = None
# result dict. key: url, value: file name
urls_done = {}
def main():
global client
global bucket
client = storage.Client.from_service_account_json(key_path)
bucket = client.get_bucket(bucket_name)
with open(json_file, 'r', encoding="utf8") as jsonfile:
data = json.load(jsonfile)
for entry in data:
urls = entry['details'].get('canonical_urls')
if urls is None or len(urls) == 0:
continue
url = urls[0]
if url in urls_done:
continue
if len(sys.argv) == 1:
print("Error: please provide path to Google Storage API system account JSON file as argument")
sys.exit(1)
filename = make_screenshots(url)
key_path = sys.argv[1]
client = datastore.Client.from_service_account_json(key_path)
urls_done[url] = filename
out = {}
query = client.query(kind='webscreenshot')
for item in query.fetch():
print(item['url'], os.path.basename(item['screenshot_url']))
out[item['url']] = os.path.basename(item['screenshot_url'])
output_filename = "./webapp/dist/data/screenshots.json"
with open(output_filename, 'w', encoding="utf8") as jsonfile:
json.dump(urls_done, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False)
def make_screenshots(url):
"""
Creates screenshots in various sizes, uploads them to
Google Cloud Storage and returns the output filename
"""
sizes = ([320, 640], [1500, 1500])
for size in sizes:
print("Screenshotting size %s for %s" % (size, url))
sizeargument = "%spx*%spx" % (size[0], size[1])
subfolder = "%sx%s" % (size[0], size[1])
filename = hashlib.md5(bytearray(url, 'utf-8')).hexdigest() + ".png"
command = [
"docker", "run", "--rm", "-v",
os.getenv("PWD") + "/temp/%s:/srv" % subfolder,
"netzbegruenung/green-spider-screenshotter:latest",
url, filename, sizeargument
]
subprocess.run(command)
blob = bucket.blob('%s/%s' % (subfolder, filename))
local_path = './temp/%s/%s' % (subfolder, filename)
if os.path.exists(local_path):
with open(local_path, 'rb') as my_file:
blob.upload_from_file(my_file, content_type="image/png")
blob.make_public()
else:
print("Error: No screenshot created: size=%s, url='%s'" % (size, url))
return filename
if __name__ == "__main__":
main()

@ -44,12 +44,10 @@
"http://gruene-braunschweig.de/": "76d9736e07d81bbff413a6ee615c4f5b.png",
"http://gruene-brookmerland-hinte-suedbrookmerland.de/": "2b6773aae630a9fa1a4c6af68f379a35.png",
"http://gruene-bruchkoebel.de/": "6384cfd8940edac48c93c68478fea13a.png",
"http://gruene-brunnthal.de/": "2872dca208a7ae5cc158457e8dbe8804.png",
"http://gruene-bubenreuth.de/": "3424d463afb975cb63e7e30b25376985.png",
"http://gruene-cadolzburg.de/": "c7c9318bd992cbd9d923257b83a56fbb.png",
"http://gruene-coelbe.de/": "bc6e8a939231bd88889fdb10c38dee7f.png",
"http://gruene-dachau.de/": "80923fafe3338087e2c4fe77d8fd3e59.png",
"http://gruene-dieburg.de/": "ae9569d6d92500673a25c7b91f0b436c.png",
"http://gruene-dietzenbach.de/": "d4b5d9ba4744e01429bd1b5b41b6cb68.png",
"http://gruene-dinkelsbuehl.de/": "7e2262ee48d36557ec258baaa4128c6e.png",
"http://gruene-dortmund.de/": "59484f851fa0e9119294c7e169d9caab.png",
@ -72,7 +70,6 @@
"http://gruene-freudenstadt.de/": "626bc0f70005f4a7c8ead873e8231815.png",
"http://gruene-gescher.de/": "ee5384e403f31dafdd28a7cafdf618b3.png",
"http://gruene-graefenberg.de/": "4e22c78d1fa01637e47f82abcedf2882.png",
"http://gruene-grafrath.de/": "9c22a0e941136269e1c683f55de70509.png",
"http://gruene-griesheim.de/": "bbe0d591202150b2f28e92f92b34e821.png",
"http://gruene-guenzburg.de/": "457a9a6bd103ed71b978a167102e38be.png",
"http://gruene-gundelfingen.de/": "b20074c0c41c75b145c469aae0acc521.png",
@ -115,7 +112,6 @@
"http://gruene-landsberg.de/": "cc932a3cce7208b8d9788ddcbe7eaa5d.png",
"http://gruene-langenzenn.de/": "1df9054ffa3072f3cad51dce454c9447.png",
"http://gruene-lichtenberg.de/": "8e4f9985b1cfaca3beadb089274a8ab3.png",
"http://gruene-linnich.de/": "14ceb538cb393cf38e58b55f59135dd9.png",
"http://gruene-lorsch.de/": "c3d5e30266a6e10fc682bc22cb1f6926.png",
"http://gruene-luechow-dannenberg.de/": "4cbf3bdf703a8fb52aab6974691a9f7d.png",
"http://gruene-luedinghausen.de/": "63934cef314b1afdf0d50e49b00cb944.png",
@ -139,7 +135,6 @@
"http://gruene-nideggen.de/": "603b84a8e5a061ea8bc862892dc8b7e5.png",
"http://gruene-niederaula.de/": "28743857b40f1d609028c23beea2e4e7.png",
"http://gruene-niederrhein-wupper.de/": "96e85f536a9f7613ace09a9ee2b56be5.png",
"http://gruene-niederzier.de/": "31c70d174e24ca1b6f9a0cd292dc3ac3.png",
"http://gruene-nks.de/": "40c05c1139cbf3044bf400aca8b08ceb.png",
"http://gruene-norderney.de/": "ab0e40e89f8a68c0c8b106796f42b297.png",
"http://gruene-nuernberg.de/": "2cd3d615e6a00c669aca6ff4fda1aa74.png",
@ -193,9 +188,7 @@
"http://gruene-suedwest.de/": "eee459730829261190961daa3f25bda5.png",
"http://gruene-swisttal.de/": "b62e46625a1f9f44773458ab6af4e80b.png",
"http://gruene-taufkirchen.de/": "87b114edd59252325d139212ca80dc88.png",
"http://gruene-tecklenburg.de/": "a77fe4e93c6218f3f78937df0d88a882.png",
"http://gruene-tholey.de/": "02abc75e9e11b427e3f96161d7218f43.png",
"http://gruene-titz.de/": "7fadc96a34327f0c5b16dbfd7401e163.png",
"http://gruene-tostedt.de/": "578133a2ea884eec37d52c0b564b2320.png",
"http://gruene-troisdorf.de/": "e893ae3f46780b3544b61c1c0da64795.png",
"http://gruene-uedem.de/": "d2ac6378177edf99a922f457d144fcac.png",
@ -382,7 +375,6 @@
"http://www.gruenekaarst.de": "bf252d8baa2833d7d82d97471a37e2d6.png",
"http://www.gruenestadterfurt.de/": "f289930c624dffd2881fc2652f192c7e.png",
"http://www.gruenewesseling.de/": "bdc13beadf59bc7c82bfdc5eb77a4660.png",
"http://www.xn--grne-burbach-elb.de/": "06ed2f735715d3162d94cee98c702ec7.png",
"http://www.xn--grne-speyer-uhb.de/": "7219577a288c3137899aaec80642566b.png",
"http://www.xn--salzlandgrne-mlb.de/": "5ad1ecb92eecff58134fb86d484e4cb8.png",
"http://xn--grne-deggendorf-0vb.de/": "46ba6d5fb20305a2a9763d839ca14af7.png",
@ -618,7 +610,6 @@
"https://gruene-magdeburg.de/start/": "94e95a004da17ae66edc25a749b00b7d.png",
"https://gruene-main-kinzig.de/home/": "9af6e6c1ae63fd8aa325d5f88c49b53c.png",
"https://gruene-main-tauber.de/startseite.html": "5e3555d5d3b000abae9724de15c21856.png",
"https://gruene-maisach.de/": "e753776a10620ac66824a3f055ac1621.png",
"https://gruene-mandelbachtal.de/": "647b371613f324fedf3fa6094ef1ce0c.png",
"https://gruene-mansfeld-suedharz.de/home/": "7703c3517ed12817d888b2cb5774d4af.png",
"https://gruene-meitingen.de/startseite/": "86cb2c5cc5f192c870ab0695071f27cc.png",
@ -957,7 +948,6 @@
"https://www.gruene-lahn-dill.de/home/": "5ff6a1541c3d2a44768f56955561e9f9.png",
"https://www.gruene-lampertheim.de/startseite/": "b29648f5179c78615d57bed12c84c2e1.png",
"https://www.gruene-landau.de/": "ab51d90be5514ea512c5240d13ebdfc2.png",
"https://www.gruene-langen.de/": "994effc6fa44d94b8769ecc41e4b56e8.png",
"https://www.gruene-lauffen.de/home/": "ed5fd891d4b72ae3b68bda5dafffe17c.png",
"https://www.gruene-lehrte.de/home/": "855f4cf0b999a76d110b4c28078c3145.png",
"https://www.gruene-leingarten.de/ortsverbandleingarten/": "d61fd10d5300bd06aa42fe87558b8139.png",
@ -1010,7 +1000,6 @@
"https://www.gruene-owl.de/aktuell-aus-owl/": "7096fd293c280205abaef38a41d0e756.png",
"https://www.gruene-peine.de/home/": "c705083557219c5a1a4bcbb02122b8ce.png",
"https://www.gruene-pi.de/home/": "a061d21132727e0fbd8ea6692cfbfc88.png",
"https://www.gruene-preussischoldendorf.de/": "4d8a8327d510dad698bd71645054d201.png",
"https://www.gruene-puchheim.de/home/": "60d35f3926eab452c764da9b58517dee.png",
"https://www.gruene-pulheim.de/": "20678f80d9afdb5254649b4d06785860.png",
"https://www.gruene-rastede.de/home/": "408e9e5430ff03a7f8e88ad0a715c6b4.png",
@ -1093,13 +1082,5 @@
"https://www.grueneprignitz.de/aktuelles/": "cab6938910d59f29a74e51b13820b9f8.png",
"https://www.mein-wachtberg.de/": "3323e371bd2cae2591aca4db6a325da1.png",
"https://www.rotenburger-gruene.de/": "4ae543cdcccdc74b598ec7c41c3079c7.png",
"https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png",
"https://www.xn--grne-alternative-liste-flrsheim-vdd8o.de/": "b794965c6573c1d3df064883ebfdfb67.png",
"https://www.xn--grne-idstein-elb.de/index.php?id=158": "41bfce9871338e072e47fad735c6cd54.png",
"https://www.xn--grne-mnster-uhbe.de/": "ce680ef05bd61a75733e5f4f8169952e.png",
"https://www.xn--grne-stadtallendorf-69b.de/": "7fdfd4272232dae3ecb94b90ecc634e7.png",
"https://www.xn--grne-teltow-uhb.de/": "1da5c54e1b3354e690f20aff1117fddd.png",
"https://www.xn--grne-wf-o2a.de/startseite/": "fb8c08c887930872d2da8510398c7811.png",
"https://www.xn--padergrn-d6a.de/": "1ac623c3d37173d1595d3d7242651938.png",
"https://xn--grne-lippe-beb.de/startseite/": "a055a381e09977afb9b334621a577d72.png"
"https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png"
}
Loading…
Cancel
Save