diff --git a/Makefile b/Makefile index b4dd4d1..fee3b94 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test: dockerimage screenshots: venv docker pull netzbegruenung/green-spider-screenshotter:latest - venv/bin/python ./screenshots.py + venv/bin/python ./screenshots.py secrets/screenshot-reader.json webapp/node_modules: cd webapp && npm install diff --git a/docs/data/screenshots.json b/docs/data/screenshots.json index d321a47..5e99df3 100644 --- a/docs/data/screenshots.json +++ b/docs/data/screenshots.json @@ -44,12 +44,10 @@ "http://gruene-braunschweig.de/": "76d9736e07d81bbff413a6ee615c4f5b.png", "http://gruene-brookmerland-hinte-suedbrookmerland.de/": "2b6773aae630a9fa1a4c6af68f379a35.png", "http://gruene-bruchkoebel.de/": "6384cfd8940edac48c93c68478fea13a.png", - "http://gruene-brunnthal.de/": "2872dca208a7ae5cc158457e8dbe8804.png", "http://gruene-bubenreuth.de/": "3424d463afb975cb63e7e30b25376985.png", "http://gruene-cadolzburg.de/": "c7c9318bd992cbd9d923257b83a56fbb.png", "http://gruene-coelbe.de/": "bc6e8a939231bd88889fdb10c38dee7f.png", "http://gruene-dachau.de/": "80923fafe3338087e2c4fe77d8fd3e59.png", - "http://gruene-dieburg.de/": "ae9569d6d92500673a25c7b91f0b436c.png", "http://gruene-dietzenbach.de/": "d4b5d9ba4744e01429bd1b5b41b6cb68.png", "http://gruene-dinkelsbuehl.de/": "7e2262ee48d36557ec258baaa4128c6e.png", "http://gruene-dortmund.de/": "59484f851fa0e9119294c7e169d9caab.png", @@ -72,7 +70,6 @@ "http://gruene-freudenstadt.de/": "626bc0f70005f4a7c8ead873e8231815.png", "http://gruene-gescher.de/": "ee5384e403f31dafdd28a7cafdf618b3.png", "http://gruene-graefenberg.de/": "4e22c78d1fa01637e47f82abcedf2882.png", - "http://gruene-grafrath.de/": "9c22a0e941136269e1c683f55de70509.png", "http://gruene-griesheim.de/": "bbe0d591202150b2f28e92f92b34e821.png", "http://gruene-guenzburg.de/": "457a9a6bd103ed71b978a167102e38be.png", "http://gruene-gundelfingen.de/": "b20074c0c41c75b145c469aae0acc521.png", @@ -115,7 +112,6 @@ "http://gruene-landsberg.de/": "cc932a3cce7208b8d9788ddcbe7eaa5d.png", "http://gruene-langenzenn.de/": "1df9054ffa3072f3cad51dce454c9447.png", "http://gruene-lichtenberg.de/": "8e4f9985b1cfaca3beadb089274a8ab3.png", - "http://gruene-linnich.de/": "14ceb538cb393cf38e58b55f59135dd9.png", "http://gruene-lorsch.de/": "c3d5e30266a6e10fc682bc22cb1f6926.png", "http://gruene-luechow-dannenberg.de/": "4cbf3bdf703a8fb52aab6974691a9f7d.png", "http://gruene-luedinghausen.de/": "63934cef314b1afdf0d50e49b00cb944.png", @@ -139,7 +135,6 @@ "http://gruene-nideggen.de/": "603b84a8e5a061ea8bc862892dc8b7e5.png", "http://gruene-niederaula.de/": "28743857b40f1d609028c23beea2e4e7.png", "http://gruene-niederrhein-wupper.de/": "96e85f536a9f7613ace09a9ee2b56be5.png", - "http://gruene-niederzier.de/": "31c70d174e24ca1b6f9a0cd292dc3ac3.png", "http://gruene-nks.de/": "40c05c1139cbf3044bf400aca8b08ceb.png", "http://gruene-norderney.de/": "ab0e40e89f8a68c0c8b106796f42b297.png", "http://gruene-nuernberg.de/": "2cd3d615e6a00c669aca6ff4fda1aa74.png", @@ -193,9 +188,7 @@ "http://gruene-suedwest.de/": "eee459730829261190961daa3f25bda5.png", "http://gruene-swisttal.de/": "b62e46625a1f9f44773458ab6af4e80b.png", "http://gruene-taufkirchen.de/": "87b114edd59252325d139212ca80dc88.png", - "http://gruene-tecklenburg.de/": "a77fe4e93c6218f3f78937df0d88a882.png", "http://gruene-tholey.de/": "02abc75e9e11b427e3f96161d7218f43.png", - "http://gruene-titz.de/": "7fadc96a34327f0c5b16dbfd7401e163.png", "http://gruene-tostedt.de/": "578133a2ea884eec37d52c0b564b2320.png", "http://gruene-troisdorf.de/": "e893ae3f46780b3544b61c1c0da64795.png", "http://gruene-uedem.de/": "d2ac6378177edf99a922f457d144fcac.png", @@ -382,7 +375,6 @@ "http://www.gruenekaarst.de": "bf252d8baa2833d7d82d97471a37e2d6.png", "http://www.gruenestadterfurt.de/": "f289930c624dffd2881fc2652f192c7e.png", "http://www.gruenewesseling.de/": "bdc13beadf59bc7c82bfdc5eb77a4660.png", - "http://www.xn--grne-burbach-elb.de/": "06ed2f735715d3162d94cee98c702ec7.png", "http://www.xn--grne-speyer-uhb.de/": "7219577a288c3137899aaec80642566b.png", "http://www.xn--salzlandgrne-mlb.de/": "5ad1ecb92eecff58134fb86d484e4cb8.png", "http://xn--grne-deggendorf-0vb.de/": "46ba6d5fb20305a2a9763d839ca14af7.png", @@ -618,7 +610,6 @@ "https://gruene-magdeburg.de/start/": "94e95a004da17ae66edc25a749b00b7d.png", "https://gruene-main-kinzig.de/home/": "9af6e6c1ae63fd8aa325d5f88c49b53c.png", "https://gruene-main-tauber.de/startseite.html": "5e3555d5d3b000abae9724de15c21856.png", - "https://gruene-maisach.de/": "e753776a10620ac66824a3f055ac1621.png", "https://gruene-mandelbachtal.de/": "647b371613f324fedf3fa6094ef1ce0c.png", "https://gruene-mansfeld-suedharz.de/home/": "7703c3517ed12817d888b2cb5774d4af.png", "https://gruene-meitingen.de/startseite/": "86cb2c5cc5f192c870ab0695071f27cc.png", @@ -957,7 +948,6 @@ "https://www.gruene-lahn-dill.de/home/": "5ff6a1541c3d2a44768f56955561e9f9.png", "https://www.gruene-lampertheim.de/startseite/": "b29648f5179c78615d57bed12c84c2e1.png", "https://www.gruene-landau.de/": "ab51d90be5514ea512c5240d13ebdfc2.png", - "https://www.gruene-langen.de/": "994effc6fa44d94b8769ecc41e4b56e8.png", "https://www.gruene-lauffen.de/home/": "ed5fd891d4b72ae3b68bda5dafffe17c.png", "https://www.gruene-lehrte.de/home/": "855f4cf0b999a76d110b4c28078c3145.png", "https://www.gruene-leingarten.de/ortsverbandleingarten/": "d61fd10d5300bd06aa42fe87558b8139.png", @@ -1010,7 +1000,6 @@ "https://www.gruene-owl.de/aktuell-aus-owl/": "7096fd293c280205abaef38a41d0e756.png", "https://www.gruene-peine.de/home/": "c705083557219c5a1a4bcbb02122b8ce.png", "https://www.gruene-pi.de/home/": "a061d21132727e0fbd8ea6692cfbfc88.png", - "https://www.gruene-preussischoldendorf.de/": "4d8a8327d510dad698bd71645054d201.png", "https://www.gruene-puchheim.de/home/": "60d35f3926eab452c764da9b58517dee.png", "https://www.gruene-pulheim.de/": "20678f80d9afdb5254649b4d06785860.png", "https://www.gruene-rastede.de/home/": "408e9e5430ff03a7f8e88ad0a715c6b4.png", @@ -1093,13 +1082,5 @@ "https://www.grueneprignitz.de/aktuelles/": "cab6938910d59f29a74e51b13820b9f8.png", "https://www.mein-wachtberg.de/": "3323e371bd2cae2591aca4db6a325da1.png", "https://www.rotenburger-gruene.de/": "4ae543cdcccdc74b598ec7c41c3079c7.png", - "https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png", - "https://www.xn--grne-alternative-liste-flrsheim-vdd8o.de/": "b794965c6573c1d3df064883ebfdfb67.png", - "https://www.xn--grne-idstein-elb.de/index.php?id=158": "41bfce9871338e072e47fad735c6cd54.png", - "https://www.xn--grne-mnster-uhbe.de/": "ce680ef05bd61a75733e5f4f8169952e.png", - "https://www.xn--grne-stadtallendorf-69b.de/": "7fdfd4272232dae3ecb94b90ecc634e7.png", - "https://www.xn--grne-teltow-uhb.de/": "1da5c54e1b3354e690f20aff1117fddd.png", - "https://www.xn--grne-wf-o2a.de/startseite/": "fb8c08c887930872d2da8510398c7811.png", - "https://www.xn--padergrn-d6a.de/": "1ac623c3d37173d1595d3d7242651938.png", - "https://xn--grne-lippe-beb.de/startseite/": "a055a381e09977afb9b334621a577d72.png" + "https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png" } \ No newline at end of file diff --git a/screenshots.py b/screenshots.py index 256b96e..411a9bc 100644 --- a/screenshots.py +++ b/screenshots.py @@ -1,79 +1,28 @@ -from google.cloud import storage -import hashlib +from google.cloud import datastore import json -import subprocess -import os import sys +import os -json_file = 'webapp/dist/data/spider_result.json' - -bucket_name = "green-spider-screenshots.sendung.de" - -if len(sys.argv) == 1: - print("Error: please provide path to Google Storage API system account JSON file as argument") - sys.exit(1) - -key_path = sys.argv[1] - -client = None -bucket = None - -# result dict. key: url, value: file name -urls_done = {} def main(): - global client - global bucket + if len(sys.argv) == 1: + print("Error: please provide path to Google Storage API system account JSON file as argument") + sys.exit(1) - client = storage.Client.from_service_account_json(key_path) - bucket = client.get_bucket(bucket_name) + key_path = sys.argv[1] + client = datastore.Client.from_service_account_json(key_path) - with open(json_file, 'r', encoding="utf8") as jsonfile: - data = json.load(jsonfile) - for entry in data: - urls = entry['details'].get('canonical_urls') - if urls is None or len(urls) == 0: - continue - url = urls[0] - - if url in urls_done: - continue - - filename = make_screenshots(url) - - urls_done[url] = filename + out = {} + query = client.query(kind='webscreenshot') + for item in query.fetch(): + print(item['url'], os.path.basename(item['screenshot_url'])) + out[item['url']] = os.path.basename(item['screenshot_url']) + output_filename = "./webapp/dist/data/screenshots.json" with open(output_filename, 'w', encoding="utf8") as jsonfile: - json.dump(urls_done, jsonfile, indent=2, sort_keys=True, ensure_ascii=False) + json.dump(out, jsonfile, indent=2, sort_keys=True, ensure_ascii=False) -def make_screenshots(url): - """ - Creates screenshots in various sizes, uploads them to - Google Cloud Storage and returns the output filename - """ - sizes = ([320, 640], [1500, 1500]) - for size in sizes: - print("Screenshotting size %s for %s" % (size, url)) - sizeargument = "%spx*%spx" % (size[0], size[1]) - subfolder = "%sx%s" % (size[0], size[1]) - filename = hashlib.md5(bytearray(url, 'utf-8')).hexdigest() + ".png" - command = [ - "docker", "run", "--rm", "-v", - os.getenv("PWD") + "/temp/%s:/srv" % subfolder, - "netzbegruenung/green-spider-screenshotter:latest", - url, filename, sizeargument - ] - subprocess.run(command) - blob = bucket.blob('%s/%s' % (subfolder, filename)) - local_path = './temp/%s/%s' % (subfolder, filename) - if os.path.exists(local_path): - with open(local_path, 'rb') as my_file: - blob.upload_from_file(my_file, content_type="image/png") - blob.make_public() - else: - print("Error: No screenshot created: size=%s, url='%s'" % (size, url)) - return filename if __name__ == "__main__": main() diff --git a/webapp/dist/data/screenshots.json b/webapp/dist/data/screenshots.json index d321a47..5e99df3 100644 --- a/webapp/dist/data/screenshots.json +++ b/webapp/dist/data/screenshots.json @@ -44,12 +44,10 @@ "http://gruene-braunschweig.de/": "76d9736e07d81bbff413a6ee615c4f5b.png", "http://gruene-brookmerland-hinte-suedbrookmerland.de/": "2b6773aae630a9fa1a4c6af68f379a35.png", "http://gruene-bruchkoebel.de/": "6384cfd8940edac48c93c68478fea13a.png", - "http://gruene-brunnthal.de/": "2872dca208a7ae5cc158457e8dbe8804.png", "http://gruene-bubenreuth.de/": "3424d463afb975cb63e7e30b25376985.png", "http://gruene-cadolzburg.de/": "c7c9318bd992cbd9d923257b83a56fbb.png", "http://gruene-coelbe.de/": "bc6e8a939231bd88889fdb10c38dee7f.png", "http://gruene-dachau.de/": "80923fafe3338087e2c4fe77d8fd3e59.png", - "http://gruene-dieburg.de/": "ae9569d6d92500673a25c7b91f0b436c.png", "http://gruene-dietzenbach.de/": "d4b5d9ba4744e01429bd1b5b41b6cb68.png", "http://gruene-dinkelsbuehl.de/": "7e2262ee48d36557ec258baaa4128c6e.png", "http://gruene-dortmund.de/": "59484f851fa0e9119294c7e169d9caab.png", @@ -72,7 +70,6 @@ "http://gruene-freudenstadt.de/": "626bc0f70005f4a7c8ead873e8231815.png", "http://gruene-gescher.de/": "ee5384e403f31dafdd28a7cafdf618b3.png", "http://gruene-graefenberg.de/": "4e22c78d1fa01637e47f82abcedf2882.png", - "http://gruene-grafrath.de/": "9c22a0e941136269e1c683f55de70509.png", "http://gruene-griesheim.de/": "bbe0d591202150b2f28e92f92b34e821.png", "http://gruene-guenzburg.de/": "457a9a6bd103ed71b978a167102e38be.png", "http://gruene-gundelfingen.de/": "b20074c0c41c75b145c469aae0acc521.png", @@ -115,7 +112,6 @@ "http://gruene-landsberg.de/": "cc932a3cce7208b8d9788ddcbe7eaa5d.png", "http://gruene-langenzenn.de/": "1df9054ffa3072f3cad51dce454c9447.png", "http://gruene-lichtenberg.de/": "8e4f9985b1cfaca3beadb089274a8ab3.png", - "http://gruene-linnich.de/": "14ceb538cb393cf38e58b55f59135dd9.png", "http://gruene-lorsch.de/": "c3d5e30266a6e10fc682bc22cb1f6926.png", "http://gruene-luechow-dannenberg.de/": "4cbf3bdf703a8fb52aab6974691a9f7d.png", "http://gruene-luedinghausen.de/": "63934cef314b1afdf0d50e49b00cb944.png", @@ -139,7 +135,6 @@ "http://gruene-nideggen.de/": "603b84a8e5a061ea8bc862892dc8b7e5.png", "http://gruene-niederaula.de/": "28743857b40f1d609028c23beea2e4e7.png", "http://gruene-niederrhein-wupper.de/": "96e85f536a9f7613ace09a9ee2b56be5.png", - "http://gruene-niederzier.de/": "31c70d174e24ca1b6f9a0cd292dc3ac3.png", "http://gruene-nks.de/": "40c05c1139cbf3044bf400aca8b08ceb.png", "http://gruene-norderney.de/": "ab0e40e89f8a68c0c8b106796f42b297.png", "http://gruene-nuernberg.de/": "2cd3d615e6a00c669aca6ff4fda1aa74.png", @@ -193,9 +188,7 @@ "http://gruene-suedwest.de/": "eee459730829261190961daa3f25bda5.png", "http://gruene-swisttal.de/": "b62e46625a1f9f44773458ab6af4e80b.png", "http://gruene-taufkirchen.de/": "87b114edd59252325d139212ca80dc88.png", - "http://gruene-tecklenburg.de/": "a77fe4e93c6218f3f78937df0d88a882.png", "http://gruene-tholey.de/": "02abc75e9e11b427e3f96161d7218f43.png", - "http://gruene-titz.de/": "7fadc96a34327f0c5b16dbfd7401e163.png", "http://gruene-tostedt.de/": "578133a2ea884eec37d52c0b564b2320.png", "http://gruene-troisdorf.de/": "e893ae3f46780b3544b61c1c0da64795.png", "http://gruene-uedem.de/": "d2ac6378177edf99a922f457d144fcac.png", @@ -382,7 +375,6 @@ "http://www.gruenekaarst.de": "bf252d8baa2833d7d82d97471a37e2d6.png", "http://www.gruenestadterfurt.de/": "f289930c624dffd2881fc2652f192c7e.png", "http://www.gruenewesseling.de/": "bdc13beadf59bc7c82bfdc5eb77a4660.png", - "http://www.xn--grne-burbach-elb.de/": "06ed2f735715d3162d94cee98c702ec7.png", "http://www.xn--grne-speyer-uhb.de/": "7219577a288c3137899aaec80642566b.png", "http://www.xn--salzlandgrne-mlb.de/": "5ad1ecb92eecff58134fb86d484e4cb8.png", "http://xn--grne-deggendorf-0vb.de/": "46ba6d5fb20305a2a9763d839ca14af7.png", @@ -618,7 +610,6 @@ "https://gruene-magdeburg.de/start/": "94e95a004da17ae66edc25a749b00b7d.png", "https://gruene-main-kinzig.de/home/": "9af6e6c1ae63fd8aa325d5f88c49b53c.png", "https://gruene-main-tauber.de/startseite.html": "5e3555d5d3b000abae9724de15c21856.png", - "https://gruene-maisach.de/": "e753776a10620ac66824a3f055ac1621.png", "https://gruene-mandelbachtal.de/": "647b371613f324fedf3fa6094ef1ce0c.png", "https://gruene-mansfeld-suedharz.de/home/": "7703c3517ed12817d888b2cb5774d4af.png", "https://gruene-meitingen.de/startseite/": "86cb2c5cc5f192c870ab0695071f27cc.png", @@ -957,7 +948,6 @@ "https://www.gruene-lahn-dill.de/home/": "5ff6a1541c3d2a44768f56955561e9f9.png", "https://www.gruene-lampertheim.de/startseite/": "b29648f5179c78615d57bed12c84c2e1.png", "https://www.gruene-landau.de/": "ab51d90be5514ea512c5240d13ebdfc2.png", - "https://www.gruene-langen.de/": "994effc6fa44d94b8769ecc41e4b56e8.png", "https://www.gruene-lauffen.de/home/": "ed5fd891d4b72ae3b68bda5dafffe17c.png", "https://www.gruene-lehrte.de/home/": "855f4cf0b999a76d110b4c28078c3145.png", "https://www.gruene-leingarten.de/ortsverbandleingarten/": "d61fd10d5300bd06aa42fe87558b8139.png", @@ -1010,7 +1000,6 @@ "https://www.gruene-owl.de/aktuell-aus-owl/": "7096fd293c280205abaef38a41d0e756.png", "https://www.gruene-peine.de/home/": "c705083557219c5a1a4bcbb02122b8ce.png", "https://www.gruene-pi.de/home/": "a061d21132727e0fbd8ea6692cfbfc88.png", - "https://www.gruene-preussischoldendorf.de/": "4d8a8327d510dad698bd71645054d201.png", "https://www.gruene-puchheim.de/home/": "60d35f3926eab452c764da9b58517dee.png", "https://www.gruene-pulheim.de/": "20678f80d9afdb5254649b4d06785860.png", "https://www.gruene-rastede.de/home/": "408e9e5430ff03a7f8e88ad0a715c6b4.png", @@ -1093,13 +1082,5 @@ "https://www.grueneprignitz.de/aktuelles/": "cab6938910d59f29a74e51b13820b9f8.png", "https://www.mein-wachtberg.de/": "3323e371bd2cae2591aca4db6a325da1.png", "https://www.rotenburger-gruene.de/": "4ae543cdcccdc74b598ec7c41c3079c7.png", - "https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png", - "https://www.xn--grne-alternative-liste-flrsheim-vdd8o.de/": "b794965c6573c1d3df064883ebfdfb67.png", - "https://www.xn--grne-idstein-elb.de/index.php?id=158": "41bfce9871338e072e47fad735c6cd54.png", - "https://www.xn--grne-mnster-uhbe.de/": "ce680ef05bd61a75733e5f4f8169952e.png", - "https://www.xn--grne-stadtallendorf-69b.de/": "7fdfd4272232dae3ecb94b90ecc634e7.png", - "https://www.xn--grne-teltow-uhb.de/": "1da5c54e1b3354e690f20aff1117fddd.png", - "https://www.xn--grne-wf-o2a.de/startseite/": "fb8c08c887930872d2da8510398c7811.png", - "https://www.xn--padergrn-d6a.de/": "1ac623c3d37173d1595d3d7242651938.png", - "https://xn--grne-lippe-beb.de/startseite/": "a055a381e09977afb9b334621a577d72.png" + "https://www.wds-gruene.de/ortsverband/": "46b150ed54a30b29036bb09a1f3a1171.png" } \ No newline at end of file