Merge pull request #45 from netzbegruenung/pull-icons

Site-Icons herunter laden und als Teil der Webapp hosten
This commit is contained in:
Marian Steinbach 2018-05-25 19:26:24 +02:00 committed by GitHub
commit 54ac84175d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 11 deletions

View file

@ -9,7 +9,10 @@ dockerimage:
# Run spider in docker image
spider: dockerimage
docker run --rm -ti -v $(PWD)/webapp/dist/data:/out spider
docker run --rm -ti \
-v $(PWD)/webapp/dist/data:/out \
-v $(PWD)/docs/siteicons:/icons \
spider
test: dockerimage
docker run --rm -ti spider /spider_test.py
@ -24,7 +27,6 @@ webapp/node_modules:
# Build webapp
webapp: webapp/node_modules
cd webapp && npx webpack --config webpack.config.js
rm -rf ./docs/*
cp -r webapp/dist/* ./docs/
serve-webapp:

View file

@ -8,6 +8,7 @@ from socket import gethostbyname_ex
from urllib.parse import urljoin
from urllib.parse import urlparse
import certifi
import hashlib
import json
import logging
import os
@ -34,11 +35,13 @@ read_timeout = 10
# Git repo for our data
green_directory_repo = 'https://github.com/netzbegruenung/green-directory.git'
# folder in that repo that holds the data
green_direcory_data_path = 'data/countries/de'
green_direcory_data_path = 'data/countries/de/bb'
green_directory_local_path = './cache/green-directory'
result_path = '/out'
siteicons_path = '/icons'
# IP address of the newthinking GCMS server
gcms_ip = "91.102.13.20"
@ -60,7 +63,7 @@ def dir_entries():
Iterator over all data files in the cloned green directory
"""
path = os.path.join(green_directory_local_path, green_direcory_data_path)
for root, dirs, files in os.walk(path):
for root, _, files in os.walk(path):
for fname in files:
filepath = os.path.join(root, fname)
@ -133,6 +136,49 @@ def normalize_title(s):
s = s.strip()
return s
def download_icon(icon_url):
"""
Download an icon from the given URL and store it with
a file name of <hash>.<ending>
"""
default_endings = {
"image/x-icon": "ico",
"image/vnd.microsoft.icon": "ico",
"image/png": "png",
"image/jpeg": "jpg",
}
# Download the icon
r = requests.get(icon_url)
r.raise_for_status()
content_hash = hashlib.md5(r.content).hexdigest()
extension = ""
file_name = os.path.basename(icon_url)[-1]
if file_name != "" and "." in file_name:
ext = file_name.split(".")[-1]
if ext != "":
extension = ext
if extension == "":
# derive from content type
t = r.headers.get('content-type')
try:
extension = default_endings[t]
except KeyError:
logging.error("No file ending defined for icon type '%s'" % t)
return None
filename = content_hash + "." + extension.lower()
path = siteicons_path + os.path.sep + filename
with open(path, 'wb') as iconfile:
iconfile.write(r.content)
return filename
def check_responsiveness(url):
"""
Checks
@ -425,12 +471,12 @@ def check_site(entry):
except requests.exceptions.ConnectionError as e:
logging.error(str(e) + " " + check_url)
check['error'] = "connection"
except requests.exceptions.Timeout as e:
logging.error(str(e) + " " + check_url)
check['error'] = "connection_timeout"
except requests.exceptions.ReadTimeout as e:
logging.error(str(e) + " " + check_url)
check['error'] = "read_timeout"
except requests.exceptions.Timeout as e:
logging.error(str(e) + " " + check_url)
check['error'] = "connection_timeout"
except Exception as e:
logging.error(str(e) + " " + check_url)
check['error'] = "unknown"
@ -450,7 +496,14 @@ def check_site(entry):
continue
if c['content']['icon'] is not None:
icons.add(c['content']['icon'])
result['details']['icons'] = sorted(list(icons))
downloaded_icons = set()
for icon_url in icons:
logging.info("Getting icon %s" % icon_url)
try:
downloaded_icons.add(download_icon(icon_url))
except Exception as e:
logging.error("Could not download icon: %s" % e)
result['details']['icons'] = sorted(list(downloaded_icons))
# collect feeds
feeds = set()
@ -630,7 +683,7 @@ def main():
"district": entry.get("district"),
"city": entry.get("city"),
})
except NameError as ne:
except NameError:
logging.error("Error in %s: 'url' key missing (%s)" % (repr_entry(entry), entry['urls'][n]))

File diff suppressed because one or more lines are too long

View file

@ -66,7 +66,8 @@ $(function(){
// FAVICON
var icon = item.result.FAVICON.value;
row.append('<td class="' + (icon ? 'good' : 'bad') + ' text-center">' + (icon ? ('<img src="' + item.details.icons[0] + '" class="icon">') : '❌') + '</td>');
var iconFile = (icon ? item.details.icons[0] : '');
row.append('<td class="' + (icon ? 'good' : 'bad') + ' text-center" data-order="'+ iconFile +'">' + (icon ? ('<img src="/siteicons/' + iconFile + '" class="icon" title="'+ iconFile +'">') : '❌') + '</td>');
// HTTPS
var hasHTTPS = item.result.HTTPS.value;