#!/usr/bin/python # # Generate html pages based on the Mirmon data # # This script will parse the json files that contain the mirmon # data and have been generated before. The json data is used to # generate html pages that visualize the mirmon data in a human # readable way. # # # Leaflet is used to visualize the mirrors on a world map. Further- # more Datatables are used to display the mirrors. # import datetime import socket import os import tempfile import urllib.request, json import xml.etree.ElementTree as ET import jinja2 html_folder = "/var/www/mirrorstats.gentoo.org/htdocs/" template_folder = "/var/www/mirrorstats.gentoo.org/gentoo-mirrorstats/html/" cache_path = "/var/www/mirrorstats.gentoo.org/var/html_data_cache.json" mirrorstats = [ 'https://mirrorstats.gentoo.org/rsync/state.json', 'https://mirrorstats.gentoo.org/distfiles/state.json', 'https://mirrorstats.gentoo.org/snapshots/state.json', 'https://mirrorstats.gentoo.org/releases/state.json', 'https://mirrorstats.gentoo.org/experimental/state.json' ] cache_data = {} # # Retrieve and parse the JSON at the given URL # def getJson(url): req = urllib.request.Request(url) r = urllib.request.urlopen(req).read() return json.loads(r.decode('utf-8')) # # Retrieve and parse the XML at the given URL # def getXML(url): req = urllib.request.Request(url) r = urllib.request.urlopen(req).read() return ET.loads(r.decode('utf-8')).getroot() # # Get the ip of the given host # def getIp(hostname): try: ip = socket.gethostbyname(hostname) except: ip = "" return ip # # Render the stats template for a given page, i.e. all, rsync, distfiles... # def renderStatsTemplate(templateEnv, page): if page == "all": mirrorstatsList = mirrorstats page_name = "index" else: mirrorstatsList = ['https://mirrorstats.gentoo.org/' + page + '/state.json'] page_name = page lastUpdate, hostList = getHostList(mirrorstatsList) template = templateEnv.get_template("stats.jinja2") template.stream(lastUpdate=lastUpdate, type=page, mirrors=hostList, locations=json.dumps(hostList)).dump(html_folder + page_name + ".html") return lastUpdate # read the cache if os.path.exists(cache_path): with open(cache_path, mode='rt') as json_file: try: cache_data = json.load(json_file) except: pass # # The all mirrors that are present in the given list # def getHostList(mirrorstatsList): hosts = {} lastUpdate = "" # process all mirrors for mirror_url in mirrorstatsList: mirrorData = getJson(mirror_url) lastUpdate = datetime.datetime.fromtimestamp(int(mirrorData['LastUpdate'])).strftime('%Y-%m-%d %H:%M:%S') for mirror in mirrorData['Mirrors']: if len(mirrorData['Mirrors'][mirror]) >= 0: for mirrorHost in mirrorData['Mirrors'][mirror]: hostname = mirrorHost['Host'] if hostname not in hosts: hosts[hostname] = {} hosts[hostname]['Hostname'] = hostname if hostname in cache_data['hosts']: ip = cache_data['hosts'][hostname] else: ip = getIp(hostname) # populate cache with new value cache_data['hosts'][hostname] = ip if ip != "": hosts[hostname]['Ip'] = ip if 'Stats' not in hosts[hostname]: hosts[hostname]['Stats'] = [] mirrorHost['Type'] = mirror_url.replace("https://mirrorstats.gentoo.org/", "").replace("/state.json", "") hosts[hostname]['Stats'].append(mirrorHost) # compute available protocols for host in hosts: protocols = [] for stat in hosts[host]['Stats']: protocols.append(stat['Protocol']) hosts[host]['Protocols'] = list(set(protocols)) # compute mirror locations for host in hosts: if 'Ip' not in hosts[host]: continue ip = hosts[host]['Ip'] if ip in cache_data['ips']: hosts[host]['Location'] = cache_data['ips'][ip] else: mirrorGeoData = getJson("https://ipinfo.io/" + ip + "/json") hosts[host]['Location'] = mirrorGeoData['loc'] # populate cache with new value cache_data['ips'][ip] = mirrorGeoData['loc'] return lastUpdate, hosts # # render jinja2 # templateLoader = jinja2.FileSystemLoader(searchpath=template_folder) templateEnv = jinja2.Environment(loader=templateLoader) ## stats lastUpdate = renderStatsTemplate(templateEnv, "all") renderStatsTemplate(templateEnv, "rsync") renderStatsTemplate(templateEnv, "distfiles") renderStatsTemplate(templateEnv, "snapshots") renderStatsTemplate(templateEnv, "releases") renderStatsTemplate(templateEnv, "experimental") ## about template = templateEnv.get_template("help.jinja2") template.stream(lastUpdate=lastUpdate).dump(html_folder + "help.html") # # write the cache # with tempfile.NamedTemporaryFile(dir=os.path.dirname(cache_path), delete=False, mode='wt') as fout: json.dump(cache_data, fout) os.chmod(fout.name, '0644') os.replace(fout.name, cache_path) # # finish # #print("Finished.")