#!/usr/bin/python
#
# Generate html pages based on the Mirmon data
#
# This script will parse the json files that contain the mirmon
# data and have been generated before. The json data is used to
# generate html pages that visualize the mirmon data in a human
# readable way.
#
#
# Leaflet is used to visualize the mirrors on a world map. Further-
# more Datatables are used to display the mirrors.
#
import datetime
import socket
import os
import tempfile
import urllib.request, json
import xml.etree.ElementTree as ET
import jinja2
html_folder = "/var/www/mirrorstats.gentoo.org/htdocs/"
template_folder = "/var/www/mirrorstats.gentoo.org/gentoo-mirrorstats/html/"
cache_path = "/var/www/mirrorstats.gentoo.org/var/html_data_cache.json"
mirrorstats = [ 'https://mirrorstats.gentoo.org/rsync/state.json',
'https://mirrorstats.gentoo.org/distfiles/state.json',
'https://mirrorstats.gentoo.org/snapshots/state.json',
'https://mirrorstats.gentoo.org/releases/state.json',
'https://mirrorstats.gentoo.org/experimental/state.json'
]
cache_data = {}
#
# Retrieve and parse the JSON at the given URL
#
def getJson(url):
req = urllib.request.Request(url)
r = urllib.request.urlopen(req).read()
return json.loads(r.decode('utf-8'))
#
# Retrieve and parse the XML at the given URL
#
def getXML(url):
req = urllib.request.Request(url)
r = urllib.request.urlopen(req).read()
return ET.loads(r.decode('utf-8')).getroot()
#
# Get the ip of the given host
#
def getIp(hostname):
try:
ip = socket.gethostbyname(hostname)
except:
ip = ""
return ip
#
# Render the stats template for a given page, i.e. all, rsync, distfiles...
#
def renderStatsTemplate(templateEnv, page):
if page == "all":
mirrorstatsList = mirrorstats
page_name = "index"
else:
mirrorstatsList = ['https://mirrorstats.gentoo.org/' + page + '/state.json']
page_name = page
lastUpdate, hostList = getHostList(mirrorstatsList)
template = templateEnv.get_template("stats.jinja2")
template.stream(lastUpdate=lastUpdate, type=page, mirrors=hostList, locations=json.dumps(hostList)).dump(html_folder + page_name + ".html")
return lastUpdate
# read the cache
if os.path.exists(cache_path):
with open(cache_path, mode='rt') as json_file:
try:
cache_data = json.load(json_file)
except:
pass
#
# The all mirrors that are present in the given list
#
def getHostList(mirrorstatsList):
hosts = {}
lastUpdate = ""
# process all mirrors
for mirror_url in mirrorstatsList:
mirrorData = getJson(mirror_url)
lastUpdate = datetime.datetime.fromtimestamp(int(mirrorData['LastUpdate'])).strftime('%Y-%m-%d %H:%M:%S')
for mirror in mirrorData['Mirrors']:
if len(mirrorData['Mirrors'][mirror]) >= 0:
for mirrorHost in mirrorData['Mirrors'][mirror]:
hostname = mirrorHost['Host']
if hostname not in hosts:
hosts[hostname] = {}
hosts[hostname]['Hostname'] = hostname
if hostname in cache_data['hosts']:
ip = cache_data['hosts'][hostname]
else:
ip = getIp(hostname)
# populate cache with new value
cache_data['hosts'][hostname] = ip
if ip != "":
hosts[hostname]['Ip'] = ip
if 'Stats' not in hosts[hostname]:
hosts[hostname]['Stats'] = []
mirrorHost['Type'] = mirror_url.replace("https://mirrorstats.gentoo.org/", "").replace("/state.json", "")
hosts[hostname]['Stats'].append(mirrorHost)
# compute available protocols
for host in hosts:
protocols = []
for stat in hosts[host]['Stats']:
protocols.append(stat['Protocol'])
hosts[host]['Protocols'] = list(set(protocols))
# compute mirror locations
for host in hosts:
if 'Ip' not in hosts[host]:
continue
ip = hosts[host]['Ip']
if ip in cache_data['ips']:
hosts[host]['Location'] = cache_data['ips'][ip]
else:
mirrorGeoData = getJson("https://ipinfo.io/" + ip + "/json")
hosts[host]['Location'] = mirrorGeoData['loc']
# populate cache with new value
cache_data['ips'][ip] = mirrorGeoData['loc']
return lastUpdate, hosts
#
# render jinja2
#
templateLoader = jinja2.FileSystemLoader(searchpath=template_folder)
templateEnv = jinja2.Environment(loader=templateLoader)
## stats
lastUpdate = renderStatsTemplate(templateEnv, "all")
renderStatsTemplate(templateEnv, "rsync")
renderStatsTemplate(templateEnv, "distfiles")
renderStatsTemplate(templateEnv, "snapshots")
renderStatsTemplate(templateEnv, "releases")
renderStatsTemplate(templateEnv, "experimental")
## about
template = templateEnv.get_template("help.jinja2")
template.stream(lastUpdate=lastUpdate).dump(html_folder + "help.html")
#
# write the cache
#
with tempfile.NamedTemporaryFile(dir=os.path.dirname(cache_path), delete=False, mode='wt') as fout:
json.dump(cache_data, fout)
os.chmod(fout.name, '0644')
os.replace(fout.name, cache_path)
#
# finish
#
#print("Finished.")