#!/usr/bin/env python #-*- coding:utf-8 -*- """Mirrorselect 2.x Tool for selecting Gentoo source and rsync mirrors. Copyright 2005-2012 Gentoo Foundation Copyright (C) 2005 Colin Kingsley Copyright (C) 2008 Zac Medico Copyright (C) 2009 Sebastian Pipping Copyright (C) 2009 Christian Ruppert Copyright (C) 2012 Brian Dolbec Distributed under the terms of the GNU General Public License v2 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. """ import math import os import signal import socket import subprocess import sys import time import urllib import urlparse import hashlib from mirrorselect.mirrorparser3 import MirrorParser3 import codecs class Extractor(object): """The Extractor employs a MirrorParser3 object to get a list of valid mirrors, and then filters them. Only the mirrors that should be tested, based on user input are saved. They will be in the hosts attribute.""" def __init__(self, list_url, options, output): self.output = output parser = MirrorParser3() self.hosts = [] hosts = self.getlist(parser, list_url) self.output.write('Extractor(): fetched mirrors.xml,' ' %s hosts before filtering\n' % len(hosts), 2) if not options.rsync: if options.ftp: hosts = self.restrict_protocall('ftp', hosts) if options.http: hosts = self.restrict_protocall('http', hosts) self.hosts = hosts def restrict_protocall(self, prot, hosts): """ Removes hosts that are not of the specified type. "prot" must always be exactly 'http' or 'ftp'. """ myhosts = [] self.output.print_info('Limiting test to %s hosts. ' % prot ) for host in hosts: if host[0].startswith(prot): myhosts.append(host) self.output.write('%s of %s removed.\n' % (len(hosts) - len(myhosts), len(hosts)) ) return myhosts def getlist(self, parser, url): """ Uses the supplied parser to get a list of urls. Takes a parser object, url, and filering options. """ self.output.write('getlist(): fetching ' + url + '\n', 2) self.output.print_info('Downloading a list of mirrors...') try: parser.parse(urllib.urlopen(url).read()) except EnvironmentError: pass if len(parser.tuples()) == 0: self.output.print_err('Could not get mirror list. Check your internet' ' connection.') self.output.write(' Got %d mirrors.\n' % len(parser.tuples())) return parser.tuples() class Shallow(object): """handles rapid server selection via netselect""" def __init__(self, hosts, options, output): self.output = output self.urls = [] if options.blocksize is not None: self.netselect_split(hosts, options.servers, options.blocksize) else: self.netselect(hosts, options.servers) if len(self.urls) == 0: self.output.print_err('Netselect failed to return any mirrors.' ' Try again using block mode.') def netselect(self, hosts, number, quiet=False): """ Uses Netselect to choose the closest hosts, _very_ quickly """ if not quiet: hosts = [host[0] for host in hosts] top_host_dict = {} top_hosts = [] if not quiet: self.output.print_info('Using netselect to choose the top %d mirrors...' \ % number) host_string = ' '.join(hosts) self.output.write('\nnetselect(): running "netselect -s%d %s"\n' % (int(number), host_string), 2) proc = subprocess.Popen( ['netselect', '-s%d' % (number,)] + hosts, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if err: self.output.write('netselect(): netselect stderr: %s\n' % err, 2) for line in out.splitlines(): line = line.split() if len(line) < 2: continue top_hosts.append(line[1]) top_host_dict[line[0]] = line[1] if not quiet: self.output.write('Done.\n') self.output.write('\nnetselect(): returning %s and %s\n' % (top_hosts, top_host_dict), 2) if quiet: return top_hosts, top_host_dict else: self.urls = top_hosts def netselect_split(self, hosts, number, block_size): """ This uses netselect to test mirrors in chunks, each at most block_size in length. This is done in a tournament style. """ hosts = [host[0] for host in hosts] self.output.write('netselect_split() got %s hosts.\n' % len(hosts), 2) host_blocks = self.host_blocks(hosts, block_size) self.output.write(' split into %s blocks\n' % len(host_blocks), 2) top_hosts = [] ret_hosts = {} block_index = 0 for block in host_blocks: self.output.print_info('Using netselect to choose the top ' '%d hosts, in blocks of %s. %s of %s blocks complete.' % (number, block_size, block_index, len(host_blocks))) host_dict = self.netselect(block, len(block), quiet=True)[1] self.output.write('ran netselect(%s, %s), and got %s\n' % (block, len(block), host_dict), 2) for key in host_dict.keys(): ret_hosts[key] = host_dict[key] block_index += 1 sys.stderr.write('\rUsing netselect to choose the top' '%d hosts, in blocks of %s. %s of %s blocks complete.\n' % (number, block_size, block_index, len(host_blocks))) host_ranking_keys = ret_hosts.keys() host_ranking_keys.sort() for rank in host_ranking_keys[:number]: top_hosts.append(ret_hosts[rank]) self.output.write('netselect_split(): returns %s\n' % top_hosts, 2) self.urls = top_hosts def host_blocks(self, hosts, block_size): """ Takes a list of hosts and a block size, and returns an list of lists of URLs. Each of the sublists is at most block_size in length. """ host_array = [] mylist = [] while len(hosts) > block_size: while (len(mylist) < block_size): mylist.append(hosts.pop()) host_array.append(mylist) mylist = [] host_array.append(hosts) self.output.write('\n_host_blocks(): returns %s blocks, each about %s in size\n' % (len(host_array), len(host_array[0])), 2) return host_array class Deep(object): """handles deep mode mirror selection.""" def __init__(self, hosts, options, output): self.output = output self.urls = [] self._hosts = hosts self._number = options.servers self._dns_timeout = options.timeout self._connect_timeout = options.timeout self._download_timeout = options.timeout addr_families = [] if options.ipv4: addr_families.append(socket.AF_INET) elif options.ipv6: addr_families.append(socket.AF_INET6) else: addr_families.append(socket.AF_INET) if socket.has_ipv6: addr_families.append(socket.AF_INET6) self._addr_families = addr_families self.deeptest() def deeptest(self): """ Takes a list of hosts and returns the fastest, using _deeptime() Doesn't waste time finnishing a test that has already taken longer than the slowest mirror weve already got. """ top_hosts = {} prog = 0 maxtime = self._download_timeout hosts = [host[0] for host in self._hosts] num_hosts = len(hosts) for host in hosts: prog += 1 self.output.print_info('Downloading 100k files from each mirror... [%s of %s]'\ % (prog, num_hosts) ) mytime, ignore = self.deeptime(host, maxtime) if not ignore and mytime < maxtime: maxtime, top_hosts = self._list_add((mytime, host), \ maxtime, top_hosts, self._number) else: continue self.output.write('deeptest(): got %s hosts, and returned %s\n' % (num_hosts, \ str(top_hosts.values())), 2) self.output.write('\n') #this just makes output nicer #can't just return the dict.valuse, because we want the fastest mirror first... keys = top_hosts.keys() keys.sort() rethosts = [] for key in keys: rethosts.append(top_hosts[key]) self.urls = rethosts def deeptime(self, url, maxtime): """ Takes a single url and fetch command, and downloads the test file. Can be given an optional timeout, for use with a clever algorithm. Like mine. """ self.output.write('\n_deeptime(): maxtime is %s\n' % maxtime, 2) if url.endswith('/'): #append the path to the testfile to the URL url = url + 'distfiles/mirrorselect-test' else: url = url + '/distfiles/mirrorselect-test' url_parts = urlparse.urlparse(url) class TimeoutException(Exception): pass def timeout_handler(signum, frame): raise TimeoutException() signal.signal(signal.SIGALRM, timeout_handler) ips = [] for family in self._addr_families: ipv6 = family == socket.AF_INET6 try: try: signal.alarm(self._dns_timeout) for family, socktype, proto, canonname, sockaddr in \ socket.getaddrinfo(url_parts.hostname, None, family, socket.SOCK_STREAM): ip = sockaddr[0] if ipv6: ip = "[%s]" % ip ips.append(ip) finally: signal.alarm(0) except socket.error, e: self.output.write('deeptime(): dns error for host %s: %s\n' % \ (url_parts.hostname, e), 2) except TimeoutException: self.output.write('deeptime(): dns timeout for host %s\n' % \ (url_parts.hostname,), 2) if not ips: self.output.write('deeptime(): unable to resolve ip for host %s\n' % \ (url_parts.hostname,), 2) return (None, True) delta = 0 f = None for ip in ips: try: try: signal.alarm(self._connect_timeout) f = urllib.urlopen(url) break finally: signal.alarm(0) except EnvironmentError, e: self.output.write(('deeptime(): connection to host %s ' + \ 'failed for ip %s: %s\n') % \ (url_parts.hostname, ip, e), 2) except TimeoutException: self.output.write(('deeptime(): connection to host %s ' + \ 'timed out for ip %s\n') % \ (url_parts.hostname, ip), 2) if f is None: self.output.write('deeptime(): unable to ' + \ 'connect to host %s\n' % \ (url_parts.hostname,), 2) return (None, True) try: # Close the initial "wake up" connection. try: signal.alarm(self._connect_timeout) f.close() finally: signal.alarm(0) except EnvironmentError, e: self.output.write(('deeptime(): close connection to host %s ' + \ 'failed for ip %s: %s\n') % \ (url_parts.hostname, ip, e), 2) except TimeoutException: self.output.write(('deeptime(): close connection to host %s ' + \ 'timed out for ip %s\n') % \ (url_parts.hostname, ip), 2) try: # The first connection serves to "wake up" the route between # the local and remote machines. A second connection is used # for the timed run. try: signal.alarm(int(math.ceil(maxtime))) stime = time.time() f = urllib.urlopen(url) if hashlib.md5(f.read()).hexdigest() != "bdf077b2e683c506bf9e8f2494eeb044": return (None, True) delta = time.time() - stime f.close() finally: signal.alarm(0) except EnvironmentError, e: self.output.write(('deeptime(): download from host %s ' + \ 'failed for ip %s: %s\n') % \ (url_parts.hostname, ip, e), 2) return (None, True) except TimeoutException: self.output.write(('deeptime(): download from host %s ' + \ 'timed out for ip %s\n') % \ (url_parts.hostname, ip), 2) return (None, True) signal.signal(signal.SIGALRM, signal.SIG_DFL) self.output.write('deeptime(): download completed.\n', 2) self.output.write('deeptime(): %s seconds for host %s\n' % (delta, url), 2) return (delta, False) def _list_add(self, time_host, maxtime, host_dict, maxlen): """ Takes argumets ((time, host), maxtime, host_dict, maxlen) Adds a new time:host pair to the dictionary of top hosts. If the dictionary is full, the slowest host is removed to make space. Returns the new maxtime, be it the specified timeout, or the slowest host. """ if len(host_dict) < maxlen: #still have room, and host is fast. add it. self.output.write('_list_add(): added host %s. with a time of %s\n' % (time_host[1], time_host[0]), 2) host_dict.update(dict([time_host])) times = host_dict.keys() times.sort() else: #We need to make room in the dict before we add. Kill the slowest. self.output.write('_list_add(): Adding host %s with a time of %s\n' % (time_host[1], time_host[0]), 2) times = host_dict.keys() times.sort() self.output.write('_list_add(): removing %s\n' % host_dict[times[-1]], 2) del host_dict[times[-1]] host_dict.update(dict([time_host])) #done adding. now return the appropriate time times = host_dict.keys() times.sort() if len(host_dict) < maxlen: #check again to choose new timeout self.output.write('_list_add(): host_dict is not full yet.' ' reusing timeout of %s sec.\n' % maxtime, 2) retval = maxtime else: self.output.write('_list_add(): host_dict is full. Selecting the best' ' timeout\n', 2) if times[-1] < maxtime: retval = times[-1] else: retval = maxtime self.output.write('_list_add(): new max time is %s seconds,' ' and now len(host_dict)= %s\n' % (retval, len(host_dict)), 2) return retval, host_dict class Interactive(object): """Handles interactive host selection.""" def __init__(self, hosts, options, output): self.output = output self.urls = [] self.interactive(hosts, options) self.output.write('Interactive.interactive(): self.urls = %s\n' % self.urls, 2) if len(self.urls[0]) == 0: sys.exit(1) def interactive(self, hosts, options): """ Some sort of interactive menu thingy. """ if options.rsync: dialog = 'dialog --stdout --title "Gentoo RSYNC Mirrors"'\ ' --radiolist "Please select your desired mirror:" 20 110 14' else: dialog = 'dialog --separate-output --stdout --title'\ ' "Gentoo Download Mirrors" --checklist "Please'\ ' select your desired mirrors:' if not options.ipv4 and not options.ipv6: dialog += '\n* = supports ipv6' dialog += '" 20 110 14' for (url, args) in sorted(hosts, key = lambda x: (x[1]['country'].lower(), x[1]['name'].lower()) ): marker = "" if options.rsync and not url.endswith("/gentoo-portage"): url+="/gentoo-portage" if (not options.ipv6 and not options.ipv4) and args['ipv6'] == 'y': marker = "* " if options.ipv6 and ( args['ipv6'] == 'n' ): continue if options.ipv4 and ( args['ipv4'] == 'n' ): continue dialog += ' ' + '"%s" "%s%s: %s" "OFF"' % ( url, marker, args['country'], args['name'] ) mirror_fd = os.popen('%s' % codecs.encode(dialog, 'utf8')) mirrors = mirror_fd.read() mirror_fd.close() self.urls = mirrors.rstrip().split('\n')