aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Pipping <sebastian@pipping.org>2009-07-07 23:51:20 +0200
committerSebastian Pipping <sebastian@pipping.org>2009-07-07 23:51:20 +0200
commita0bca12d90e145d361fb9f6d20e0ac5d89bf5a58 (patch)
treea9ca88698cf4346aca185b15afd8438182966c11 /mirrorselect
parentAdd parser for mirrors3.xml and migrate to it (diff)
downloadmirrorselect-a0bca12d90e145d361fb9f6d20e0ac5d89bf5a58.tar.gz
mirrorselect-a0bca12d90e145d361fb9f6d20e0ac5d89bf5a58.tar.bz2
mirrorselect-a0bca12d90e145d361fb9f6d20e0ac5d89bf5a58.zip
Move mirror parsers to module "mirrorselect"
Diffstat (limited to 'mirrorselect')
-rwxr-xr-xmirrorselect892
-rw-r--r--mirrorselect/__init__.py0
-rw-r--r--mirrorselect/mirrorparser.py90
-rw-r--r--mirrorselect/mirrorparser3.py58
4 files changed, 148 insertions, 892 deletions
diff --git a/mirrorselect b/mirrorselect
deleted file mode 100755
index 41dcf1f..0000000
--- a/mirrorselect
+++ /dev/null
@@ -1,892 +0,0 @@
-#!/usr/bin/python
-
-# Mirrorselect 1.x
-# Tool for selecting Gentoo source and rsync mirrors.
-#
-# Copyright (C) 2005 Colin Kingsley <tercel@gentoo.org>
-# Copyright (C) 2008 Zac Medico <zmedico@gentoo.org>
-# Copyright (C) 2009 Sebastian Pipping <sebastian@pipping.org>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
-
-__revision__ = '1.4'
-
-import math
-import os
-import re
-import shlex
-import shutil
-import signal
-import socket
-import string
-import subprocess
-import sys
-import time
-import urllib
-import urlparse
-from optparse import IndentedHelpFormatter, OptionParser
-from mirrorparser3 import MirrorParser3, MIRRORS_3_XML
-import codecs
-
-class Output(object):
- """Handles text output. Only prints messages with level <= verbosity.
- Therefore, verbosity=2 is everything (debug), and verbosity=0 is urgent
- messages only (quiet)."""
-
- def __init__(self, verbosity=1, out=sys.stderr):
- esc_seq = "\x1b["
- codes={}
-
- codes["reset"] = esc_seq + "39;49;00m"
- codes["bold"] = esc_seq + "01m"
- codes["blue"] = esc_seq + "34;01m"
- codes["green"] = esc_seq + "32;01m"
- codes["yellow"] = esc_seq + "33;01m"
- codes["red"] = esc_seq + "31;01m"
-
- self.codes = codes
- del codes
-
- self.verbosity = verbosity
- self.file = out
-
- def red(self, text):
- return self.codes["red"]+text+self.codes["reset"]
-
- def green(self, text):
- return self.codes["green"]+text+self.codes["reset"]
-
- def white(self, text):
- return self.codes["bold"]+text+self.codes["reset"]
-
- def blue(self, text):
- return self.codes["blue"]+text+self.codes["reset"]
-
- def yellow(self, text):
- return self.codes["yellow"]+text+self.codes["reset"]
-
- def print_info(self, message, level=1):
- """Prints an info message with a green star, like einfo."""
- if level <= self.verbosity:
- self.file.write('\r' + self.green('* ') + message)
- self.file.flush()
-
- def print_warn(self, message, level=1):
- """Prints a warning."""
- if level <= self.verbosity:
- self.file.write(self.yellow('Warning: ') + message)
- self.file.flush()
-
- def print_err(self, message, level=0):
- """prints an error message with a big red ERROR."""
- if level <= self.verbosity:
- self.file.write(self.red('\nERROR: ') + message + '\n')
- self.file.flush()
- sys.exit(1)
-
- def write(self, message, level=1):
- """A wrapper arounf stderr.write, to enforce verbosity settings."""
- if level <= self.verbosity:
- self.file.write(message)
- self.file.flush()
-
-
-class ColoredFormatter(IndentedHelpFormatter):
-
- """HelpFormatter with colorful output.
-
- Extends format_option.
- Overrides format_heading.
- """
-
- def format_heading(self, heading):
- """Return a colorful heading."""
- return "%*s%s:\n" % (self.current_indent, "", output.white(heading))
-
- def format_option(self, option):
- """Return colorful formatted help for an option."""
- option = IndentedHelpFormatter.format_option(self, option)
- # long options with args
- option = re.sub(
- r"--([a-zA-Z]*)=([a-zA-Z]*)",
- lambda m: "-%s %s" % (output.green(m.group(1)),
- output.blue(m.group(2))),
- option)
- # short options with args
- option = re.sub(
- r"-([a-zA-Z]) ?([0-9A-Z]+)",
- lambda m: " -" + output.green(m.group(1)) + ' ' + output.blue(m.group(2)),
- option)
- # options without args
- option = re.sub(
- r"-([a-zA-Z\d]+)", lambda m: "-" + output.green(m.group(1)),
- option)
- return option
-
- def format_description(self, description):
- """Do not wrap."""
- return description + '\n'
-
-
-class Extractor(object):
- """The Extractor employs a MirrorParser3 object to get a list of valid
- mirrors, and then filters them. Only the mirrors that should be tested, based on
- user input are saved. They will be in the hosts attribute."""
-
- def __init__(self, list_url, options):
- parser = MirrorParser3()
- self.hosts = []
-
- if options.rsync:
- output.write('using hardcoded rsync mirrors\n', 2)
- self.hosts = [
- ('rsync://rsync.gentoo.org/gentoo-portage', 'Default'),
- ('rsync://rsync.namerica.gentoo.org/gentoo-portage', 'North'
- ' America'),
- ('rsync://rsync.samerica.gentoo.org/gentoo-portage', 'South'
- ' America'),
- ('rsync://rsync.europe.gentoo.org/gentoo-portage', 'Europe'),
- ('rsync://rsync.asia.gentoo.org/gentoo-portage', 'Asia'),
- ('rsync://rsync.au.gentoo.org/gentoo-portage', 'Australia')]
- else:
- hosts = self.getlist(parser, list_url)
- output.write('Extractor(): fetched mirrors.xml,'
- ' %s hosts before filtering\n' % len(hosts), 2)
-
- if options.ftp:
- hosts = self.restrict_protocall('ftp', hosts)
- if options.http:
- hosts = self.restrict_protocall('http', hosts)
-
- for host in hosts:
- if not host[0].startswith('rsync'):
- self.hosts.append(host)
-
- def restrict_protocall(self, prot, hosts):
- """
- Removes hosts that are not of the specified type.
- "prot" must always be exactly 'http' or 'ftp'.
- """
- myhosts = []
-
- output.print_info('Limiting test to %s hosts. ' % prot )
-
- for host in hosts:
- if host[0].startswith(prot):
- myhosts.append(host)
-
- output.write('%s of %s removed.\n' % (len(hosts) - len(myhosts),
- len(hosts)) )
-
- return myhosts
-
-
- def getlist(self, parser, url):
- """
- Uses the supplied parser to get a list of urls.
- Takes a parser object, url, and filering options.
- """
-
- output.write('getlist(): fetching ' + url + '\n', 2)
-
- output.print_info('Downloading a list of mirrors...')
-
- try:
- parser.parse(urllib.urlopen(url).read())
- except EnvironmentError:
- pass
-
- def fix_tuple(tuple):
- url, name = tuple
- return (url, codecs.encode(name, 'utf8'))
-
- tuples = [fix_tuple(t) for t in parser.tuples()]
- if len(tuples) == 0:
- output.print_err('Could not get mirror list. Check your internet'
- ' connection.')
-
- output.write(' Got %d mirrors.\n' % len(tuples))
-
- return tuples
-
-
-class Shallow(object):
- """handles rapid server selection via netselect"""
-
- def __init__(self, hosts, options):
- self.urls = []
-
- if options.blocksize is not None:
- self.netselect_split(hosts, options.servers,
- options.blocksize)
- else:
- self.netselect(hosts, options.servers)
-
- if len(self.urls) == 0:
- output.print_err('Netselect failed to return any mirrors.'
- ' Try again using block mode.')
-
-
- def netselect(self, hosts, number, quiet=False):
- """
- Uses Netselect to choose the closest hosts, _very_ quickly
- """
- if not quiet:
- hosts = [host[0] for host in hosts]
- top_host_dict = {}
- top_hosts = []
-
- if not quiet:
- output.print_info('Using netselect to choose the top %d mirrors...' \
- % number)
-
- host_string = ' '.join(hosts)
-
- output.write('\nnetselect(): running "netselect -s%d %s"\n' % (int(number),
- host_string), 2)
-
- proc = subprocess.Popen( ['netselect', '-s%d' % (number,)] + hosts,
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
- out, err = proc.communicate()
-
- if err:
- output.write('netselect(): netselect stderr: %s\n' % err, 2)
-
- for line in out.splitlines():
- line = line.split()
- if len(line) < 2:
- continue
- top_hosts.append(line[1])
- top_host_dict[line[0]] = line[1]
-
- if not quiet:
- output.write('Done.\n')
-
- output.write('\nnetselect(): returning %s and %s\n' % (top_hosts,
- top_host_dict), 2)
-
- if quiet:
- return top_hosts, top_host_dict
- else:
- self.urls = top_hosts
-
-
- def netselect_split(self, hosts, number, block_size):
- """
- This uses netselect to test mirrors in chunks, each at most block_size in length.
- This is done in a tournament style.
- """
- hosts = [host[0] for host in hosts]
-
- output.write('netselect_split() got %s hosts.\n' % len(hosts), 2)
-
- host_blocks = self.host_blocks(hosts, block_size)
-
- output.write(' split into %s blocks\n' % len(host_blocks), 2)
-
- top_hosts = []
- ret_hosts = {}
-
- block_index = 0
- for block in host_blocks:
- output.print_info('Using netselect to choose the top '
- '%d hosts, in blocks of %s. %s of %s blocks complete.'
- % (number, block_size, block_index, len(host_blocks)))
-
- host_dict = self.netselect(block, len(block), quiet=True)[1]
-
- output.write('ran netselect(%s, %s), and got %s\n' % (block, len(block),
- host_dict), 2)
-
- for key in host_dict.keys():
- ret_hosts[key] = host_dict[key]
- block_index += 1
-
- sys.stderr.write('\rUsing netselect to choose the top'
- '%d hosts, in blocks of %s. %s of %s blocks complete.\n'
- % (number, block_size, block_index, len(host_blocks)))
-
- host_ranking_keys = ret_hosts.keys()
- host_ranking_keys.sort()
-
- for rank in host_ranking_keys[:number]:
- top_hosts.append(ret_hosts[rank])
-
- output.write('netselect_split(): returns %s\n' % top_hosts, 2)
-
- self.urls = top_hosts
-
-
- def host_blocks(self, hosts, block_size):
- """
- Takes a list of hosts and a block size, and returns an list of lists of URLs.
- Each of the sublists is at most block_size in length.
- """
- host_array = []
- mylist = []
-
- while len(hosts) > block_size:
- while (len(mylist) < block_size):
- mylist.append(hosts.pop())
- host_array.append(mylist)
- mylist = []
- host_array.append(hosts)
-
- output.write('\n_host_blocks(): returns %s blocks, each about %s in size\n'
- % (len(host_array), len(host_array[0])), 2)
-
- return host_array
-
-
-class Deep(object):
- """handles deep mode mirror selection."""
-
- _bufsize = 4096
-
- def __init__(self, hosts, options):
- self.urls = []
- self._hosts = hosts
- self._number = options.servers
- self._dns_timeout = options.timeout
- self._connect_timeout = options.timeout
- self._download_timeout = options.timeout
-
- addr_families = []
- if options.ipv4:
- addr_families.append(socket.AF_INET)
- elif options.ipv6:
- addr_families.append(socket.AF_INET6)
- else:
- addr_families.append(socket.AF_INET)
- if socket.has_ipv6:
- addr_families.append(socket.AF_INET6)
-
- self._addr_families = addr_families
-
- self.deeptest()
-
- def deeptest(self):
- """
- Takes a list of hosts and returns the fastest, using _deeptime()
- Doesn't waste time finnishing a test that has already taken longer than
- the slowest mirror weve already got.
- """
- top_hosts = {}
- prog = 0
- maxtime = self._download_timeout
- hosts = [host[0] for host in self._hosts]
- num_hosts = len(hosts)
-
- for host in hosts:
-
- prog += 1
- output.print_info('Downloading 100k files from each mirror... [%s of %s]'\
- % (prog, num_hosts) )
-
- mytime, ignore = self.deeptime(host, maxtime)
-
- if not ignore and mytime < maxtime:
- maxtime, top_hosts = self._list_add((mytime, host), \
- maxtime, top_hosts, self._number)
- else:
- continue
-
- output.write('deeptest(): got %s hosts, and returned %s\n' % (num_hosts, \
- str(top_hosts.values())), 2)
-
- output.write('\n') #this just makes output nicer
-
- #can't just return the dict.valuse, because we want the fastest mirror first...
- keys = top_hosts.keys()
- keys.sort()
-
- rethosts = []
- for key in keys:
- rethosts.append(top_hosts[key])
-
- self.urls = rethosts
-
-
- def deeptime(self, url, maxtime):
- """
- Takes a single url and fetch command, and downloads the test file.
- Can be given an optional timeout, for use with a clever algorithm.
- Like mine.
- """
- output.write('\n_deeptime(): maxtime is %s\n' % maxtime, 2)
-
- if url.endswith('/'): #append the path to the testfile to the URL
- url = url + 'distfiles/mirrorselect-test'
- else:
- url = url + '/distfiles/mirrorselect-test'
-
- url_parts = urlparse.urlparse(url)
-
- class TimeoutException(Exception):
- pass
-
- def timeout_handler(signum, frame):
- raise TimeoutException()
-
- signal.signal(signal.SIGALRM, timeout_handler)
-
- ips = []
- for family in self._addr_families:
- ipv6 = family == socket.AF_INET6
- try:
- try:
- signal.alarm(self._dns_timeout)
- for family, socktype, proto, canonname, sockaddr in \
- socket.getaddrinfo(url_parts.hostname, None,
- family, socket.SOCK_STREAM):
- ip = sockaddr[0]
- if ipv6:
- ip = "[%s]" % ip
- ips.append(ip)
- finally:
- signal.alarm(0)
- except socket.error, e:
- output.write('deeptime(): dns error for host %s: %s\n' % \
- (url_parts.hostname, e), 2)
- except TimeoutException:
- output.write('deeptime(): dns timeout for host %s\n' % \
- (url_parts.hostname,), 2)
-
- if not ips:
- output.write('deeptime(): unable to resolve ip for host %s\n' % \
- (url_parts.hostname,), 2)
- return (None, True)
-
- delta = 0
- f = None
-
- for ip in ips:
- try:
- ip_url = url.replace(url_parts.hostname, ip, 1)
- try:
- signal.alarm(self._connect_timeout)
- f = urllib.urlopen(ip_url)
- break
- finally:
- signal.alarm(0)
- except EnvironmentError, e:
- output.write(('deeptime(): connection to host %s ' + \
- 'failed for ip %s: %s\n') % \
- (url_parts.hostname, ip, e), 2)
- except TimeoutException:
- output.write(('deeptime(): connection to host %s ' + \
- 'timed out for ip %s\n') % \
- (url_parts.hostname, ip), 2)
-
- if f is None:
- output.write('deeptime(): unable to ' + \
- 'connect to host %s\n' % \
- (url_parts.hostname,), 2)
- return (None, True)
-
- try:
-
- # Close the initial "wake up" connection.
- try:
- signal.alarm(self._connect_timeout)
- f.close()
- finally:
- signal.alarm(0)
-
- except EnvironmentError, e:
- output.write(('deeptime(): close connection to host %s ' + \
- 'failed for ip %s: %s\n') % \
- (url_parts.hostname, ip, e), 2)
- except TimeoutException:
- output.write(('deeptime(): close connection to host %s ' + \
- 'timed out for ip %s\n') % \
- (url_parts.hostname, ip), 2)
-
- try:
-
- # The first connection serves to "wake up" the route between
- # the local and remote machines. A second connection is used
- # for the timed run.
- try:
- signal.alarm(int(math.ceil(maxtime)))
- stime = time.time()
- f = urllib.urlopen(ip_url)
- while f.read(self._bufsize):
- pass
- delta = time.time() - stime
- f.close()
- finally:
- signal.alarm(0)
-
- except EnvironmentError, e:
- output.write(('deeptime(): download from host %s ' + \
- 'failed for ip %s: %s\n') % \
- (url_parts.hostname, ip, e), 2)
- return (None, True)
- except TimeoutException:
- output.write(('deeptime(): download from host %s ' + \
- 'timed out for ip %s\n') % \
- (url_parts.hostname, ip), 2)
- return (None, True)
-
- signal.signal(signal.SIGALRM, signal.SIG_DFL)
-
- output.write('deeptime(): download completed.\n', 2)
- output.write('deeptime(): %s seconds for host %s\n' % (delta, url), 2)
- return (delta, False)
-
- def _list_add(self, time_host, maxtime, host_dict, maxlen):
- """
- Takes argumets ((time, host), maxtime, host_dict, maxlen)
- Adds a new time:host pair to the dictionary of top hosts.
- If the dictionary is full, the slowest host is removed to make space.
- Returns the new maxtime, be it the specified timeout, or the slowest host.
- """
- if len(host_dict) < maxlen: #still have room, and host is fast. add it.
-
- output.write('_list_add(): added host %s. with a time of %s\n' %
- (time_host[1], time_host[0]), 2)
-
- host_dict.update(dict([time_host]))
- times = host_dict.keys()
- times.sort()
-
- else: #We need to make room in the dict before we add. Kill the slowest.
- output.write('_list_add(): Adding host %s with a time of %s\n' %
- (time_host[1], time_host[0]), 2)
- times = host_dict.keys()
- times.sort()
- output.write('_list_add(): removing %s\n' % host_dict[times[-1]],
- 2)
- del host_dict[times[-1]]
- host_dict.update(dict([time_host]))
- #done adding. now return the appropriate time
- times = host_dict.keys()
- times.sort()
-
- if len(host_dict) < maxlen: #check again to choose new timeout
- output.write('_list_add(): host_dict is not full yet.'
- ' reusing timeout of %s sec.\n' % maxtime, 2)
- retval = maxtime
- else:
- output.write('_list_add(): host_dict is full. Selecting the best'
- ' timeout\n', 2)
- if times[-1] < maxtime:
- retval = times[-1]
- else:
- retval = maxtime
-
- output.write('_list_add(): new max time is %s seconds,'
- ' and now len(host_dict)= %s\n' % (retval, len(host_dict)), 2)
-
- return retval, host_dict
-
-
-class Interactive(object):
- """Handles interactive host selection."""
-
- def __init__(self, hosts, rsync):
- self.urls = []
-
- self.interactive(hosts, rsync)
- output.write('Interactive.interactive(): self.urls = %s\n' % self.urls, 2)
-
- if len(self.urls[0]) == 0:
- sys.exit(1)
-
-
- def interactive(self, hosts, rsync):
- """
- Some sort of interactive menu thingy.
- """
- if rsync:
- dialog = 'dialog --stdout --title "Gentoo RSYNC Mirrors"'\
- ' --radiolist "Please select your desired mirror:" 20 110 14'
- else:
- dialog = 'dialog --separate-output --stdout --title'\
- ' "Gentoo Download Mirrors" --checklist "Please'\
- ' select your desired mirrors:\n* = supports ipv6" 20 110 14'
-
- dialog += ' ' + ' '.join(['"%s" "%s" "OFF"' % host for host in hosts])
-
- mirror_fd = os.popen('%s' % dialog)
- mirrors = mirror_fd.read()
- mirror_fd.close()
-
- self.urls = mirrors.split('\n')
-
-
-def _have_bin(name):
- """
- Determines whether a particular binary is available on the host system.
- """
- for path_dir in os.environ.get("PATH", "").split(":"):
- if not path_dir:
- continue
- file_path = os.path.join(path_dir, name)
- if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
- return file_path
- return None
-
-
-def handler(signum, frame):
- output.print_err('Caught signal %s. Exiting' % signum)
-
-def write_config(hosts, out, path, sync=False):
- """
- Writes the make.conf style string to the given file, or to stdout.
- """
- if sync:
- var = 'SYNC'
- else:
- var = 'GENTOO_MIRRORS'
-
- mirror_string = '%s="%s"' % (var, ' '.join(hosts))
-
- if out:
- print
- print mirror_string
- sys.exit(0)
-
-
- output.write('\n')
- output.print_info('Modifying make.conf with new mirrors...\n')
- try:
- config = open(path, 'r')
- output.write('\tReading make.conf\n')
- lines = config.readlines()
- config.close()
- output.write('\tMoving to %s.backup\n' % path)
- shutil.move(path, path + '.backup')
- except IOError:
- lines = []
-
- regex = re.compile('^%s=.*' % var)
- for line in lines:
- if regex.match(line):
- lines.remove(line)
-
- lines.append(mirror_string)
-
- output.write('\tWriting new make.conf\n')
- config = open(path, 'w')
- for line in lines:
- config.write(line)
- config.write('\n')
- config.close()
-
- output.print_info('Done.\n')
- sys.exit(0)
-
-def get_filesystem_mirrors(out, path, sync=False):
- """
- Read the current mirrors and retain mounted filesystems mirrors
- """
- fsmirrors = []
-
- if sync:
- var = 'SYNC'
- else:
- var = 'GENTOO_MIRRORS'
-
- try:
- f = open(path,'r')
- except IOError,e:
- return fsmirrors
-
- """ Search for 'var' in make.conf and extract value """
- try:
- lex = shlex.shlex(f, posix=True)
- lex.wordchars=string.digits+string.letters+"~!@#$%*_\:;?,./-+{}"
- lex.quotes="\"'"
- while 1:
- key = lex.get_token()
- if key == var:
- equ = lex.get_token()
-
- if (equ == ''):
- break;
- elif (equ != '='):
- break;
-
- val = lex.get_token()
- if val is None:
- break;
-
- """ Look for mounted filesystem in value """
- mirrorlist = val.rsplit()
- p = re.compile('rsync://|http://|ftp://',re.IGNORECASE)
- for mirror in mirrorlist:
- if (p.match(mirror) == None):
- fsmirrors.append(mirror)
- break
- elif key is None:
- break
- except Exception, e:
- fsmirrors = []
-
- return fsmirrors
-
-def parse_args(argv):
- """
- Does argument parsing and some sanity checks.
- Returns an optparse Options object.
-
- The descriptions, grouping, and possibly the amount sanity checking
- need some finishing touches.
- """
- desc = "\n".join((
- output.white("examples:"),
- "",
- output.white(" automatic:"),
- " # mirrorselect -s5",
- " # mirrorselect -s3 -b10 -o >> /mnt/gentoo/etc/make.conf",
- " # mirrorselect -D -s4",
- "",
- output.white(" interactive:"),
- " # mirrorselect -i -r",
- ))
- parser = OptionParser(
- formatter=ColoredFormatter(), description=desc,
- version='Mirrorselect version: %s' % __revision__)
-
- group = parser.add_option_group("Main modes")
- group.add_option(
- "-i", "--interactive", action="store_true", default=False,
- help="Interactive Mode, this will present a list "
- "to make it possible to select mirrors you wish to use.")
- group.add_option(
- "-D", "--deep", action="store_true", default=False,
- help="Deep mode. This is used to give a more accurate "
- "speed test. It will download a 100k file from "
- "each server. Because of this you should only use "
- "this option if you have a good connection.")
-
- group = parser.add_option_group(
- "Server type selection (choose at most one)")
- group.add_option(
- "-F", "--ftp", action="store_true", default=False,
- help="ftp only mode. Will not consider hosts of other "
- "types.")
- group.add_option(
- "-H", "--http", action="store_true", default=False,
- help="http only mode. Will not consider hosts of other types")
- group.add_option(
- "-r", "--rsync", action="store_true", default=False,
- help="rsync mode. Allows you to interactively select your"
- " rsync mirror. Requires -i to be used.")
- group.add_option(
- "-4", "--ipv4", action="store_true", default=False,
- help="only use IPv4")
- group.add_option(
- "-6", "--ipv6", action="store_true", default=False,
- help="only use IPv6")
-
- group = parser.add_option_group("Other options")
- group.add_option(
- "-o", "--output", action="store_true", default=False,
- help="Output Only Mode, this is especially useful "
- "when being used during installation, to redirect "
- "output to a file other than /etc/make.conf")
- group.add_option(
- "-b", "--blocksize", action="store", type="int",
- help="This is to be used in automatic mode "
- "and will split the hosts into blocks of BLOCKSIZE for "
- "use with netselect. This is required for certain "
- "routers which block 40+ requests at any given time. "
- "Recommended parameters to pass are: -s3 -b10")
- group.add_option(
- "-t", "--timeout", action="store", type="int",
- default="10", help="Timeout for deep mode. Defaults to 10 seconds.")
- group.add_option(
- "-s", "--servers", action="store", type="int", default=1,
- help="Specify Number of servers for Automatic Mode "
- "to select. this is only valid for download mirrors. "
- "If this is not specified, a default of 1 is used.")
- group.add_option(
- "-d", "--debug", action="store_const", const=2, dest="verbosity",
- default=1, help="debug mode")
- group.add_option(
- "-q", "--quiet", action="store_const", const=0, dest="verbosity",
- help="Quiet mode")
-
- if len(argv) == 1:
- parser.print_help()
- sys.exit(1)
-
- options, args = parser.parse_args(argv[1:])
-
- # sanity checks
-
- # hack: check if more than one of these is set
- if options.http + options.ftp + options.rsync > 1:
- output.print_err('Choose at most one of -H, -f and -r')
-
- if options.ipv4 and options.ipv6:
- output.print_err('Choose at most one of --ipv4 and --ipv6')
-
- if options.ipv6 and not socket.has_ipv6:
- options.ipv6 = False
- output.print_err('The --ipv6 option requires python ipv6 support')
-
- if options.rsync and not options.interactive:
- output.print_err('rsync servers can only be selected with -i')
-
- if options.interactive and (
- options.deep or
- options.blocksize or
- options.servers > 1):
- output.print_err('Invalid option combination with -i')
-
- if (not options.deep) and (not _have_bin('netselect') ):
- output.print_err(
- 'You do not appear to have netselect on your system. '
- 'You must use the -D flag')
-
- if (os.getuid() != 0) and not options.output:
- output.print_err('Must be root to write to /etc/make.conf!\n')
-
- if args:
- output.print_err('Unexpected arguments passed.')
-
- # return results
- return options
-
-
-output = Output() #the only FUCKING global. Damnit.
-def main(argv):
- """Lets Rock!"""
- config_path = '/etc/make.conf'
-
- signal.signal(signal.SIGINT, handler)
-
- options = parse_args(argv)
- output.verbosity = options.verbosity
-
- fsmirrors = get_filesystem_mirrors(options.output, config_path, options.rsync)
- hosts = Extractor(MIRRORS_3_XML, options).hosts
-
- if options.interactive:
- selector = Interactive(hosts, options.rsync)
- elif options.deep:
- selector = Deep(hosts, options)
- else:
- selector = Shallow(hosts, options)
-
- write_config(fsmirrors + selector.urls, options.output, config_path, options.rsync)
-
-
-if __name__ == '__main__':
- main(sys.argv)
diff --git a/mirrorselect/__init__.py b/mirrorselect/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mirrorselect/__init__.py
diff --git a/mirrorselect/mirrorparser.py b/mirrorselect/mirrorparser.py
new file mode 100644
index 0000000..4b07f0f
--- /dev/null
+++ b/mirrorselect/mirrorparser.py
@@ -0,0 +1,90 @@
+# Mirrorselect 1.x
+# Tool for selecting Gentoo source and rsync mirrors.
+#
+# Copyright (C) 2005 Colin Kingsley <tercel@gentoo.org>
+# Copyright (C) 2008 Zac Medico <zmedico@gentoo.org>
+# Copyright (C) 2009 Sebastian Pipping <sebastian@pipping.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+
+from HTMLParser import HTMLParser
+
+MIRRORS_XML = 'http://www.gentoo.org/main/en/mirrors.xml?passthru=1'
+
+class MirrorParser(HTMLParser):
+ """
+ MirrorParser objects are fed an html input stream via the feed() method.
+ After the instance is closed, the lines atribute contains an array with
+ elements of the form: (url, description)
+ """
+
+ def __init__(self):
+ HTMLParser.__init__(self)
+
+ self.lines = []
+ self.line = []
+
+ self.get_desc = False
+ self.in_sect = False
+ self.sect_good = False
+ self.check_title = False
+
+ self.sects = ('North America', 'South America', 'Europe', 'Australia',
+ 'Asia', 'Other Mirrors:', 'Partial Mirrors')
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'section':
+ self.in_sect = True
+ if (tag == 'title') and self.in_sect:
+ self.check_title = True
+ if (tag == 'uri') and self.sect_good: #This is a good one
+ self.line.append(dict(attrs)['link']) #url
+ self.get_desc = True #the next data block is the description
+
+ def handle_data(self, data):
+ if self.check_title and (data in self.sects):
+ self.sect_good = True
+ if self.get_desc:
+ if data.endswith('*'):
+ data = data.replace('*', '')
+ data = '* ' + data
+ self.line.append(data)
+ self.get_desc = False
+
+ def handle_endtag(self, tag):
+ if tag == 'section':
+ self.in_sect = False
+ self.sect_good = False
+ if (tag == 'uri') and (len(self.line) == 2):
+ self.lines.append(tuple(self.line))
+ self.line = []
+
+ def tuples(self):
+ return self.lines
+
+ def uris(self):
+ return [url for url, description in self.lines]
+
+if __name__ == '__main__':
+ import urllib
+ parser = MirrorParser()
+ try:
+ parser.feed(urllib.urlopen(MIRRORS_XML).read())
+ except EnvironmentError:
+ pass
+ parser.close()
+ print '===== tuples'
+ print parser.tuples()
+ print '===== uris'
+ print parser.uris()
diff --git a/mirrorselect/mirrorparser3.py b/mirrorselect/mirrorparser3.py
new file mode 100644
index 0000000..6efcc3d
--- /dev/null
+++ b/mirrorselect/mirrorparser3.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+# Mirrorselect 1.x
+# Tool for selecting Gentoo source and rsync mirrors.
+#
+# Copyright (C) 2009 Sebastian Pipping <sebastian@pipping.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+
+from xml.etree import ElementTree as ET
+
+MIRRORS_3_XML = 'http://www.gentoo.org/main/en/mirrors3.xml'
+
+class MirrorParser3:
+ def __init__(self):
+ self._reset()
+
+ def _reset(self):
+ self._dict = {}
+
+ def parse(self, text):
+ self._reset()
+ for mirrorgroup in ET.XML(text):
+ for mirror in mirrorgroup:
+ name = ''
+ for e in mirror:
+ if e.tag == 'name':
+ name = e.text
+ if e.tag == 'uri':
+ uri = e.text
+ self._dict[uri] = name
+
+ def tuples(self):
+ return [(url, name) for url, name in self._dict.items()]
+
+ def uris(self):
+ return [url for url, name in self._dict.items()]
+
+if __name__ == '__main__':
+ import urllib
+ parser = MirrorParser3()
+ parser.parse(urllib.urlopen(MIRRORS_3_XML).read())
+ print '===== tuples'
+ print parser.tuples()
+ print '===== uris'
+ print parser.uris()