# getbinpkg.py -- Portage binary-package helper functions # Copyright 2003-2004 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 # $Id: /var/cvsroot/gentoo-src/portage/pym/getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $ from output import * import htmllib,HTMLParser,string,formatter,sys,os,xpak,time,tempfile,cPickle,base64 try: import ftplib except SystemExit, e: raise except Exception, e: sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n") try: import httplib except SystemExit, e: raise except Exception, e: sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n") def make_metadata_dict(data): myid,myglob = data mydict = {} for x in xpak.getindex_mem(myid): mydict[x] = xpak.getitem(data,x) return mydict class ParseLinks(HTMLParser.HTMLParser): """Parser class that overrides HTMLParser to grab all anchors from an html page and provide suffix and prefix limitors""" def __init__(self): self.PL_anchors = [] HTMLParser.HTMLParser.__init__(self) def get_anchors(self): return self.PL_anchors def get_anchors_by_prefix(self,prefix): newlist = [] for x in self.PL_anchors: if (len(x) >= len(prefix)) and (x[:len(suffix)] == prefix): if x not in newlist: newlist.append(x[:]) return newlist def get_anchors_by_suffix(self,suffix): newlist = [] for x in self.PL_anchors: if (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix): if x not in newlist: newlist.append(x[:]) return newlist def handle_endtag(self,tag): pass def handle_starttag(self,tag,attrs): if tag == "a": for x in attrs: if x[0] == 'href': if x[1] not in self.PL_anchors: self.PL_anchors.append(x[1]) def create_conn(baseurl,conn=None): """(baseurl,conn) --- Takes a protocol://site:port/address url, and an optional connection. If connection is already active, it is passed on. baseurl is reduced to address and is returned in tuple (conn,address)""" parts = string.split(baseurl, "://", 1) if len(parts) != 2: raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl protocol,url_parts = parts del parts host,address = string.split(url_parts, "/", 1) del url_parts address = "/"+address userpass_host = string.split(host, "@", 1) if len(userpass_host) == 1: host = userpass_host[0] userpass = ["anonymous"] else: host = userpass_host[1] userpass = string.split(userpass_host[0], ":") del userpass_host if len(userpass) > 2: raise ValueError, "Unable to interpret username/password provided." elif len(userpass) == 2: username = userpass[0] password = userpass[1] elif len(userpass) == 1: username = userpass[0] password = None del userpass http_headers = {} http_params = {} if username and password: http_headers = { "Authorization": "Basic %s" % string.replace( base64.encodestring("%s:%s" % (username, password)), "\012", "" ), } if not conn: if protocol == "https": conn = httplib.HTTPSConnection(host) elif protocol == "http": conn = httplib.HTTPConnection(host) elif protocol == "ftp": passive = 1 if(host[-1] == "*"): passive = 0 host = host[:-1] conn = ftplib.FTP(host) if password: conn.login(username,password) else: sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n") conn.login(username) conn.set_pasv(passive) conn.set_debuglevel(0) else: raise NotImplementedError, "%s is not a supported protocol." % protocol return (conn,protocol,address, http_params, http_headers) def make_ftp_request(conn, address, rest=None, dest=None): """(conn,address,rest) --- uses the conn object to request the data from address and issuing a rest if it is passed.""" try: if dest: fstart_pos = dest.tell() conn.voidcmd("TYPE I") fsize = conn.size(address) if (rest != None) and (rest < 0): rest = fsize+int(rest) if rest < 0: rest = 0 if rest != None: mysocket = conn.transfercmd("RETR "+str(address), rest) else: mysocket = conn.transfercmd("RETR "+str(address)) mydata = "" while 1: somedata = mysocket.recv(8192) if somedata: if dest: dest.write(somedata) else: mydata = mydata + somedata else: break if dest: data_size = fstart_pos - dest.tell() else: data_size = len(mydata) mysocket.close() conn.voidresp() conn.voidcmd("TYPE A") return mydata,not (fsize==data_size),"" except ValueError, e: return None,int(str(e)[:4]),str(e) def make_http_request(conn, address, params={}, headers={}, dest=None): """(conn,address,params,headers) --- uses the conn object to request the data from address, performing Location forwarding and using the optional params and headers.""" rc = 0 response = None while (rc == 0) or (rc == 301) or (rc == 302): try: if (rc != 0): conn,ignore,ignore,ignore,ignore = create_conn(address) conn.request("GET", address, params, headers) except SystemExit, e: raise except Exception, e: return None,None,"Server request failed: "+str(e) response = conn.getresponse() rc = response.status # 301 means that the page address is wrong. if ((rc == 301) or (rc == 302)): ignored_data = response.read() del ignored_data for x in string.split(str(response.msg), "\n"): parts = string.split(x, ": ", 1) if parts[0] == "Location": if (rc == 301): sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n") if (rc == 302): sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n") address = parts[1] break if (rc != 200) and (rc != 206): sys.stderr.write(str(response.msg)+"\n") sys.stderr.write(response.read()+"\n") sys.stderr.write("address: "+address+"\n") return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")" if dest: dest.write(response.read()) return "",0,"" return response.read(),0,"" def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0): myarray = [] if not (prefix and suffix): match_both = 0 for x in array: add_p = 0 if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix): add_p = 1 if match_both: if prefix and not add_p: # Require both, but don't have first one. continue else: if add_p: # Only need one, and we have it. myarray.append(x[:]) continue if not allow_overlap: # Not allow to overlap prefix and suffix if len(x) >= (len(prefix)+len(suffix)): y = x[len(prefix):] else: continue # Too short to match. else: y = x # Do whatever... We're overlapping. if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix): myarray.append(x) # It matches else: continue # Doesn't match. return myarray def dir_get_list(baseurl,conn=None): """(baseurl[,connection]) -- Takes a base url to connect to and read from. URL should be in the for ://[:port] Connection is used for persistent connection instances.""" if not conn: keepconnection = 0 else: keepconnection = 1 conn,protocol,address,params,headers = create_conn(baseurl, conn) listing = None if protocol in ["http","https"]: page,rc,msg = make_http_request(conn,address,params,headers) if page: parser = ParseLinks() parser.feed(page) del page listing = parser.get_anchors() else: raise Exception, "Unable to get listing: %s %s" % (rc,msg) elif protocol in ["ftp"]: if address[-1] == '/': olddir = conn.pwd() conn.cwd(address) listing = conn.nlst() conn.cwd(olddir) del olddir else: listing = conn.nlst(address) else: raise TypeError, "Unknown protocol. '%s'" % protocol if not keepconnection: conn.close() return listing def file_get_metadata(baseurl,conn=None, chunk_size=3000): """(baseurl[,connection]) -- Takes a base url to connect to and read from. URL should be in the for ://[:port] Connection is used for persistent connection instances.""" if not conn: keepconnection = 0 else: keepconnection = 1 conn,protocol,address,params,headers = create_conn(baseurl, conn) if protocol in ["http","https"]: headers["Range"] = "bytes=-"+str(chunk_size) data,rc,msg = make_http_request(conn, address, params, headers) elif protocol in ["ftp"]: data,rc,msg = make_ftp_request(conn, address, -chunk_size) else: raise TypeError, "Unknown protocol. '%s'" % protocol if data: xpaksize = xpak.decodeint(data[-8:-4]) if (xpaksize+8) > chunk_size: myid = file_get_metadata(baseurl, conn, (xpaksize+8)) if not keepconnection: conn.close() return myid else: xpak_data = data[len(data)-(xpaksize+8):-8] del data myid = xpak.xsplit_mem(xpak_data) if not myid: myid = None,None del xpak_data else: myid = None,None if not keepconnection: conn.close() return myid def file_get(baseurl,dest,conn=None,fcmd=None): """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from. URL should be in the for ://[user[:pass]@][:port]""" if not fcmd: return file_get_lib(baseurl,dest,conn) fcmd = string.replace(fcmd, "${DISTDIR}", dest) fcmd = string.replace(fcmd, "${URI}", baseurl) fcmd = string.replace(fcmd, "${FILE}", os.path.basename(baseurl)) mysplit = string.split(fcmd) mycmd = mysplit[0] myargs = [os.path.basename(mycmd)]+mysplit[1:] mypid=os.fork() if mypid == 0: try: os.execv(mycmd,myargs) except OSError: pass sys.stderr.write("!!! Failed to spawn fetcher.\n") sys.exit(1) retval=os.waitpid(mypid,0)[1] if (retval & 0xff) == 0: retval = retval >> 8 else: sys.stderr.write("Spawned processes caught a signal.\n") sys.exit(1) if retval != 0: sys.stderr.write("Fetcher exited with a failure condition.\n") return 0 return 1 def file_get_lib(baseurl,dest,conn=None): """(baseurl[,connection]) -- Takes a base url to connect to and read from. URL should be in the for ://[:port] Connection is used for persistent connection instances.""" if not conn: keepconnection = 0 else: keepconnection = 1 conn,protocol,address,params,headers = create_conn(baseurl, conn) sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n")) if protocol in ["http","https"]: data,rc,msg = make_http_request(conn, address, params, headers, dest=dest) elif protocol in ["ftp"]: data,rc,msg = make_ftp_request(conn, address, dest=dest) else: raise TypeError, "Unknown protocol. '%s'" % protocol if not keepconnection: conn.close() return rc def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None): """(baseurl,conn,chunk_size,verbose) -- """ if not conn: keepconnection = 0 else: keepconnection = 1 if makepickle == None: makepickle = "/var/cache/edb/metadata.idx.most_recent" conn,protocol,address,params,headers = create_conn(baseurl, conn) filedict = {} try: metadatafile = open("/var/cache/edb/remote_metadata.pickle") metadata = cPickle.load(metadatafile) sys.stderr.write("Loaded metadata pickle.\n") metadatafile.close() except SystemExit, e: raise except: metadata = {} if not metadata.has_key(baseurl): metadata[baseurl]={} if not metadata[baseurl].has_key("indexname"): metadata[baseurl]["indexname"]="" if not metadata[baseurl].has_key("timestamp"): metadata[baseurl]["timestamp"]=0 if not metadata[baseurl].has_key("unmodified"): metadata[baseurl]["unmodified"]=0 if not metadata[baseurl].has_key("data"): metadata[baseurl]["data"]={} filelist = dir_get_list(baseurl, conn) tbz2list = match_in_array(filelist, suffix=".tbz2") metalist = match_in_array(filelist, prefix="metadata.idx") del filelist # Determine if our metadata file is current. metalist.sort() metalist.reverse() # makes the order new-to-old. havecache=0 for mfile in metalist: if usingcache and \ ((metadata[baseurl]["indexname"] != mfile) or \ (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))): # Try to download new cache until we succeed on one. data="" for trynum in [1,2,3]: mytempfile = tempfile.TemporaryFile() try: file_get(baseurl+"/"+mfile, mytempfile, conn) if mytempfile.tell() > len(data): mytempfile.seek(0) data = mytempfile.read() except ValueError, e: sys.stderr.write("--- "+str(e)+"\n") if trynum < 3: sys.stderr.write("Retrying...\n") mytempfile.close() continue if match_in_array([mfile],suffix=".gz"): sys.stderr.write("gzip'd\n") try: import gzip mytempfile.seek(0) gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile) data = gzindex.read() except SystemExit, e: raise except Exception, e: mytempfile.close() sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n") mytempfile.close() try: metadata[baseurl]["data"] = cPickle.loads(data) del data metadata[baseurl]["indexname"] = mfile metadata[baseurl]["timestamp"] = int(time.time()) metadata[baseurl]["modified"] = 0 # It's not, right after download. sys.stderr.write("Pickle loaded.\n") break except SystemExit, e: raise except Exception, e: sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n") sys.stderr.write("!!! "+str(e)+"\n") try: metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+") cPickle.dump(metadata,metadatafile) metadatafile.close() except SystemExit, e: raise except Exception, e: sys.stderr.write("!!! Failed to write binary metadata to disk!\n") sys.stderr.write("!!! "+str(e)+"\n") break # We may have metadata... now we run through the tbz2 list and check. sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n") for x in tbz2list: x = os.path.basename(x) if ((not metadata[baseurl]["data"].has_key(x)) or \ (x not in metadata[baseurl]["data"].keys())): sys.stderr.write(yellow("x")) metadata[baseurl]["modified"] = 1 myid = file_get_metadata(baseurl+"/"+x, conn, chunk_size) if myid[0]: metadata[baseurl]["data"][x] = make_metadata_dict(myid) elif verbose: sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n") else: sys.stderr.write(green("o")) sys.stderr.write("\n") try: if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]: metadata[baseurl]["timestamp"] = int(time.time()) metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+") cPickle.dump(metadata,metadatafile) metadatafile.close() if makepickle: metadatafile = open(makepickle, "w") cPickle.dump(metadata[baseurl]["data"],metadatafile) metadatafile.close() except SystemExit, e: raise except Exception, e: sys.stderr.write("!!! Failed to write binary metadata to disk!\n") sys.stderr.write("!!! "+str(e)+"\n") if not keepconnection: conn.close() return metadata[baseurl]["data"]