aboutsummaryrefslogtreecommitdiff
blob: b695e9b9bb1583a56c831b99f25c62edfcf47e48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from lxml.html import fromstring
import urllib

class pypi_checker(object):
    # TODO: Shall we use some kind of yaml or json based format instead?
    # Or move this data into DB?
    pkgs = {
        # Package in Portage            :  Package in PyPi
        'dev-python/flask'              : 'Flask',
        'dev-python/flask-openid'       : 'Flask-OpenID',
        'dev-python/flask-sqlalchemy'   : 'Flask-SQLAlchemy',
        'dev-python/mako'               : 'Mako',
        'dev-python/sphinx'             : 'Sphinx',
        'dev-python/virtualenv'         : 'virtualenv',
        'dev-python/vobject'            : 'vobject',
        'dev-python/werkzeug'           : 'Werkzeug',
        'dev-python/xlwt'               : 'xlwt',
        'dev-python/yolk'               : 'yolk',
        'dev-python/yolk-portage'       : 'yolk-portage',
    }

    # Url for fetching version information
    url = 'http://pypi.python.org/pypi?:action=index'

    def __init__(self):
        pass

    def fetch_and_parse_all(self):
        """Download and parse package version information."""

        items = {}
        f = urllib.urlopen(self.url)
        #f = open('utils/pypi.html')
        if f:
            data = fromstring(f.read()).cssselect('table.list')[0]
            for row in data.getchildren():
                val = row[0].getchildren()
                if len(val) > 0:
                    pkg = val[0].text.encode('utf-8').split('\xc2\xa0')
                    if len(pkg) == 2:
                        if pkg[0] not in items.keys():
                            items[pkg[0]] = []
                        items[pkg[0]].append(pkg[1])
        f.close()
        return items

if __name__ == '__main__':
    print PyPi().fetch_and_parse_all()