gs_ctan/ctan_db.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
    ctan_db.py
    ~~~~~~~~~~
    
    CTAN package database
    
    :copyright: (c) 2013 by Jauhien Piatlicki
    :license: GPL-2, see LICENSE for more details.
"""

import itertools
import re

import portage

from g_sorcery.g_collections import Dependency, Package, serializable_elist
from g_sorcery.package_db import DBGenerator
from g_sorcery.logger import Logger

class CtanDBGenerator(DBGenerator):
    """
    Implementation of database generator for CTAN LaTeX backend.
    """
    def __init__(self, package_db_class):
        super(CtanDBGenerator, self).__init__(package_db_class)
        
        logger = Logger()
        gentoo_arch = portage.settings['ARCH']
        self.arch = ""
        if gentoo_arch == "x86":
            self.arch = "i386-linux"
        elif gentoo_arch == "amd64":
            self.arch = "x86_64-linux"
        else:
            logger.warning("not supported arch: " + gentoo_arch)


    def get_download_uries(self, common_config, config):
        """
        Get download URI.
        """
        tlpdb_uri = config["repo_uri"] + "/tlpkg/texlive.tlpdb.xz"
        return [tlpdb_uri]
        
    def parse_data(self, data_f):
        """
        Parse downloaded data.

        Parsed data is a list of dictionaries.
        Each dictionary corresponds to one package.

        Args:
            data_f: Open file wit data.

        Returns:
            Parsed data.
        """
        data = data_f.read()
        
        data = data.split("\n")
        
        #entries are separated by new lines
        data = \
        [list(group) for key, group in itertools.groupby(data, bool) if key]

        #we need only Package entries
        data = \
        [entry for entry in data if entry[1] == "category Package"]

        result = []

        KEY = 0
        VALUE = 1
        FILES_LENGTH = len("files")
        
        for entry in data:     
            res_entry = {}
            previous_key = ""
            current_key = ""
            for line in entry:
                line = line.split(" ")
                if line[KEY][-FILES_LENGTH:] == "files":
                    current_key = line[KEY]
                    res_entry[current_key] = {}
                    for value in line[VALUE:]:
                        key, val = value.split("=")
                        res_entry[current_key][key] = val
                    res_entry[current_key]["files"] = []
                elif not line[KEY]:
                    res_entry[current_key]["files"].append(" ".join(line[VALUE:]))
                elif line[KEY] == "depend":
                    if "depend" in res_entry:
                        res_entry["depend"].append(" ".join(line[VALUE:]))
                    else:
                        res_entry["depend"] = [" ".join(line[VALUE:])]
                else:
                    if previous_key == line[KEY]:
                        res_entry[previous_key] += " " + " ".join(line[VALUE:])
                    else:
                        res_entry[line[KEY]] = " ".join(line[VALUE:])
                        previous_key = line[KEY]
                        current_key = ""

            parts = res_entry["name"].split(".")
            if len(parts) > 1:
                if parts[1] != self.arch:
                    continue

            result.append(res_entry)
        
        return result

    def process_data(self, pkg_db, data, common_config, config):
        """
        Process parsed data and fill database.
        """

        category = "dev-tex"
        
        pkg_db.add_category(category)

        ARCH_LENGTH = len("ARCH")

        data = data["texlive.tlpdb"]

        for entry in data:
            realname = entry["name"]

            pkgname = realname
            parts = pkgname.split(".")
            if len(parts) > 1:
                pkgname = "_".join(parts)

            if self.in_config([common_config, config], "exclude", pkgname):
                continue
            
            #todo: work on common data vars processing: external deps, filtering etc.
            #at the moment just copy necessary code from elpa_db.py
            allowed_ords = set(range(ord('a'), ord('z'))) | set(range(ord('A'), ord('Z'))) | \
              set(range(ord('0'), ord('9'))) | set(list(map(ord,
                    ['+', '_', '-', ' ', '.', '(', ')', '[', ']', '{', '}', ','])))

            if "shortdesc" in entry:                
                description = entry["shortdesc"]
            else:
                description = entry["name"]
            description = "".join([x for x in description if ord(x) in allowed_ords])

            if "longdesc" in entry:
                longdescription = entry["longdesc"]
                longdescription = "".join([x for x in longdescription if ord(x) in allowed_ords])
            else:
                longdescription = description

            if "catalogue-version" in entry:
                version = entry["catalogue-version"]
                #todo better version checking and processing
                match_object = re.match("(^[0-9]+[a-z]?$)|(^[0-9][0-9\.]+[0-9][a-z]?$)", version)
                if not match_object:
                    version = entry["revision"]
            else:
                version = entry["revision"]

            if "catalogue-license" in entry:
                license = self.convert([common_config, config], "licenses", entry["catalogue-license"])
            else:
                license = "unknown"

            if "catalogue-ctan" in entry:
                source_type = "zip"
                base_src_uri = "ftp://tug.ctan.org/pub/tex-archive"
                catalogue = entry["catalogue-ctan"]
                homepage = "http://www.ctan.org/tex-archive" + catalogue
                catalogue = catalogue[:-len(realname)]
            else:
                source_type = "tar.xz"
                base_src_uri = "http://mirror.ctan.org/systems/texlive/tlnet/archive/"
                catalogue = ""
                homepage = "http://www.ctan.org/tex-archive/systems/texlive/tlnet"

            dependencies = serializable_elist(separator="\n\t")

            if "depend" in entry:
                for dependency in entry["depend"]:
                    if dependency[-ARCH_LENGTH:] == "ARCH":
                        dependency = dependency[:-ARCH_LENGTH-1] + "_" + self.arch
                    dependencies.append(Dependency(category, dependency))

            ebuild_data = {"realname" : realname,
                           "description" : description,
                           "homepage" : homepage,
                           "license" : license,
                           "source_type" : source_type,
                           "base_src_uri" : base_src_uri,
                           "catalogue" : catalogue,
                           "dependencies" : dependencies,
                           "depend" : dependencies,
                           "rdepend" : dependencies,
            #eclass entry
                           'eclasses' : ['gs-ctan'],
            #metadata entries
                           'maintainer' : [{'email' : 'piatlicki@gmail.com',
                                            'name' : 'Jauhien Piatlicki'}],
                           'longdescription' : longdescription
                          }

            pkg_db.add_package(Package(category, pkgname, version), ebuild_data)