1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
import xml.etree.ElementTree as ET
from flask import json
import requests
from .. import app, db
from .models import Category, Maintainer, Package, PackageVersion
proj_url = "https://api.gentoo.org/metastructure/projects.xml"
pkg_url_base = "https://packages.gentoo.org/"
http_session = requests.session()
def get_project_data():
data = http_session.get(proj_url)
if not data:
print("Failed retrieving projects.xml")
return
root = ET.fromstring(data.text)
projects = {}
# Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10
if root.tag.lower() != 'projects':
print("Downloaded projects.xml root tag isn't 'projects'")
return
for proj_elem in root:
if proj_elem.tag.lower() != 'project':
print("Skipping unknown <projects> subtag <%s>" % proj_elem.tag)
continue
proj = {}
for elem in proj_elem:
tag = elem.tag.lower()
if tag in ['email', 'name', 'url', 'description']:
proj[tag] = elem.text
elif tag == 'member':
member = {}
if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1':
member['is_lead'] = True
for member_elem in elem:
member_tag = member_elem.tag.lower()
if member_tag in ['email', 'name', 'role']:
member[member_tag] = member_elem.text
if 'email' in member:
# TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly
if 'members' not in proj:
proj['members'] = []
proj['members'].append(member)
pass
elif tag == 'subproject':
if 'ref' in elem.attrib:
if 'subprojects' not in proj:
proj['subprojects'] = []
# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. TODO: Might change if sync code will want it differently
proj['subprojects'].append((elem.attrib['ref'], True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False))
else:
print("Invalid <subproject> tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "<unknown>")
else:
print("Skipping unknown <project> subtag <%s>" % tag)
if 'email' in proj:
projects[proj['email']] = proj
else:
print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,))
return projects
def sync_projects():
projects = get_project_data()
existing_maintainers = {}
# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5
for maintainer in Maintainer.query.all():
existing_maintainers[maintainer.email] = maintainer
for email, data in projects.items():
if email in existing_maintainers:
print ("Updating project %s" % email)
existing_maintainers[email].is_project = True
if 'description' in data:
existing_maintainers[email].description = data['description']
if 'name' in data:
existing_maintainers[email].name = data['name']
if 'url' in data:
existing_maintainers[email].url = data['url']
else:
print ("Adding project %s" % email)
new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url'])
db.session.add(new_maintainer)
db.session.commit()
def sync_categories():
url = pkg_url_base + "categories.json"
data = http_session.get(url)
# TODO: Handle response error (if not data)
categories = json.loads(data.text)
existing_categories = {}
# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5
for cat in Category.query.all():
existing_categories[cat.name] = cat
for category in categories:
if category['name'] in existing_categories:
existing_categories[category['name']].description = category['description']
else:
new_cat = Category(name=category['name'], description=category['description'])
db.session.add(new_cat)
db.session.commit()
def sync_packages():
for category in Category.query.all():
existing_packages = category.packages.all()
data = http_session.get(pkg_url_base + "categories/" + category.name + ".json")
if not data:
print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
continue
packages = json.loads(data.text)['packages']
# TODO: Use UPSERT instead (on_conflict_do_update)
existing_packages = {}
for pkg in Package.query.all():
existing_packages[pkg.name] = pkg
for package in packages:
if package['name'] in existing_packages:
continue # TODO: Update description once we keep that in DB
else:
new_pkg = Package(category_id=category.id, name=package['name'])
db.session.add(new_pkg)
db.session.commit()
def sync_versions():
for package in Package.query.all():
data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json")
if not data:
print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update
continue
from pprint import pprint
pprint(json.loads(data.text))
break
|