[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: fe8d9eedef4fa5b406f304c83e064d62860d35df
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Mon Jan 23 00:06:46 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Mon Jan 23 00:06:46 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=fe8d9eed

sync: fix a missing .items to iterate on dict

 backend/lib/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 25b6ea0..c3ed83c 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -242,7 +242,7 @@ def sync_versions():
 db.session.delete(kwd_obj)
 
 # 3.2 cleanup dead revisions
-for version, ver_obj in pkg_versions:
+for version, ver_obj in pkg_versions.items():
 if version not in pkg['versions']:
 db.session.delete(ver_obj)
 



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: edc09cb3b2f3862e6fc5d5277041fbce091d3281
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 17:45:56 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 17:45:56 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=edc09cb3

sync: add version and keyword synchronization

 backend/lib/sync.py | 42 ++
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 22008ea..25b6ea0 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -5,7 +5,7 @@ from datetime import datetime
 import requests
 
 from .. import app, db
-from .models import Category, Maintainer, Package, PackageVersion
+from .models import Category, Keyword, Maintainer, Package, PackageVersion
 
 SYNC_BUFFER_SECS = 60*60 #1 hour
 proj_url = "https://api.gentoo.org/metastructure/projects.xml;
@@ -165,6 +165,8 @@ def sync_versions():
 for maintainer in Maintainer.query.all():
 existing_maintainers[maintainer.email] = maintainer
 
+all_keywords = {kwd.name: kwd for kwd in Keyword.query.all()}
+
 packages_to_sync = Package.query.filter(Package.last_sync_ts < 
ts).order_by(Package.last_sync_ts).all()
 print("Going to sync %d packages%s" % (len(packages_to_sync), (" (oldest 
sync UTC timestamp: %s)" % packages_to_sync[0].last_sync_ts if 
len(packages_to_sync) else "")))
 
@@ -183,7 +185,7 @@ def sync_versions():
 if 'description' in pkg:
 package.description = pkg['description']
 
-   # 2. refresh maintainers
+# 2. refresh maintainers
 maintainers = []
 for maint in pkg.get('maintainers', []):
 if 'email' not in maint or 'type' not in maint:
@@ -208,9 +210,41 @@ def sync_versions():
 # Intentionally outside if 'maintainers' in pkg, because if there are 
no maintainers in JSON, it's falled to maintainer-needed and we need to clean 
out old maintainer entries
 package.maintainers = maintainers # TODO: Retain order to know who is 
primary; retain description associated with the maintainership
 
-# TODO: 3. refresh versions
+# 3.1. refresh versions
+pkg_versions = {pkgver.version: pkgver for pkgver in package.versions}
+for version in pkg['versions']:
+if version['version'] not in pkg_versions:
+pkgver = PackageVersion(version=version['version'],
+package=package)
+db.session.add(pkgver)
+else:
+pkgver = pkg_versions[version['version']]
+
+pkg_keywords = {kwd.name: kwd for kwd in pkgver.keywords}
+
+# 4.1. synchronize new keywords
+for keyword in version['keywords']:
+if keyword in pkg_keywords:
+continue
+
+# TODO: keywords should be initialized earlier to not have to
+# worry about their existence here
+if keyword not in all_keywords:
+kwd = Keyword(name=keyword)
+db.session.add(kwd)
+all_keywords[keyword] = kwd
+
+pkgver.keywords.append(all_keywords[keyword])
+
+# 4.2. cleanup removed keywords
+for keyword, kwd_obj in pkg_keywords.items():
+if keyword not in version['keywords']:
+db.session.delete(kwd_obj)
 
-# TODO: 4. refresh keywords
+# 3.2 cleanup dead revisions
+for version, ver_obj in pkg_versions:
+if version not in pkg['versions']:
+db.session.delete(ver_obj)
 
 # 5. mark package as refreshed
 package.last_sync_ts = now



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 01fe45522776507f8b9e5d973c2982f66d78b6db
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 17:12:53 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 17:12:53 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=01fe4552

sync: add detail points to sync_versions

 backend/lib/sync.py | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 02e1116..22008ea 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -152,6 +152,12 @@ def sync_packages():
 db.session.commit()
 
 def sync_versions():
+"""Synchronize packages version data from packages.gentoo.org.
+
+For each package that has not been updated in the last SYNC_BUFFER_SECS,
+pull package information and refresh its description, maintainers,
+versions and keywords.
+"""
 cnt = 0
 ts = datetime.utcfromtimestamp(time.time() - SYNC_BUFFER_SECS)
 now = datetime.utcnow()
@@ -172,9 +178,12 @@ def sync_versions():
 pkg = data.json()
 
 print ("Updating package: %s" % package.full_name)
+
+# 1. refresh description
 if 'description' in pkg:
 package.description = pkg['description']
 
+   # 2. refresh maintainers
 maintainers = []
 for maint in pkg.get('maintainers', []):
 if 'email' not in maint or 'type' not in maint:
@@ -198,6 +207,12 @@ def sync_versions():
 
 # Intentionally outside if 'maintainers' in pkg, because if there are 
no maintainers in JSON, it's falled to maintainer-needed and we need to clean 
out old maintainer entries
 package.maintainers = maintainers # TODO: Retain order to know who is 
primary; retain description associated with the maintainership
+
+# TODO: 3. refresh versions
+
+# TODO: 4. refresh keywords
+
+# 5. mark package as refreshed
 package.last_sync_ts = now
 
 if not cnt % 100:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: b888c93b7892c532385626c9d2a55a8b11661e99
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:35:17 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:35:17 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=b888c93b

sync: use dict facilities for key retrieval with a default

 backend/lib/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 4cbfe1b..723c3af 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -192,7 +192,7 @@ def sync_versions():
 if maint['type'] == 'project':
 is_project = True
 print("Adding %s maintainer %s" % ("project" if is_project 
else "individual", email))
-new_maintainer = Maintainer(email=email, 
is_project=is_project, name=maint['name'] if 'name' in maint else None)
+new_maintainer = Maintainer(email=email, 
is_project=is_project, name=maint.get('name'))
 db.session.add(new_maintainer)
 existing_maintainers[email] = new_maintainer
 maintainers.append(new_maintainer)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: f969ccffe04df2d1eeb014dfe67d58177da476fb
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:34:13 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:34:13 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=f969ccff

sync: reduce unneeded conditional evaluation

tags cannot be evaluated to go though these branches after the first if so 
switch to elif.

 backend/lib/sync.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 429d14b..4cbfe1b 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -36,7 +36,7 @@ def get_project_data():
 tag = elem.tag.lower()
 if tag in ['email']:
 proj[tag] = elem.text.lower()
-if tag in ['name', 'url', 'description']:
+elif tag in ['name', 'url', 'description']:
 proj[tag] = elem.text
 elif tag == 'member':
 member = {}
@@ -46,7 +46,7 @@ def get_project_data():
 member_tag = member_elem.tag.lower()
 if member_tag in ['email']:
 member[member_tag] = member_elem.text.lower()
-if member_tag in ['name', 'role']:
+elif member_tag in ['name', 'role']:
 member[member_tag] = member_elem.text
 if 'email' in member:
 proj['members'].append(member)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: c71c75d3fbf28528c844f8280e0ef499dacb1819
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:35:58 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:35:58 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c71c75d3

sync: use dict facilities for key retrieval with a default

 backend/lib/sync.py | 39 +++
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 723c3af..02e1116 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -176,26 +176,25 @@ def sync_versions():
 package.description = pkg['description']
 
 maintainers = []
-if 'maintainers' in pkg:
-for maint in pkg['maintainers']:
-if 'email' not in maint or 'type' not in maint:
-raise ValueError(
-"Package %s maintainer %s entry not GLEP 67 valid" %
-(package.full_name, maint)
-)
-
-email = maint['email'].lower()
-if email in existing_maintainers:
-maintainers.append(existing_maintainers[email])
-else:
-is_project = False
-if maint['type'] == 'project':
-is_project = True
-print("Adding %s maintainer %s" % ("project" if is_project 
else "individual", email))
-new_maintainer = Maintainer(email=email, 
is_project=is_project, name=maint.get('name'))
-db.session.add(new_maintainer)
-existing_maintainers[email] = new_maintainer
-maintainers.append(new_maintainer)
+for maint in pkg.get('maintainers', []):
+if 'email' not in maint or 'type' not in maint:
+raise ValueError(
+"Package %s maintainer %s entry not GLEP 67 valid" %
+(package.full_name, maint)
+)
+
+email = maint['email'].lower()
+if email in existing_maintainers:
+maintainers.append(existing_maintainers[email])
+else:
+is_project = False
+if maint['type'] == 'project':
+is_project = True
+print("Adding %s maintainer %s" % ("project" if is_project 
else "individual", email))
+new_maintainer = Maintainer(email=email, 
is_project=is_project, name=maint.get('name'))
+db.session.add(new_maintainer)
+existing_maintainers[email] = new_maintainer
+maintainers.append(new_maintainer)
 
 # Intentionally outside if 'maintainers' in pkg, because if there are 
no maintainers in JSON, it's falled to maintainer-needed and we need to clean 
out old maintainer entries
 package.maintainers = maintainers # TODO: Retain order to know who is 
primary; retain description associated with the maintainership



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: e8f79bda15a675e5802b0daad41144b082d20247
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:07:52 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:23:56 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=e8f79bda

sync: sort imports according to PEP8

 backend/lib/sync.py | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index c837c23..5e8240d 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,7 +1,9 @@
-import xml.etree.ElementTree as ET
-import requests
 import time
+import xml.etree.ElementTree as ET
 from datetime import datetime
+
+import requests
+
 from .. import app, db
 from .models import Category, Maintainer, Package, PackageVersion
 
@@ -10,6 +12,7 @@ proj_url = "https://api.gentoo.org/metastructure/projects.xml;
 pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
 
+
 def get_project_data():
 projects = {}
 data = http_session.get(proj_url)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 29a6bea1536dd23adbc84454aacb2c81d0499f82
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:21:36 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:23:56 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=29a6bea1

sync: replace assert with ValueError raise

Simpler expression, probably here to stay.

 backend/lib/sync.py | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 0aab3bc..429d14b 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -178,10 +178,11 @@ def sync_versions():
 maintainers = []
 if 'maintainers' in pkg:
 for maint in pkg['maintainers']:
-assert (
-'email' in maint and 'type' in maint,
-"Package %s maintainer %s entry not GLEP 67 valid" % 
(package.full_name, maint)
-)
+if 'email' not in maint or 'type' not in maint:
+raise ValueError(
+"Package %s maintainer %s entry not GLEP 67 valid" %
+(package.full_name, maint)
+)
 
 email = maint['email'].lower()
 if email in existing_maintainers:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 5e7347647516660603dddeedcf570d0cfef27b1a
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 12:18:00 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:23:56 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5e734764

sync: define project keys default values

Costs less than checking for it in each loop iteration and does no
harm later to loop on empty lists.

 backend/lib/sync.py | 64 +++--
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 5e8240d..0aab3bc 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -28,7 +28,10 @@ def get_project_data():
 if proj_elem.tag.lower() != 'project':
 print("Skipping unknown  subtag <%s>" % proj_elem.tag)
 continue
-proj = {}
+proj = {
+'members': [],
+'subprojects': [],
+}
 for elem in proj_elem:
 tag = elem.tag.lower()
 if tag in ['email']:
@@ -46,14 +49,11 @@ def get_project_data():
 if member_tag in ['name', 'role']:
 member[member_tag] = member_elem.text
 if 'email' in member:
-if 'members' not in proj:
-proj['members'] = []
 proj['members'].append(member)
 elif tag == 'subproject':
 if 'ref' in elem.attrib:
-if 'subprojects' not in proj:
-proj['subprojects'] = []
-# subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is True or False. TODO: Might 
change if sync code will want it differently
+# subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is True or False.
+# TODO: Might change if sync code will want it differently
 proj['subprojects'].append((elem.attrib['ref'].lower(), 
True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == 
'1') else False))
 else:
 print("Invalid  tag inside project %s - 
required 'ref' attribute missing" % proj['email'] if 'email' in proj else 
"")
@@ -86,32 +86,34 @@ def sync_projects():
 new_maintainer = Maintainer(email=email, is_project=True, 
description=data['description'], name=data['name'], url=data['url'])
 db.session.add(new_maintainer)
 existing_maintainers[email] = new_maintainer
+
 members = []
-if 'subprojects' in data:
-for subproject_email, inherit_members in data['subprojects']:
-# TODO: How should we handle inherit_members?
-if subproject_email in existing_maintainers:
-members.append(existing_maintainers[subproject_email])
-else:
-print("Creating new project entry for subproject: %s" % 
subproject_email)
-new_subproject = Maintainer(email=subproject_email, 
is_project=True)
-db.session.add(new_subproject)
-existing_maintainers[subproject_email] = new_subproject
-members.append(new_subproject)
-if 'members' in data:
-for member in data['members']:
-if member['email'] in existing_maintainers:
-# TODO: Stop overwriting the name from master data, 
if/once we have a proper sync source for individual maintainers (Gentoo LDAP?)
-if 'name' in member:
-existing_maintainers[member['email']].name = 
member['name']
-members.append(existing_maintainers[member['email']])
-else:
-print("Adding individual maintainer %s" % member['email'])
-new_maintainer = Maintainer(email=member['email'], 
is_project=False, name=member['name'] if 'name' in member else None)
-db.session.add(new_maintainer)
-existing_maintainers[member['email']] = new_maintainer
-members.append(new_maintainer)
-# TODO: Include role information in the association?
+
+for subproject_email, inherit_members in data['subprojects']:
+# TODO: How should we handle inherit_members?
+if subproject_email in existing_maintainers:
+members.append(existing_maintainers[subproject_email])
+else:
+print("Creating new project entry for subproject: %s" % 
subproject_email)
+new_subproject = Maintainer(email=subproject_email, 
is_project=True)
+db.session.add(new_subproject)
+existing_maintainers[subproject_email] = new_subproject
+

[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Mart Raudsepp
commit: ed727d30df105b6852f5118baa5a454965b6f4ba
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Jan 22 12:07:48 2017 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Jan 22 12:07:48 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=ed727d30

sync: Use dict comprehension in sync_categories as well

 backend/lib/sync.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index dbb44c2..c837c23 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -117,10 +117,8 @@ def sync_categories():
 data = http_session.get(url)
 # TODO: Handle response error (if not data)
 categories = data.json()
-existing_categories = {}
 # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on 
postgresql:9.5
-for cat in Category.query.all():
-existing_categories[cat.name] = cat
+existing_categories = {cat.name: cat for cat in Category.query.all()}
 for category in categories:
 if category['name'] in existing_categories:
 existing_categories[category['name']].description = 
category['description']



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 24047d7602bbdbaae60f88e6811dc8570227161f
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 11:58:33 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:00:24 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=24047d76

sync: use ORM magics in sync_packages

ORM knows howto map objects to ids through relationships so skip the
details and focus on the thing you want to do.

 backend/lib/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index ba31477..dbb44c2 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -144,7 +144,7 @@ def sync_packages():
 if package['name'] in existing_packages:
 continue # TODO: Update description once we keep that in DB
 else:
-new_pkg = Package(category_id=category.id, 
name=package['name'])
+new_pkg = Package(category=category, name=package['name'])
 db.session.add(new_pkg)
 db.session.commit()
 



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 793722996da7f8c9120c678b16350363d30c6bf1
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 11:39:41 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 12:00:20 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=79372299

sync:  use assert for GLEP67 compliance check

Should never be raised actually but who knows.

 backend/lib/sync.py | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 7c499b5..ba31477 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -175,15 +175,17 @@ def sync_versions():
 maintainers = []
 if 'maintainers' in pkg:
 for maint in pkg['maintainers']:
-if 'email' not in maint:
-print("WARNING: Package %s was told to have a maintainer 
without an e-mail identifier" % package.full_name)
-continue
+assert (
+'email' in maint and 'type' in maint,
+"Package %s maintainer %s entry not GLEP 67 valid" % 
(package.full_name, maint)
+)
+
 email = maint['email'].lower()
 if email in existing_maintainers:
 maintainers.append(existing_maintainers[email])
 else:
 is_project = False
-if 'type' in maint and maint['type'] == 'project':
+if maint['type'] == 'project':
 is_project = True
 print("Adding %s maintainer %s" % ("project" if is_project 
else "individual", email))
 new_maintainer = Maintainer(email=email, 
is_project=is_project, name=maint['name'] if 'name' in maint else None)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Mart Raudsepp
commit: fab9c6f0ce09830aa95fc3bdfe09c03663094660
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Jan 22 11:57:24 2017 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Jan 22 11:57:24 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=fab9c6f0

sync: Fix pkg sync for packages that have a same named pkg in another category

Also fixes an InstrumentedList issue due to change from the categories.packages
relationship from dynamic loading to select in commit 8d90fa1009 having broken
that earlier

 backend/lib/sync.py | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index d292291..7c499b5 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -131,12 +131,6 @@ def sync_categories():
 
 def sync_packages():
 for category in Category.query.all():
-if not category.packages:
-print('Category %s has no packages' % category.name)
-existing_packages = []
-else:
-existing_packages = category.packages.all()
-
 data = http_session.get(pkg_url_base + "categories/" + category.name + 
".json")
 if not data:
 print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?
@@ -144,7 +138,7 @@ def sync_packages():
 packages = data.json()['packages']
 # TODO: Use UPSERT instead (on_conflict_do_update)
 
-existing_packages = {pkg.name: pkg for pkg in Package.query.all()}
+existing_packages = {pkg.name: pkg for pkg in category.packages}
 
 for package in packages:
 if package['name'] in existing_packages:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: cd3166150bd42dc8b516e2776d4093418b19d423
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 11:03:03 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 11:04:36 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=cd316615

sync: fix broken sync_packages

I think there is a problem in the logic here but at least this gets me
past the initial sync.

 backend/lib/sync.py | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 744811b..48629cc 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -131,7 +131,12 @@ def sync_categories():
 
 def sync_packages():
 for category in Category.query.all():
-existing_packages = category.packages.all()
+if not category.packages:
+print('Category %s has no packages' % category.name)
+existing_packages = []
+else:
+existing_packages = category.packages.all()
+
 data = http_session.get(pkg_url_base + "categories/" + category.name + 
".json")
 if not data:
 print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2017-01-22 Thread Gilles Dartiguelongue
commit: 5f53c4b92b93e9206089a15ff3851925ed3b8952
Author: Gilles Dartiguelongue  gentoo  org>
AuthorDate: Sun Jan 22 11:04:12 2017 +
Commit: Gilles Dartiguelongue  gentoo  org>
CommitDate: Sun Jan 22 11:04:40 2017 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5f53c4b9

sync: use dict-comprehension in sync_packages

 backend/lib/sync.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 48629cc..d292291 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -143,9 +143,9 @@ def sync_packages():
 continue
 packages = data.json()['packages']
 # TODO: Use UPSERT instead (on_conflict_do_update)
-existing_packages = {}
-for pkg in Package.query.all():
-existing_packages[pkg.name] = pkg
+
+existing_packages = {pkg.name: pkg for pkg in Package.query.all()}
+
 for package in packages:
 if package['name'] in existing_packages:
 continue # TODO: Update description once we keep that in DB



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: 8d90fa100941d73a026a7270f64d16fbe65dc8a5
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 07:09:52 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 07:09:52 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=8d90fa10

models: Add preliminary model and fields for keyword and p.mask storage

 backend/lib/models.py | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index 2eb9e8c..010d58f 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -2,6 +2,18 @@ from datetime import datetime
 from .. import db
 
 
+class Keyword(db.Model):
+id = db.Column(db.Integer, primary_key=True)
+# current longest entries would be of length 16 with "~sparc64-freebsd" 
and "~sparc64-solaris"
+name = db.Column(db.Unicode(20), unique=True, nullable=False) # TODO: 
Force lower case?
+
+@property
+def stable(self):
+return not self.name.startswith('~')
+
+def __repr__(self):
+return "" % self.name
+
 class Category(db.Model):
 id = db.Column(db.Integer, primary_key=True)
 name = db.Column(db.Unicode(30), unique=True, nullable=False)
@@ -19,12 +31,13 @@ class Package(db.Model):
 id = db.Column(db.Integer, primary_key=True)
 name = db.Column(db.Unicode(128), nullable=False)
 category_id = db.Column(db.Integer, db.ForeignKey('category.id'), 
nullable=False)
-category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
+category = db.relationship('Category', backref=db.backref('packages', 
lazy='select'))
 description = db.Column(db.Unicode(500))
 last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, 
default=datetime.utcfromtimestamp(0))
 maintainers = db.relationship("Maintainer",
 secondary=package_maintainer_rel_table,
 backref='directly_maintained_packages')
+# versions backref
 
 @property
 def full_name(self):
@@ -33,11 +46,18 @@ class Package(db.Model):
 def __repr__(self):
 return "" % (self.category.name, self.name)
 
+package_version_keywords_rel_table = db.Table('package_version_keywords_rel',
+db.Column('package_version_id', db.Integer, 
db.ForeignKey('package_version.id')),
+db.Column('keyword_id', db.Integer, db.ForeignKey('keyword.id')),
+)
+
 class PackageVersion(db.Model):
 id = db.Column(db.Integer, primary_key=True)
 version = db.Column(db.Unicode(128), nullable=False)
 package_id = db.Column(db.Integer, db.ForeignKey('package.id'), 
nullable=False)
-package = db.relationship('Package', backref=db.backref('versions', 
lazy='dynamic'))
+package = db.relationship('Package', backref=db.backref('versions', 
lazy='select'))
+keywords = db.relationship("Keyword", 
secondary=package_version_keywords_rel_table)
+masks = db.Column(db.UnicodeText, nullable=True) # Concatenated mask 
reasons if p.masked, NULL if not a masked version. TODO: arch specific masks
 
 def __repr__(self):
 return "" % (self.package.category.name, 
self.package.name, self.version)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: 5f3073d21e0748a9414fbd516c3e032d0456ab35
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 04:41:46 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 04:41:46 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5f3073d2

sync: Always handle e-mails in lower case to not end up with duplicates

Suggested-by: Doug Freed  mtu.edu>

 backend/lib/models.py |  1 +
 backend/lib/sync.py   | 24 ++--
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index ba20622..2eb9e8c 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -50,6 +50,7 @@ maintainer_project_membership_rel_table = 
db.Table('maintainer_project_membershi
 
 class Maintainer(db.Model):
 id = db.Column(db.Integer, primary_key=True)
+# TODO: This has to be unique case insensitive. Currently we have to 
always force lower() to guarantee this and find the proper maintainer entry; 
later we might want to use some sort of NOCASE collate rules here to keep the 
capitalization as preferred per master data
 email = db.Column(db.Unicode(50), nullable=False, unique=True)
 is_project = db.Column(db.Boolean, nullable=False, server_default='f', 
default=False)
 name = db.Column(db.Unicode(128))

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 7ba583d..744811b 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -28,7 +28,9 @@ def get_project_data():
 proj = {}
 for elem in proj_elem:
 tag = elem.tag.lower()
-if tag in ['email', 'name', 'url', 'description']:
+if tag in ['email']:
+proj[tag] = elem.text.lower()
+if tag in ['name', 'url', 'description']:
 proj[tag] = elem.text
 elif tag == 'member':
 member = {}
@@ -36,19 +38,20 @@ def get_project_data():
 member['is_lead'] = True
 for member_elem in elem:
 member_tag = member_elem.tag.lower()
-if member_tag in ['email', 'name', 'role']:
+if member_tag in ['email']:
+member[member_tag] = member_elem.text.lower()
+if member_tag in ['name', 'role']:
 member[member_tag] = member_elem.text
 if 'email' in member:
 if 'members' not in proj:
 proj['members'] = []
 proj['members'].append(member)
-pass
 elif tag == 'subproject':
 if 'ref' in elem.attrib:
 if 'subprojects' not in proj:
 proj['subprojects'] = []
 # subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is True or False. TODO: Might 
change if sync code will want it differently
-proj['subprojects'].append((elem.attrib['ref'], True if 
('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') 
else False))
+proj['subprojects'].append((elem.attrib['ref'].lower(), 
True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == 
'1') else False))
 else:
 print("Invalid  tag inside project %s - 
required 'ref' attribute missing" % proj['email'] if 'email' in proj else 
"")
 else:
@@ -77,7 +80,7 @@ def sync_projects():
 existing_maintainers[email].url = data['url']
 else:
 print ("Adding project %s" % email)
-new_maintainer = Maintainer(email=data['email'], is_project=True, 
description=data['description'], name=data['name'], url=data['url'])
+new_maintainer = Maintainer(email=email, is_project=True, 
description=data['description'], name=data['name'], url=data['url'])
 db.session.add(new_maintainer)
 existing_maintainers[email] = new_maintainer
 members = []
@@ -176,16 +179,17 @@ def sync_versions():
 if 'email' not in maint:
 print("WARNING: Package %s was told to have a maintainer 
without an e-mail identifier" % package.full_name)
 continue
-if maint['email'] in existing_maintainers: # FIXME: Some 
proxy-maintainers are using mixed case e-mail address, right now we'd be 
creating duplicates right now if the case is different across different packages
-maintainers.append(existing_maintainers[maint['email']])
+email = maint['email'].lower()
+if email in existing_maintainers:
+maintainers.append(existing_maintainers[email])
 else:
 is_project = False
 if 'type' in maint and maint['type'] == 'project':
 is_project = True

[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: 0522c4ccf0f4ca737572b8164cde6bb9c498ba7f
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 02:52:48 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 02:52:48 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=0522c4cc

sync: Increase the sync delta to 1 hour and print the sync count and oldest TS 
at start

 backend/lib/sync.py | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 8c687c6..7ba583d 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -5,7 +5,7 @@ from datetime import datetime
 from .. import app, db
 from .models import Category, Maintainer, Package, PackageVersion
 
-SYNC_BUFFER_SECS = 30*60
+SYNC_BUFFER_SECS = 60*60 #1 hour
 proj_url = "https://api.gentoo.org/metastructure/projects.xml;
 pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
@@ -154,7 +154,10 @@ def sync_versions():
 for maintainer in Maintainer.query.all():
 existing_maintainers[maintainer.email] = maintainer
 
-for package in Package.query.filter(Package.last_sync_ts < 
ts).order_by(Package.last_sync_ts).all():
+packages_to_sync = Package.query.filter(Package.last_sync_ts < 
ts).order_by(Package.last_sync_ts).all()
+print("Going to sync %d packages%s" % (len(packages_to_sync), (" (oldest 
sync UTC timestamp: %s)" % packages_to_sync[0].last_sync_ts if 
len(packages_to_sync) else "")))
+
+for package in packages_to_sync:
 cnt += 1
 data = http_session.get(pkg_url_base + "packages/" + package.full_name 
+ ".json")
 if not data:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: c6f4ea5ccc10c9441345f83d9ea6b0d2a121ede4
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 02:39:40 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 02:39:40 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c6f4ea5c

sync: Don't forget to commit db transaction after all packages are synced

Sometimes don't need to cancel out, so save the updates after the last
modulo 100 to DB too :)

 backend/lib/sync.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 0250fba..8c687c6 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -193,3 +193,5 @@ def sync_versions():
 print("%d packages updated, committing DB transaction" % cnt)
 db.session.commit()
 now = datetime.utcnow()
+
+db.session.commit()



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: 32483c9459bcfc4f7e3848b3c0e3dc6c1c41829d
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 02:08:03 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 02:08:03 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=32483c94

sync: Order package details syncing based on how old the last sync was

This way if we got stuck and re-run much later (or it has exceeded the buffer
time constant), we'll at least sync the oldest ones first, so we always end
up being less out of date with the oldest sync ts.

 backend/lib/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 567da2d..0250fba 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -154,7 +154,7 @@ def sync_versions():
 for maintainer in Maintainer.query.all():
 existing_maintainers[maintainer.email] = maintainer
 
-for package in Package.query.filter(Package.last_sync_ts < ts).all():
+for package in Package.query.filter(Package.last_sync_ts < 
ts).order_by(Package.last_sync_ts).all():
 cnt += 1
 data = http_session.get(pkg_url_base + "packages/" + package.full_name 
+ ".json")
 if not data:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: dde4a3a9c8fbe76897219886f21d046392d65730
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 01:56:00 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 01:56:00 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dde4a3a9

sync: Add package description and maintainers sync

Maintains a sync timestamp to skip recently synced packages, so if a
previous run got stuck, we can skip re-doing it too soon.
Saves the DB transaction after every 100 packages, because packages.g.o
seems to rate-limit us, so at least we will have things saved into DB
periodically to cancel out when we get stuck and restart.

 backend/lib/sync.py | 49 +
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index e53fa9b..567da2d 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,8 +1,11 @@
 import xml.etree.ElementTree as ET
 import requests
+import time
+from datetime import datetime
 from .. import app, db
 from .models import Category, Maintainer, Package, PackageVersion
 
+SYNC_BUFFER_SECS = 30*60
 proj_url = "https://api.gentoo.org/metastructure/projects.xml;
 pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
@@ -144,11 +147,49 @@ def sync_packages():
 db.session.commit()
 
 def sync_versions():
-for package in Package.query.all():
+cnt = 0
+ts = datetime.utcfromtimestamp(time.time() - SYNC_BUFFER_SECS)
+now = datetime.utcnow()
+existing_maintainers = {}
+for maintainer in Maintainer.query.all():
+existing_maintainers[maintainer.email] = maintainer
+
+for package in Package.query.filter(Package.last_sync_ts < ts).all():
+cnt += 1
 data = http_session.get(pkg_url_base + "packages/" + package.full_name 
+ ".json")
 if not data:
 print("No JSON data for package %s" % package.full_name) # FIXME: 
Handle better; e.g mark the package as removed if no pkgmove update
 continue
-from pprint import pprint
-pprint(data.json())
-break
+
+pkg = data.json()
+
+print ("Updating package: %s" % package.full_name)
+if 'description' in pkg:
+package.description = pkg['description']
+
+maintainers = []
+if 'maintainers' in pkg:
+for maint in pkg['maintainers']:
+if 'email' not in maint:
+print("WARNING: Package %s was told to have a maintainer 
without an e-mail identifier" % package.full_name)
+continue
+if maint['email'] in existing_maintainers: # FIXME: Some 
proxy-maintainers are using mixed case e-mail address, right now we'd be 
creating duplicates right now if the case is different across different packages
+maintainers.append(existing_maintainers[maint['email']])
+else:
+is_project = False
+if 'type' in maint and maint['type'] == 'project':
+is_project = True
+print("Adding %s maintainer %s" % ("project" if is_project 
else "individual", maint['email']))
+new_maintainer = Maintainer(email=maint['email'], 
is_project=is_project, name=maint['name'] if 'name' in maint else None)
+db.session.add(new_maintainer)
+existing_maintainers[maint['email']] = new_maintainer
+maintainers.append(new_maintainer)
+
+# Intentionally outside if 'maintainers' in pkg, because if there are 
no maintainers in JSON, it's falled to maintainer-needed and we need to clean 
out old maintainer entries
+package.maintainers = maintainers # TODO: Retain order to know who is 
primary; retain description associated with the maintainership
+package.last_sync_ts = now
+
+if not cnt % 100:
+print("%d packages updated, committing DB transaction" % cnt)
+db.session.commit()
+now = datetime.utcnow()



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: ed46487bc107c4f404d23e6429e0e4050616459b
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 01:55:18 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 01:55:18 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=ed46487b

models: Add package maintainers relationship table and ORM relationships

 backend/lib/models.py | 9 +
 1 file changed, 9 insertions(+)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index e06dcf8..ba20622 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -10,6 +10,11 @@ class Category(db.Model):
 def __repr__(self):
 return "" % self.name
 
+package_maintainer_rel_table = db.Table('package_maintainer_rel',
+db.Column('package_id', db.Integer, db.ForeignKey('package.id')),
+db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')),
+)
+
 class Package(db.Model):
 id = db.Column(db.Integer, primary_key=True)
 name = db.Column(db.Unicode(128), nullable=False)
@@ -17,6 +22,9 @@ class Package(db.Model):
 category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
 description = db.Column(db.Unicode(500))
 last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, 
default=datetime.utcfromtimestamp(0))
+maintainers = db.relationship("Maintainer",
+secondary=package_maintainer_rel_table,
+backref='directly_maintained_packages')
 
 @property
 def full_name(self):
@@ -54,6 +62,7 @@ class Maintainer(db.Model):
 
secondaryjoin=id==maintainer_project_membership_rel_table.c.maintainer_id,
 backref='projects')
 # projects relationship backref ^^
+# directly_maintained_packages backref - list of packages maintained 
directly by given project or individual maintainer (as opposed to a bigger list 
that includes packages maintained by parent/child projects or projects the 
given individual maintainer is part of)
 
 def __repr__(self):
 return "" % ("project" if self.is_project else 
"individual", self.email)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-06 Thread Mart Raudsepp
commit: f1a5e9bb01bb7fd802e7cf87b4e9dd675e910140
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Dec  7 00:30:06 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Dec  7 00:30:06 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=f1a5e9bb

models: Add description and last_sync_ts columns for Package

description we will get from package.g.o per-package detailed json,
last_sync_ts will be used to record when that detailed json was
last pulled, so that we can rate-limit as-needed.

If still using sqlite, can DROP TABLE package; and re-create with
./manage.py init
or add the columns manually
ALTER TABLE package ADD COLUMN description VARCHAR(500);
ALTER TABLE package ADD COLUMN last_sync_ts TIMESTAMP NOT NULL;

though that NOT NULL vs default on sqlalchemy's side for now might
pose an issue, solving of which is an easy exercise for those that care
instead of recreating.

 backend/lib/models.py | 4 
 1 file changed, 4 insertions(+)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index f842a8a..e06dcf8 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from .. import db
 
 
@@ -14,6 +15,8 @@ class Package(db.Model):
 name = db.Column(db.Unicode(128), nullable=False)
 category_id = db.Column(db.Integer, db.ForeignKey('category.id'), 
nullable=False)
 category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
+description = db.Column(db.Unicode(500))
+last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, 
default=datetime.utcfromtimestamp(0))
 
 @property
 def full_name(self):
@@ -31,6 +34,7 @@ class PackageVersion(db.Model):
 def __repr__(self):
 return "" % (self.package.category.name, 
self.package.name, self.version)
 
+
 maintainer_project_membership_rel_table = 
db.Table('maintainer_project_membership_rel',
 db.Column('project_id', db.Integer, db.ForeignKey('maintainer.id')),
 db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')),



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-05 Thread Mart Raudsepp
commit: 8c264ac120faebd8463f9b6fadde65f40df2ddb0
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Mon Dec  5 17:44:25 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Mon Dec  5 17:44:25 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=8c264ac1

sync: return empty dict on projects retrieval error, so the caller won't error

 backend/lib/sync.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 2d6244c..e53fa9b 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -8,16 +8,16 @@ pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
 
 def get_project_data():
+projects = {}
 data = http_session.get(proj_url)
 if not data:
 print("Failed retrieving projects.xml")
-return
+return projects
 root = ET.fromstring(data.content)
-projects = {}
 # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 
2016-11-10
 if root.tag.lower() != 'projects':
 print("Downloaded projects.xml root tag isn't 'projects'")
-return
+return projects
 for proj_elem in root:
 if proj_elem.tag.lower() != 'project':
 print("Skipping unknown  subtag <%s>" % proj_elem.tag)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-04 Thread Mart Raudsepp
commit: 9664464413b7cd59f861eff01148454974e23030
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 08:02:10 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 08:02:10 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=96644644

sync: use requests response.json() directly instead of json.loads

This should ensure requests will handle UTF-8 fully correctly for us

Suggested-by: Doug Freed  mtu.edu>

 backend/lib/sync.py | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 22419bf..2d6244c 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,5 +1,4 @@
 import xml.etree.ElementTree as ET
-from flask import json
 import requests
 from .. import app, db
 from .models import Category, Maintainer, Package, PackageVersion
@@ -111,7 +110,7 @@ def sync_categories():
 url = pkg_url_base + "categories.json"
 data = http_session.get(url)
 # TODO: Handle response error (if not data)
-categories = json.loads(data.text)
+categories = data.json()
 existing_categories = {}
 # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on 
postgresql:9.5
 for cat in Category.query.all():
@@ -131,7 +130,7 @@ def sync_packages():
 if not data:
 print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?
 continue
-packages = json.loads(data.text)['packages']
+packages = data.json()['packages']
 # TODO: Use UPSERT instead (on_conflict_do_update)
 existing_packages = {}
 for pkg in Package.query.all():
@@ -151,5 +150,5 @@ def sync_versions():
 print("No JSON data for package %s" % package.full_name) # FIXME: 
Handle better; e.g mark the package as removed if no pkgmove update
 continue
 from pprint import pprint
-pprint(json.loads(data.text))
+pprint(data.json())
 break



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-04 Thread Mart Raudsepp
commit: dac532df96cb16626f4f1656b5aa2f82b8383c8d
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 07:59:39 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 07:59:39 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dac532df

sync: Fix UTF-8 handling for projects.xml import

Need to feed response.content bytestring into ElementTree, not response.text.
With the latter ET seems to figure it's already decoded and goes all latin-1 on 
us.
>From response.content bytestream it notices the UTF-8 encoding XML markup and 
>does
things right.

Diagnosed-by: Doug Freed  mtu.edu>

 backend/lib/sync.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 4894315..22419bf 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -13,7 +13,7 @@ def get_project_data():
 if not data:
 print("Failed retrieving projects.xml")
 return
-root = ET.fromstring(data.text)
+root = ET.fromstring(data.content)
 projects = {}
 # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 
2016-11-10
 if root.tag.lower() != 'projects':



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: 080e857b7081db90f874c73fd271d8bd699195d6
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 07:43:13 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 07:43:13 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=080e857b

sync: Update individual maintainer names during projects sync for the time being

... until we don't have master data for this that we shouldn't overwrite.
Also remove a now done TODO item and tweak a debug string I messed up 
pre-commit.

 backend/lib/sync.py | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 57a7cb1..4894315 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -37,7 +37,6 @@ def get_project_data():
 if member_tag in ['email', 'name', 'role']:
 member[member_tag] = member_elem.text
 if 'email' in member:
-# TODO: Sync the members (it's valid as email is given) - 
maybe at the end, after we have synced the project data, so we can add him to 
the project directly
 if 'members' not in proj:
 proj['members'] = []
 proj['members'].append(member)
@@ -94,9 +93,12 @@ def sync_projects():
 if 'members' in data:
 for member in data['members']:
 if member['email'] in existing_maintainers:
+# TODO: Stop overwriting the name from master data, 
if/once we have a proper sync source for individual maintainers (Gentoo LDAP?)
+if 'name' in member:
+existing_maintainers[member['email']].name = 
member['name']
 members.append(existing_maintainers[member['email']])
 else:
-print("Adding individual%s" % member['email'])
+print("Adding individual maintainer %s" % member['email'])
 new_maintainer = Maintainer(email=member['email'], 
is_project=False, name=member['name'] if 'name' in member else None)
 db.session.add(new_maintainer)
 existing_maintainers[member['email']] = new_maintainer



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: a0e5f8b3559f243236d9dd1170a00d4405042631
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 06:24:39 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 06:24:39 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=a0e5f8b3

models: Add association table and ORM relationship between projects and members

 backend/lib/models.py | 12 
 1 file changed, 12 insertions(+)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index bc6cd20..f842a8a 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -31,6 +31,11 @@ class PackageVersion(db.Model):
 def __repr__(self):
 return "" % (self.package.category.name, 
self.package.name, self.version)
 
+maintainer_project_membership_rel_table = 
db.Table('maintainer_project_membership_rel',
+db.Column('project_id', db.Integer, db.ForeignKey('maintainer.id')),
+db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')),
+)
+
 class Maintainer(db.Model):
 id = db.Column(db.Integer, primary_key=True)
 email = db.Column(db.Unicode(50), nullable=False, unique=True)
@@ -39,5 +44,12 @@ class Maintainer(db.Model):
 url = db.Column(db.Unicode())
 description = db.Column(db.Unicode(500))
 
+members = db.relationship("Maintainer",
+secondary=maintainer_project_membership_rel_table,
+primaryjoin=id==maintainer_project_membership_rel_table.c.project_id,
+
secondaryjoin=id==maintainer_project_membership_rel_table.c.maintainer_id,
+backref='projects')
+# projects relationship backref ^^
+
 def __repr__(self):
 return "" % ("project" if self.is_project else 
"individual", self.email)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: d1965a898e3f92f94accb630d4daf68d156a0d0c
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 06:26:47 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 06:26:47 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d1965a89

sync: Project members and subprojects syncing to DB

 backend/lib/sync.py | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 6ed8e01..57a7cb1 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -78,9 +78,33 @@ def sync_projects():
 print ("Adding project %s" % email)
 new_maintainer = Maintainer(email=data['email'], is_project=True, 
description=data['description'], name=data['name'], url=data['url'])
 db.session.add(new_maintainer)
+existing_maintainers[email] = new_maintainer
+members = []
+if 'subprojects' in data:
+for subproject_email, inherit_members in data['subprojects']:
+# TODO: How should we handle inherit_members?
+if subproject_email in existing_maintainers:
+members.append(existing_maintainers[subproject_email])
+else:
+print("Creating new project entry for subproject: %s" % 
subproject_email)
+new_subproject = Maintainer(email=subproject_email, 
is_project=True)
+db.session.add(new_subproject)
+existing_maintainers[subproject_email] = new_subproject
+members.append(new_subproject)
+if 'members' in data:
+for member in data['members']:
+if member['email'] in existing_maintainers:
+members.append(existing_maintainers[member['email']])
+else:
+print("Adding individual%s" % member['email'])
+new_maintainer = Maintainer(email=member['email'], 
is_project=False, name=member['name'] if 'name' in member else None)
+db.session.add(new_maintainer)
+existing_maintainers[member['email']] = new_maintainer
+members.append(new_maintainer)
+# TODO: Include role information in the association?
+existing_maintainers[email].members = members
 db.session.commit()
 
-
 def sync_categories():
 url = pkg_url_base + "categories.json"
 data = http_session.get(url)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: df4ddb601efbef157147fcfd6057afd01636acab
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 05:26:10 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 05:26:10 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=df4ddb60

sync: Initial projects syncing to DB without members

 backend/lib/sync.py | 35 ---
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index fbc653a..6ed8e01 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -2,19 +2,19 @@ import xml.etree.ElementTree as ET
 from flask import json
 import requests
 from .. import app, db
-from .models import Category, Package, PackageVersion
+from .models import Category, Maintainer, Package, PackageVersion
 
 proj_url = "https://api.gentoo.org/metastructure/projects.xml;
 pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
 
-def sync_projects():
+def get_project_data():
 data = http_session.get(proj_url)
 if not data:
 print("Failed retrieving projects.xml")
 return
 root = ET.fromstring(data.text)
-projects = []
+projects = {}
 # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 
2016-11-10
 if root.tag.lower() != 'projects':
 print("Downloaded projects.xml root tag isn't 'projects'")
@@ -53,12 +53,33 @@ def sync_projects():
 else:
 print("Skipping unknown  subtag <%s>" % tag)
 if 'email' in proj:
-projects.append(proj)
+projects[proj['email']] = proj
 else:
 print("Skipping incomplete project data due to lack of required 
email identifier: %s" % (proj,))
-from pprint import pprint
-print("Found the following projects and data:")
-pprint(projects)
+return projects
+
+def sync_projects():
+projects = get_project_data()
+existing_maintainers = {}
+# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on 
postgresql:9.5
+for maintainer in Maintainer.query.all():
+existing_maintainers[maintainer.email] = maintainer
+for email, data in projects.items():
+if email in existing_maintainers:
+print ("Updating project %s" % email)
+existing_maintainers[email].is_project = True
+if 'description' in data:
+existing_maintainers[email].description = data['description']
+if 'name' in data:
+existing_maintainers[email].name = data['name']
+if 'url' in data:
+existing_maintainers[email].url = data['url']
+else:
+print ("Adding project %s" % email)
+new_maintainer = Maintainer(email=data['email'], is_project=True, 
description=data['description'], name=data['name'], url=data['url'])
+db.session.add(new_maintainer)
+db.session.commit()
+
 
 def sync_categories():
 url = pkg_url_base + "categories.json"



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: a46c779bf33cf558d287f8bcf11a5e483046bb17
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 05:24:45 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 05:25:29 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=a46c779b

models: Add Maintainer model

As this is a new table, just re-doing "./manage.py init" should add it to db,
while keeping old data.

 backend/lib/models.py | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index 57f3e64..bc6cd20 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -30,3 +30,14 @@ class PackageVersion(db.Model):
 
 def __repr__(self):
 return "" % (self.package.category.name, 
self.package.name, self.version)
+
+class Maintainer(db.Model):
+id = db.Column(db.Integer, primary_key=True)
+email = db.Column(db.Unicode(50), nullable=False, unique=True)
+is_project = db.Column(db.Boolean, nullable=False, server_default='f', 
default=False)
+name = db.Column(db.Unicode(128))
+url = db.Column(db.Unicode())
+description = db.Column(db.Unicode(500))
+
+def __repr__(self):
+return "" % ("project" if self.is_project else 
"individual", self.email)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-12-03 Thread Mart Raudsepp
commit: 20275e6f354929fe3d702fb9b296f828704eb5a1
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sun Dec  4 04:48:07 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sun Dec  4 04:48:07 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=20275e6f

models: Use sqlalchemy Unicode columns instead of String

 backend/lib/models.py | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index 8f7637d..57f3e64 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -3,15 +3,15 @@ from .. import db
 
 class Category(db.Model):
 id = db.Column(db.Integer, primary_key=True)
-name = db.Column(db.String(30), unique=True, nullable=False)
-description = db.Column(db.String(500))
+name = db.Column(db.Unicode(30), unique=True, nullable=False)
+description = db.Column(db.Unicode(500))
 
 def __repr__(self):
 return "" % self.name
 
 class Package(db.Model):
 id = db.Column(db.Integer, primary_key=True)
-name = db.Column(db.String(128), nullable=False)
+name = db.Column(db.Unicode(128), nullable=False)
 category_id = db.Column(db.Integer, db.ForeignKey('category.id'), 
nullable=False)
 category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
 
@@ -24,7 +24,7 @@ class Package(db.Model):
 
 class PackageVersion(db.Model):
 id = db.Column(db.Integer, primary_key=True)
-version = db.Column(db.String(128), nullable=False)
+version = db.Column(db.Unicode(128), nullable=False)
 package_id = db.Column(db.Integer, db.ForeignKey('package.id'), 
nullable=False)
 package = db.relationship('Package', backref=db.backref('versions', 
lazy='dynamic'))
 



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-11-10 Thread Mart Raudsepp
commit: 5972da09a9d9faaa7dbf45929a6c09a0d07d0691
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Fri Nov 11 01:22:04 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Fri Nov 11 01:22:04 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5972da09

Add parsed project members to the result dict

 backend/lib/sync.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 291d701..fbc653a 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -38,6 +38,9 @@ def sync_projects():
 member[member_tag] = member_elem.text
 if 'email' in member:
 # TODO: Sync the members (it's valid as email is given) - 
maybe at the end, after we have synced the project data, so we can add him to 
the project directly
+if 'members' not in proj:
+proj['members'] = []
+proj['members'].append(member)
 pass
 elif tag == 'subproject':
 if 'ref' in elem.attrib:



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-11-10 Thread Mart Raudsepp
commit: c11a83cc5a9e9b0ce885caddef5a3b593fc4
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Thu Nov 10 15:50:27 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Thu Nov 10 15:50:27 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c11a8333

Normalize subproject inherit-members to True or False during parsing

 backend/lib/sync.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 7139119..291d701 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -43,8 +43,8 @@ def sync_projects():
 if 'ref' in elem.attrib:
 if 'subprojects' not in proj:
 proj['subprojects'] = []
-# subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is 
followed). TODO: Might change if sync code will want it differently
-proj['subprojects'].append((elem.attrib['ref'], 
elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None))
+# subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is True or False. TODO: Might 
change if sync code will want it differently
+proj['subprojects'].append((elem.attrib['ref'], True if 
('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') 
else False))
 else:
 print("Invalid  tag inside project %s - 
required 'ref' attribute missing" % proj['email'] if 'email' in proj else 
"")
 else:



[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/

2016-11-10 Thread Mart Raudsepp
commit: d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Thu Nov 10 15:43:16 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Thu Nov 10 15:43:40 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d7dbfa3b

Initial projects.xml parsing code with debug printout

 backend/lib/sync.py | 59 +
 manage.py   |  8 +++-
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index ce54937..7139119 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,13 +1,64 @@
+import xml.etree.ElementTree as ET
 from flask import json
 import requests
 from .. import app, db
 from .models import Category, Package, PackageVersion
 
-url_base = "https://packages.gentoo.org/;
+proj_url = "https://api.gentoo.org/metastructure/projects.xml;
+pkg_url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
 
+def sync_projects():
+data = http_session.get(proj_url)
+if not data:
+print("Failed retrieving projects.xml")
+return
+root = ET.fromstring(data.text)
+projects = []
+# Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 
2016-11-10
+if root.tag.lower() != 'projects':
+print("Downloaded projects.xml root tag isn't 'projects'")
+return
+for proj_elem in root:
+if proj_elem.tag.lower() != 'project':
+print("Skipping unknown  subtag <%s>" % proj_elem.tag)
+continue
+proj = {}
+for elem in proj_elem:
+tag = elem.tag.lower()
+if tag in ['email', 'name', 'url', 'description']:
+proj[tag] = elem.text
+elif tag == 'member':
+member = {}
+if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1':
+member['is_lead'] = True
+for member_elem in elem:
+member_tag = member_elem.tag.lower()
+if member_tag in ['email', 'name', 'role']:
+member[member_tag] = member_elem.text
+if 'email' in member:
+# TODO: Sync the members (it's valid as email is given) - 
maybe at the end, after we have synced the project data, so we can add him to 
the project directly
+pass
+elif tag == 'subproject':
+if 'ref' in elem.attrib:
+if 'subprojects' not in proj:
+proj['subprojects'] = []
+# subprojects will be a list of (subproject_email, 
inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is 
followed). TODO: Might change if sync code will want it differently
+proj['subprojects'].append((elem.attrib['ref'], 
elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None))
+else:
+print("Invalid  tag inside project %s - 
required 'ref' attribute missing" % proj['email'] if 'email' in proj else 
"")
+else:
+print("Skipping unknown  subtag <%s>" % tag)
+if 'email' in proj:
+projects.append(proj)
+else:
+print("Skipping incomplete project data due to lack of required 
email identifier: %s" % (proj,))
+from pprint import pprint
+print("Found the following projects and data:")
+pprint(projects)
+
 def sync_categories():
-url = url_base + "categories.json"
+url = pkg_url_base + "categories.json"
 data = http_session.get(url)
 # TODO: Handle response error (if not data)
 categories = json.loads(data.text)
@@ -26,7 +77,7 @@ def sync_categories():
 def sync_packages():
 for category in Category.query.all():
 existing_packages = category.packages.all()
-data = http_session.get(url_base + "categories/" + category.name + 
".json")
+data = http_session.get(pkg_url_base + "categories/" + category.name + 
".json")
 if not data:
 print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?
 continue
@@ -45,7 +96,7 @@ def sync_packages():
 
 def sync_versions():
 for package in Package.query.all():
-data = http_session.get(url_base + "packages/" + package.full_name + 
".json")
+data = http_session.get(pkg_url_base + "packages/" + package.full_name 
+ ".json")
 if not data:
 print("No JSON data for package %s" % package.full_name) # FIXME: 
Handle better; e.g mark the package as removed if no pkgmove update
 continue

diff --git a/manage.py b/manage.py
index 359c63a..a31b96c 100755
--- a/manage.py
+++ b/manage.py
@@ -21,12 +21,18 @@ def init():
 
 @manager.command
 def sync_gentoo():
-"""Synchronize Gentoo data from packages.gentoo.org API"""
+

[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/

2016-11-10 Thread Mart Raudsepp
commit: d584775a6820f23561c5b8922a46644920bbf2e6
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Thu Nov 10 09:09:42 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Thu Nov 10 09:09:42 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d584775a

Add dirty sync_versions debug code

This just prints the first packages versions JSON data out and exits,
so just some initial debug code out of the way to sync in projects.xml
first, as sync_versions will need to reference projects and maintainers,
so better to finish projects.xml sync first.

 backend/lib/sync.py | 12 +++-
 manage.py   |  2 --
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index a6aef23..ce54937 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,7 +1,7 @@
 from flask import json
 import requests
 from .. import app, db
-from .models import Category, Package
+from .models import Category, Package, PackageVersion
 
 url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
@@ -42,3 +42,13 @@ def sync_packages():
 new_pkg = Package(category_id=category.id, 
name=package['name'])
 db.session.add(new_pkg)
 db.session.commit()
+
+def sync_versions():
+for package in Package.query.all():
+data = http_session.get(url_base + "packages/" + package.full_name + 
".json")
+if not data:
+print("No JSON data for package %s" % package.full_name) # FIXME: 
Handle better; e.g mark the package as removed if no pkgmove update
+continue
+from pprint import pprint
+pprint(json.loads(data.text))
+break

diff --git a/manage.py b/manage.py
index 4634518..359c63a 100755
--- a/manage.py
+++ b/manage.py
@@ -36,12 +36,10 @@ def sync_packages():
 """Synchronize only Gentoo packages base data (without details)"""
 sync.sync_packages()
 
-'''
 @manager.command
 def sync_versions():
 """Synchronize only Gentoo package details"""
 sync.sync_versions()
-'''
 
 if __name__ == '__main__':
 manager.run()



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-09-24 Thread Mart Raudsepp
commit: 6113941adc9693cac0a4aa12cdac82f75c7921bd
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Sat Sep 24 07:01:30 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Sat Sep 24 07:01:30 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=6113941a

Add a full_name property to package and remove some debug spam on sync

 backend/lib/models.py | 4 
 backend/lib/sync.py   | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index 8e47d56..8f7637d 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -15,6 +15,10 @@ class Package(db.Model):
 category_id = db.Column(db.Integer, db.ForeignKey('category.id'), 
nullable=False)
 category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
 
+@property
+def full_name(self):
+return "%s/%s" % (self.category.name, self.name)
+
 def __repr__(self):
 return "" % (self.category.name, self.name)
 

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 6dcb6b9..a6aef23 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -26,7 +26,6 @@ def sync_categories():
 def sync_packages():
 for category in Category.query.all():
 existing_packages = category.packages.all()
-print("Existing packages in DB for category %s: %s" % (category.name, 
existing_packages,))
 data = http_session.get(url_base + "categories/" + category.name + 
".json")
 if not data:
 print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?



[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/

2016-09-07 Thread Mart Raudsepp
commit: 1e826829e42b0524365770dd329af5217a5f6b54
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Sep  7 20:20:20 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Sep  7 20:20:20 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829

Add syncing of packages in categories from packages.g.o (just name)

Also add manage.py commands to call the sync steps individually for testing

 backend/lib/sync.py | 28 ++--
 manage.py   | 25 ++---
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 3cfb746..6dcb6b9 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,15 +1,18 @@
 from flask import json
 import requests
 from .. import app, db
-from .models import Category
+from .models import Category, Package
 
+url_base = "https://packages.gentoo.org/;
 http_session = requests.session()
 
 def sync_categories():
-url = "https://packages.gentoo.org/categories.json;
+url = url_base + "categories.json"
 data = http_session.get(url)
+# TODO: Handle response error (if not data)
 categories = json.loads(data.text)
 existing_categories = {}
+# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on 
postgresql:9.5
 for cat in Category.query.all():
 existing_categories[cat.name] = cat
 for category in categories:
@@ -19,3 +22,24 @@ def sync_categories():
 new_cat = Category(name=category['name'], 
description=category['description'])
 db.session.add(new_cat)
 db.session.commit()
+
+def sync_packages():
+for category in Category.query.all():
+existing_packages = category.packages.all()
+print("Existing packages in DB for category %s: %s" % (category.name, 
existing_packages,))
+data = http_session.get(url_base + "categories/" + category.name + 
".json")
+if not data:
+print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?
+continue
+packages = json.loads(data.text)['packages']
+# TODO: Use UPSERT instead (on_conflict_do_update)
+existing_packages = {}
+for pkg in Package.query.all():
+existing_packages[pkg.name] = pkg
+for package in packages:
+if package['name'] in existing_packages:
+continue # TODO: Update description once we keep that in DB
+else:
+new_pkg = Package(category_id=category.id, 
name=package['name'])
+db.session.add(new_pkg)
+db.session.commit()

diff --git a/manage.py b/manage.py
index 4f123aa..4634518 100755
--- a/manage.py
+++ b/manage.py
@@ -4,7 +4,7 @@
 from flask_script import Manager, Shell
 
 from backend import app, db
-from backend.lib.sync import sync_categories
+from backend.lib import sync
 
 
 manager = Manager(app)
@@ -21,8 +21,27 @@ def init():
 
 @manager.command
 def sync_gentoo():
-"""Syncronize Gentoo data from packages.gentoo.org API"""
-sync_categories()
+"""Synchronize Gentoo data from packages.gentoo.org API"""
+sync.sync_categories()
+sync.sync_packages()
+#sync_versions()
+
+@manager.command
+def sync_categories():
+"""Synchronize only Gentoo categories data"""
+sync.sync_categories()
+
+@manager.command
+def sync_packages():
+"""Synchronize only Gentoo packages base data (without details)"""
+sync.sync_packages()
+
+'''
+@manager.command
+def sync_versions():
+"""Synchronize only Gentoo package details"""
+sync.sync_versions()
+'''
 
 if __name__ == '__main__':
 manager.run()



[gentoo-commits] proj/grumpy:master commit in: backend/lib/

2016-09-07 Thread Mart Raudsepp
commit: 724bb757e8b08382dcbdd460cbef533b91e6338f
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Wed Sep  7 20:17:51 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Wed Sep  7 20:17:51 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=724bb757

Don't double-quote debug output for full atoms from %r usage

 backend/lib/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/lib/models.py b/backend/lib/models.py
index 5088e3e..8e47d56 100644
--- a/backend/lib/models.py
+++ b/backend/lib/models.py
@@ -16,7 +16,7 @@ class Package(db.Model):
 category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
 
 def __repr__(self):
-return "" % (self.category.name, self.name)
+return "" % (self.category.name, self.name)
 
 class PackageVersion(db.Model):
 id = db.Column(db.Integer, primary_key=True)
@@ -25,4 +25,4 @@ class PackageVersion(db.Model):
 package = db.relationship('Package', backref=db.backref('versions', 
lazy='dynamic'))
 
 def __repr__(self):
-return "" % (self.package.category.name, 
self.package.name, self.version)
+return "" % (self.package.category.name, 
self.package.name, self.version)



[gentoo-commits] proj/grumpy:master commit in: backend/lib/, /, backend/

2016-09-06 Thread Mart Raudsepp
commit: 6d5b0a5ba688677a127d1df1439080482c9709d1
Author: Mart Raudsepp  gentoo  org>
AuthorDate: Tue Sep  6 21:19:29 2016 +
Commit: Mart Raudsepp  gentoo  org>
CommitDate: Tue Sep  6 21:20:02 2016 +
URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=6d5b0a5b

Add initial code to sync categories from packages.g.o with associated plumbing

Now this should make http://localhost:5000 show the available categories:
./manage.py init
./manage.py sync_gentoo
./manage.py runserver

 backend/__init__.py   | 15 ++-
 backend/lib/models.py | 28 
 backend/lib/sync.py   | 21 +
 manage.py | 15 +--
 requirements.txt  |  2 ++
 5 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/backend/__init__.py b/backend/__init__.py
index 81ca7eb..b03432b 100644
--- a/backend/__init__.py
+++ b/backend/__init__.py
@@ -1,7 +1,20 @@
 from flask import Flask
+from flask_sqlalchemy import SQLAlchemy
 
 app = Flask(__name__)
+app.config['SQLALCHEMY_DATABASE_URI'] = "sqlite:///grumpy.db" # FIXME: 
configuration support
+db = SQLAlchemy(app)
+
+from .lib import models
+
 
 @app.route("/")
 def hello_world():
-return "Hello World!"
+categories = models.Category.query.all()
+text = ""
+for cat in categories:
+text += "%s: %s" % (cat.name, cat.description)
+return "Hello World! These are the package categories I know about:%s" 
% text
+
+
+__all__ = ["app", "db"]

diff --git a/backend/lib/models.py b/backend/lib/models.py
new file mode 100644
index 000..5088e3e
--- /dev/null
+++ b/backend/lib/models.py
@@ -0,0 +1,28 @@
+from .. import db
+
+
+class Category(db.Model):
+id = db.Column(db.Integer, primary_key=True)
+name = db.Column(db.String(30), unique=True, nullable=False)
+description = db.Column(db.String(500))
+
+def __repr__(self):
+return "" % self.name
+
+class Package(db.Model):
+id = db.Column(db.Integer, primary_key=True)
+name = db.Column(db.String(128), nullable=False)
+category_id = db.Column(db.Integer, db.ForeignKey('category.id'), 
nullable=False)
+category = db.relationship('Category', backref=db.backref('packages', 
lazy='dynamic'))
+
+def __repr__(self):
+return "" % (self.category.name, self.name)
+
+class PackageVersion(db.Model):
+id = db.Column(db.Integer, primary_key=True)
+version = db.Column(db.String(128), nullable=False)
+package_id = db.Column(db.Integer, db.ForeignKey('package.id'), 
nullable=False)
+package = db.relationship('Package', backref=db.backref('versions', 
lazy='dynamic'))
+
+def __repr__(self):
+return "" % (self.package.category.name, 
self.package.name, self.version)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
new file mode 100644
index 000..3cfb746
--- /dev/null
+++ b/backend/lib/sync.py
@@ -0,0 +1,21 @@
+from flask import json
+import requests
+from .. import app, db
+from .models import Category
+
+http_session = requests.session()
+
+def sync_categories():
+url = "https://packages.gentoo.org/categories.json;
+data = http_session.get(url)
+categories = json.loads(data.text)
+existing_categories = {}
+for cat in Category.query.all():
+existing_categories[cat.name] = cat
+for category in categories:
+if category['name'] in existing_categories:
+existing_categories[category['name']].description = 
category['description']
+else:
+new_cat = Category(name=category['name'], 
description=category['description'])
+db.session.add(new_cat)
+db.session.commit()

diff --git a/manage.py b/manage.py
index b28d93a..4f123aa 100755
--- a/manage.py
+++ b/manage.py
@@ -3,15 +3,26 @@
 
 from flask_script import Manager, Shell
 
-from backend import app
+from backend import app, db
+from backend.lib.sync import sync_categories
 
 
 manager = Manager(app)
 
 def shell_context():
-return dict(app=manager.app)
+return dict(app=manager.app, db=db)
 
 manager.add_command('shell', Shell(make_context=shell_context))
 
+@manager.command
+def init():
+"""Initialize empty database with tables"""
+db.create_all()
+
+@manager.command
+def sync_gentoo():
+"""Syncronize Gentoo data from packages.gentoo.org API"""
+sync_categories()
+
 if __name__ == '__main__':
 manager.run()

diff --git a/requirements.txt b/requirements.txt
index eaf59ef..78e4b2b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 Flask
+Flask-SQLAlchemy
 Flask-Script  #manage.py
+requests