commit:     1e826829e42b0524365770dd329af5217a5f6b54
Author:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Wed Sep  7 20:20:20 2016 +0000
Commit:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Wed Sep  7 20:20:20 2016 +0000
URL:        https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829

Add syncing of packages in categories from packages.g.o (just name)

Also add manage.py commands to call the sync steps individually for testing

 backend/lib/sync.py | 28 ++++++++++++++++++++++++++--
 manage.py           | 25 ++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/backend/lib/sync.py b/backend/lib/sync.py
index 3cfb746..6dcb6b9 100644
--- a/backend/lib/sync.py
+++ b/backend/lib/sync.py
@@ -1,15 +1,18 @@
 from flask import json
 import requests
 from .. import app, db
-from .models import Category
+from .models import Category, Package
 
+url_base = "https://packages.gentoo.org/";
 http_session = requests.session()
 
 def sync_categories():
-    url = "https://packages.gentoo.org/categories.json";
+    url = url_base + "categories.json"
     data = http_session.get(url)
+    # TODO: Handle response error (if not data)
     categories = json.loads(data.text)
     existing_categories = {}
+    # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on 
postgresql:9.5
     for cat in Category.query.all():
         existing_categories[cat.name] = cat
     for category in categories:
@@ -19,3 +22,24 @@ def sync_categories():
             new_cat = Category(name=category['name'], 
description=category['description'])
             db.session.add(new_cat)
     db.session.commit()
+
+def sync_packages():
+    for category in Category.query.all():
+        existing_packages = category.packages.all()
+        print("Existing packages in DB for category %s: %s" % (category.name, 
existing_packages,))
+        data = http_session.get(url_base + "categories/" + category.name + 
".json")
+        if not data:
+            print("No JSON data for category %s" % category.name) # FIXME: 
Better handling; mark category as inactive/gone?
+            continue
+        packages = json.loads(data.text)['packages']
+        # TODO: Use UPSERT instead (on_conflict_do_update)
+        existing_packages = {}
+        for pkg in Package.query.all():
+            existing_packages[pkg.name] = pkg
+        for package in packages:
+            if package['name'] in existing_packages:
+                continue # TODO: Update description once we keep that in DB
+            else:
+                new_pkg = Package(category_id=category.id, 
name=package['name'])
+                db.session.add(new_pkg)
+    db.session.commit()

diff --git a/manage.py b/manage.py
index 4f123aa..4634518 100755
--- a/manage.py
+++ b/manage.py
@@ -4,7 +4,7 @@
 from flask_script import Manager, Shell
 
 from backend import app, db
-from backend.lib.sync import sync_categories
+from backend.lib import sync
 
 
 manager = Manager(app)
@@ -21,8 +21,27 @@ def init():
 
 @manager.command
 def sync_gentoo():
-    """Syncronize Gentoo data from packages.gentoo.org API"""
-    sync_categories()
+    """Synchronize Gentoo data from packages.gentoo.org API"""
+    sync.sync_categories()
+    sync.sync_packages()
+    #sync_versions()
+
+@manager.command
+def sync_categories():
+    """Synchronize only Gentoo categories data"""
+    sync.sync_categories()
+
+@manager.command
+def sync_packages():
+    """Synchronize only Gentoo packages base data (without details)"""
+    sync.sync_packages()
+
+'''
+@manager.command
+def sync_versions():
+    """Synchronize only Gentoo package details"""
+    sync.sync_versions()
+'''
 
 if __name__ == '__main__':
     manager.run()

Reply via email to