commit:     285a95500835248045b0736469e382a1f73fc6be
Author:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Mon Mar 20 22:52:44 2017 +0000
Commit:     Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Mon Mar 20 23:11:10 2017 +0000
URL:        
https://gitweb.gentoo.org/proj/gentoo-bumpchecker.git/commit/?id=285a9550

gnome: make the cache.json requests parallel; reduces a run from 3m01 to 0m23 
for me

This relies on the requests-futures package, which in turn relies on 
python-3.2+ Futures
(or a backport of it). If requests-futures import fail, it will fall back to 
the old slower
fetching one by one.

 modules/gnome_module.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/modules/gnome_module.py b/modules/gnome_module.py
index afba235..e6544b6 100644
--- a/modules/gnome_module.py
+++ b/modules/gnome_module.py
@@ -4,8 +4,16 @@
 # vim: set sts=4 sw=4 et tw=0 :
 
 import requests
+try:
+    from requests_futures.sessions import FuturesSession
+    parallel_requests = True
+except:
+    print("requests-futures not found for parallel fetching - will fallback to 
slower one-by-one version retrieval for latest version")
+    parallel_requests = False
+
 import package_module, clioptions_module
 
+MAX_WORKERS = 10
 DEBUG = False
 
 
@@ -34,12 +42,17 @@ class GNOME:
             gnome_release_list[1] = str(int(gnome_release_list[1]) + 1)
         self.gnome_release = ".".join(gnome_release_list[:2])
 
-        self.http = requests.session()
+        if parallel_requests:
+            self.http = FuturesSession(max_workers=MAX_WORKERS)
+        else:
+            self.http = requests.session()
         self.url_base = "https://download.gnome.org/";
         self.release_versions_file_path = self.url_base + 'teams/releng/'
 
     def generate_data_from_versions_markup(self, url):
         data = self.http.get(url)
+        if parallel_requests:
+            data = data.result()
         if not data:
             raise ValueError("Couldn't open %s" % url)
 
@@ -61,11 +74,20 @@ class GNOME:
 
     def generate_data_individual(self, release_packages):
         ret = []
+        # First query all results; if parallel_requests==True, this will run 
in parallel
+        for pkg in release_packages:
+            name = pkg.name.split('/')[-1]
+            if name in name_mapping:
+                name = name_mapping[name]
+            pkg.requests_result = self.http.get(self.url_base + '/sources/' + 
name + '/cache.json')
+
+        # And now handle the results - this is a separate loop for parallel 
fetch support
         for pkg in release_packages:
             name = pkg.name.split('/')[-1]
             if name in name_mapping:
                 name = name_mapping[name]
-            data = self.http.get(self.url_base + '/sources/' + name + 
'/cache.json')
+            # pkg.requests_result is the resulting Response if 
parallel_requests else Future that we need to call result() on to wait/retrieve 
the response
+            data = pkg.requests_result.result() if parallel_requests else 
pkg.requests_results
             if not data:
                 print("Warning: Unable to read cache.json for %s" % pkg.name)
                 continue

Reply via email to