commit: 285a95500835248045b0736469e382a1f73fc6be
Author: Mart Raudsepp <leio <AT> gentoo <DOT> org>
AuthorDate: Mon Mar 20 22:52:44 2017 +0000
Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org>
CommitDate: Mon Mar 20 23:11:10 2017 +0000
URL:
https://gitweb.gentoo.org/proj/gentoo-bumpchecker.git/commit/?id=285a9550
gnome: make the cache.json requests parallel; reduces a run from 3m01 to 0m23
for me
This relies on the requests-futures package, which in turn relies on
python-3.2+ Futures
(or a backport of it). If requests-futures import fail, it will fall back to
the old slower
fetching one by one.
modules/gnome_module.py | 26 ++++++++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)
diff --git a/modules/gnome_module.py b/modules/gnome_module.py
index afba235..e6544b6 100644
--- a/modules/gnome_module.py
+++ b/modules/gnome_module.py
@@ -4,8 +4,16 @@
# vim: set sts=4 sw=4 et tw=0 :
import requests
+try:
+ from requests_futures.sessions import FuturesSession
+ parallel_requests = True
+except:
+ print("requests-futures not found for parallel fetching - will fallback to
slower one-by-one version retrieval for latest version")
+ parallel_requests = False
+
import package_module, clioptions_module
+MAX_WORKERS = 10
DEBUG = False
@@ -34,12 +42,17 @@ class GNOME:
gnome_release_list[1] = str(int(gnome_release_list[1]) + 1)
self.gnome_release = ".".join(gnome_release_list[:2])
- self.http = requests.session()
+ if parallel_requests:
+ self.http = FuturesSession(max_workers=MAX_WORKERS)
+ else:
+ self.http = requests.session()
self.url_base = "https://download.gnome.org/"
self.release_versions_file_path = self.url_base + 'teams/releng/'
def generate_data_from_versions_markup(self, url):
data = self.http.get(url)
+ if parallel_requests:
+ data = data.result()
if not data:
raise ValueError("Couldn't open %s" % url)
@@ -61,11 +74,20 @@ class GNOME:
def generate_data_individual(self, release_packages):
ret = []
+ # First query all results; if parallel_requests==True, this will run
in parallel
+ for pkg in release_packages:
+ name = pkg.name.split('/')[-1]
+ if name in name_mapping:
+ name = name_mapping[name]
+ pkg.requests_result = self.http.get(self.url_base + '/sources/' +
name + '/cache.json')
+
+ # And now handle the results - this is a separate loop for parallel
fetch support
for pkg in release_packages:
name = pkg.name.split('/')[-1]
if name in name_mapping:
name = name_mapping[name]
- data = self.http.get(self.url_base + '/sources/' + name +
'/cache.json')
+ # pkg.requests_result is the resulting Response if
parallel_requests else Future that we need to call result() on to wait/retrieve
the response
+ data = pkg.requests_result.result() if parallel_requests else
pkg.requests_results
if not data:
print("Warning: Unable to read cache.json for %s" % pkg.name)
continue