IMPALA-3872: allow providing PyPi mirror for python packages

We still rely on the python.org json API, which doesn't seem to be
mirrored (instead there's a html-based index format implemented by
the mirrors).

The mirror can be provided by setting the PYPI_MIRROR environment
variable. The default is "https://pypi.python.org";.

Change-Id: Ibc11f010332c0225121c86c9930e35c7ac01409c
Reviewed-on: http://gerrit.cloudera.org:8080/4770
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/51b13106
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/51b13106
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/51b13106

Branch: refs/heads/hadoop-next
Commit: 51b1310681d07308fb508a038a4fdf5a1e73b5e8
Parents: 381e719
Author: Tim Armstrong <[email protected]>
Authored: Wed Oct 19 11:09:35 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Tue Nov 8 05:34:50 2016 +0000

----------------------------------------------------------------------
 infra/python/deps/pip_download.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/51b13106/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py 
b/infra/python/deps/pip_download.py
index 658d0cc..a3c6a09 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -22,6 +22,7 @@
 # This script requires Python 2.6+.
 
 import json
+import os
 import os.path
 import sys
 from hashlib import md5
@@ -30,6 +31,8 @@ from urllib import urlopen, URLopener
 
 NUM_TRIES = 3
 
+PYPI_MIRROR = os.environ.get("PYPI_MIRROR", "https://pypi.python.org";)
+
 def check_md5sum(filename, expected_md5):
   actual_md5 = md5(open(filename).read()).hexdigest()
   return actual_md5 == expected_md5
@@ -55,6 +58,8 @@ def download_package(pkg_name, pkg_version):
   '''Download the required package. Sometimes the download can be flaky, so we 
use the
   retry decorator.'''
   pkg_type = 'sdist' # Don't download wheel archives for now
+  # This JSON endpoint is not provided by PyPI mirrors so we always need to 
get this
+  # from pypi.python.org.
   pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % 
pkg_name).read())
 
   downloader = URLopener()
@@ -65,8 +70,9 @@ def download_package(pkg_name, pkg_version):
       if os.path.isfile(filename) and check_md5sum(filename, expected_md5):
         print "File with matching md5sum already exists, skipping %s" % 
filename
         return True
-      print "Downloading %s from %s " % (filename, pkg['url'])
-      downloader.retrieve(pkg['url'], filename)
+      pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path'])
+      print "Downloading %s from %s" % (filename, pkg_url)
+      downloader.retrieve(pkg_url, filename)
       actual_md5 = md5(open(filename).read()).hexdigest()
       if check_md5sum(filename, expected_md5):
         return True

Reply via email to