IMPALA-3872: allow providing PyPi mirror for python packages We still rely on the python.org json API, which doesn't seem to be mirrored (instead there's a html-based index format implemented by the mirrors).
The mirror can be provided by setting the PYPI_MIRROR environment variable. The default is "https://pypi.python.org". Change-Id: Ibc11f010332c0225121c86c9930e35c7ac01409c Reviewed-on: http://gerrit.cloudera.org:8080/4770 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/51b13106 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/51b13106 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/51b13106 Branch: refs/heads/hadoop-next Commit: 51b1310681d07308fb508a038a4fdf5a1e73b5e8 Parents: 381e719 Author: Tim Armstrong <[email protected]> Authored: Wed Oct 19 11:09:35 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Nov 8 05:34:50 2016 +0000 ---------------------------------------------------------------------- infra/python/deps/pip_download.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/51b13106/infra/python/deps/pip_download.py ---------------------------------------------------------------------- diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py index 658d0cc..a3c6a09 100755 --- a/infra/python/deps/pip_download.py +++ b/infra/python/deps/pip_download.py @@ -22,6 +22,7 @@ # This script requires Python 2.6+. import json +import os import os.path import sys from hashlib import md5 @@ -30,6 +31,8 @@ from urllib import urlopen, URLopener NUM_TRIES = 3 +PYPI_MIRROR = os.environ.get("PYPI_MIRROR", "https://pypi.python.org") + def check_md5sum(filename, expected_md5): actual_md5 = md5(open(filename).read()).hexdigest() return actual_md5 == expected_md5 @@ -55,6 +58,8 @@ def download_package(pkg_name, pkg_version): '''Download the required package. Sometimes the download can be flaky, so we use the retry decorator.''' pkg_type = 'sdist' # Don't download wheel archives for now + # This JSON endpoint is not provided by PyPI mirrors so we always need to get this + # from pypi.python.org. pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read()) downloader = URLopener() @@ -65,8 +70,9 @@ def download_package(pkg_name, pkg_version): if os.path.isfile(filename) and check_md5sum(filename, expected_md5): print "File with matching md5sum already exists, skipping %s" % filename return True - print "Downloading %s from %s " % (filename, pkg['url']) - downloader.retrieve(pkg['url'], filename) + pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path']) + print "Downloading %s from %s" % (filename, pkg_url) + downloader.retrieve(pkg_url, filename) actual_md5 = md5(open(filename).read()).hexdigest() if check_md5sum(filename, expected_md5): return True
