IMPALA-3763: download_requirements fixes * Download to infra/python/deps instead of the current directory. * Download the correct virtualenv version, to match the version on cdh5-trunk * Don't re-download packages repeatedly, instead check the md5sum.
Testing: Tested manually on the ASF tree, then made sure that bootstrap_virtualenv completed successfully to make sure we had all of the requirements downloaded successfully. Change-Id: I5a3c42236dddfd8a456c82605dc1fdc199a2bc48 Reviewed-on: http://gerrit.cloudera.org:8080/3416 Reviewed-by: Jim Apple <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/fc3ff1c5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/fc3ff1c5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/fc3ff1c5 Branch: refs/heads/master Commit: fc3ff1c52f3b3bfa5f94cc48b76025dd3e9b30b0 Parents: 6e71e90 Author: Tim Armstrong <[email protected]> Authored: Mon Jun 20 17:42:18 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Tue Jun 21 00:37:54 2016 -0700 ---------------------------------------------------------------------- infra/python/deps/download_requirements | 6 ++++-- infra/python/deps/pip_download.py | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc3ff1c5/infra/python/deps/download_requirements ---------------------------------------------------------------------- diff --git a/infra/python/deps/download_requirements b/infra/python/deps/download_requirements index fa06cd6..a7dc432 100755 --- a/infra/python/deps/download_requirements +++ b/infra/python/deps/download_requirements @@ -22,5 +22,7 @@ DIR=$(dirname "$0") # For virtualenv, other scripts rely on the .tar.gz package (not a .whl package). # kudu-python is downloaded separately because pip install attempts to execute a # setup.py subcommand for kudu-python that can fail even if the download succeeds. -$PYTHON $DIR/pip_download.py virtualenv 13.0.1 -$PYTHON $DIR/pip_download.py kudu-python 0.1.1 +pushd $DIR +$PYTHON ./pip_download.py virtualenv 13.1.0 +$PYTHON ./pip_download.py kudu-python 0.1.1 +popd http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fc3ff1c5/infra/python/deps/pip_download.py ---------------------------------------------------------------------- diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py index 34a0897..b1e1fa7 100755 --- a/infra/python/deps/pip_download.py +++ b/infra/python/deps/pip_download.py @@ -3,6 +3,7 @@ # over which archive type is downloaded and what post-download steps are executed. import hashlib import json +import os.path from urllib import urlopen, URLopener import sys @@ -11,16 +12,26 @@ pkg_version = sys.argv[2] pkg_type = 'sdist' # Don't download wheel archives for now pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read()) +def check_md5sum(filename, expected_md5): + expected_md5 = pkg['md5_digest'] + actual_md5 = hashlib.md5(open(filename).read()).hexdigest() + return actual_md5 == expected_md5 + found = False downloader = URLopener() for pkg in pkg_info['releases'][pkg_version]: if pkg['packagetype'] == pkg_type: - print "Downloading %s from %s " % (pkg['filename'], pkg['url']) - downloader.retrieve(pkg['url'], pkg['filename']) + filename = pkg['filename'] expected_md5 = pkg['md5_digest'] - actual_md5 = hashlib.md5(open(pkg['filename']).read()).hexdigest() - if actual_md5 != expected_md5: - print "MD5 mismatch: %s v. %s" % (expected_md5, actual_md5) + print "Downloading %s from %s " % (filename, pkg['url']) + if os.path.isfile(filename) and check_md5sum(filename, expected_md5): + print "File with matching md5sum already exists, skipping download." + found = True + break + downloader.retrieve(pkg['url'], filename) + actual_md5 = hashlib.md5(open(filename).read()).hexdigest() + if not check_md5sum(filename, expected_md5): + print "MD5 mismatch in file %s." % filename sys.exit(1) found = True break
