Repository: incubator-impala Updated Branches: refs/heads/master f03900a80 -> 3bdde74a7
IMPALA-6027: Retry downloading toolchain components. We've seen intermittent 500 errors when downloading the toolchain from S3 over the HTTPS URLs. As a first stab, this commit retries 3 times, with some jitter. I also changed the threadpool introduced previously to have a limit of 4 threads, because that's sufficient to get the speed improvement. The 500 errors have been observed both before and after the threadpool change. For testing, I ran the straight-forward case directly. I introduced a broken version string to observe that retries would happen on any error from wget. Change-Id: I7669c7d41240aa0eb43c30d5bf2bd5c01b66180b Reviewed-on: http://gerrit.cloudera.org:8080/8258 Reviewed-by: Thomas Tauber-Marshall <[email protected]> Reviewed-by: Michael Brown <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3bdde74a Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3bdde74a Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3bdde74a Branch: refs/heads/master Commit: 3bdde74a70ce8f202faee8ce52b157ea88860ecf Parents: f03900a Author: Philip Zeyliger <[email protected]> Authored: Wed Oct 11 09:28:37 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Oct 11 21:45:40 2017 +0000 ---------------------------------------------------------------------- bin/bootstrap_toolchain.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3bdde74a/bin/bootstrap_toolchain.py ---------------------------------------------------------------------- diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py index 76d4c5a..bea3a99 100755 --- a/bin/bootstrap_toolchain.py +++ b/bin/bootstrap_toolchain.py @@ -29,12 +29,14 @@ # # python bootstrap_toolchain.py import os +import random import re import sh import shutil import subprocess import sys import tempfile +import time HOST = "https://native-toolchain.s3.amazonaws.com/build" @@ -77,9 +79,18 @@ def get_platform_release_label(release=None): def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobber): print "URL {0}".format(download_path) - print "Downloading {0} to {1}".format(file_name, destination) - # --no-clobber avoids downloading the file if a file with the name already exists - sh.wget(download_path, directory_prefix=destination, no_clobber=wget_no_clobber) + NUM_ATTEMPTS = 3 + for attempt in range(1, NUM_ATTEMPTS + 1): + print "Downloading {0} to {1} (attempt {2})".format(file_name, destination, attempt) + # --no-clobber avoids downloading the file if a file with the name already exists + try: + sh.wget(download_path, directory_prefix=destination, no_clobber=wget_no_clobber) + break + except Exception, e: + if attempt == NUM_ATTEMPTS: + raise + print "Download failed; retrying after sleep: " + str(e) + time.sleep(10 + random.random() * 5) # Sleep between 10 and 15 seconds. print "Extracting {0}".format(file_name) sh.tar(z=True, x=True, f=os.path.join(destination, file_name), directory=destination) sh.rm(os.path.join(destination, file_name)) @@ -310,7 +321,7 @@ def execute_many(f, args): pool = None try: import multiprocessing.pool - pool = multiprocessing.pool.ThreadPool() + pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4)) return pool.map(f, args, 1) except ImportError: # multiprocessing was introduced in Python 2.6.
