IMPALA-6731: Use private index in bootstrap_virtualenv This change switches to using a private pypi index url when using a private pypi mirror. This allows to run the tests without relying on the public Python pypi mirrors.
Some packages can not detect their dependencies correctly when they get installed together with the dependencies in the same call to pip. This change adds a second stage of package installation to separate these packages from their dependencies. It also adds a few missing packages and updates some packages to newer versions. Testing: Ran this on a box where I blocked DNS resolution to Python's upstream pypi. Change-Id: I85f75f1f1a305f3043e0910ab88a880eeb30f00b Reviewed-on: http://gerrit.cloudera.org:8080/9798 Reviewed-by: Philip Zeyliger <[email protected]> Tested-by: Lars Volker <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5ce2bc44 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5ce2bc44 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5ce2bc44 Branch: refs/heads/2.x Commit: 5ce2bc4475ed45f216ae622d6dde18fff21c1d92 Parents: 46c95b5 Author: Lars Volker <[email protected]> Authored: Fri Mar 23 21:50:34 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Mar 27 03:35:00 2018 +0000 ---------------------------------------------------------------------- infra/python/bootstrap_virtualenv.py | 33 ++++++++++++++++++++---- infra/python/deps/compiled-requirements.txt | 5 ++-- infra/python/deps/pip_download.py | 5 ++-- infra/python/deps/requirements.txt | 11 ++++---- infra/python/deps/stage2-requirements.txt | 32 +++++++++++++++++++++++ 5 files changed, 71 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/bootstrap_virtualenv.py ---------------------------------------------------------------------- diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py index e26aaf3..fe7695d 100644 --- a/infra/python/bootstrap_virtualenv.py +++ b/infra/python/bootstrap_virtualenv.py @@ -21,6 +21,8 @@ # A multi-step bootstrapping process is required to build and install all of the # dependencies: # 1. install basic non-C/C++ packages into the virtualenv +# 1b. install packages that depend on step 1 but cannot be installed together with their +# dependencies # 2. use the virtualenv Python to bootstrap the toolchain # 3. use toolchain gcc to build C/C++ packages # 4. build the kudu-python package with toolchain gcc and Cython @@ -49,9 +51,13 @@ LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0]) DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps") ENV_DIR = os.path.join(os.path.dirname(__file__), "env") -# Generated using "pip install --download <DIR> -r requirements.txt" +# Requirements file with packages we need for our build and tests. REQS_PATH = os.path.join(DEPS_DIR, "requirements.txt") +# Second stage of requirements which cannot be installed together with their dependencies +# in requirements.txt. +REQS2_PATH = os.path.join(DEPS_DIR, "stage2-requirements.txt") + # Requirements for the next bootstrapping step that builds compiled requirements # with toolchain gcc. COMPILED_REQS_PATH = os.path.join(DEPS_DIR, "compiled-requirements.txt") @@ -140,9 +146,23 @@ def exec_pip_install(args, cc="no-cc-available", env=None): # # --no-cache-dir is used to prevent caching of compiled artifacts, which may be built # with different compilers or settings. - exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", "pip"), - "install", "--no-binary", "--no-index", "--no-cache-dir", "--find-links", - "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))] + args, env=env) + cmd = [os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", "pip"), + "install", "-v", "--no-binary", "--no-cache-dir"] + + # When using a custom mirror, we also must use the index of that mirror. + if "PYPI_MIRROR" in os.environ: + cmd.extend(["--index-url", "%s/simple" % os.environ["PYPI_MIRROR"]]) + else: + # Prevent fetching additional packages from the index. If we forget to add a package + # to one of the requirements.txt files, this should trigger an error. However, we will + # still access the index for version/dependency resolution, hence we need to change it + # when using a private mirror. + cmd.append("--no-index") + + cmd.extend(["--find-links", + "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))]) + cmd.extend(args) + exec_cmd(cmd, env=env) def find_file(*paths): @@ -181,6 +201,9 @@ def install_deps(): LOG.info("Installing packages into the virtualenv") exec_pip_install(["-r", REQS_PATH]) mark_reqs_installed(REQS_PATH) + LOG.info("Installing stage 2 packages into the virtualenv") + exec_pip_install(["-r", REQS2_PATH]) + mark_reqs_installed(REQS2_PATH) def have_toolchain(): '''Return true if the Impala toolchain is available''' @@ -335,7 +358,7 @@ def reqs_are_installed(reqs_path): installed_reqs_file.close() def setup_virtualenv_if_not_exists(): - if not reqs_are_installed(REQS_PATH): + if not (reqs_are_installed(REQS_PATH) and reqs_are_installed(REQS2_PATH)): delete_virtualenv_if_exist() create_virtualenv() install_deps() http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/compiled-requirements.txt ---------------------------------------------------------------------- diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt index b3f9f4d..289a78e 100644 --- a/infra/python/deps/compiled-requirements.txt +++ b/infra/python/deps/compiled-requirements.txt @@ -26,12 +26,13 @@ Fabric == 1.10.2 impyla == 0.14.0 bitarray == 0.8.1 sasl == 0.1.3 - six == 1.9.0 + six == 1.11.0 # Thrift usually comes from the thirdparty dir but in case the virtualenv is needed # before thirdparty is built thrift will be installed anyways. thrift == 0.9.0 - thrift_sasl == 0.1.0 + thrift-sasl == 0.1.0 psutil == 0.7.1 # Required for Kudu: Cython == 0.23.4 numpy == 1.10.4 + pytz == 2018.3 http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/pip_download.py ---------------------------------------------------------------------- diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py index 2e84426..6fbb683 100755 --- a/infra/python/deps/pip_download.py +++ b/infra/python/deps/pip_download.py @@ -35,8 +35,9 @@ NUM_DOWNLOAD_ATTEMPTS = 8 PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org') # The requirement files that list all of the required packages and versions. -REQUIREMENTS_FILES = ['requirements.txt', 'compiled-requirements.txt', - 'kudu-requirements.txt', 'adls-requirements.txt'] +REQUIREMENTS_FILES = ['requirements.txt', 'stage2-requirements.txt', + 'compiled-requirements.txt', 'kudu-requirements.txt', + 'adls-requirements.txt'] def check_digest(filename, algorithm, expected_digest): http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/requirements.txt ---------------------------------------------------------------------- diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt index abf5d7d..bea16f4 100644 --- a/infra/python/deps/requirements.txt +++ b/infra/python/deps/requirements.txt @@ -27,6 +27,7 @@ boto3 == 1.2.3 simplejson == 3.3.0 # For python version 2.6 botocore == 1.3.30 python_dateutil == 2.5.2 + six == 1.11.0 docutils == 0.12 jmespath == 0.9.0 futures == 3.0.5 @@ -47,13 +48,11 @@ pexpect == 3.3 pg8000 == 1.10.2 prettytable == 0.7.2 pyparsing == 2.0.3 -pytest == 2.9.2 - py == 1.4.32 -pytest-random == 0.02 -pytest-xdist == 1.15.0 python-magic == 0.4.11 -pywebhdfs == 0.3.2 - pbr == 1.8.1 +# pbr is required for pywebhdfs but must be installed in a separate call to pip before +# attempting to install pywebhdfs (https://github.com/pywebhdfs/pywebhdfs/issues/52). +# pywebhdfs itself will be installed in stage 2. + pbr == 3.1.1 requests == 2.7.0 # Newer versions of setuptools don't support Python 2.6 setuptools == 36.8.0 http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/stage2-requirements.txt ---------------------------------------------------------------------- diff --git a/infra/python/deps/stage2-requirements.txt b/infra/python/deps/stage2-requirements.txt new file mode 100644 index 0000000..eda2cd3 --- /dev/null +++ b/infra/python/deps/stage2-requirements.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This file contains packages that have dependencies in requirements.txt and that have to +# be installed in a separate invocation of pip. + +# Remember, all modules below need to support python 2.6. + +# Requires setuptools-scm +pytest == 2.9.2 + py == 1.4.32 + pytest-forked == 0.2 + pytest-random == 0.02 + pytest-runner == 4.2 + pytest-xdist == 1.17.1 + +# Requires pbr +pywebhdfs == 0.3.2
