IMPALA-6731: Use private index in bootstrap_virtualenv

This change switches to using a private pypi index url when using a
private pypi mirror. This allows to run the tests without relying on the
public Python pypi mirrors.

Some packages can not detect their dependencies correctly when they get
installed together with the dependencies in the same call to pip. This
change adds a second stage of package installation to separate these
packages from their dependencies.

It also adds a few missing packages and updates some packages to newer
versions.

Testing: Ran this on a box where I blocked DNS resolution to Python's
upstream pypi.

Change-Id: I85f75f1f1a305f3043e0910ab88a880eeb30f00b
Reviewed-on: http://gerrit.cloudera.org:8080/9798
Reviewed-by: Philip Zeyliger <[email protected]>
Tested-by: Lars Volker <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5ce2bc44
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5ce2bc44
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5ce2bc44

Branch: refs/heads/2.x
Commit: 5ce2bc4475ed45f216ae622d6dde18fff21c1d92
Parents: 46c95b5
Author: Lars Volker <[email protected]>
Authored: Fri Mar 23 21:50:34 2018 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue Mar 27 03:35:00 2018 +0000

----------------------------------------------------------------------
 infra/python/bootstrap_virtualenv.py        | 33 ++++++++++++++++++++----
 infra/python/deps/compiled-requirements.txt |  5 ++--
 infra/python/deps/pip_download.py           |  5 ++--
 infra/python/deps/requirements.txt          | 11 ++++----
 infra/python/deps/stage2-requirements.txt   | 32 +++++++++++++++++++++++
 5 files changed, 71 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/bootstrap_virtualenv.py
----------------------------------------------------------------------
diff --git a/infra/python/bootstrap_virtualenv.py 
b/infra/python/bootstrap_virtualenv.py
index e26aaf3..fe7695d 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -21,6 +21,8 @@
 # A multi-step bootstrapping process is required to build and install all of 
the
 # dependencies:
 # 1. install basic non-C/C++ packages into the virtualenv
+# 1b. install packages that depend on step 1 but cannot be installed together 
with their
+#     dependencies
 # 2. use the virtualenv Python to bootstrap the toolchain
 # 3. use toolchain gcc to build C/C++ packages
 # 4. build the kudu-python package with toolchain gcc and Cython
@@ -49,9 +51,13 @@ LOG = 
logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
 DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
 ENV_DIR = os.path.join(os.path.dirname(__file__), "env")
 
-# Generated using "pip install --download <DIR> -r requirements.txt"
+# Requirements file with packages we need for our build and tests.
 REQS_PATH = os.path.join(DEPS_DIR, "requirements.txt")
 
+# Second stage of requirements which cannot be installed together with their 
dependencies
+# in requirements.txt.
+REQS2_PATH = os.path.join(DEPS_DIR, "stage2-requirements.txt")
+
 # Requirements for the next bootstrapping step that builds compiled 
requirements
 # with toolchain gcc.
 COMPILED_REQS_PATH = os.path.join(DEPS_DIR, "compiled-requirements.txt")
@@ -140,9 +146,23 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
   #
   # --no-cache-dir is used to prevent caching of compiled artifacts, which may 
be built
   # with different compilers or settings.
-  exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, 
"bin", "pip"),
-    "install", "--no-binary", "--no-index", "--no-cache-dir", "--find-links",
-    "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))] + args, 
env=env)
+  cmd = [os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", 
"pip"),
+    "install", "-v", "--no-binary", "--no-cache-dir"]
+
+  # When using a custom mirror, we also must use the index of that mirror.
+  if "PYPI_MIRROR" in os.environ:
+    cmd.extend(["--index-url", "%s/simple" % os.environ["PYPI_MIRROR"]])
+  else:
+    # Prevent fetching additional packages from the index. If we forget to add 
a package
+    # to one of the requirements.txt files, this should trigger an error. 
However, we will
+    # still access the index for version/dependency resolution, hence we need 
to change it
+    # when using a private mirror.
+    cmd.append("--no-index")
+
+  cmd.extend(["--find-links",
+      "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))])
+  cmd.extend(args)
+  exec_cmd(cmd, env=env)
 
 
 def find_file(*paths):
@@ -181,6 +201,9 @@ def install_deps():
   LOG.info("Installing packages into the virtualenv")
   exec_pip_install(["-r", REQS_PATH])
   mark_reqs_installed(REQS_PATH)
+  LOG.info("Installing stage 2 packages into the virtualenv")
+  exec_pip_install(["-r", REQS2_PATH])
+  mark_reqs_installed(REQS2_PATH)
 
 def have_toolchain():
   '''Return true if the Impala toolchain is available'''
@@ -335,7 +358,7 @@ def reqs_are_installed(reqs_path):
     installed_reqs_file.close()
 
 def setup_virtualenv_if_not_exists():
-  if not reqs_are_installed(REQS_PATH):
+  if not (reqs_are_installed(REQS_PATH) and reqs_are_installed(REQS2_PATH)):
     delete_virtualenv_if_exist()
     create_virtualenv()
     install_deps()

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/compiled-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/compiled-requirements.txt 
b/infra/python/deps/compiled-requirements.txt
index b3f9f4d..289a78e 100644
--- a/infra/python/deps/compiled-requirements.txt
+++ b/infra/python/deps/compiled-requirements.txt
@@ -26,12 +26,13 @@ Fabric == 1.10.2
 impyla == 0.14.0
   bitarray == 0.8.1
   sasl == 0.1.3
-  six == 1.9.0
+  six == 1.11.0
   # Thrift usually comes from the thirdparty dir but in case the virtualenv is 
needed
   # before thirdparty is built thrift will be installed anyways.
   thrift == 0.9.0
-  thrift_sasl == 0.1.0
+  thrift-sasl == 0.1.0
 psutil == 0.7.1
 # Required for Kudu:
   Cython == 0.23.4
   numpy == 1.10.4
+  pytz == 2018.3

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py 
b/infra/python/deps/pip_download.py
index 2e84426..6fbb683 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -35,8 +35,9 @@ NUM_DOWNLOAD_ATTEMPTS = 8
 PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
 
 # The requirement files that list all of the required packages and versions.
-REQUIREMENTS_FILES = ['requirements.txt', 'compiled-requirements.txt',
-                      'kudu-requirements.txt', 'adls-requirements.txt']
+REQUIREMENTS_FILES = ['requirements.txt', 'stage2-requirements.txt',
+                      'compiled-requirements.txt', 'kudu-requirements.txt',
+                      'adls-requirements.txt']
 
 
 def check_digest(filename, algorithm, expected_digest):

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt 
b/infra/python/deps/requirements.txt
index abf5d7d..bea16f4 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -27,6 +27,7 @@ boto3 == 1.2.3
   simplejson == 3.3.0 # For python version 2.6
   botocore == 1.3.30
   python_dateutil == 2.5.2
+    six == 1.11.0
   docutils == 0.12
   jmespath == 0.9.0
   futures == 3.0.5
@@ -47,13 +48,11 @@ pexpect == 3.3
 pg8000 == 1.10.2
 prettytable == 0.7.2
 pyparsing == 2.0.3
-pytest == 2.9.2
-  py == 1.4.32
-pytest-random == 0.02
-pytest-xdist == 1.15.0
 python-magic == 0.4.11
-pywebhdfs == 0.3.2
-  pbr == 1.8.1
+# pbr is required for pywebhdfs but must be installed in a separate call to 
pip before
+# attempting to install pywebhdfs 
(https://github.com/pywebhdfs/pywebhdfs/issues/52).
+# pywebhdfs itself will be installed in stage 2.
+  pbr == 3.1.1
 requests == 2.7.0
 # Newer versions of setuptools don't support Python 2.6
 setuptools == 36.8.0

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/stage2-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/stage2-requirements.txt 
b/infra/python/deps/stage2-requirements.txt
new file mode 100644
index 0000000..eda2cd3
--- /dev/null
+++ b/infra/python/deps/stage2-requirements.txt
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file contains packages that have dependencies in requirements.txt and 
that have to
+# be installed in a separate invocation of pip.
+
+# Remember, all modules below need to support python 2.6.
+
+# Requires setuptools-scm
+pytest == 2.9.2
+  py == 1.4.32
+  pytest-forked == 0.2
+  pytest-random == 0.02
+  pytest-runner == 4.2
+  pytest-xdist == 1.17.1
+
+# Requires pbr
+pywebhdfs == 0.3.2

Reply via email to