This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 12325eb7ecda3754895ca825003ae3215bc060a1
Author: Michael Smith <[email protected]>
AuthorDate: Mon Oct 9 16:41:24 2023 -0700

    IMPALA-12515: Build modules for extra pythons
    
    Adds IMPALA_EXTRA_PACKAGE_PYTHONS to build impala-shell tarball
    dependencies for additional Python targets. That can be used to build a
    tarball that supports multiple Python 3 minor versions at once.
    
    Updates the impala-shell script to provide a clear error message when
    attempting to use the tarball with a Python version that it hasn't been
    built for.
    
    Change-Id: I13720a9e3c50f348bef41f5e91f810204e416f13
    Reviewed-on: http://gerrit.cloudera.org:8080/20617
    Reviewed-by: Joe McDonnell <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 ...eate_py3_virtualenv.sh => create_virtualenv.sh} | 25 +++----
 bin/impala-config.sh                               |  5 ++
 shell/CMakeLists.txt                               | 83 +++++++++++++---------
 shell/impala-shell                                 | 15 +++-
 shell/make_shell_tarball.sh                        | 58 ++++++---------
 tests/shell/util.py                                |  4 +-
 6 files changed, 103 insertions(+), 87 deletions(-)

diff --git a/bin/cmake_aux/create_py3_virtualenv.sh 
b/bin/cmake_aux/create_virtualenv.sh
similarity index 71%
rename from bin/cmake_aux/create_py3_virtualenv.sh
rename to bin/cmake_aux/create_virtualenv.sh
index d632fc952..2afb4dc69 100755
--- a/bin/cmake_aux/create_py3_virtualenv.sh
+++ b/bin/cmake_aux/create_virtualenv.sh
@@ -16,29 +16,27 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-# Create a python3 virtualenv. When system python is 3.6 or higher,
+# Create a python virtualenv. When system python is 3.6 or higher,
 # we can just use the built-in venv module to create the virtualenv.
 # If system python is older or the built-in venv module fails, then
 # this falls back to impala-virtualenv, which uses python2 to
-# initialize a virtualenv using python3.
+# initialize a virtualenv using the specified interpeter.
 #
-# This takes a single argument, which is the destination directory
-# for the virtualenv:
-# create_py3_virtualenv.sh venv_dir
+# This takes two arguments, which is the interpreter to use and the
+# destination directory for the virtualenv:
+# create_virtualenv.sh pythonX venv_dir
 
 set -euo pipefail
 
-# We should only be calling this when system python3 is available
-[[ -n ${IMPALA_SYSTEM_PYTHON3} ]]
+PYTHON_EXE=$1
+VENV_DIR=$2
 
-VENV_DIR=$1
-
-IS_PY36_OR_HIGHER=$(${IMPALA_SYSTEM_PYTHON3} -c \
-  "import sys; print('true' if sys.version_info.minor >= 6 else 'false')")
+IS_PY36_OR_HIGHER=$(${PYTHON_EXE} -c "import sys; print(\
+  'true' if sys.version_info.major >= 3 and sys.version_info.minor >= 6 else 
'false')")
 
 # If using Python >= 3.6, try to use the builtin venv package.
 if $IS_PY36_OR_HIGHER ; then
-  if ${IMPALA_SYSTEM_PYTHON3} -m venv ${VENV_DIR} ; then
+  if ${PYTHON_EXE} -m venv ${VENV_DIR} ; then
     # Success
     exit 0
   fi
@@ -51,5 +49,4 @@ if $IS_PY36_OR_HIGHER ; then
 fi
 
 # Fall back to using the old impala-virtualenv method
-impala-virtualenv --python ${IMPALA_SYSTEM_PYTHON3} ${VENV_DIR}
-
+impala-virtualenv --python ${PYTHON_EXE} ${VENV_DIR}
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 74c24c558..8c6cea8fe 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -225,6 +225,11 @@ unset IMPALA_THRIFT_PY_URL
 export IMPALA_SYSTEM_PYTHON2="${IMPALA_SYSTEM_PYTHON2_OVERRIDE-$(command -v 
python2)}"
 export IMPALA_SYSTEM_PYTHON3="${IMPALA_SYSTEM_PYTHON3_OVERRIDE-$(command -v 
python3)}"
 
+# Additional Python versions to use when building the impala-shell prebuilt 
tarball
+# via make_shell_tarball.sh. That tarball includes precompiled packages, so it 
can be
+# used without additional system dependencies needed for pip install.
+# export IMPALA_EXTRA_PACKAGE_PYTHONS=python3.6;python3.10
+
 if [[ $OSTYPE == "darwin"* ]]; then
   IMPALA_CYRUS_SASL_VERSION=2.1.26
   unset IMPALA_CYRUS_SASL_URL
diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt
index 3d97b7e28..513400e5a 100644
--- a/shell/CMakeLists.txt
+++ b/shell/CMakeLists.txt
@@ -15,44 +15,58 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# These virtualenvs serve two purposes:
-# 1. They have system python with wheel installed, and they can be used to 
produce
-#    wheels for external dependencies for the shell tarball build.
-# 2. We pip install impala-shell into them for use in tests.
-# The initial virtualenv creation includes the "pip install wheel" command to
-# satisfy #1. #2 is a separate step and has no interaction with #1.
-set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv")
-set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv")
-# IMPALA-12117: Use separate pip cache directories to avoid concurrency
-# issues. The standard location is in ~/.cache/pip, so this uses directories
-# inside ~/.cache. These typical consume a couple MB each.
-set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip")
-set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip")
-
-add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python
-  COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" 
"${PYTHON2_VENV}"
-  COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" 
wheel
-)
-
-# In cases where system python3 is old, this can use impala-virtualenv, so it
-# needs to depend on impala_python.
-add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python
-  COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" 
"${PYTHON3_VENV}"
-  COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" 
wheel
-)
-
 # The shell tarball build only needs the build virtualenvs for the system
 # pythons that are installed.
 set(IMPALA_PYTHON_BUILD_VENVS "")
-if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
-  list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON2_VENV}")
+
+set(VENV_LOC "${CMAKE_SOURCE_DIR}/shell/build")
+set(PIP_LOC "~/.cache/impala_pip")
+
+# Tests depend on installing system pythons to specific locations, so we error 
if they
+# won't match what's expected in this config and make_shell_tarball.sh.
+set(PYTHON_EXES $ENV{IMPALA_EXTRA_PACKAGE_PYTHONS})
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} STREQUAL "")
+  get_filename_component(PYTHON_NAME $ENV{IMPALA_SYSTEM_PYTHON2} NAME)
+  if (NOT ${PYTHON_NAME} STREQUAL "python2")
+    message(FATAL_ERROR "IMPALA_SYSTEM_PYTHON2 must be a binary named python2")
+  endif()
+  list(APPEND PYTHON_EXES $ENV{IMPALA_SYSTEM_PYTHON2})
 endif()
-if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} EQUAL "")
-  list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON3_VENV}")
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} STREQUAL "")
+  get_filename_component(PYTHON_NAME $ENV{IMPALA_SYSTEM_PYTHON3} NAME)
+  if (NOT ${PYTHON_NAME} STREQUAL "python3")
+    message(FATAL_ERROR "IMPALA_SYSTEM_PYTHON3 must be a binary named python3")
+  endif()
+  list(APPEND PYTHON_EXES $ENV{IMPALA_SYSTEM_PYTHON3})
 endif()
+message(STATUS "Packaging for ${PYTHON_EXES}")
+
+foreach(PYTHON_EXE IN LISTS PYTHON_EXES)
+  get_filename_component(PYTHON_NAME "${PYTHON_EXE}" NAME)
+  # These virtualenvs serve two purposes:
+  # 1. They have system python with wheel installed, and they can be used to 
produce
+  #    wheels for external dependencies for the shell tarball build.
+  # 2. We pip install impala-shell into them for use in tests.
+  # The initial virtualenv creation includes the "pip install wheel" command to
+  # satisfy #1. #2 is a separate step and has no interaction with #1.
+  set(VENV "${VENV_LOC}/${PYTHON_NAME}_venv")
+  # IMPALA-12117: Use separate pip cache directories to avoid concurrency
+  # issues. The standard location is in ~/.cache/pip, so this uses directories
+  # inside ~/.cache. These typical consume a couple MB each.
+  set(PIP_CACHE "${PIP_LOC}/${PYTHON_NAME}")
+
+  # Supports fallback to impala-virtualenv for older Python versions.
+  add_custom_command(OUTPUT "${VENV}" DEPENDS impala_python
+    COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_virtualenv.sh"
+            "${PYTHON_EXE}" "${VENV}"
+    COMMAND "${VENV}/bin/pip" install --cache-dir "${PIP_CACHE}" wheel
+  )
+
+  list(APPEND IMPALA_PYTHON_BUILD_VENVS "${VENV}")
+endforeach()
 
 add_custom_target(shell_tarball DEPENDS gen-deps "${IMPALA_PYTHON_BUILD_VENVS}"
-  COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
+  COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh" ${PYTHON_EXES}
 )
 
 add_custom_target(shell_pypi_package DEPENDS shell_tarball impala_python
@@ -73,10 +87,13 @@ add_custom_target(shell_pypi_test_package DEPENDS 
shell_tarball impala_python
     "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
 )
 
+# Tests expect to find venvs at 'python2_venv' and 'python3_venv' in 
tests/shell/util.py.
+set(PYTHON2_VENV "${VENV_LOC}/python2_venv")
 add_custom_target(shell_python2_install DEPENDS "${PYTHON2_VENV}" 
shell_pypi_test_package
-  COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" 
"${SHELL_TEST_PKG}"
+  COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PIP_LOC}/python2" 
"${SHELL_TEST_PKG}"
 )
 
+set(PYTHON3_VENV "${VENV_LOC}/python3_venv")
 add_custom_target(shell_python3_install DEPENDS "${PYTHON3_VENV}" 
shell_pypi_test_package
-  COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" 
"${SHELL_TEST_PKG}"
+  COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PIP_LOC}/python3" 
"${SHELL_TEST_PKG}"
 )
diff --git a/shell/impala-shell b/shell/impala-shell
index 5f5dd93b7..7f8e385f9 100755
--- a/shell/impala-shell
+++ b/shell/impala-shell
@@ -45,10 +45,21 @@ fi
 PYTHONPATH="${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}"
 
 # External dependencies are installed in /ext-py${PYTHON_VERSION}
-PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; print(sys.version_info[0])')
+PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; \
+  print("{}.{}".format(sys.version_info.major, sys.version_info.minor))')
+if [ ! -d "${SHELL_HOME}/ext-py${PYTHON_VERSION}" ]; then
+  # List all ext-py* dirs, remove ext-py prefix, and join into a 
comma-separated string.
+  dirs=( $(cd ${SHELL_HOME} && echo ext-py*) )
+  vers="${dirs[@]#ext-py}"
+  pretty="$(printf "%s, " ${vers[@]})"
+  echo "This impala-shell package was not built to support Python 
${PYTHON_VERSION}." \
+       "Supported Python versions are: ${pretty%, }."
+  exit 1
+fi
+
 PYTHONPATH="${SHELL_HOME}/ext-py${PYTHON_VERSION}:${PYTHONPATH}"
 
-if [ ${PYTHON_VERSION} -eq 2 ]; then
+if [ "${PYTHON_VERSION}" = "2.7" ]; then
   # Python 2 requires the legacy pkg_resources.py code
   PYTHONPATH="${PYTHONPATH}:${SHELL_HOME}/legacy"
 fi
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index 20704e86a..09123549b 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -31,6 +31,11 @@ if [ "x${IMPALA_HOME}" == "x" ]; then
   exit 1
 fi
 
+if [ $# -eq 0 ]; then
+  echo "Must specify at least one python interpreter"
+  exit 1
+fi
+
 # Detect whether IMPALA_HOME is a git repository. This is used below to allow 
extra
 # checks when building ext-py.
 pushd ${IMPALA_HOME}
@@ -66,11 +71,9 @@ THRIFT_GEN_PY_DIR="${SHELL_HOME}/gen-py"
 echo "Deleting all files in ${TARBALL_ROOT}/{gen-py,lib,ext-py*,legacy}"
 rm -rf ${TARBALL_ROOT}/lib/* 2>&1 > /dev/null
 rm -rf ${TARBALL_ROOT}/gen-py/* 2>&1 > /dev/null
-rm -rf ${TARBALL_ROOT}/ext-py*/* 2>&1 > /dev/null
+rm -rf ${TARBALL_ROOT}/ext-py* 2>&1 > /dev/null
 rm -rf ${TARBALL_ROOT}/legacy/* 2>&1 > /dev/null
 mkdir -p ${TARBALL_ROOT}/lib
-mkdir -p ${TARBALL_ROOT}/ext-py2
-mkdir -p ${TARBALL_ROOT}/ext-py3
 mkdir -p ${TARBALL_ROOT}/legacy
 
 rm -f ${THRIFT_GEN_PY_DIR}/impala_build_version.py
@@ -126,46 +129,29 @@ for MODULE in ${SHELL_HOME}/ext-py/*; do
     continue;
   fi
   pushd ${MODULE} > /dev/null 2>&1
-  if [ ! -z "${IMPALA_SYSTEM_PYTHON2:-}" ]; then
-    echo "Cleaning up old build artifacts."
-    rm -rf dist 2>&1 > /dev/null
-    rm -rf build 2>&1 > /dev/null
-    echo "Building ${MODULE} with Python 2"
-    # Use the py2_venv to get the wheel package needed for bdist_wheel below.
-    # python2 is now the virtualenv's python2, which is $IMPALA_SYSTEM_PYTHON2
-    source ${IMPALA_HOME}/shell/build/py2_venv/bin/activate
-    if [[ "$MODULE" == *"/bitarray"* ]]; then
-      # Need to use setuptools to build wheel for bitarray module
-      python2 -c "import setuptools; exec(open('setup.py').read())" \
-          -q bdist_wheel
-    else
-      python2 setup.py -q bdist_wheel clean
-    fi
-    # pip install the wheel into the python 2 external dependencies directory
-    PYTHON2_PIP_CACHE="~/.cache/impala_py2_pip"
-    pip install --no-deps --cache "${PYTHON2_PIP_CACHE}" \
-      --target ${TARBALL_ROOT}/ext-py2 dist/*.whl
-  fi
-  if [ ! -z "${IMPALA_SYSTEM_PYTHON3:-}" ]; then
+  for PYTHON_EXE in $*; do
     echo "Cleaning up old build artifacts."
     rm -rf dist 2>&1 > /dev/null
     rm -rf build 2>&1 > /dev/null
-    echo "Building ${MODULE} with Python 3"
-    # Use the py3_venv to get the wheel package needed for bdist_wheel below.
-    # python3 is now the virtualenv's python3, which is $IMPALA_SYSTEM_PYTHON3
-    source ${IMPALA_HOME}/shell/build/py3_venv/bin/activate
+    echo "Building ${MODULE} with ${PYTHON_EXE}"
+    # Use the venv to get the wheel package needed for bdist_wheel below.
+    PYTHON_NAME=$(basename ${PYTHON_EXE})
+    source ${IMPALA_HOME}/shell/build/${PYTHON_NAME}_venv/bin/activate
     if [[ "$MODULE" == *"/bitarray"* ]]; then
       # Need to use setuptools to build wheel for bitarray module
-      python3 -c "import setuptools; exec(open('setup.py').read())" \
-          -q bdist_wheel
+      python -c "import setuptools; exec(open('setup.py').read())" -q 
bdist_wheel
     else
-      python3 setup.py -q bdist_wheel clean
+      python setup.py -q bdist_wheel clean
     fi
-    # pip install the wheel into the python 2 external dependencies directory
-    PYTHON3_PIP_CACHE="~/.cache/impala_py3_pip"
-    pip install --no-deps --cache "${PYTHON3_PIP_CACHE}" \
-      --target ${TARBALL_ROOT}/ext-py3 dist/*.whl
-  fi
+    # pip install the wheel into the external dependencies directory
+    PIP_CACHE="~/.cache/impala_pip/${PYTHON_NAME}"
+    PYTHON_VERSION=$(${PYTHON_EXE} -c 'import sys; \
+      print("{}.{}".format(sys.version_info.major, sys.version_info.minor))')
+    # Use --upgrade to suppress warnings about replacement when $* includes 
duplicate
+    # Python minor versions.
+    pip install --upgrade --no-deps --cache "${PIP_CACHE}" \
+      --target ${TARBALL_ROOT}/ext-py${PYTHON_VERSION} dist/*.whl
+  done
   popd 2>&1 > /dev/null
 done
 
diff --git a/tests/shell/util.py b/tests/shell/util.py
index 06f62842d..892ef22f1 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -394,6 +394,6 @@ def get_impala_shell_executable(vector):
   return {
     'dev': [impala_shell_executable],
     'dev3': ['env', 'IMPALA_PYTHON_EXECUTABLE=python3', 
impala_shell_executable],
-    'python2': [os.path.join(IMPALA_HOME, 
'shell/build/py2_venv/bin/impala-shell')],
-    'python3': [os.path.join(IMPALA_HOME, 
'shell/build/py3_venv/bin/impala-shell')]
+    'python2': [os.path.join(IMPALA_HOME, 
'shell/build/python2_venv/bin/impala-shell')],
+    'python3': [os.path.join(IMPALA_HOME, 
'shell/build/python3_venv/bin/impala-shell')]
   }[vector.get_value_with_default('impala_shell', 'dev')]

Reply via email to