This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 07d5a93de642b8c10ff4893ffd1bfa8ef0a92147 Author: Joe McDonnell <[email protected]> AuthorDate: Mon Jun 19 12:27:40 2023 -0700 IMPALA-12220: pip install ext-py dependencies in the shell tarball The impala-shell tarball ships its external dependencies by building eggs and including them in the ext-py* directories. On Redhat 9 and Ubuntu 22, the impala-shell tarball encountered a regression where the sasl package could not access its Client class: Error connecting: AttributeError, module 'sasl' has no attribute 'Client' This only occurs when using eggs (which are zip files). The virtualenv installs worked fine. Unpacking the eggs and using the content directly also avoids the problem. This reworks the shell tarball to instead build wheels and install them with 'pip install'. This means that the external dependencies are not packaged in eggs, and this avoids the issue with sasl. This is a minimal change to avoid the issue until the shell tarball build can be reworked more extensively. Testing: - Ran shell tests on Redhat 9 Change-Id: I49403979c559b7f8bbe038865c06db6024468d72 Reviewed-on: http://gerrit.cloudera.org:8080/20095 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- shell/CMakeLists.txt | 56 ++++++++++++++++++++++++++++++--------------- shell/impala-shell | 29 ++++++++--------------- shell/make_shell_tarball.sh | 48 ++++++++++++++++++++++++++------------ 3 files changed, 80 insertions(+), 53 deletions(-) diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt index 3c670a9c7..3d97b7e28 100644 --- a/shell/CMakeLists.txt +++ b/shell/CMakeLists.txt @@ -15,7 +15,43 @@ # specific language governing permissions and limitations # under the License. -add_custom_target(shell_tarball DEPENDS gen-deps +# These virtualenvs serve two purposes: +# 1. They have system python with wheel installed, and they can be used to produce +# wheels for external dependencies for the shell tarball build. +# 2. We pip install impala-shell into them for use in tests. +# The initial virtualenv creation includes the "pip install wheel" command to +# satisfy #1. #2 is a separate step and has no interaction with #1. +set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv") +set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv") +# IMPALA-12117: Use separate pip cache directories to avoid concurrency +# issues. The standard location is in ~/.cache/pip, so this uses directories +# inside ~/.cache. These typical consume a couple MB each. +set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip") +set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip") + +add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python + COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" "${PYTHON2_VENV}" + COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" wheel +) + +# In cases where system python3 is old, this can use impala-virtualenv, so it +# needs to depend on impala_python. +add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python + COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" "${PYTHON3_VENV}" + COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" wheel +) + +# The shell tarball build only needs the build virtualenvs for the system +# pythons that are installed. +set(IMPALA_PYTHON_BUILD_VENVS "") +if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "") + list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON2_VENV}") +endif() +if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} EQUAL "") + list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON3_VENV}") +endif() + +add_custom_target(shell_tarball DEPENDS gen-deps "${IMPALA_PYTHON_BUILD_VENVS}" COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh" ) @@ -37,28 +73,10 @@ add_custom_target(shell_pypi_test_package DEPENDS shell_tarball impala_python "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh" ) -set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv") -set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv") -# IMPALA-12117: Use separate pip cache directories to avoid concurrency -# issues. The standard location is in ~/.cache/pip, so this uses directories -# inside ~/.cache. These typical consume a couple MB each. -set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip") -set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip") - -add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python - COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" "${PYTHON2_VENV}" -) - add_custom_target(shell_python2_install DEPENDS "${PYTHON2_VENV}" shell_pypi_test_package COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" "${SHELL_TEST_PKG}" ) -# In cases where system python3 is old, this can use impala-virtualenv, so it -# needs to depend on impala_python. -add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python - COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" "${PYTHON3_VENV}" -) - add_custom_target(shell_python3_install DEPENDS "${PYTHON3_VENV}" shell_pypi_test_package COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" "${SHELL_TEST_PKG}" ) diff --git a/shell/impala-shell b/shell/impala-shell index 651db863d..5f5dd93b7 100755 --- a/shell/impala-shell +++ b/shell/impala-shell @@ -28,7 +28,7 @@ # gen-py/ -- containing the Python thrift stubs for connecting to Impalad # lib/ -- containing the Hive Beeswax libraries; note this directory exists only in the # shell tarball, not the Impala repository, see make_shell_tarball.sh for details -# ext-py${VERSION}/ -- containing all the eggs for the modules packaged with the shell. +# ext-py${VERSION}/ -- containing the external dependencies for the shell. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" SHELL_HOME=${IMPALA_SHELL_HOME:-${SCRIPT_DIR}} @@ -36,32 +36,21 @@ SHELL_HOME=${IMPALA_SHELL_HOME:-${SCRIPT_DIR}} # Set the envrionment's locale settings to allow for utf-8 compatibility export LC_CTYPE=${LC_CTYPE:-en_US.UTF-8} -# User can configure EGG_CACHE by setting PYTHON_EGG_CACHE. -# By default it is set to a per-user temporary location, -# which follows what hue does. -PYTHON_EGG_CACHE=${PYTHON_EGG_CACHE:-/tmp/impala-shell-python-egg-cache-${USER}} - -if [ ! -d ${PYTHON_EGG_CACHE} ]; then - mkdir ${PYTHON_EGG_CACHE} -fi - # Select python version; prefer 2, use 3 if 2's absent. Allow override with envvar PYTHON_EXE="${IMPALA_PYTHON_EXECUTABLE:-python}" if ! command -v "${PYTHON_EXE}" > /dev/null; then PYTHON_EXE=python3 fi -# External module eggs are located in /ext-py, append them to the PYTHONPATH -# Loads eggs based on Python version +PYTHONPATH="${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}" + +# External dependencies are installed in /ext-py${PYTHON_VERSION} PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; print(sys.version_info[0])') -EGG_PATH='' -for EGG in $(ls ${SHELL_HOME}/ext-py${PYTHON_VERSION}/*.egg); do - EGG_PATH="${EGG}:${EGG_PATH}" -done +PYTHONPATH="${SHELL_HOME}/ext-py${PYTHON_VERSION}:${PYTHONPATH}" -LEGACY= if [ ${PYTHON_VERSION} -eq 2 ]; then - LEGACY=":${SHELL_HOME}/legacy" + # Python 2 requires the legacy pkg_resources.py code + PYTHONPATH="${PYTHONPATH}:${SHELL_HOME}/legacy" fi -PYTHONPATH="${EGG_PATH}${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}${LEGACY}" \ - PYTHONIOENCODING='utf-8' exec ${PYTHON_EXE} ${SHELL_HOME}/impala_shell.py "$@" +PYTHONPATH="${PYTHONPATH}" PYTHONIOENCODING='utf-8' exec ${PYTHON_EXE} \ + ${SHELL_HOME}/impala_shell.py "$@" diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh index 775a63c48..8e0658f41 100755 --- a/shell/make_shell_tarball.sh +++ b/shell/make_shell_tarball.sh @@ -104,8 +104,16 @@ def get_build_date(): return "${BUILD_DATE}" EOF -# Building all eggs. -echo "Building all external modules into eggs" +# Building all external dependencies +# +# This builds each package to a wheel, then pip installs that wheel into the external +# dependencies directory for that Python version. The result directories are similar +# to the lib/python${version}/site-packages directory for a virtualenv with impala-shell +# installed. +# +# These use the same pip caches as the virtualenvs to avoid extra downloads. This +# script is a prerequisite for the pypi packaging, so there is no concurrency issue. +echo "Building all external dependencies" for MODULE in ${SHELL_HOME}/ext-py/*; do # Sometimes there are leftover module directories from version changes. If IMPALA_HOME # is a git repository, then we can check if the module directory is tracked by git. @@ -122,29 +130,41 @@ for MODULE in ${SHELL_HOME}/ext-py/*; do echo "Cleaning up old build artifacts." rm -rf dist 2>&1 > /dev/null rm -rf build 2>&1 > /dev/null - echo "Creating a Python 2 egg for ${MODULE}" + echo "Building ${MODULE} with Python 2" + # Use the py2_venv to get the wheel package needed for bdist_wheel below. + # python2 is now the virtualenv's python2, which is $IMPALA_SYSTEM_PYTHON2 + source ${IMPALA_HOME}/shell/build/py2_venv/bin/activate if [[ "$MODULE" == *"/bitarray"* ]]; then - # Need to use setuptools to build egg for bitarray module - ${IMPALA_SYSTEM_PYTHON2} -c "import setuptools; exec(open('setup.py').read())" \ - -q bdist_egg + # Need to use setuptools to build wheel for bitarray module + python2 -c "import setuptools; exec(open('setup.py').read())" \ + -q bdist_wheel else - ${IMPALA_SYSTEM_PYTHON2} setup.py -q bdist_egg clean + python2 setup.py -q bdist_wheel clean fi - cp dist/*.egg ${TARBALL_ROOT}/ext-py2 + # pip install the wheel into the python 2 external dependencies directory + PYTHON2_PIP_CACHE="~/.cache/impala_py2_pip" + pip install --no-deps --cache "${PYTHON2_PIP_CACHE}" \ + --target ${TARBALL_ROOT}/ext-py2 dist/*.whl fi if [ ! -z "${IMPALA_SYSTEM_PYTHON3:-}" ]; then echo "Cleaning up old build artifacts." rm -rf dist 2>&1 > /dev/null rm -rf build 2>&1 > /dev/null - echo "Creating a Python 3 egg for ${MODULE}" + echo "Building ${MODULE} with Python 3" + # Use the py3_venv to get the wheel package needed for bdist_wheel below. + # python3 is now the virtualenv's python3, which is $IMPALA_SYSTEM_PYTHON3 + source ${IMPALA_HOME}/shell/build/py3_venv/bin/activate if [[ "$MODULE" == *"/bitarray"* ]]; then - # Need to use setuptools to build egg for bitarray module - ${IMPALA_SYSTEM_PYTHON3} -c "import setuptools; exec(open('setup.py').read())" \ - -q bdist_egg + # Need to use setuptools to build wheel for bitarray module + python3 -c "import setuptools; exec(open('setup.py').read())" \ + -q bdist_wheel else - ${IMPALA_SYSTEM_PYTHON3} setup.py -q bdist_egg clean + python3 setup.py -q bdist_wheel clean fi - cp dist/*.egg ${TARBALL_ROOT}/ext-py3 + # pip install the wheel into the python 2 external dependencies directory + PYTHON3_PIP_CACHE="~/.cache/impala_py3_pip" + pip install --no-deps --cache "${PYTHON3_PIP_CACHE}" \ + --target ${TARBALL_ROOT}/ext-py3 dist/*.whl fi popd 2>&1 > /dev/null done
