This is an automated email from the ASF dual-hosted git repository. kaxilnaik pushed a commit to branch v1-10-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 591dc992977f2a0c9ac1bbd06d0a1730cb260e44 Author: Jarek Potiuk <[email protected]> AuthorDate: Tue Dec 1 09:51:24 2020 +0100 Improve verification of images with PIP check (#12718) Verification of images with PIP is done in separate jobs and they provide useful information to committers and contributors when the pip check fails. (cherry picked from commit ebc8fcf199d3304d8c55f6c3908108701c05f9ad) --- .github/workflows/ci.yml | 47 +++++++++ scripts/ci/images/ci_prepare_prod_image_on_ci.sh | 1 - ..._wait_for_ci_image.sh => ci_verify_ci_image.sh} | 49 +++++---- scripts/ci/images/ci_verify_prod_image.sh | 116 +++++++++++++++++++++ scripts/ci/images/ci_wait_for_ci_image.sh | 18 ---- scripts/ci/images/ci_wait_for_prod_image.sh | 18 ---- scripts/ci/libraries/_build_images.sh | 61 +++++++++++ 7 files changed, 250 insertions(+), 60 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77cbf65..9655955 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -177,6 +177,28 @@ jobs: run: ./scripts/ci/images/ci_wait_for_all_ci_images.sh if: needs.build-info.outputs.waitForImage == 'true' + verify-ci-images: + timeout-minutes: 20 + name: "Verify CI Image Py${{matrix.python-version}}" + runs-on: ubuntu-20.04 + needs: [build-info, ci-images] + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + env: + BACKEND: sqlite + if: needs.build-info.outputs.image-build == 'true' + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + if: needs.build-info.outputs.waitForImage == 'true' + - name: "Free space" + run: ./scripts/ci/tools/ci_free_space_on_ci.sh + if: needs.build-info.outputs.waitForImage == 'true' + - name: "Verify CI image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" + run: ./scripts/ci/images/ci_verify_ci_image.sh + if: needs.build-info.outputs.waitForImage == 'true' + static-checks: timeout-minutes: 30 name: "Static checks" @@ -593,6 +615,27 @@ jobs: run: ./scripts/ci/images/ci_wait_for_all_prod_images.sh if: needs.build-info.outputs.waitForImage == 'true' + verify-prod-images: + timeout-minutes: 20 + name: "Verify Prod Image Py${{matrix.python-version}}" + runs-on: ubuntu-20.04 + needs: [build-info, prod-images] + strategy: + matrix: + python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} + env: + BACKEND: sqlite + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v2 + if: needs.build-info.outputs.waitForImage == 'true' + - name: "Free space" + run: ./scripts/ci/tools/ci_free_space_on_ci.sh + if: needs.build-info.outputs.waitForImage == 'true' + - name: "Verify PROD image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}" + run: ./scripts/ci/images/ci_verify_prod_image.sh + if: needs.build-info.outputs.waitForImage == 'true' + tests-kubernetes: timeout-minutes: 50 name: K8s ${{matrix.python-version}} ${{matrix.kubernetes-version}} ${{matrix.kubernetes-mode}} @@ -681,6 +724,7 @@ jobs: - tests-mysql - tests-kubernetes - prod-images + - verify-prod-images - docs if: > (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test') && @@ -717,6 +761,7 @@ jobs: - tests-mysql - tests-kubernetes - ci-images + - verify-ci-images - docs if: > (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' ) && @@ -781,6 +826,8 @@ jobs: needs: - build-info - constraints + - verify-ci-images + - verify-prod-images - static-checks - tests-sqlite - tests-mysql diff --git a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh index f8cdcd2..d38182b 100755 --- a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh +++ b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh @@ -47,5 +47,4 @@ function build_prod_images_on_ci() { unset FORCE_BUILD } - build_prod_images_on_ci diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_verify_ci_image.sh similarity index 57% copy from scripts/ci/images/ci_wait_for_ci_image.sh copy to scripts/ci/images/ci_verify_ci_image.sh index 2c0bdf2..e1f2b98 100755 --- a/scripts/ci/images/ci_wait_for_ci_image.sh +++ b/scripts/ci/images/ci_verify_ci_image.sh @@ -16,37 +16,40 @@ # specific language governing permissions and limitations # under the License. # shellcheck source=scripts/ci/libraries/_script_init.sh -. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" +. "$(dirname "${BASH_SOURCE[0]}")/../libraries/_script_init.sh" + +function verify_ci_image_dependencies() { -function verify_ci_image_dependencies { echo - echo "Checking if Airflow dependencies are non-conflicting in CI image." + echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_CI_IMAGE} image." echo - push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \ - "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" - - # TODO: remove after we have it fully working - docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true + set +e + docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' + local res=$? + if [[ ${res} != "0" ]]; then + echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m" + echo + build_images::inform_about_pip_check "" + else + echo + echo -e " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m" + echo + fi + set -e + exit ${res} } -push_pull_remove_images::check_if_github_registry_wait_for_image_enabled - -push_pull_remove_images::check_if_jq_installed - -build_image::login_to_github_registry_if_needed - -export AIRFLOW_CI_IMAGE_NAME="${BRANCH_NAME}-python${PYTHON_MAJOR_MINOR_VERSION}-ci" +function pull_ci_image() { + local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + echo + echo "Pulling the ${image_name_with_tag} image." + echo -echo -echo "Waiting for image to appear: ${AIRFLOW_CI_IMAGE_NAME}" -echo + push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" "${image_name_with_tag}" +} -push_pull_remove_images::wait_for_github_registry_image \ - "${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}" -echo -echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it" -echo +build_images::prepare_ci_build verify_ci_image_dependencies diff --git a/scripts/ci/images/ci_verify_prod_image.sh b/scripts/ci/images/ci_verify_prod_image.sh new file mode 100755 index 0000000..b330904 --- /dev/null +++ b/scripts/ci/images/ci_verify_prod_image.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# shellcheck source=scripts/ci/libraries/_script_init.sh +. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" + +function verify_prod_image_has_airflow_and_providers { + echo + echo "Airflow folders installed in the image:" + echo + + AIRFLOW_INSTALLATION_LOCATION="/home/airflow/.local" + + docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \ + 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d' + + EXPECTED_MIN_AIRFLOW_DIRS_COUNT="60" + readonly EXPECTED_MIN_AIRFLOW_DIRS_COUNT + + COUNT_AIRFLOW_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \ + 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d | grep -c -v "/airflow/providers"') + + echo + echo "Number of airflow dirs: ${COUNT_AIRFLOW_DIRS}" + echo + + if [[ "${COUNT_AIRFLOW_DIRS}" -lt "${EXPECTED_MIN_AIRFLOW_DIRS_COUNT}" ]]; then + >&2 echo + >&2 echo Number of airflow folders installed is less than ${EXPECTED_MIN_AIRFLOW_DIRS_COUNT} + >&2 echo This is unexpected. Please investigate, looking at the output above! + >&2 echo + exit 1 + else + echo + echo -e " \e[32mOK. Airflow is installed.\e[0m" + echo + fi + + EXPECTED_MIN_PROVIDERS_DIRS_COUNT="240" + readonly EXPECTED_MIN_PROVIDERS_DIRS_COUNT + + COUNT_AIRFLOW_PROVIDERS_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \ + 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/providers -type d | grep -c "" | xargs') + + echo + echo "Number of providers dirs: ${COUNT_AIRFLOW_PROVIDERS_DIRS}" + echo + + if [ "${COUNT_AIRFLOW_PROVIDERS_DIRS}" -lt "${EXPECTED_MIN_PROVIDERS_DIRS_COUNT}" ]; then + >&2 echo + >&2 echo Number of providers folders installed is less than ${EXPECTED_MIN_PROVIDERS_DIRS_COUNT} + >&2 echo This is unexpected. Please investigate, looking at the output above! + >&2 echo + exit 1 + else + echo + echo -e " \e[32mOK. Airflow Providers are installed.\e[0m" + echo + fi +} + + +function verify_prod_image_dependencies { + + echo + echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_PROD_IMAGE} image." + echo + + set +e + docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check' + local res=$? + if [[ ${res} != "0" ]]; then + echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m" + echo + build_images::inform_about_pip_check "--production " + # TODO(potiuk) - enable the comment once https://github.com/apache/airflow/pull/12188 is merged + # exit ${res} + else + echo + echo " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m" + echo + fi + set -e + +} + +function pull_prod_image() { + local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" + + echo + echo "Pulling the ${image_name_with_tag} image." + echo + + push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" "${image_name_with_tag}" +} + +build_images::prepare_prod_build + + +verify_prod_image_has_airflow_and_providers + +verify_prod_image_dependencies diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_wait_for_ci_image.sh index 2c0bdf2..0c3ea08 100755 --- a/scripts/ci/images/ci_wait_for_ci_image.sh +++ b/scripts/ci/images/ci_wait_for_ci_image.sh @@ -18,18 +18,6 @@ # shellcheck source=scripts/ci/libraries/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" -function verify_ci_image_dependencies { - echo - echo "Checking if Airflow dependencies are non-conflicting in CI image." - echo - - push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \ - "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" - - # TODO: remove after we have it fully working - docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true -} - push_pull_remove_images::check_if_github_registry_wait_for_image_enabled push_pull_remove_images::check_if_jq_installed @@ -44,9 +32,3 @@ echo push_pull_remove_images::wait_for_github_registry_image \ "${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}" - -echo -echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it" -echo - -verify_ci_image_dependencies diff --git a/scripts/ci/images/ci_wait_for_prod_image.sh b/scripts/ci/images/ci_wait_for_prod_image.sh index e53aec1..1c7cef5 100755 --- a/scripts/ci/images/ci_wait_for_prod_image.sh +++ b/scripts/ci/images/ci_wait_for_prod_image.sh @@ -18,18 +18,6 @@ # shellcheck source=scripts/ci/libraries/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" -function verify_prod_image_dependencies { - echo - echo "Checking if Airflow dependencies are non-conflicting in PROD image." - echo - - push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" \ - "${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}" - - # TODO: remove the | true after we fixed pip check for prod image - docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check' || true -} - push_pull_remove_images::check_if_github_registry_wait_for_image_enabled push_pull_remove_images::check_if_jq_installed @@ -44,9 +32,3 @@ echo push_pull_remove_images::wait_for_github_registry_image \ "${AIRFLOW_PROD_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}" - -echo -echo "Verifying the ${AIRFLOW_PROD_IMAGE_NAME} image after pulling it" -echo - -verify_prod_image_dependencies diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh index 8de58db..30e7a85 100644 --- a/scripts/ci/libraries/_build_images.sh +++ b/scripts/ci/libraries/_build_images.sh @@ -849,3 +849,64 @@ function build_images::determine_docker_cache_strategy() { verbosity::print_info "Using ${DOCKER_CACHE} cache strategy for the build." verbosity::print_info } + + +# Useful information for people who stumble upon a pip check failure +function build_images::inform_about_pip_check() { + >&2 echo """ + +The image did not pass 'pip check' verification. This means that there are some conflicting dependencies +in the image. Usually it means that some setup.py or setup.cfg limits need to be adjusted to fix it. + +Usually it happens when one of the dependencies gets upgraded and it has more strict requirements +than the other dependencies and they are conflicting. + +In case you did not update setup.py or any of your dependencies, this error might happen in case +someone accidentally merges conflicting dependencies in master. This +should not happen as we are running 'pip check' as dependency before we upgrade the constrained +dependencies, but we could miss some edge cases (thank you for your patience). Please let committer now +and apologies for the troubles. You do not have to do anything in this case. You might be asked to +rebase to the latest master after the problem is fixed. + +In case you actually updated setup.py, there are some steps you can take to address that: + +* first of all ask the committer to set 'upgrade to newer dependencies' and 'full tests needed' labels + for your PR. This will turn your PR in mode where all the dependencies are upgraded to latest matching + dependencies and the checks will run for all python versions + +* run locally the image that is failing with Breeze - this will make it easy to manually try to update + the setup.py and test the consequences of changing constraints. You can do it by checking out your PR + and running this command: + + ./breeze ${1}--github-image-id ${GITHUB_REGISTRY_PULL_IMAGE_TAG} --backend ${BACKEND} --python ${PYTHON_MAJOR_MINOR_VERSION} + +* your setup.py and setup.cfg will be mounted to the container and you will be able to iterate with + different setup.py versions. + +* run 'pipdeptree' to figure out where the dependency conflict comes from. Useful commands that can help you + to find out dependencies you have are: + * 'pipdeptree | less' (you can then search through the dependencies with vim-like shortcuts) + * 'pipdeptree > /files/pipdeptree.txt' - this will produce a pipdeptree.txt file in your source + 'files' directory and you can open it in editor of your choice, + * 'pipdeptree | grep YOUR_DEPENDENCY' - to see all the requirements your dependency has as specified + by other packages + +* figure out which dependency limits should be upgraded. First try to upgrade them in setup.py extras + and run pip to upgrade your dependencies accordingly: + + pip install '.[all]' --upgrade --upgrade-strategy eager + +* run pip check to figure out if the dependencies have been fixed (it should let you know which dependencies + are conflicting or (hurray!) if there are no conflicts: + + pip check + +* in some, rare, cases, pip will not limit the requirement in case you specify it in extras, you might + need to add such requirement in 'install_requires' section of setup.cfg in order to have pip take it into + account. This will happen if higher version of your dependency is already installed in 'install_requires' + section. In such case update 'setup.cfg' and run pip install/pip check from the previous steps + +* iterate until all such dependency conflicts are fixed. + +""" +}
