This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-0-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit d3b0a717b6a9204192003f8969e8fa45ce6b4b5f
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sun Mar 14 21:31:46 2021 +0100

    Only rebuilds base python image when upgrading to newer deps (#14783)
    
    The base python image is only updated when manually triggered and
    in case of checking for upgraded dependencies in master build.
    
    While automated upgrade to latest Python image is good for
    security, it can cause a number of problems when run automatically
    in the CI:
    
    * cache invalidation - thus longer builds
    * sudden test failures
    
    This happened in the past already quite a number of times so it
    is time to switch to a bit different mode. Python images will only
    be automatically upgraded in those cases:
    
    1) When Master CI build is run in scheduled nightly build - to check
       that tests still pass for latest version of the image
    
    2) When manually refreshed with --force-pull-base-python-image
    
    3) When DockerHub official images (from tags) are built.
    
    The procedure to refresh the images manually in our CI has been
    added to the documentation.
    
    (cherry picked from commit 4762396b8bd84885a647268d85a50ad3e796859d)
---
 .github/workflows/build-images-workflow-run.yml  |  4 ++
 BREEZE.rst                                       | 43 ++++++++++++++++----
 IMAGES.rst                                       | 44 ++++++++++++++++++---
 breeze                                           | 40 ++++++++++++++++---
 breeze-complete                                  |  2 +-
 scripts/ci/images/ci_build_dockerhub.sh          |  1 +
 scripts/ci/libraries/_push_pull_remove_images.sh | 50 +++++++++++++-----------
 7 files changed, 141 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/build-images-workflow-run.yml 
b/.github/workflows/build-images-workflow-run.yml
index 65ba7aa..b158fd2 100644
--- a/.github/workflows/build-images-workflow-run.yml
+++ b/.github/workflows/build-images-workflow-run.yml
@@ -278,6 +278,8 @@ jobs:
       UPGRADE_TO_NEWER_DEPENDENCIES: ${{ 
needs.build-info.outputs.upgradeToNewerDependencies }}
       CONTINUE_ON_PIP_CHECK_FAILURE: "true"
       DOCKER_CACHE: ${{ needs.cancel-workflow-runs.outputs.cacheDirective }}
+      FORCE_PULL_BASE_PYTHON_IMAGE: >
+        ${{ needs.cancel-workflow-runs.sourceEvent == 'schedule' && 'true' || 
'false' }}
     steps:
       - name: >
           Checkout [${{ needs.cancel-workflow-runs.outputs.sourceEvent }}]
@@ -405,6 +407,8 @@ jobs:
       GITHUB_REGISTRY_PULL_IMAGE_TAG: ${{ github.event.workflow_run.id }}
       UPGRADE_TO_NEWER_DEPENDENCIES: ${{ 
needs.build-info.outputs.upgradeToNewerDependencies }}
       DOCKER_CACHE: ${{ needs.cancel-workflow-runs.outputs.cacheDirective }}
+      FORCE_PULL_BASE_PYTHON_IMAGE: >
+        ${{ needs.cancel-workflow-runs.sourceEvent == 'schedule' && 'true' || 
'false' }}
       VERSION_SUFFIX_FOR_PYPI: "dev"
       VERSION_SUFFIX_FOR_SVN: "dev"
     steps:
diff --git a/BREEZE.rst b/BREEZE.rst
index d608d5e..c5d643d 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -296,7 +296,7 @@ can check whether your problem is fixed.
 
 1. If you are on macOS, check if you have enough disk space for Docker.
 2. Restart Breeze with ``./breeze restart``.
-3. Delete the ``.build`` directory and run ``./breeze build-image 
--force-pull-images``.
+3. Delete the ``.build`` directory and run ``./breeze build-image``.
 4. Clean up Docker images via ``breeze cleanup-image`` command.
 5. Restart your Docker Engine and try again.
 6. Restart your machine and try again.
@@ -1255,16 +1255,24 @@ This is the current syntax for  `./breeze <./breeze>`_:
   breeze build-image [FLAGS]
 
         Builds docker image (CI or production) without entering the container. 
You can pass
-        additional options to this command, such as '--force-build-image',
-        '--force-pull-image', '--python', '--build-cache-local' or 
'-build-cache-pulled'
-        in order to modify build behaviour.
+        additional options to this command, such as:
+
+        Choosing python version:
+          '--python'
+
+        Choosing cache option:
+           '--build-cache-local' or '-build-cache-pulled', or 
'--build-cache-none'
+
+        Choosing whether to force pull images or force build the image:
+            '--force-build-image',
+             '--force-pull-image', '--force-pull-base-python-image'
 
         You can also pass '--production-image' flag to build production image 
rather than CI image.
 
-        For DockerHub pull --dockerhub-user and --dockerhub-repo flags can be 
used to specify
-        the repository to pull from. For GitHub repository, the 
--github-repository
+        For DockerHub pull. '--dockerhub-user' and '--dockerhub-repo' flags 
can be used to specify
+        the repository to pull from. For GitHub repository, the 
'--github-repository'
         flag can be used for the same purpose. You can also use
-        --github-image-id <COMMIT_SHA>|<RUN_ID> in case you want to pull the 
image with
+        '--github-image-id <COMMIT_SHA>|<RUN_ID>' in case you want to pull the 
image with
         specific COMMIT_SHA tag or RUN_ID.
 
   Flags:
@@ -1334,6 +1342,13 @@ This is the current syntax for  `./breeze <./breeze>`_:
           images are pulled by default only for the first time you run the
           environment, later the locally build images are used as cache.
 
+  --force-pull-base-python-image
+          Forces pulling of Python base image from DockerHub before building to
+          populate cache. This should only be run in case we need to update to 
latest available
+          Python base image. This should be a rare and manually triggered 
event. Also this flag
+          is used in the scheduled run in CI when we rebuild all the images 
from the scratch
+          and run the tests to see if the latest python images do not fail our 
tests.
+
   Customization options:
 
   -E, --extras EXTRAS
@@ -1982,6 +1997,13 @@ This is the current syntax for  `./breeze <./breeze>`_:
           images are pulled by default only for the first time you run the
           environment, later the locally build images are used as cache.
 
+  --force-pull-base-python-image
+          Forces pulling of Python base image from DockerHub before building to
+          populate cache. This should only be run in case we need to update to 
latest available
+          Python base image. This should be a rare and manually triggered 
event. Also this flag
+          is used in the scheduled run in CI when we rebuild all the images 
from the scratch
+          and run the tests to see if the latest python images do not fail our 
tests.
+
   Customization options:
 
   -E, --extras EXTRAS
@@ -2570,6 +2592,13 @@ This is the current syntax for  `./breeze <./breeze>`_:
           images are pulled by default only for the first time you run the
           environment, later the locally build images are used as cache.
 
+  --force-pull-base-python-image
+          Forces pulling of Python base image from DockerHub before building to
+          populate cache. This should only be run in case we need to update to 
latest available
+          Python base image. This should be a rare and manually triggered 
event. Also this flag
+          is used in the scheduled run in CI when we rebuild all the images 
from the scratch
+          and run the tests to see if the latest python images do not fail our 
tests.
+
   Customization options:
 
   -E, --extras EXTRAS
diff --git a/IMAGES.rst b/IMAGES.rst
index 2871ba0..0cf5b8c 100644
--- a/IMAGES.rst
+++ b/IMAGES.rst
@@ -757,12 +757,9 @@ significant changes have been made to apt packages or even 
the base python image
 Pulling the Latest Images
 -------------------------
 
-Sometimes the image needs to be rebuilt from scratch. This is required, for 
example,
-when there is a security update of the Python version that all the images are 
based on and new version
-of the image is pushed to the repository. In this case it is usually faster to 
pull the latest
-images rather than rebuild them from scratch.
-
-You can do it via the ``--force-pull-images`` flag to force pulling the latest 
images from the Docker Hub.
+Sometimes the image needs to be refreshed from the registry in DockerHub - 
because you have an outdated
+version. You can do it via the ``--force-pull-images`` flag to force pulling 
the latest images from the
+DockerHub.
 
 For production image:
 
@@ -777,6 +774,41 @@ however uou can also force it with the same flag.
 
   ./breeze build-image --force-pull-images
 
+Refreshing Base Python images
+=============================
+
+Python base images are updated from time-to-time, usually as a result of 
implementing security fixes.
+When you build your image locally using ``docker build`` you use the version 
of image that you have locally.
+For the CI builds using ``breeze`` we use the image that is stored in our 
repository in order to use cache
+efficiently. However we can refresh the image to latest available by specifying
+``--force-pull-base-python-image`` and running it manually (you need to have 
access to DockerHub and our
+GitHub Registies in order to be able to do that.
+
+.. code-block:: bash
+
+    #/bin/bash
+    export DOCKERHUB_USER="apache"
+    export GITHUB_REPOSITORY="apache/airflow"
+    export FORCE_ANSWER_TO_QUESTIONS="true"
+    export CI="true"
+
+    for python_version in "3.6" "3.7" "3.8"
+    do
+            ./breeze build-image --python ${python_version} 
--build-cache-local \
+                    --force-pull-base-python-image --verbose
+            ./breeze build-image --python ${python_version} 
--build-cache-local \
+                    --production-image --verbose
+            ./breeze push-image
+            ./breeze push-image --github-registry ghcr.io
+            ./breeze push-image --github-registry docker.pkg.github.com
+            ./breeze push-image --production-image
+            ./breeze push-image --github-registry ghcr.io --production-image
+            ./breeze push-image --github-registry docker.pkg.github.com 
--production-image
+    done
+
+
+
+
 Embedded image scripts
 ======================
 
diff --git a/breeze b/breeze
index 6c352c9..7e3432d 100755
--- a/breeze
+++ b/breeze
@@ -116,6 +116,10 @@ function breeze::setup_default_breeze_constants() {
     # This can be overridden by '--force-pull-images' flag
     export FORCE_PULL_IMAGES="false"
 
+    # By default we do not pull python base image. We should do that only when 
we run upgrade check in
+    # CI master and when we manually refresh the images to latest versions
+    export FORCE_PULL_BASE_PYTHON_IMAGE="false"
+
     # Forward common host credentials to docker (gcloud, aws etc.).
     export FORWARD_CREDENTIALS="false"
 
@@ -983,6 +987,15 @@ function breeze::parse_arguments() {
             export FORCE_ANSWER_TO_QUESTIONS="yes"
             shift
             ;;
+        --force-pull-base-python-image)
+            echo "Force pulling base python image. Uses pulled images as 
cache."
+            echo
+            export FORCE_PULL_BASE_PYTHON_IMAGE="true"
+            export FORCE_BUILD_IMAGES="true"
+            # if you want to force  build an image - assume you want to build 
it :)
+            export FORCE_ANSWER_TO_QUESTIONS="yes"
+            shift
+            ;;
         -I | --production-image)
             export PRODUCTION_IMAGE="true"
             export SQLITE_URL=
@@ -1719,16 +1732,24 @@ ${CMDNAME} build-docs [-- <EXTRA_ARGS>]
 ${CMDNAME} build-image [FLAGS]
 
       Builds docker image (CI or production) without entering the container. 
You can pass
-      additional options to this command, such as '--force-build-image',
-      '--force-pull-image', '--python', '--build-cache-local' or 
'-build-cache-pulled'
-      in order to modify build behaviour.
+      additional options to this command, such as:
+
+      Choosing python version:
+        '--python'
+
+      Choosing cache option:
+         '--build-cache-local' or '-build-cache-pulled', or 
'--build-cache-none'
+
+      Choosing whether to force pull images or force build the image:
+          '--force-build-image',
+           '--force-pull-image', '--force-pull-base-python-image'
 
       You can also pass '--production-image' flag to build production image 
rather than CI image.
 
-      For DockerHub pull --dockerhub-user and --dockerhub-repo flags can be 
used to specify
-      the repository to pull from. For GitHub repository, the 
--github-repository
+      For DockerHub pull. '--dockerhub-user' and '--dockerhub-repo' flags can 
be used to specify
+      the repository to pull from. For GitHub repository, the 
'--github-repository'
       flag can be used for the same purpose. You can also use
-      --github-image-id <COMMIT_SHA>|<RUN_ID> in case you want to pull the 
image with
+      '--github-image-id <COMMIT_SHA>|<RUN_ID>' in case you want to pull the 
image with
       specific COMMIT_SHA tag or RUN_ID.
 
 Flags:
@@ -2580,6 +2601,13 @@ function breeze::flag_build_docker_images() {
         images are pulled by default only for the first time you run the
         environment, later the locally build images are used as cache.
 
+--force-pull-base-python-image
+        Forces pulling of Python base image from DockerHub before building to
+        populate cache. This should only be run in case we need to update to 
latest available
+        Python base image. This should be a rare and manually triggered event. 
Also this flag
+        is used in the scheduled run in CI when we rebuild all the images from 
the scratch
+        and run the tests to see if the latest python images do not fail our 
tests.
+
 Customization options:
 
 -E, --extras EXTRAS
diff --git a/breeze-complete b/breeze-complete
index 7789552..72007cb 100644
--- a/breeze-complete
+++ b/breeze-complete
@@ -164,7 +164,7 @@ help python: backend: integration:
 kubernetes-mode: kubernetes-version: helm-version: kind-version:
 skip-mounting-local-sources mount-all-local-sources install-airflow-version: 
install-airflow-reference: db-reset
 verbose assume-yes assume-no assume-quit forward-credentials init-script:
-force-build-images force-pull-images production-image extras: 
force-clean-images skip-rebuild-check
+force-build-images force-pull-base-python-image production-image extras: 
force-clean-images skip-rebuild-check
 build-cache-local build-cache-pulled build-cache-disabled disable-pip-cache
 dockerhub-user: dockerhub-repo: use-github-registry github-registry: 
github-repository: github-image-id: generate-constraints-mode:
 postgres-version: mysql-version:
diff --git a/scripts/ci/images/ci_build_dockerhub.sh 
b/scripts/ci/images/ci_build_dockerhub.sh
index 16d926a..7a52782 100755
--- a/scripts/ci/images/ci_build_dockerhub.sh
+++ b/scripts/ci/images/ci_build_dockerhub.sh
@@ -111,6 +111,7 @@ else
     export INSTALL_PROVIDERS_FROM_SOURCES="false"
     export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
     export DOCKER_CACHE="local"
+    export FORCE_PULL_BASE_PYTHON_IMAGE="true"
     # Name the image based on the TAG rather than based on the branch name
     export FORCE_AIRFLOW_PROD_BASE_TAG="${DOCKER_TAG}"
     export INSTALL_AIRFLOW_VERSION="${DOCKER_TAG%-python*}"
diff --git a/scripts/ci/libraries/_push_pull_remove_images.sh 
b/scripts/ci/libraries/_push_pull_remove_images.sh
index 5090c5b..3624a9a 100644
--- a/scripts/ci/libraries/_push_pull_remove_images.sh
+++ b/scripts/ci/libraries/_push_pull_remove_images.sh
@@ -104,10 +104,13 @@ function 
push_pull_remove_images::pull_image_github_dockerhub() {
     set -e
 }
 
-# Force pulls the python base image
-function push_pull_remove_images::force_pull_python_base_image() {
+# Rebuilds python base image from the latest available Python version
+function push_pull_remove_images::rebuild_python_base_image() {
+   echo
+   echo "Rebuilding ${AIRFLOW_PYTHON_BASE_IMAGE} from latest 
${PYTHON_BASE_IMAGE}"
+   echo
    docker pull "${PYTHON_BASE_IMAGE}"
-    echo "FROM ${PYTHON_BASE_IMAGE}" | \
+   echo "FROM ${PYTHON_BASE_IMAGE}" | \
         docker build \
             --label 
"org.opencontainers.image.source=https://github.com/${GITHUB_REPOSITORY}"; \
             -t "${AIRFLOW_PYTHON_BASE_IMAGE}" -
@@ -115,34 +118,33 @@ function 
push_pull_remove_images::force_pull_python_base_image() {
 
 # Pulls the base Python image. This image is used as base for CI and PROD 
images, depending on the parameters used:
 #
-# * if FORCE_PULL_IMAGES is true or UPGRADE_TO_NEWER_DEPENDENCIES != false, 
then it pulls the latest Python image available first and
-#     adds `org.opencontainers.image.source` label to it, so that it is linked 
to Airflow repository when
-#     we push it to GHCR registry
+# * if FORCE_PULL_BASE_PYTHON_IMAGE != false, then it rebuild the image using 
latest Python image available
+#     and adds `org.opencontainers.image.source` label to it, so that it is 
linked to Airflow
+#     repository when we push it to GHCR registry
 # * Otherwise it pulls the Python base image from either GitHub registry or 
from DockerHub
 #     depending on USE_GITHUB_REGISTRY variable. In case we pull specific 
build image (via suffix)
 #     it will pull the right image using the specified suffix
 function push_pull_remove_images::pull_base_python_image() {
+    if [[ ${FORCE_PULL_BASE_PYTHON_IMAGE} == "true" ]] ; then
+        push_pull_remove_images::rebuild_python_base_image
+        return
+    fi
     echo
-    echo "Force pull python base image ${AIRFLOW_PYTHON_BASE_IMAGE}. Upgrade 
to newer dependencies: ${UPGRADE_TO_NEWER_DEPENDENCIES}"
+    echo "Docker pulling base python image. Upgrade to newer deps: 
${UPGRADE_TO_NEWER_DEPENDENCIES}"
     echo
     if [[ -n ${DETECTED_TERMINAL=} ]]; then
-        echo -n "
-Docker pulling ${AIRFLOW_PYTHON_BASE_IMAGE}. Upgrade to newer dependencies 
${UPGRADE_TO_NEWER_DEPENDENCIES}
+        echo -n "Docker pulling base python image. Upgrade to newer deps: 
${UPGRADE_TO_NEWER_DEPENDENCIES}
 " > "${DETECTED_TERMINAL}"
     fi
-    if [[ "${FORCE_PULL_IMAGES}" == "true" || ${UPGRADE_TO_NEWER_DEPENDENCIES} 
!= "false" ]]; then
-        push_pull_remove_images::force_pull_python_base_image
-    else
-        if [[ ${USE_GITHUB_REGISTRY} == "true" ]]; then
-            PYTHON_TAG_SUFFIX=""
-            if [[ ${GITHUB_REGISTRY_PULL_IMAGE_TAG} != "latest" ]]; then
-                PYTHON_TAG_SUFFIX="-${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-            fi
-            push_pull_remove_images::pull_image_github_dockerhub 
"${AIRFLOW_PYTHON_BASE_IMAGE}" \
-                "${GITHUB_REGISTRY_PYTHON_BASE_IMAGE}${PYTHON_TAG_SUFFIX}"
-        else
-            docker pull "${AIRFLOW_PYTHON_BASE_IMAGE}"
+    if [[ ${USE_GITHUB_REGISTRY} == "true" ]]; then
+        PYTHON_TAG_SUFFIX=""
+        if [[ ${GITHUB_REGISTRY_PULL_IMAGE_TAG} != "latest" ]]; then
+            PYTHON_TAG_SUFFIX="-${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
         fi
+        push_pull_remove_images::pull_image_github_dockerhub 
"${AIRFLOW_PYTHON_BASE_IMAGE}" \
+            "${GITHUB_REGISTRY_PYTHON_BASE_IMAGE}${PYTHON_TAG_SUFFIX}"
+    else
+        docker pull "${AIRFLOW_PYTHON_BASE_IMAGE}"
     fi
 }
 
@@ -150,7 +152,8 @@ Docker pulling ${AIRFLOW_PYTHON_BASE_IMAGE}. Upgrade to 
newer dependencies ${UPG
 function push_pull_remove_images::pull_ci_images_if_needed() {
     local python_image_hash
     python_image_hash=$(docker images -q "${AIRFLOW_PYTHON_BASE_IMAGE}" 2> 
/dev/null || true)
-    if [[ -z "${python_image_hash=}" || "${FORCE_PULL_IMAGES}" == "true" ]]; 
then
+    if [[ -z "${python_image_hash=}" || "${FORCE_PULL_IMAGES}" == "true" || \
+            ${FORCE_PULL_BASE_PYTHON_IMAGE} == "true" ]]; then
         push_pull_remove_images::pull_base_python_image
     fi
     if [[ "${DOCKER_CACHE}" == "pulled" ]]; then
@@ -168,7 +171,8 @@ function 
push_pull_remove_images::pull_ci_images_if_needed() {
 function push_pull_remove_images::pull_prod_images_if_needed() {
     local python_image_hash
     python_image_hash=$(docker images -q "${AIRFLOW_PYTHON_BASE_IMAGE}" 2> 
/dev/null || true)
-    if [[ -z "${python_image_hash=}" || "${FORCE_PULL_IMAGES}" == "true" ]]; 
then
+    if [[ -z "${python_image_hash=}" || "${FORCE_PULL_IMAGES}" == "true"  || \
+            ${FORCE_PULL_BASE_PYTHON_IMAGE} == "true" ]]; then
         push_pull_remove_images::pull_base_python_image
     fi
     if [[ "${DOCKER_CACHE}" == "pulled" ]]; then

Reply via email to