This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch fix-image-cache in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 89a3c70b9654c7f8812e87d49bc4dfec6979486d Author: Jarek Potiuk <[email protected]> AuthorDate: Sun Mar 24 21:21:03 2024 +0100 Fix image cache optimizations - speeding up the build The recent refactors in workflows broke the way how cache had been used in the CI builds. This PR brings back the optimizations by using the cache and rebuilding it. --- .github/workflows/additional-ci-image-checks.yml | 54 ++++++++--------- .github/workflows/build-images.yml | 3 +- .github/workflows/ci-image-build.yml | 17 +++--- .github/workflows/finalize-tests.yml | 69 ++++++++++++++-------- .github/workflows/prod-image-build.yml | 9 ++- .github/workflows/prod-image-extra-checks.yml | 3 + .github/workflows/push-image-cache.yml | 62 ++++++++++--------- .../airflow_breeze/utils/docker_command_utils.py | 1 + 8 files changed, 122 insertions(+), 96 deletions(-) diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index 1bee163b0f..8cfc9acefc 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -89,30 +89,31 @@ jobs: # delay cache refresh. It does not attempt to upgrade to newer dependencies. # We only push CI cache as PROD cache usually does not gain as much from fresh cache because # it uses prepared airflow and provider packages that invalidate the cache anyway most of the time - # push-early-buildx-cache-to-github-registry: - # name: Push Early Image Cache - # uses: ./.github/workflows/push-image-cache.yml - # permissions: - # contents: read - # # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs - # # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. - # # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the - # # "in-workflow-build" condition - # packages: write - # secrets: inherit - # with: - # runs-on: ${{ inputs.runs-on }} - # cache-type: "Early" - # include-prod-images: "false" - # push-latest-images: "false" - # image-tag: ${{ inputs.image-tag }} - # python-versions: ${{ inputs.python-versions }} - # branch: ${{ inputs.branch }} - # use-uv: "true" - # include-success-outputs: ${{ inputs.include-success-outputs }} - # constraints-branch: ${{ inputs.constraints-branch }} - # docker-cache: ${{ inputs.docker-cache }} - # if: inputs.canary-run == 'true' && inputs.branch == 'main' + push-early-buildx-cache-to-github-registry: + name: Push Early Image Cache + uses: ./.github/workflows/push-image-cache.yml + permissions: + contents: read + # This write is only given here for `push` events from "apache/airflow" repo. It is not given for PRs + # from forks. This is to prevent malicious PRs from creating images in the "apache/airflow" repo. + # For regular build for PRS this "build-prod-images" workflow will be skipped anyway by the + # "in-workflow-build" condition + packages: write + secrets: inherit + with: + # Runs on Public runners + cache-type: "Early" + include-prod-images: "false" + push-latest-images: "false" + image-tag: ${{ inputs.image-tag }} + platform: "linux/amd64" + python-versions: ${{ inputs.python-versions }} + branch: ${{ inputs.branch }} + constraints-branch: ${{ inputs.constraints-branch }} + use-uv: "true" + include-success-outputs: ${{ inputs.include-success-outputs }} + docker-cache: ${{ inputs.docker-cache }} + if: inputs.canary-run == 'true' && inputs.branch == 'main' # Check that after earlier cache push, breeze command will build quickly check-that-image-builds-quickly: @@ -121,7 +122,6 @@ jobs: runs-on: ["ubuntu-22.04"] env: UPGRADE_TO_NEWER_DEPENDENCIES: false - PLATFORM: "linux/amd64" PYTHON_MAJOR_MINOR_VERSION: ${{ inputs.default-python-version }} PYTHON_VERSION: ${{ inputs.default-python-version }} IMAGE_TAG: ${{ inputs.image-tag }} @@ -142,7 +142,7 @@ jobs: - name: "Login to ghcr.io" run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - name: "Check that image builds quickly" - run: breeze shell --max-time 120 + run: breeze shell --max-time 120 --platform "linux/amd64" # This is only a check if ARM images are successfully building when committer runs PR from # Apache repository. This is needed in case you want to fix failing cache job in "canary" run @@ -156,11 +156,11 @@ jobs: packages: write secrets: inherit with: - platform: "arm64" push-image: "false" runs-on: ${{ inputs.runs-on }} image-tag: ${{ inputs.image-tag }} python-versions: ${{ inputs.python-versions }} + platform: "linux/arm64" branch: ${{ inputs.branch }} constraints-branch: ${{ inputs.constraints-branch }} use-uv: "true" diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index d2d574c806..32a5056848 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -181,12 +181,12 @@ jobs: use-uv: "true" image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} + platform: "linux/amd64" branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} docker-cache: ${{ needs.build-info.outputs.cache-directive }} - build-prod-images: name: Build PROD images permissions: @@ -210,6 +210,7 @@ jobs: use-uv: "true" image-tag: ${{ needs.build-info.outputs.image-tag }} python-versions: ${{ needs.build-info.outputs.python-versions }} + platform: "linux/amd64" branch: ${{ needs.build-info.outputs.default-branch }} constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} build-provider-packages: "true" diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index 9eff0ced3a..177cd39fbb 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -48,9 +48,8 @@ on: # yamllint disable-line rule:truthy default: "false" type: string platform: - description: > - Name of the platform for the build - 'amd64/arm64' - default: "amd64" + description: "Platform for the build - 'linux/amd64' or 'linux/arm64'" + required: true type: string push-image: description: "Whether to push image to the registry (true/false)" @@ -107,8 +106,8 @@ jobs: timeout-minutes: 110 name: "\ ${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -CI ${{inputs.platform}} image\ -${{matrix.python-version}}${{ inputs.do-build == 'true' && ':' || '' }}\ +CI ${{ inputs.platform }} image\ +${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" runs-on: ${{ fromJSON(inputs.runs-on) }} env: @@ -185,7 +184,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" if: inputs.do-build == 'true' && inputs.upgrade-to-newer-dependencies != 'false' - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: inputs.do-build == 'true' && inputs.platform == 'arm64' + if: inputs.do-build == 'true' && inputs.platform == 'linux/arm64' - name: Login to ghcr.io run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin if: inputs.do-build == 'true' @@ -195,19 +194,19 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" run: > breeze ci-image build --tag-as-latest --image-tag "${{ inputs.image-tag }}" --python "${{ matrix.python-version }}" - --platform "linux/${{ inputs.platform }}" + --platform "${{ inputs.platform }}" env: DOCKER_CACHE: ${{ inputs.docker-cache }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BUILDER: ${{ inputs.platform == 'amd64' && 'default' || 'airflow_cache' }} + BUILDER: "airflow_cache" PUSH: ${{ inputs.push-image }} VERBOSE: "true" if: inputs.do-build == 'true' - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && inputs.do-build == 'true' && inputs.platform == 'arm64' + if: always() && inputs.do-build == 'true' && inputs.platform == 'linux/arm64' - name: "Source constraints: ${{ matrix.python-version }}" shell: bash run: > diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 48971ff789..3c97b5e757 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -120,31 +120,50 @@ jobs: run: git push - # Push BuildX cache to GitHub Registry in Apache repository, if all tests are successful and build - # is executed as result of direct push to "main" or one of the "vX-Y-test" branches - # It rebuilds all images using just-pushed constraints using buildx and pushes them to registry - # It will automatically check if a new python image was released and will pull the latest one if needed - # push-buildx-cache-to-github-registry: - # name: Push Regular Image Cache - # needs: [update-constraints] - # uses: ./.github/workflows/push-image-cache.yml - # permissions: - # contents: read - # packages: write - # secrets: inherit - # with: - # runs-on: ${{ inputs.runs-on }} - # cache-type: "Regular" - # include-prod-images: "true" - # push-latest-images: "true" - # use-uv: "true" - # image-tag: ${{ inputs.image-tag }} - # python-versions: ${{ inputs.python-versions }} - # branch: ${{ inputs.branch }} - # constraints-branch: ${{ inputs.constraints-branch }} - # include-success-outputs: ${{ inputs.include-success-outputs }} - # docker-cache: ${{ inputs.docker-cache }} - # if: inputs.canary-run == 'true' + push-buildx-cache-to-github-registry-amd: + name: Push Regular Image Cache + needs: [update-constraints] + uses: ./.github/workflows/push-image-cache.yml + permissions: + contents: read + packages: write + secrets: inherit + with: + cache-type: "Regular" + include-prod-images: "true" + push-latest-images: "true" + image-tag: ${{ inputs.image-tag }} + platform: "linux/amd64" + python-versions: ${{ inputs.python-versions }} + branch: ${{ inputs.branch }} + constraints-branch: ${{ inputs.constraints-branch }} + use-uv: "true" + include-success-outputs: ${{ inputs.include-success-outputs }} + docker-cache: ${{ inputs.docker-cache }} + if: inputs.canary-run == 'true' + + push-buildx-cache-to-github-registry-arm: + name: Push Regular Image Cache + needs: [update-constraints] + uses: ./.github/workflows/push-image-cache.yml + permissions: + contents: read + packages: write + secrets: inherit + with: + runs-on: ${{ inputs.runs-on }} + cache-type: "Regular" + include-prod-images: "true" + push-latest-images: "true" + image-tag: ${{ inputs.image-tag }} + platform: "linux/arm64" + python-versions: ${{ inputs.python-versions }} + branch: ${{ inputs.branch }} + constraints-branch: ${{ inputs.constraints-branch }} + use-uv: "true" + include-success-outputs: ${{ inputs.include-success-outputs }} + docker-cache: ${{ inputs.docker-cache }} + if: inputs.canary-run == 'true' summarize-warnings: timeout-minutes: 15 diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index 6005e44d56..27cd62d42c 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -77,6 +77,10 @@ on: # yamllint disable-line rule:truthy description: "JSON-formatted array of Python versions to build images from" required: true type: string + platform: + description: "Platform for the build - 'linux/amd64' or 'linux/arm64'" + required: true + type: string branch: description: "Branch used to run the CI jobs in (main/v2_*_test)." required: true @@ -111,8 +115,8 @@ jobs: timeout-minutes: 80 name: "\ ${{ inputs.do-build == 'true' && 'Build' || 'Skip building' }} \ -PROD ${{inputs.build-type}} image\ -${{matrix.python-version}}${{ inputs.do-build == 'true' && ':' || '' }}\ +PROD ${{ inputs.build-type }} image\ +${{ matrix.python-version }}${{ inputs.do-build == 'true' && ':' || '' }}\ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" runs-on: ${{ fromJSON(inputs.runs-on) }} env: @@ -248,6 +252,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} DEBIAN_VERSION: ${{ inputs.debian-version }} + BUILDER: "airflow_cache" INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} INCLUDE_NOT_READY_PROVIDERS: "true" diff --git a/.github/workflows/prod-image-extra-checks.yml b/.github/workflows/prod-image-extra-checks.yml index 69dee4e4a9..faa904e438 100644 --- a/.github/workflows/prod-image-extra-checks.yml +++ b/.github/workflows/prod-image-extra-checks.yml @@ -59,6 +59,7 @@ jobs: image-tag: bullseye-${{ inputs.image-tag }} debian-version: "bullseye" python-versions: ${{ inputs.python-versions }} + platform: "linux/amd64" branch: ${{ inputs.branch }} # Always build images during the extra checks and never push them push-image: "false" @@ -76,6 +77,7 @@ jobs: image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} + platform: "linux/amd64" branch: ${{ inputs.branch }} # Always build images during the extra checks and never push them push-image: "false" @@ -93,6 +95,7 @@ jobs: image-tag: mysql-${{ inputs.image-tag }} install-mysql-client-type: "mysql" python-versions: ${{ inputs.python-versions }} + platform: "linux/amd64" branch: ${{ inputs.branch }} # Always build images during the extra checks and never push them push-image: "false" diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index dbc8893a8f..798897f27d 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -23,14 +23,14 @@ on: # yamllint disable-line rule:truthy runs-on: description: "The array of labels (in json form) determining type of the runner to use for the build." required: false - default: '["self-hosted", "Linux", "X64"]' + default: "[\"ubuntu-22.04\"]" type: string cache-type: description: "Type of cache to push (Early / Regular)." required: true type: string include-prod-images: - description: "Whether to include prod images in the cache (true/false)." + description: "Whether to build PROD image cache additionally to CI image cache (true/false)." required: true type: string push-latest-images: @@ -45,14 +45,14 @@ on: # yamllint disable-line rule:truthy description: "MySQL client type to use during build (mariadb/mysql)" type: string default: "mariadb" - use-uv: - description: "Whether to use uv to build the image (true/false)" - required: true - type: string image-tag: description: "Tag to set for the image" required: true type: string + platform: + description: "Platform for the build - 'linux/amd64' or 'linux/arm64'" + required: true + type: string python-versions: description: "JSON-formatted array of Python versions to build images from" required: true @@ -65,6 +65,10 @@ on: # yamllint disable-line rule:truthy description: "Branch used to construct constraints URL from." required: true type: string + use-uv: + description: "Whether to use uv to build the image (true/false)" + required: true + type: string include-success-outputs: description: "Whether to include success outputs (true/false)." required: true @@ -74,15 +78,14 @@ on: # yamllint disable-line rule:truthy required: true type: string jobs: - push-ci-image-cache: - name: "Push CI image cache" + push-image-cache: + name: "Push image cache" runs-on: ${{ fromJSON(inputs.runs-on) }} strategy: fail-fast: false matrix: - python: ${{fromJson(inputs.python-versions)}} + python: ${{ fromJson(inputs.python-versions) }} env: - RUNS_ON: '["self-hosted", "Linux", "X64"]' DEBIAN_VERSION: ${{ inputs.debian-version }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} USE_UV: ${{ inputs.use-uv }} @@ -92,7 +95,6 @@ jobs: UPGRADE_TO_NEWER_DEPENDENCIES: "false" DOCKER_CACHE: ${{ inputs.docker-cache }} VERSION_SUFFIX_FOR_PYPI: "dev0" - PLATFORM: "linux/amd64,linux/arm64" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.sha }} VERBOSE: "true" @@ -109,25 +111,23 @@ jobs: uses: ./.github/actions/cleanup-docker - name: "Install Breeze" uses: ./.github/actions/breeze - - name: "Cleanup dist and context file" - run: rm -fv ./dist/* ./docker-context-files/* - if: inputs.include-prod-images == 'true' - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + if: inputs.platform == 'linux/arm64' - name: Login to ghcr.io run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push CI ${{ inputs.cache-type }} cache: ${{ matrix.python }} (AMD/ARM)" + - name: "Push CI ${{ inputs.cache-type }} cache: ${{ matrix.python }} ${{ inputs.platform }}" run: > - breeze ci-image build --builder airflow_cache - --prepare-buildx-cache --run-in-parallel --platform "linux/amd64,linux/arm64" - --python ${{ matrix.python }} + breeze ci-image build --builder airflow_cache --prepare-buildx-cache + --platform "${{ inputs.platform }}" --python ${{ matrix.python }} - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() - - name: "Push CI latest images: ${{ matrix.python }} (AMD ONLY)" + if: always() && inputs.platform == 'linux/arm64' + - name: "Push CI latest images: ${{ matrix.python }} (linux/amd64 only)" run: > - breeze ci-image build --tag-as-latest --push --python "${{matrix.python}}" --platform "linux/amd64" - if: inputs.push-latest-images == 'true' + breeze ci-image build --tag-as-latest --push + --python "${{ matrix.python }}" --platform "${{ inputs.platform }}" + if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' push-prod-image-cache: name: "Push PROD image cache" @@ -135,9 +135,8 @@ jobs: strategy: fail-fast: false matrix: - python: ${{fromJson(inputs.python-versions)}} + python: ${{ fromJson(inputs.python-versions) }} env: - RUNS_ON: '["self-hosted", "Linux", "X64"]' DEBIAN_VERSION: ${{ inputs.debian-version }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} USE_UV: ${{ inputs.use-uv }} @@ -147,7 +146,6 @@ jobs: UPGRADE_TO_NEWER_DEPENDENCIES: "false" DOCKER_CACHE: ${{ inputs.docker-cache }} VERSION_SUFFIX_FOR_PYPI: "dev0" - PLATFORM: "linux/amd64,linux/arm64" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COMMIT_SHA: ${{ github.sha }} VERBOSE: "true" @@ -177,23 +175,23 @@ jobs: run: cp -v --no-preserve=mode,ownership ./dist/*.whl ./docker-context-files - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh + if: inputs.platform == 'linux/arm64' - name: Login to ghcr.io run: echo "${{ env.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ matrix.python-version }} (AMD/ARM)" + - name: "Push PROD ${{ inputs.cache-type }} cache: ${{ matrix.python-version }} ${{ inputs.platform }}" run: > breeze prod-image build --builder airflow_cache - --prepare-buildx-cache --run-in-parallel --platform "linux/amd64,linux/arm64" + --prepare-buildx-cache --platform "${{ inputs.platform }}" --install-packages-from-context --airflow-constraints-mode constraints-source-providers --python ${{ matrix.python }} - if: inputs.include-prod-images == 'true' - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() + if: always() && inputs.platform == 'linux/arm64' # We only push "AMD" images as it is really only needed for any kind of automated builds in CI # and currently there is not an easy way to make multi-platform image from two separate builds # and we can do it after we stopped the ARM instance as it is not needed anymore - - name: "Push PROD latest image: ${{ matrix.python }} (AMD ONLY)" + - name: "Push PROD latest image: ${{ matrix.python }} (linux/amd64 ONLY)" run: > breeze prod-image build --tag-as-latest --install-packages-from-context - --push --python ${{ matrix.python}} --platform "linux/amd64" - if: inputs.push-latest-images == 'true' + --push --python ${{ matrix.python}} --platform "${{ inputs.platform }}" + if: inputs.push-latest-images == 'true' && inputs.platform == 'linux/amd64' diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 3ad92a19e5..84f638d88a 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -658,6 +658,7 @@ def autodetect_docker_context(): def get_and_use_docker_context(context: str): if context == "autodetect": context = autodetect_docker_context() + run_command(["docker", "context", "create", context], check=False) output = run_command(["docker", "context", "use", context], check=False) if output.returncode != 0: get_console().print(
