This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v2-8-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 9a5f702e8d743729f8eda7ca5160bba26b249a89 Author: Jarek Potiuk <[email protected]> AuthorDate: Fri Jan 12 01:29:31 2024 +0100 Add support for reproducible build date epoch for Airflow releases (#36726) Hatch has built-in support for reproducible builds, however it uses a hard-coded 2020 date to generate the reproducible binaries, which produces whl, tar.gz files that contain file dates that are pretty old. This might be confusing for anyone who is looking at the file contents and timestamp inside. This PR adds support (similar to provider approach) to store current reproducible date in the repository - so that it can be committed and tagged together with Airflow sources. It is updated fully automaticallly by pre-commit whenever release notes change, which basically means that whenever release notes are update just before release, the reproducible date is updated to current date. For now we only check if the packages produced by hatchling build are reproducible. (cherry picked from commit a2d6c389f69034c526554b3291874dc4d66c4529) --- .pre-commit-config.yaml | 12 +++++- .rat-excludes | 1 + STATIC_CODE_CHECKS.rst | 2 + airflow/reproducible_build.yaml | 2 + dev/README_RELEASE_AIRFLOW.md | 42 ++++++++++++++++++++ .../commands/release_management_commands.py | 5 ++- dev/breeze/src/airflow_breeze/pre_commit_ids.py | 1 + images/breeze/output_static-checks.svg | 6 +-- images/breeze/output_static-checks.txt | 2 +- .../pre_commit_update_source_date_epoch.py | 45 ++++++++++++++++++++++ .../in_container/run_prepare_airflow_packages.py | 13 ++++++- 11 files changed, 122 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f50076ba61..f7b4d0ae6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -125,7 +125,7 @@ repos: - --fuzzy-match-generates-todo - id: insert-license name: Add license for all YAML files except Helm templates - exclude: ^\.github/.*$|^.*/.*_vendor/|^chart/templates/.* + exclude: ^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*|^airflow/reproducible_build.yaml$ types: [yaml] files: \.ya?ml$ args: @@ -244,7 +244,7 @@ repos: name: Check YAML files with yamllint entry: yamllint -c yamllint-config.yml --strict types: [yaml] - exclude: ^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$ + exclude: ^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^airflow/reproducible_build.yaml$ - repo: https://github.com/ikamensh/flynt rev: '1.0.1' hooks: @@ -794,6 +794,14 @@ repos: files: ^dev/breeze/pyproject\.toml$|^dev/breeze/README\.md$ pass_filenames: false require_serial: true + - id: update-reproducible-source-date-epoch + name: Update Source Date Epoch for reproducible builds + language: python + entry: ./scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py + files: ^RELEASE_NOTES.rst$ + additional_dependencies: ['rich>=12.4.4', 'pyyaml'] + pass_filenames: false + require_serial: true - id: check-breeze-top-dependencies-limited name: Breeze should have small number of top-level dependencies language: python diff --git a/.rat-excludes b/.rat-excludes index ab2296b487..9822c9ac41 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -86,6 +86,7 @@ PULL_REQUEST_TEMPLATE.md PROVIDER_CHANGES*.md manifests/* redirects.txt +reproducible_build.yaml # Locally mounted files .*egg-info/* diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 68a0aef451..3f748579b0 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -394,6 +394,8 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-providers-dependencies | Update dependencies for provider packages | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| update-reproducible-source-date-epoch | Update Source Date Epoch for reproducible builds | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-spelling-wordlist-to-be-sorted | Sort alphabetically and uniquify spelling_wordlist.txt | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-supported-versions | Updates supported versions in documentation | | diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml new file mode 100644 index 0000000000..2bd6204c80 --- /dev/null +++ b/airflow/reproducible_build.yaml @@ -0,0 +1,2 @@ +release-notes-hash: 81a945804fc42c18f416b5aa1f4b0fde +source-date-epoch: 1704922121 diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index dabc02ea2d..50ee3b280d 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -34,6 +34,7 @@ - [Licence check](#licence-check) - [Signature check](#signature-check) - [SHA512 sum check](#sha512-sum-check) + - [Reproducible package check](#reproducible-package-check) - [Source code check](#source-code-check) - [Verify the release candidate by Contributors](#verify-the-release-candidate-by-contributors) - [Installing release candidate in your local virtual environment](#installing-release-candidate-in-your-local-virtual-environment) @@ -578,6 +579,47 @@ Checking apache_airflow-2.0.2rc4-py2.py3-none-any.whl.sha512 Checking apache-airflow-2.0.2rc4-source.tar.gz.sha512 ``` +## Reproducible package check + +Airflow supports reproducible builds, which means that the packages prepared from the same sources should +produce binary identical packages in reproducible way. You should check if the packages can be +binary-reproduced when built from the sources. + +Checkout airflow sources and build packages in dist folder: + +```shell script +git checkout X.Y.Zrc1 +export AIRFLOW_REPO_ROOT=$(pwd) +rm -rf dist/* +breeze release-management prepare-airflow-package --package-format both +``` + +That should produce `.whl` and `.tar.gz` packages in dist folder. + +Change to the directory where you have the packages from svn: + +```shell script +# First clone the repo if you do not have it +cd .. +[ -d asf-dist ] || svn checkout --depth=immediates https://dist.apache.org/repos/dist asf-dist +svn update --set-depth=infinity asf-dist/dev/airflow + +# Then compare the packages +cd asf-dist/dev/airflow/X.Y.Zrc1 +for i in ${AIRFLOW_REPO_ROOT}/dist/* +do + echo "Checking if $(basename $i) is the same as $i" + diff "$(basename $i)" "$i" && echo "OK" +done +``` + +The output should be empty (files are identical). +In case the files are different, you should see: + +``` +Binary files apache_airflow-2.9.0.dev0.tar.gz and .../apache_airflow-2.9.0.dev0.tar.gz differ +``` + ## Source code check You should check if the sources in the packages produced are the same as coming from the tag in git. diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 39211c9658..ca2041bd94 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -209,12 +209,13 @@ GITPYTHON_VERSION = "3.1.40" RICH_VERSION = "13.7.0" NODE_VERSION = "21.2.0" PRE_COMMIT_VERSION = "3.5.0" +PYYAML_VERSION = "6.0.1" AIRFLOW_BUILD_DOCKERFILE = f""" FROM python:{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}-slim-{ALLOWED_DEBIAN_VERSIONS[0]} RUN apt-get update && apt-get install -y --no-install-recommends git -RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 \ - gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} pre-commit=={PRE_COMMIT_VERSION} +RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 pyyaml=={PYYAML_VERSION}\ + gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} pre-commit=={PRE_COMMIT_VERSION} COPY . /opt/airflow """ diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index e5ad08afa1..8e64e536c7 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -129,6 +129,7 @@ PRE_COMMIT_LIST = [ "update-local-yml-file", "update-migration-references", "update-providers-dependencies", + "update-reproducible-source-date-epoch", "update-spelling-wordlist-to-be-sorted", "update-supported-versions", "update-vendored-in-k8s-json-schema", diff --git a/images/breeze/output_static-checks.svg b/images/breeze/output_static-checks.svg index 4212e283ea..acbc21e6b4 100644 --- a/images/breeze/output_static-checks.svg +++ b/images/breeze/output_static-checks.svg @@ -348,9 +348,9 @@ </text><text class="breeze-static-checks-r5" x="0" y="1118" textLength="12.2" clip-path="url(#breeze-static-checks-line-45)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1118" textLength="988.2" clip-path="url(#breeze-static-checks-line-45)">update-er-diagram | update-extras | update-in-the-wild-to-be-sorted |            </text><text class="breeze-static-checks-r5" x="1451.8" y="1118" textL [...] </text><text class="breeze-static-checks-r5" x="0" y="1142.4" textLength="12.2" clip-path="url(#breeze-static-checks-line-46)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1142.4" textLength="988.2" clip-path="url(#breeze-static-checks-line-46)">update-inlined-dockerfile-scripts | update-installed-providers-to-be-sorted |    </text><text class="breeze-static-checks-r5" x="1451.8" y="1142.4" textLength="12.2" clip-path="url(#breeze-static-c [...] </text><text class="breeze-static-checks-r5" x="0" y="1166.8" textLength="12.2" clip-path="url(#breeze-static-checks-line-47)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1166.8" textLength="988.2" clip-path="url(#breeze-static-checks-line-47)">update-local-yml-file | update-migration-references |                           &# [...] -</text><text class="breeze-static-checks-r5" x="0" y="1191.2" textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2" clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies | update-spelling-wordlist-to-be-sorted |          </text><text class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...] -</text><text class="breeze-static-checks-r5" x="0" y="1215.6" textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2" clip-path="url(#breeze-static-checks-line-49)">update-supported-versions | update-vendored-in-k8s-json-schema | update-version |</text><text class="breeze-static-checks-r5" x="1451.8" y="1215.6" textLength="12.2" clip-path="url(#breeze-static-checks-line [...] -</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2" clip-path="url(#breeze-static-checks-line-50)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2" clip-path="url(#breeze-static-checks-line-50)">yamllint)                                       [...] +</text><text class="breeze-static-checks-r5" x="0" y="1191.2" textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2" clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies | update-reproducible-source-date-epoch |          </text><text class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...] +</text><text class="breeze-static-checks-r5" x="0" y="1215.6" textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2" clip-path="url(#breeze-static-checks-line-49)">update-spelling-wordlist-to-be-sorted | update-supported-versions |              </text><text class="breeze-static-checks-r5" x="1451.8" y="1215.6" [...] +</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2" clip-path="url(#breeze-static-checks-line-50)">│</text><text class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2" clip-path="url(#breeze-static-checks-line-50)">update-vendored-in-k8s-json-schema | update-version | yamllint)                  </text><text class="breeze-static-checks-r5" [...] </text><text class="breeze-static-checks-r5" x="0" y="1264.4" textLength="12.2" clip-path="url(#breeze-static-checks-line-51)">│</text><text class="breeze-static-checks-r4" x="24.4" y="1264.4" textLength="12.2" clip-path="url(#breeze-static-checks-line-51)">-</text><text class="breeze-static-checks-r4" x="36.6" y="1264.4" textLength="61" clip-path="url(#breeze-static-checks-line-51)">-show</text><text class="breeze-static-checks-r4" x="97.6" y="1264.4" textLength="195.2" clip-path="url(# [...] </text><text class="breeze-static-checks-r5" x="0" y="1288.8" textLength="12.2" clip-path="url(#breeze-static-checks-line-52)">│</text><text class="breeze-static-checks-r4" x="24.4" y="1288.8" textLength="12.2" clip-path="url(#breeze-static-checks-line-52)">-</text><text class="breeze-static-checks-r4" x="36.6" y="1288.8" textLength="134.2" clip-path="url(#breeze-static-checks-line-52)">-initialize</text><text class="breeze-static-checks-r4" x="170.8" y="1288.8" textLength="146.4" clip-p [...] </text><text class="breeze-static-checks-r5" x="0" y="1313.2" textLength="12.2" clip-path="url(#breeze-static-checks-line-53)">│</text><text class="breeze-static-checks-r4" x="24.4" y="1313.2" textLength="12.2" clip-path="url(#breeze-static-checks-line-53)">-</text><text class="breeze-static-checks-r4" x="36.6" y="1313.2" textLength="48.8" clip-path="url(#breeze-static-checks-line-53)">-max</text><text class="breeze-static-checks-r4" x="85.4" y="1313.2" textLength="292.8" clip-path="url( [...] diff --git a/images/breeze/output_static-checks.txt b/images/breeze/output_static-checks.txt index 8f9982f8cf..b1b9c6d6be 100644 --- a/images/breeze/output_static-checks.txt +++ b/images/breeze/output_static-checks.txt @@ -1 +1 @@ -01342faaf5558fb2af9ce20c7ca0be8c +d466e07e997920503e147e798ed2b353 diff --git a/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py new file mode 100755 index 0000000000..6bd15d8bd3 --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import sys +from hashlib import md5 +from pathlib import Path +from time import time + +import yaml + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is importable + +from common_precommit_utils import AIRFLOW_SOURCES_ROOT_PATH + +RELEASE_NOTES_FILE_PATH = AIRFLOW_SOURCES_ROOT_PATH / "RELEASE_NOTES.rst" +REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT_PATH / "airflow" / "reproducible_build.yaml" + +if __name__ == "__main__": + hash_md5 = md5() + hash_md5.update(RELEASE_NOTES_FILE_PATH.read_bytes()) + release_notes_hash = hash_md5.hexdigest() + reproducible_build_text = REPRODUCIBLE_BUILD_FILE.read_text() + reproducible_build = yaml.safe_load(reproducible_build_text) + old_hash = reproducible_build["release-notes-hash"] + if release_notes_hash != old_hash: + # Replace the hash in the file + reproducible_build["release-notes-hash"] = release_notes_hash + reproducible_build["source-date-epoch"] = int(time()) + REPRODUCIBLE_BUILD_FILE.write_text(yaml.dump(reproducible_build)) diff --git a/scripts/in_container/run_prepare_airflow_packages.py b/scripts/in_container/run_prepare_airflow_packages.py index 2f6f1912c2..66aae93ca4 100755 --- a/scripts/in_container/run_prepare_airflow_packages.py +++ b/scripts/in_container/run_prepare_airflow_packages.py @@ -26,11 +26,13 @@ from contextlib import contextmanager from pathlib import Path from shutil import rmtree +import yaml from rich.console import Console console = Console(color_system="standard", width=200) AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve() +REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / "reproducible_build.yaml" AIRFLOW_INIT_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / "__init__.py" WWW_DIRECTORY = AIRFLOW_SOURCES_ROOT / "airflow" / "www" VERSION_SUFFIX = os.environ.get("VERSION_SUFFIX_FOR_PYPI", "") @@ -81,8 +83,17 @@ def build_airflow_packages(package_format: str): if package_format in ["both", "sdist"]: build_command.extend(["-t", "sdist"]) + reproducible_date = yaml.safe_load(REPRODUCIBLE_BUILD_FILE.read_text())["source-date-epoch"] + + envcopy = os.environ.copy() + envcopy["SOURCE_DATE_EPOCH"] = str(reproducible_date) console.print(f"[bright_blue]Building packages: {package_format}\n") - build_process = subprocess.run(build_command, capture_output=False, cwd=AIRFLOW_SOURCES_ROOT) + build_process = subprocess.run( + build_command, + capture_output=False, + cwd=AIRFLOW_SOURCES_ROOT, + env=envcopy, + ) if build_process.returncode != 0: console.print("[red]Error building Airflow packages")
