This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-8-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 9a5f702e8d743729f8eda7ca5160bba26b249a89
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri Jan 12 01:29:31 2024 +0100

    Add support for reproducible build date epoch for Airflow releases (#36726)
    
    Hatch has built-in support for reproducible builds, however it
    uses a hard-coded 2020 date to generate the reproducible binaries,
    which produces whl, tar.gz files that contain file dates that are
    pretty old. This might be confusing for anyone who is looking at
    the file contents and timestamp inside.
    
    This PR adds support (similar to provider approach) to store current
    reproducible date in the repository - so that it can be committed
    and tagged together with Airflow sources. It is updated fully
    automaticallly by pre-commit whenever release notes change, which
    basically means that whenever release notes are update just
    before release, the reproducible date is updated to current date.
    
    For now we only check if the packages produced by hatchling
    build are reproducible.
    
    (cherry picked from commit a2d6c389f69034c526554b3291874dc4d66c4529)
---
 .pre-commit-config.yaml                            | 12 +++++-
 .rat-excludes                                      |  1 +
 STATIC_CODE_CHECKS.rst                             |  2 +
 airflow/reproducible_build.yaml                    |  2 +
 dev/README_RELEASE_AIRFLOW.md                      | 42 ++++++++++++++++++++
 .../commands/release_management_commands.py        |  5 ++-
 dev/breeze/src/airflow_breeze/pre_commit_ids.py    |  1 +
 images/breeze/output_static-checks.svg             |  6 +--
 images/breeze/output_static-checks.txt             |  2 +-
 .../pre_commit_update_source_date_epoch.py         | 45 ++++++++++++++++++++++
 .../in_container/run_prepare_airflow_packages.py   | 13 ++++++-
 11 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f50076ba61..f7b4d0ae6c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -125,7 +125,7 @@ repos:
           - --fuzzy-match-generates-todo
       - id: insert-license
         name: Add license for all YAML files except Helm templates
-        exclude: ^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*
+        exclude: 
^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*|^airflow/reproducible_build.yaml$
         types: [yaml]
         files: \.ya?ml$
         args:
@@ -244,7 +244,7 @@ repos:
         name: Check YAML files with yamllint
         entry: yamllint -c yamllint-config.yml --strict
         types: [yaml]
-        exclude: 
^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$
+        exclude: 
^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^airflow/reproducible_build.yaml$
   - repo: https://github.com/ikamensh/flynt
     rev: '1.0.1'
     hooks:
@@ -794,6 +794,14 @@ repos:
         files: ^dev/breeze/pyproject\.toml$|^dev/breeze/README\.md$
         pass_filenames: false
         require_serial: true
+      - id: update-reproducible-source-date-epoch
+        name: Update Source Date Epoch for reproducible builds
+        language: python
+        entry: ./scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
+        files: ^RELEASE_NOTES.rst$
+        additional_dependencies: ['rich>=12.4.4', 'pyyaml']
+        pass_filenames: false
+        require_serial: true
       - id: check-breeze-top-dependencies-limited
         name: Breeze should have small number of top-level dependencies
         language: python
diff --git a/.rat-excludes b/.rat-excludes
index ab2296b487..9822c9ac41 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -86,6 +86,7 @@ PULL_REQUEST_TEMPLATE.md
 PROVIDER_CHANGES*.md
 manifests/*
 redirects.txt
+reproducible_build.yaml
 
 # Locally mounted files
 .*egg-info/*
diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst
index 68a0aef451..3f748579b0 100644
--- a/STATIC_CODE_CHECKS.rst
+++ b/STATIC_CODE_CHECKS.rst
@@ -394,6 +394,8 @@ require Breeze Docker image to be built locally.
 
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
 | update-providers-dependencies                             | Update 
dependencies for provider packages                    |         |
 
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
+| update-reproducible-source-date-epoch                     | Update Source 
Date Epoch for reproducible builds             |         |
++-----------------------------------------------------------+--------------------------------------------------------------+---------+
 | update-spelling-wordlist-to-be-sorted                     | Sort 
alphabetically and uniquify spelling_wordlist.txt       |         |
 
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
 | update-supported-versions                                 | Updates 
supported versions in documentation                  |         |
diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml
new file mode 100644
index 0000000000..2bd6204c80
--- /dev/null
+++ b/airflow/reproducible_build.yaml
@@ -0,0 +1,2 @@
+release-notes-hash: 81a945804fc42c18f416b5aa1f4b0fde
+source-date-epoch: 1704922121
diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md
index dabc02ea2d..50ee3b280d 100644
--- a/dev/README_RELEASE_AIRFLOW.md
+++ b/dev/README_RELEASE_AIRFLOW.md
@@ -34,6 +34,7 @@
   - [Licence check](#licence-check)
   - [Signature check](#signature-check)
   - [SHA512 sum check](#sha512-sum-check)
+  - [Reproducible package check](#reproducible-package-check)
   - [Source code check](#source-code-check)
 - [Verify the release candidate by 
Contributors](#verify-the-release-candidate-by-contributors)
   - [Installing release candidate in your local virtual 
environment](#installing-release-candidate-in-your-local-virtual-environment)
@@ -578,6 +579,47 @@ Checking 
apache_airflow-2.0.2rc4-py2.py3-none-any.whl.sha512
 Checking apache-airflow-2.0.2rc4-source.tar.gz.sha512
 ```
 
+## Reproducible package check
+
+Airflow supports reproducible builds, which means that the packages prepared 
from the same sources should
+produce binary identical packages in reproducible way. You should check if the 
packages can be
+binary-reproduced when built from the sources.
+
+Checkout airflow sources and build packages in dist folder:
+
+```shell script
+git checkout X.Y.Zrc1
+export AIRFLOW_REPO_ROOT=$(pwd)
+rm -rf dist/*
+breeze release-management prepare-airflow-package --package-format both
+```
+
+That should produce `.whl` and `.tar.gz` packages in dist folder.
+
+Change to the directory where you have the packages from svn:
+
+```shell script
+# First clone the repo if you do not have it
+cd ..
+[ -d asf-dist ] || svn checkout --depth=immediates 
https://dist.apache.org/repos/dist asf-dist
+svn update --set-depth=infinity asf-dist/dev/airflow
+
+# Then compare the packages
+cd asf-dist/dev/airflow/X.Y.Zrc1
+for i in ${AIRFLOW_REPO_ROOT}/dist/*
+do
+  echo "Checking if $(basename $i) is the same as $i"
+  diff "$(basename $i)" "$i" && echo "OK"
+done
+```
+
+The output should be empty (files are identical).
+In case the files are different, you should see:
+
+```
+Binary files apache_airflow-2.9.0.dev0.tar.gz and 
.../apache_airflow-2.9.0.dev0.tar.gz differ
+```
+
 ## Source code check
 
 You should check if the sources in the packages produced are the same as 
coming from the tag in git.
diff --git 
a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py 
b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
index 39211c9658..ca2041bd94 100644
--- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
@@ -209,12 +209,13 @@ GITPYTHON_VERSION = "3.1.40"
 RICH_VERSION = "13.7.0"
 NODE_VERSION = "21.2.0"
 PRE_COMMIT_VERSION = "3.5.0"
+PYYAML_VERSION = "6.0.1"
 
 AIRFLOW_BUILD_DOCKERFILE = f"""
 FROM 
python:{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}-slim-{ALLOWED_DEBIAN_VERSIONS[0]}
 RUN apt-get update && apt-get install -y --no-install-recommends git
-RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 \
-  gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} 
pre-commit=={PRE_COMMIT_VERSION}
+RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 
pyyaml=={PYYAML_VERSION}\
+ gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION} 
pre-commit=={PRE_COMMIT_VERSION}
 COPY . /opt/airflow
 """
 
diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py 
b/dev/breeze/src/airflow_breeze/pre_commit_ids.py
index e5ad08afa1..8e64e536c7 100644
--- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py
+++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py
@@ -129,6 +129,7 @@ PRE_COMMIT_LIST = [
     "update-local-yml-file",
     "update-migration-references",
     "update-providers-dependencies",
+    "update-reproducible-source-date-epoch",
     "update-spelling-wordlist-to-be-sorted",
     "update-supported-versions",
     "update-vendored-in-k8s-json-schema",
diff --git a/images/breeze/output_static-checks.svg 
b/images/breeze/output_static-checks.svg
index 4212e283ea..acbc21e6b4 100644
--- a/images/breeze/output_static-checks.svg
+++ b/images/breeze/output_static-checks.svg
@@ -348,9 +348,9 @@
 </text><text class="breeze-static-checks-r5" x="0" y="1118" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-45)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1118" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-45)">update-er-diagram&#160;|&#160;update-extras&#160;|&#160;update-in-the-wild-to-be-sorted&#160;|&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1118" textL [...]
 </text><text class="breeze-static-checks-r5" x="0" y="1142.4" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-46)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1142.4" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-46)">update-inlined-dockerfile-scripts&#160;|&#160;update-installed-providers-to-be-sorted&#160;|&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1142.4" textLength="12.2" 
clip-path="url(#breeze-static-c [...]
 </text><text class="breeze-static-checks-r5" x="0" y="1166.8" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-47)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1166.8" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-47)">update-local-yml-file&#160;|&#160;update-migration-references&#160;|&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#
 [...]
-</text><text class="breeze-static-checks-r5" x="0" y="1191.2" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies&#160;|&#160;update-spelling-wordlist-to-be-sorted&#160;|&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...]
-</text><text class="breeze-static-checks-r5" x="0" y="1215.6" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-49)">update-supported-versions&#160;|&#160;update-vendored-in-k8s-json-schema&#160;|&#160;update-version&#160;|</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1215.6" textLength="12.2" 
clip-path="url(#breeze-static-checks-line [...]
-</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-50)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-50)">yamllint)&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160
 [...]
+</text><text class="breeze-static-checks-r5" x="0" y="1191.2" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies&#160;|&#160;update-reproducible-source-date-epoch&#160;|&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...]
+</text><text class="breeze-static-checks-r5" x="0" y="1215.6" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-49)">update-spelling-wordlist-to-be-sorted&#160;|&#160;update-supported-versions&#160;|&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5" x="1451.8" y="1215.6" [...]
+</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-50)">│</text><text 
class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2" 
clip-path="url(#breeze-static-checks-line-50)">update-vendored-in-k8s-json-schema&#160;|&#160;update-version&#160;|&#160;yamllint)&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-static-checks-r5"  [...]
 </text><text class="breeze-static-checks-r5" x="0" y="1264.4" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-51)">│</text><text 
class="breeze-static-checks-r4" x="24.4" y="1264.4" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-51)">-</text><text 
class="breeze-static-checks-r4" x="36.6" y="1264.4" textLength="61" 
clip-path="url(#breeze-static-checks-line-51)">-show</text><text 
class="breeze-static-checks-r4" x="97.6" y="1264.4" textLength="195.2" 
clip-path="url(# [...]
 </text><text class="breeze-static-checks-r5" x="0" y="1288.8" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-52)">│</text><text 
class="breeze-static-checks-r4" x="24.4" y="1288.8" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-52)">-</text><text 
class="breeze-static-checks-r4" x="36.6" y="1288.8" textLength="134.2" 
clip-path="url(#breeze-static-checks-line-52)">-initialize</text><text 
class="breeze-static-checks-r4" x="170.8" y="1288.8" textLength="146.4" clip-p 
[...]
 </text><text class="breeze-static-checks-r5" x="0" y="1313.2" 
textLength="12.2" clip-path="url(#breeze-static-checks-line-53)">│</text><text 
class="breeze-static-checks-r4" x="24.4" y="1313.2" textLength="12.2" 
clip-path="url(#breeze-static-checks-line-53)">-</text><text 
class="breeze-static-checks-r4" x="36.6" y="1313.2" textLength="48.8" 
clip-path="url(#breeze-static-checks-line-53)">-max</text><text 
class="breeze-static-checks-r4" x="85.4" y="1313.2" textLength="292.8" 
clip-path="url( [...]
diff --git a/images/breeze/output_static-checks.txt 
b/images/breeze/output_static-checks.txt
index 8f9982f8cf..b1b9c6d6be 100644
--- a/images/breeze/output_static-checks.txt
+++ b/images/breeze/output_static-checks.txt
@@ -1 +1 @@
-01342faaf5558fb2af9ce20c7ca0be8c
+d466e07e997920503e147e798ed2b353
diff --git a/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py 
b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
new file mode 100755
index 0000000000..6bd15d8bd3
--- /dev/null
+++ b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import sys
+from hashlib import md5
+from pathlib import Path
+from time import time
+
+import yaml
+
+sys.path.insert(0, str(Path(__file__).parent.resolve()))  # make sure 
common_precommit_utils is importable
+
+from common_precommit_utils import AIRFLOW_SOURCES_ROOT_PATH
+
+RELEASE_NOTES_FILE_PATH = AIRFLOW_SOURCES_ROOT_PATH / "RELEASE_NOTES.rst"
+REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT_PATH / "airflow" / 
"reproducible_build.yaml"
+
+if __name__ == "__main__":
+    hash_md5 = md5()
+    hash_md5.update(RELEASE_NOTES_FILE_PATH.read_bytes())
+    release_notes_hash = hash_md5.hexdigest()
+    reproducible_build_text = REPRODUCIBLE_BUILD_FILE.read_text()
+    reproducible_build = yaml.safe_load(reproducible_build_text)
+    old_hash = reproducible_build["release-notes-hash"]
+    if release_notes_hash != old_hash:
+        # Replace the hash in the file
+        reproducible_build["release-notes-hash"] = release_notes_hash
+        reproducible_build["source-date-epoch"] = int(time())
+    REPRODUCIBLE_BUILD_FILE.write_text(yaml.dump(reproducible_build))
diff --git a/scripts/in_container/run_prepare_airflow_packages.py 
b/scripts/in_container/run_prepare_airflow_packages.py
index 2f6f1912c2..66aae93ca4 100755
--- a/scripts/in_container/run_prepare_airflow_packages.py
+++ b/scripts/in_container/run_prepare_airflow_packages.py
@@ -26,11 +26,13 @@ from contextlib import contextmanager
 from pathlib import Path
 from shutil import rmtree
 
+import yaml
 from rich.console import Console
 
 console = Console(color_system="standard", width=200)
 
 AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve()
+REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / 
"reproducible_build.yaml"
 AIRFLOW_INIT_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / "__init__.py"
 WWW_DIRECTORY = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
 VERSION_SUFFIX = os.environ.get("VERSION_SUFFIX_FOR_PYPI", "")
@@ -81,8 +83,17 @@ def build_airflow_packages(package_format: str):
     if package_format in ["both", "sdist"]:
         build_command.extend(["-t", "sdist"])
 
+    reproducible_date = 
yaml.safe_load(REPRODUCIBLE_BUILD_FILE.read_text())["source-date-epoch"]
+
+    envcopy = os.environ.copy()
+    envcopy["SOURCE_DATE_EPOCH"] = str(reproducible_date)
     console.print(f"[bright_blue]Building packages: {package_format}\n")
-    build_process = subprocess.run(build_command, capture_output=False, 
cwd=AIRFLOW_SOURCES_ROOT)
+    build_process = subprocess.run(
+        build_command,
+        capture_output=False,
+        cwd=AIRFLOW_SOURCES_ROOT,
+        env=envcopy,
+    )
 
     if build_process.returncode != 0:
         console.print("[red]Error building Airflow packages")

Reply via email to