This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new a2d6c389f6 Add support for reproducible build date epoch for Airflow
releases (#36726)
a2d6c389f6 is described below
commit a2d6c389f69034c526554b3291874dc4d66c4529
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri Jan 12 01:29:31 2024 +0100
Add support for reproducible build date epoch for Airflow releases (#36726)
Hatch has built-in support for reproducible builds, however it
uses a hard-coded 2020 date to generate the reproducible binaries,
which produces whl, tar.gz files that contain file dates that are
pretty old. This might be confusing for anyone who is looking at
the file contents and timestamp inside.
This PR adds support (similar to provider approach) to store current
reproducible date in the repository - so that it can be committed
and tagged together with Airflow sources. It is updated fully
automaticallly by pre-commit whenever release notes change, which
basically means that whenever release notes are update just
before release, the reproducible date is updated to current date.
For now we only check if the packages produced by hatchling
build are reproducible.
---
.pre-commit-config.yaml | 12 +++++-
.rat-excludes | 1 +
STATIC_CODE_CHECKS.rst | 2 +
airflow/reproducible_build.yaml | 2 +
dev/README_RELEASE_AIRFLOW.md | 42 ++++++++++++++++++++
.../commands/release_management_commands.py | 5 ++-
dev/breeze/src/airflow_breeze/pre_commit_ids.py | 1 +
images/breeze/output_static-checks.svg | 6 +--
images/breeze/output_static-checks.txt | 2 +-
.../pre_commit_update_source_date_epoch.py | 45 ++++++++++++++++++++++
.../in_container/run_prepare_airflow_packages.py | 13 ++++++-
11 files changed, 122 insertions(+), 9 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1e1d7e4e99..da64ff457f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -125,7 +125,7 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: Add license for all YAML files except Helm templates
- exclude: ^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*
+ exclude:
^\.github/.*$|^.*/.*_vendor/|^chart/templates/.*|^airflow/reproducible_build.yaml$
types: [yaml]
files: \.ya?ml$
args:
@@ -244,7 +244,7 @@ repos:
name: Check YAML files with yamllint
entry: yamllint -c yamllint-config.yml --strict
types: [yaml]
- exclude:
^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$
+ exclude:
^.*airflow\.template\.yaml$|^.*init_git_sync\.template\.yaml$|^.*/.*_vendor/|^chart/(?:templates|files)/.*\.yaml$|openapi/.*\.yaml$|^\.pre-commit-config\.yaml$|^airflow/reproducible_build.yaml$
- repo: https://github.com/ikamensh/flynt
rev: '1.0.1'
hooks:
@@ -794,6 +794,14 @@ repos:
files: ^dev/breeze/pyproject\.toml$|^dev/breeze/README\.md$
pass_filenames: false
require_serial: true
+ - id: update-reproducible-source-date-epoch
+ name: Update Source Date Epoch for reproducible builds
+ language: python
+ entry: ./scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
+ files: ^RELEASE_NOTES.rst$
+ additional_dependencies: ['rich>=12.4.4', 'pyyaml']
+ pass_filenames: false
+ require_serial: true
- id: check-breeze-top-dependencies-limited
name: Breeze should have small number of top-level dependencies
language: python
diff --git a/.rat-excludes b/.rat-excludes
index ab2296b487..9822c9ac41 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -86,6 +86,7 @@ PULL_REQUEST_TEMPLATE.md
PROVIDER_CHANGES*.md
manifests/*
redirects.txt
+reproducible_build.yaml
# Locally mounted files
.*egg-info/*
diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst
index 952bf5fd23..de5a25edbd 100644
--- a/STATIC_CODE_CHECKS.rst
+++ b/STATIC_CODE_CHECKS.rst
@@ -394,6 +394,8 @@ require Breeze Docker image to be built locally.
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| update-providers-dependencies | Update
dependencies for provider packages | |
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
+| update-reproducible-source-date-epoch | Update Source
Date Epoch for reproducible builds | |
++-----------------------------------------------------------+--------------------------------------------------------------+---------+
| update-spelling-wordlist-to-be-sorted | Sort
alphabetically and uniquify spelling_wordlist.txt | |
+-----------------------------------------------------------+--------------------------------------------------------------+---------+
| update-supported-versions | Updates
supported versions in documentation | |
diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml
new file mode 100644
index 0000000000..2bd6204c80
--- /dev/null
+++ b/airflow/reproducible_build.yaml
@@ -0,0 +1,2 @@
+release-notes-hash: 81a945804fc42c18f416b5aa1f4b0fde
+source-date-epoch: 1704922121
diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md
index ebebc577c8..6b51c9189f 100644
--- a/dev/README_RELEASE_AIRFLOW.md
+++ b/dev/README_RELEASE_AIRFLOW.md
@@ -34,6 +34,7 @@
- [Licence check](#licence-check)
- [Signature check](#signature-check)
- [SHA512 sum check](#sha512-sum-check)
+ - [Reproducible package check](#reproducible-package-check)
- [Source code check](#source-code-check)
- [Verify the release candidate by
Contributors](#verify-the-release-candidate-by-contributors)
- [Installing release candidate in your local virtual
environment](#installing-release-candidate-in-your-local-virtual-environment)
@@ -578,6 +579,47 @@ Checking
apache_airflow-2.0.2rc4-py2.py3-none-any.whl.sha512
Checking apache-airflow-2.0.2rc4-source.tar.gz.sha512
```
+## Reproducible package check
+
+Airflow supports reproducible builds, which means that the packages prepared
from the same sources should
+produce binary identical packages in reproducible way. You should check if the
packages can be
+binary-reproduced when built from the sources.
+
+Checkout airflow sources and build packages in dist folder:
+
+```shell script
+git checkout X.Y.Zrc1
+export AIRFLOW_REPO_ROOT=$(pwd)
+rm -rf dist/*
+breeze release-management prepare-airflow-package --package-format both
+```
+
+That should produce `.whl` and `.tar.gz` packages in dist folder.
+
+Change to the directory where you have the packages from svn:
+
+```shell script
+# First clone the repo if you do not have it
+cd ..
+[ -d asf-dist ] || svn checkout --depth=immediates
https://dist.apache.org/repos/dist asf-dist
+svn update --set-depth=infinity asf-dist/dev/airflow
+
+# Then compare the packages
+cd asf-dist/dev/airflow/X.Y.Zrc1
+for i in ${AIRFLOW_REPO_ROOT}/dist/*
+do
+ echo "Checking if $(basename $i) is the same as $i"
+ diff "$(basename $i)" "$i" && echo "OK"
+done
+```
+
+The output should be empty (files are identical).
+In case the files are different, you should see:
+
+```
+Binary files apache_airflow-2.9.0.dev0.tar.gz and
.../apache_airflow-2.9.0.dev0.tar.gz differ
+```
+
## Source code check
You should check if the sources in the packages produced are the same as
coming from the tag in git.
diff --git
a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
index 6e7bd2d06b..5bdfbf99a0 100644
--- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
@@ -210,12 +210,13 @@ GITPYTHON_VERSION = "3.1.40"
RICH_VERSION = "13.7.0"
NODE_VERSION = "21.2.0"
PRE_COMMIT_VERSION = "3.5.0"
+PYYAML_VERSION = "6.0.1"
AIRFLOW_BUILD_DOCKERFILE = f"""
FROM
python:{DEFAULT_PYTHON_MAJOR_MINOR_VERSION}-slim-{ALLOWED_DEBIAN_VERSIONS[0]}
RUN apt-get update && apt-get install -y --no-install-recommends git
-RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1 \
- gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION}
pre-commit=={PRE_COMMIT_VERSION}
+RUN pip install pip=={AIRFLOW_PIP_VERSION} hatch==1.9.1
pyyaml=={PYYAML_VERSION}\
+ gitpython=={GITPYTHON_VERSION} rich=={RICH_VERSION}
pre-commit=={PRE_COMMIT_VERSION}
COPY . /opt/airflow
"""
diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py
b/dev/breeze/src/airflow_breeze/pre_commit_ids.py
index 773c7b5ff9..46ad4b907b 100644
--- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py
+++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py
@@ -129,6 +129,7 @@ PRE_COMMIT_LIST = [
"update-local-yml-file",
"update-migration-references",
"update-providers-dependencies",
+ "update-reproducible-source-date-epoch",
"update-spelling-wordlist-to-be-sorted",
"update-supported-versions",
"update-vendored-in-k8s-json-schema",
diff --git a/images/breeze/output_static-checks.svg
b/images/breeze/output_static-checks.svg
index 5591b99cd1..45a1f74acd 100644
--- a/images/breeze/output_static-checks.svg
+++ b/images/breeze/output_static-checks.svg
@@ -348,9 +348,9 @@
</text><text class="breeze-static-checks-r5" x="0" y="1118" textLength="12.2"
clip-path="url(#breeze-static-checks-line-45)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1118" textLength="988.2"
clip-path="url(#breeze-static-checks-line-45)">update-er-diagram | update-extras | update-in-the-wild-to-be-sorted |            </text><text
class="breeze-static-checks-r5" x="1451.8" y="1118" textL [...]
</text><text class="breeze-static-checks-r5" x="0" y="1142.4"
textLength="12.2" clip-path="url(#breeze-static-checks-line-46)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1142.4" textLength="988.2"
clip-path="url(#breeze-static-checks-line-46)">update-inlined-dockerfile-scripts | update-installed-providers-to-be-sorted |    </text><text
class="breeze-static-checks-r5" x="1451.8" y="1142.4" textLength="12.2"
clip-path="url(#breeze-static-c [...]
</text><text class="breeze-static-checks-r5" x="0" y="1166.8"
textLength="12.2" clip-path="url(#breeze-static-checks-line-47)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1166.8" textLength="988.2"
clip-path="url(#breeze-static-checks-line-47)">update-local-yml-file | update-migration-references |                           &#
[...]
-</text><text class="breeze-static-checks-r5" x="0" y="1191.2"
textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2"
clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies | update-spelling-wordlist-to-be-sorted |          </text><text
class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...]
-</text><text class="breeze-static-checks-r5" x="0" y="1215.6"
textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2"
clip-path="url(#breeze-static-checks-line-49)">update-supported-versions | update-vendored-in-k8s-json-schema | update-version |</text><text
class="breeze-static-checks-r5" x="1451.8" y="1215.6" textLength="12.2"
clip-path="url(#breeze-static-checks-line [...]
-</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2"
clip-path="url(#breeze-static-checks-line-50)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2"
clip-path="url(#breeze-static-checks-line-50)">yamllint)                                      
[...]
+</text><text class="breeze-static-checks-r5" x="0" y="1191.2"
textLength="12.2" clip-path="url(#breeze-static-checks-line-48)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1191.2" textLength="988.2"
clip-path="url(#breeze-static-checks-line-48)">update-providers-dependencies | update-reproducible-source-date-epoch |          </text><text
class="breeze-static-checks-r5" x="1451.8" y="1191.2" textLength="12.2" c [...]
+</text><text class="breeze-static-checks-r5" x="0" y="1215.6"
textLength="12.2" clip-path="url(#breeze-static-checks-line-49)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1215.6" textLength="988.2"
clip-path="url(#breeze-static-checks-line-49)">update-spelling-wordlist-to-be-sorted | update-supported-versions |              </text><text
class="breeze-static-checks-r5" x="1451.8" y="1215.6" [...]
+</text><text class="breeze-static-checks-r5" x="0" y="1240" textLength="12.2"
clip-path="url(#breeze-static-checks-line-50)">│</text><text
class="breeze-static-checks-r7" x="451.4" y="1240" textLength="988.2"
clip-path="url(#breeze-static-checks-line-50)">update-vendored-in-k8s-json-schema | update-version | yamllint)                  </text><text
class="breeze-static-checks-r5" [...]
</text><text class="breeze-static-checks-r5" x="0" y="1264.4"
textLength="12.2" clip-path="url(#breeze-static-checks-line-51)">│</text><text
class="breeze-static-checks-r4" x="24.4" y="1264.4" textLength="12.2"
clip-path="url(#breeze-static-checks-line-51)">-</text><text
class="breeze-static-checks-r4" x="36.6" y="1264.4" textLength="61"
clip-path="url(#breeze-static-checks-line-51)">-show</text><text
class="breeze-static-checks-r4" x="97.6" y="1264.4" textLength="195.2"
clip-path="url(# [...]
</text><text class="breeze-static-checks-r5" x="0" y="1288.8"
textLength="12.2" clip-path="url(#breeze-static-checks-line-52)">│</text><text
class="breeze-static-checks-r4" x="24.4" y="1288.8" textLength="12.2"
clip-path="url(#breeze-static-checks-line-52)">-</text><text
class="breeze-static-checks-r4" x="36.6" y="1288.8" textLength="134.2"
clip-path="url(#breeze-static-checks-line-52)">-initialize</text><text
class="breeze-static-checks-r4" x="170.8" y="1288.8" textLength="146.4" clip-p
[...]
</text><text class="breeze-static-checks-r5" x="0" y="1313.2"
textLength="12.2" clip-path="url(#breeze-static-checks-line-53)">│</text><text
class="breeze-static-checks-r4" x="24.4" y="1313.2" textLength="12.2"
clip-path="url(#breeze-static-checks-line-53)">-</text><text
class="breeze-static-checks-r4" x="36.6" y="1313.2" textLength="48.8"
clip-path="url(#breeze-static-checks-line-53)">-max</text><text
class="breeze-static-checks-r4" x="85.4" y="1313.2" textLength="292.8"
clip-path="url( [...]
diff --git a/images/breeze/output_static-checks.txt
b/images/breeze/output_static-checks.txt
index d14e7c4faf..8799fd3895 100644
--- a/images/breeze/output_static-checks.txt
+++ b/images/breeze/output_static-checks.txt
@@ -1 +1 @@
-d3ff74b53801cfe8664bf29625ad953f
+29332d159ef565f70d92c02e76a40902
diff --git a/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
new file mode 100755
index 0000000000..6bd15d8bd3
--- /dev/null
+++ b/scripts/ci/pre_commit/pre_commit_update_source_date_epoch.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import sys
+from hashlib import md5
+from pathlib import Path
+from time import time
+
+import yaml
+
+sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure
common_precommit_utils is importable
+
+from common_precommit_utils import AIRFLOW_SOURCES_ROOT_PATH
+
+RELEASE_NOTES_FILE_PATH = AIRFLOW_SOURCES_ROOT_PATH / "RELEASE_NOTES.rst"
+REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT_PATH / "airflow" /
"reproducible_build.yaml"
+
+if __name__ == "__main__":
+ hash_md5 = md5()
+ hash_md5.update(RELEASE_NOTES_FILE_PATH.read_bytes())
+ release_notes_hash = hash_md5.hexdigest()
+ reproducible_build_text = REPRODUCIBLE_BUILD_FILE.read_text()
+ reproducible_build = yaml.safe_load(reproducible_build_text)
+ old_hash = reproducible_build["release-notes-hash"]
+ if release_notes_hash != old_hash:
+ # Replace the hash in the file
+ reproducible_build["release-notes-hash"] = release_notes_hash
+ reproducible_build["source-date-epoch"] = int(time())
+ REPRODUCIBLE_BUILD_FILE.write_text(yaml.dump(reproducible_build))
diff --git a/scripts/in_container/run_prepare_airflow_packages.py
b/scripts/in_container/run_prepare_airflow_packages.py
index 2f6f1912c2..66aae93ca4 100755
--- a/scripts/in_container/run_prepare_airflow_packages.py
+++ b/scripts/in_container/run_prepare_airflow_packages.py
@@ -26,11 +26,13 @@ from contextlib import contextmanager
from pathlib import Path
from shutil import rmtree
+import yaml
from rich.console import Console
console = Console(color_system="standard", width=200)
AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve()
+REPRODUCIBLE_BUILD_FILE = AIRFLOW_SOURCES_ROOT / "airflow" /
"reproducible_build.yaml"
AIRFLOW_INIT_FILE = AIRFLOW_SOURCES_ROOT / "airflow" / "__init__.py"
WWW_DIRECTORY = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
VERSION_SUFFIX = os.environ.get("VERSION_SUFFIX_FOR_PYPI", "")
@@ -81,8 +83,17 @@ def build_airflow_packages(package_format: str):
if package_format in ["both", "sdist"]:
build_command.extend(["-t", "sdist"])
+ reproducible_date =
yaml.safe_load(REPRODUCIBLE_BUILD_FILE.read_text())["source-date-epoch"]
+
+ envcopy = os.environ.copy()
+ envcopy["SOURCE_DATE_EPOCH"] = str(reproducible_date)
console.print(f"[bright_blue]Building packages: {package_format}\n")
- build_process = subprocess.run(build_command, capture_output=False,
cwd=AIRFLOW_SOURCES_ROOT)
+ build_process = subprocess.run(
+ build_command,
+ capture_output=False,
+ cwd=AIRFLOW_SOURCES_ROOT,
+ env=envcopy,
+ )
if build_process.returncode != 0:
console.print("[red]Error building Airflow packages")