This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-1-test by this push:
new bf48f9142d0 [v3-1-test] Review and modernize release check process for
Airflow: (#57203) (#57252)
bf48f9142d0 is described below
commit bf48f9142d0dce0622bd8e5f31b5d989db1871b2
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sat Oct 25 19:18:48 2025 +0200
[v3-1-test] Review and modernize release check process for Airflow:
(#57203) (#57252)
* include task-sdk
* switch to RAT 0.17
* update .rat-excludes to be compatible with RAT 0.17
* Add .gitignore to task-sdk to not build release with accidental files
(cherry picked from commit 90e3926868d2059e4679c9035b9ee26f874233fd)
---
.gitignore | 6 +
.pre-commit-config.yaml | 1 +
.rat-excludes | 148 ++++++++++++++++++++-
dev/README_RELEASE_AIRFLOW.md | 41 ++++--
dev/check_files.py | 44 +++++-
.../ci/dockerfiles/apache-rat/build_and_push.sh | 4 +-
scripts/ci/prek/check_license.py | 12 +-
task-sdk/.gitignore | 14 ++
8 files changed, 247 insertions(+), 23 deletions(-)
diff --git a/.gitignore b/.gitignore
index d3f1810a46e..e7fb2a5403d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -275,3 +275,9 @@ _api/
#while running go tests inside the go-sdk, it can generate log files for dags,
ignore all logs
go-sdk/**/*.log
+
+# E2e tests
+_e2e_test_report.json
+
+# UV cache
+.uv-cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0f66d77e410..b3fd8a169f0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -763,6 +763,7 @@ repos:
^airflow-core/newsfragments/43349\.significant\.rst$|
^airflow-core/newsfragments/41368\.significant\.rst$|
.*/dist/.*|
+ .rat-excludes|
package-lock\.json$|
^providers/edge3/src/airflow/providers/edge3/plugins/www/pnpm-lock.yaml$
- id: check-base-operator-partial-arguments
diff --git a/.rat-excludes b/.rat-excludes
index 129d7210f07..9890235c23c 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -19,6 +19,7 @@
.rat-excludes
.stylelintignore
.stylelintrc
+.env
.venv
requirements
requirements.txt
@@ -176,9 +177,150 @@ auth_generated.py
www-hash.txt
# go setup files
-go.mod
-go.sum
-mocks/*
+**/go.mod
+**/go.sum
+**/protov1/*
+
+# go mocks
+**/mocks/*
+
+# Generated protobuf files
+.*proto
+.*pb.go
+.*_grpc.pb.go
# Kubernetes env
.env
+
+# SVG files
+**/*.svg
+
+# Doc only change marker file
+**/.latest-doc-only-change.txt
+**/*-gen/*
+
+
+# Redirects
+**/redirects.txt
+
+# Ignore files
+
+**/.git-blame-ignore-revs
+**/.gitattributes
+**/.rat-excludes
+**/.gitignore
+**/.prettierignore
+**/.prettierrc
+**/.airflowignore
+**/.airflowignore_glob
+
+
+# Vendor includes
+**/_vendor/
+
+# Generated files
+**/*-generated.yaml
+**/*-generated.py
+**/generated.py
+**/generated/*
+**/auth_generated.py
+
+# Lock files
+**/pnpm-lock.yaml
+**/yarn.lock
+**/Chart.lock
+**/uv.lock
+
+# Generated UI files
+**/ui/index.html
+**/ui/dev/index.html
+**/ui/dist/index.html
+**/_private_ui.yaml
+**/dist/**
+**/www/index.html
+
+# PNG files
+**/*.png
+
+# CSV files
+**/*.csv
+
+# LICENCE files
+**/LICENCE*.txt
+**/LICENSE*.txt
+
+
+# Checksum files
+**/*.sha256
+**/*.md5sum
+
+# Requirement files
+**/requirements.txt
+
+# Hashes
+**/command_hashes.txt
+**/www-hash.txt
+
+# Spelling wordlist
+**/spelling_wordlist.txt
+**/dictionary.txt
+
+# Empty files
+**/empty.txt
+
+# Script files
+**/script
+**/script.bteq
+**/script_utf16.bteq
+
+# Reproducible build files
+**/reproducible_build.yaml
+
+# Other files
+**/test_notifier.txt
+**/email.html
+**/*.log
+**/example_upload.txt
+**/dummy.pdf
+**/java_streaming_src/*
+**/kube_config
+**/prod_image_installed_providers.txt
+**/text.txt
+**/newsfragments/**
+**/warnings.txt
+**/rtd-deprecation/404.html
+**/.env
+**/*.jsonl
+
+# API files
+**/_api/**
+**/node_modules/**
+
+# Doc files
+/docs/.latest-doc-only-change.txt
+/docs/redirects.txt
+/docs/integration-logos/*.svg
+/docs/img/*.md5sum
+/docs/img/*.svg
+
+# Log files
+*.log
+
+# md5 sum files
+.*\.md5sum
+
+# Generated files
+*generated.*
+/src/airflow/providers/keycloak/auth_manager/openapi/v2-keycloak-auth-manager-generated.yaml
+/src/airflow/providers/edge3/plugins/www/*
+/src/airflow/providers/edge3/openapi/v2-edge-generated.yaml
+/src/airflow/providers/fab/auth_manager/api_fastapi/openapi/v2-fab-auth-manager-generated.yaml
+/src/airflow/providers/fab/www/static/dist/*
+/any/dag_id=dag_for_testing_redis_task_handler/run_id=test/task_id=task_for_testing_redis_log_handler/attempt=1.log
+/src/airflow/providers/google/ads/.gitignore
+
+# Vendored-in code
+/src/airflow/providers/google/_vendor/*
+
+# Git ignore file
+.gitignore
diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md
index 70374c64908..50d05e57408 100644
--- a/dev/README_RELEASE_AIRFLOW.md
+++ b/dev/README_RELEASE_AIRFLOW.md
@@ -614,11 +614,13 @@ you are checking):
```shell script
VERSION=X.Y.Zrc1
+TASK_SDK_VERSION=X.Y.Zrc1
git fetch apache --tags
git checkout ${VERSION}
export AIRFLOW_REPO_ROOT=$(pwd)
rm -rf dist/*
breeze release-management prepare-airflow-distributions --distribution-format
both
+breeze release-management prepare-task-sdk-distributions --distribution-format
both
breeze release-management prepare-airflow-tarball --version ${VERSION}
```
@@ -628,6 +630,7 @@ will be done in a docker container. However, if you have
`hatch` installed loc
```bash
breeze release-management prepare-airflow-distributions --distribution-format
both --use-local-hatch
+breeze release-management prepare-task-sdk-distributions --distribution-format
both --use-local-hatch
breeze release-management prepare-airflow-tarball --version ${VERSION}
```
@@ -635,7 +638,7 @@ This is generally faster and requires less
resources/network bandwidth. Note tha
do it before preparing the tarball as preparing packages cleans up dist folder
from
apache-airflow artifacts as it uses hatch's `-c` build flag.
-The `prepare-airflow-distributions` command (no matter if docker or local
hatch is used) should produce the
+The `prepare-*-distributions` commands (no matter if docker or local hatch is
used) should produce the
reproducible `.whl`, `.tar.gz` packages in the dist folder.
The tarball command should produce reproducible `-source.tar.gz` tarball of
sources.
@@ -650,10 +653,16 @@ svn update --set-depth=infinity asf-dist/dev/airflow
# Then compare the packages
cd asf-dist/dev/airflow/${VERSION}
-for i in ${AIRFLOW_REPO_ROOT}/dist/*
+for i in *.whl *.tar.gz
do
- echo "Checking if $(basename $i) is the same as $i"
- diff "$(basename $i)" "$i" && echo "OK"
+ echo "Checking if $(basename $i) is the same as
${AIRFLOW_REPO_ROOT}/dist/$(basename $i)"
+ diff "$(basename $i)" "${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" && echo "OK"
+done
+cd ../task-sdk/${TASK_SDK_VERSION}
+for i in *.whl *.tar.gz
+do
+ echo "Checking if $(basename $i) is the same as
${AIRFLOW_REPO_ROOT}/dist/$(basename $i)"
+ diff "$(basename $i)" "${AIRFLOW_REPO_ROOT}/dist/$(basename $i)" && echo "OK"
done
```
@@ -703,21 +712,35 @@ cd $AIRFLOW_REPO_ROOT/dev
uv run check_files.py airflow -v ${VERSION} -p ${PATH_TO_SVN}
```
+
+```shell script
+cd $AIRFLOW_REPO_ROOT/dev
+uv run check_files.py task-sdk -v ${TASK_SDK_VERSION} -p
${PATH_TO_SVN}/task-sdk
+```
+
## Licence check
This can be done with the Apache RAT tool.
-* Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi
(unpack the binary,
- the jar is inside)
-* Unpack the release source archive (the `<package + version>-source.tar.gz`
file) to a folder
-* Enter the sources folder run the check
+Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi
(unpack the binary, the jar is inside)
+wget -qO-
https://dlcdn.apache.org//creadur/apache-rat-0.17/apache-rat-0.17-bin.tar.gz |
gunzip | tar -C /tmp -xvf -
+
+Unpack the release source archive (the `<package + version>-source.tar.gz`
file) to a folder
+
+```shell script
+rm -rf /tmp/apache/airflow-src && mkdir -p /tmp/apache-airflow-src && tar -xzf
${PATH_TO_SVN}/${VERSION}/apache-airflow-*-source.tar.gz -C
/tmp/apache-airflow-src
+```
+
+Run the check:
```shell script
-java -jar ../../apache-rat-0.13/apache-rat-0.13.jar -E .rat-excludes -d .
+java -jar /tmp/apache-rat-0.17/apache-rat-0.17.jar --input-exclude-file
${AIRFLOW_REPO_ROOT}/.rat-excludes /tmp/apache-airflow-src | grep "! "
```
where `.rat-excludes` is the file in the root of Airflow source code.
+You should see no files reported as Unknown or with wrong licence.
+
## Signature check
Make sure you have imported into your GPG the PGP key of the person signing
the release. You can find the valid keys in
diff --git a/dev/check_files.py b/dev/check_files.py
index e7f16b5c612..69d243667a2 100644
--- a/dev/check_files.py
+++ b/dev/check_files.py
@@ -46,6 +46,15 @@ RUN pip install "apache-airflow=={}"
"""
+TASK_SDK_DOCKER = """\
+FROM python:3.10
+
+# Upgrade
+RUN pip install "apache-airflow-task-sdk=={}"
+
+"""
+
+
DOCKER_UPGRADE = """\
FROM apache/airflow:1.10.15
@@ -131,7 +140,7 @@ def check_all_files(actual_files, expected_files):
return missing_list
-def check_release(files: list[str], version: str):
+def check_airflow_release(files: list[str], version: str):
print(f"Checking airflow release for version {version}:\n")
version = strip_rc_suffix(version)
@@ -147,6 +156,19 @@ def check_release(files: list[str], version: str):
return check_all_files(expected_files=expected_files, actual_files=files)
+def check_task_sdk_release(files: list[str], version: str):
+ print(f"Checking task-sdk release for version {version}:\n")
+ version = strip_rc_suffix(version)
+
+ expected_files = expand_name_variations(
+ [
+ f"apache_airflow_task_sdk-{version}.tar.gz",
+ f"apache_airflow_task_sdk-{version}-py3-none-any.whl",
+ ]
+ )
+ return check_all_files(expected_files=expected_files, actual_files=files)
+
+
def expand_name_variations(files):
return sorted(base + suffix for base, suffix in itertools.product(files,
["", ".asc", ".sha512"]))
@@ -225,13 +247,26 @@ def providers(ctx, path: str):
@click.pass_context
def airflow(ctx, path: str, version: str):
files = os.listdir(os.path.join(path, version))
- missing_files = check_release(files, version)
+ missing_files = check_airflow_release(files, version)
create_docker(AIRFLOW_DOCKER.format(version))
if missing_files:
warn_of_missing_files(missing_files)
return
[email protected](name="task-sdk")
+@path_option
+@version_option
[email protected]_context
+def task_sdk(ctx, path: str, version: str):
+ files = os.listdir(os.path.join(path, version))
+ missing_files = check_task_sdk_release(files, version)
+ create_docker(TASK_SDK_DOCKER.format(version))
+ if missing_files:
+ warn_of_missing_files(missing_files)
+ return
+
+
@click.command()
@path_option
@version_option
@@ -248,6 +283,7 @@ def upgrade_check(ctx, path: str, version: str):
cli.add_command(providers)
cli.add_command(airflow)
+cli.add_command(task_sdk)
cli.add_command(upgrade_check)
if __name__ == "__main__":
@@ -273,7 +309,7 @@ def test_check_release_pass():
"apache_airflow_core-2.8.1.tar.gz.asc",
"apache_airflow_core-2.8.1.tar.gz.sha512",
]
- assert check_release(files, version="2.8.1rc2") == []
+ assert check_airflow_release(files, version="2.8.1rc2") == []
def test_check_release_fail():
@@ -294,7 +330,7 @@ def test_check_release_fail():
"apache_airflow_core-2.8.1.tar.gz.sha512",
]
- missing_files = check_release(files, version="2.8.1rc2")
+ missing_files = check_airflow_release(files, version="2.8.1rc2")
assert missing_files == ["apache_airflow-2.8.1.tar.gz",
"apache_airflow_core-2.8.1.tar.gz"]
diff --git a/scripts/ci/dockerfiles/apache-rat/build_and_push.sh
b/scripts/ci/dockerfiles/apache-rat/build_and_push.sh
index ddd004d9337..0980c121d6c 100755
--- a/scripts/ci/dockerfiles/apache-rat/build_and_push.sh
+++ b/scripts/ci/dockerfiles/apache-rat/build_and_push.sh
@@ -19,10 +19,10 @@ set -euo pipefail
GITHUB_REPOSITORY=${GITHUB_REPOSITORY:="apache/airflow"}
readonly GITHUB_REPOSITORY
-APACHERAT_VERSION="0.16.1"
+APACHERAT_VERSION="0.17"
readonly APACHERAT_VERSION
-AIRFLOW_APACHERAT_VERSION="2024.03.23"
+AIRFLOW_APACHERAT_VERSION="2025.10.24"
readonly AIRFLOW_APACHERAT_VERSION
COMMIT_SHA=$(git rev-parse HEAD)
diff --git a/scripts/ci/prek/check_license.py b/scripts/ci/prek/check_license.py
index c60d6bc9fa9..bb50dca43ab 100755
--- a/scripts/ci/prek/check_license.py
+++ b/scripts/ci/prek/check_license.py
@@ -37,11 +37,10 @@ cmd = [
"--user",
f"{os.getuid()}:{os.getgid()}",
"--rm",
-
"ghcr.io/apache/airflow-apache-rat:0.16.1-2024.03.23@sha256:83c4d2610ec4a439d1809a67fadbdc9a1df089ab130b32209351bdd4527a3f02",
- "-d",
- "/opt/airflow",
- "--exclude-file",
+
"ghcr.io/apache/airflow-apache-rat:0.17-2025.10.24@sha256:63e965ecfa195d38cf0525b16ad801dff75833ee97d88cd763020537c36981c9",
+ "--input-exclude-file",
"/opt/airflow/.rat-excludes",
+ "/opt/airflow",
]
print("Running command:")
@@ -56,7 +55,10 @@ result = subprocess.run(
output = result.stdout
if result.returncode != 0:
print(f"\033[0;31mERROR: {result.returncode} when running rat\033[0m\n")
- print(output)
+ lines = output.splitlines()
+ for line in lines:
+ if "! " in line:
+ print(line)
sys.exit(result.returncode)
unknown_licences = [line for line in output.splitlines() if "??" in line]
if unknown_licences:
diff --git a/task-sdk/.gitignore b/task-sdk/.gitignore
new file mode 100644
index 00000000000..0da25061558
--- /dev/null
+++ b/task-sdk/.gitignore
@@ -0,0 +1,14 @@
+# Potentialy created files
+.uv-cache
+dist
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg