This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 4527afd5777 Add `breeze registry backfill` command for older provider
versions (#63269)
4527afd5777 is described below
commit 4527afd5777b959ba45c3b9429f06939908e4220
Author: Kaxil Naik <[email protected]>
AuthorDate: Tue Mar 10 15:02:40 2026 +0000
Add `breeze registry backfill` command for older provider versions (#63269)
Adds a new breeze subcommand that extracts runtime parameters and connection
types for previously released provider versions using `uv run --with` — no
Docker or breeze CI image needed.
Also includes:
- Unit tests for all helper functions (16 tests)
- Breeze docs for the backfill command
- GitHub Actions workflow (registry-backfill.yml) that runs providers in
parallel via matrix strategy, then publishes versions.json
- Fix providerVersions.js to use runtime module_counts from modules.json
instead of AST-based counts from providers.json
Two issues:
- `tomllib` is Python 3.11+; use try/except fallback to `tomli` (same
pattern as other breeze modules)
- `TestReadProviderYamlInfo` tests used real filesystem paths that depend
on `tomllib`; replaced with `tmp_path`-based mock files
---
.github/workflows/registry-backfill.yml | 266 +++++++++++++++++++++
dev/breeze/doc/11_registry_tasks.rst | 41 ++++
dev/breeze/doc/images/output_registry.svg | 24 +-
dev/breeze/doc/images/output_registry.txt | 2 +-
dev/breeze/doc/images/output_registry_backfill.svg | 126 ++++++++++
dev/breeze/doc/images/output_registry_backfill.txt | 1 +
.../output_setup_check-all-params-in-groups.svg | 4 +-
.../output_setup_check-all-params-in-groups.txt | 2 +-
.../output_setup_regenerate-command-images.svg | 2 +-
.../output_setup_regenerate-command-images.txt | 2 +-
.../airflow_breeze/commands/registry_commands.py | 191 ++++++++++++++-
.../commands/registry_commands_config.py | 10 +
dev/breeze/tests/test_registry_backfill.py | 189 +++++++++++++++
registry/src/_data/providerVersions.js | 11 +
14 files changed, 857 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/registry-backfill.yml
b/.github/workflows/registry-backfill.yml
new file mode 100644
index 00000000000..5a0b39d661f
--- /dev/null
+++ b/.github/workflows/registry-backfill.yml
@@ -0,0 +1,266 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+---
+name: Registry Backfill
+on: # yamllint disable-line rule:truthy
+ workflow_dispatch:
+ inputs:
+ destination:
+ description: >
+ Publish to live or staging S3 bucket
+ required: true
+ type: choice
+ options:
+ - staging
+ - live
+ default: staging
+ providers:
+ description: >
+ Space-separated provider IDs
+ (e.g. 'amazon google databricks')
+ required: true
+ type: string
+ versions:
+ description: >
+ Space-separated versions to backfill
+ (e.g. '9.15.0 9.14.0'). Applied to ALL providers.
+ required: true
+ type: string
+
+permissions:
+ contents: read
+
+jobs:
+ prepare:
+ runs-on: ubuntu-latest
+ outputs:
+ matrix: ${{ steps.matrix.outputs.matrix }}
+ bucket: ${{ steps.destination.outputs.bucket }}
+ steps:
+ - name: "Build provider matrix"
+ id: matrix
+ env:
+ PROVIDERS: ${{ inputs.providers }}
+ run: |
+ MATRIX=$(echo "${PROVIDERS}" \
+ | tr ' ' '\n' | jq -R . \
+ | jq -cs '{"provider": .}')
+ echo "matrix=${MATRIX}" >> "${GITHUB_OUTPUT}"
+
+ - name: "Determine S3 destination"
+ id: destination
+ env:
+ DESTINATION: ${{ inputs.destination }}
+ run: |
+ if [[ "${DESTINATION}" == "live" ]]; then
+ URL="s3://live-docs-airflow-apache-org"
+ else
+ URL="s3://staging-docs-airflow-apache-org"
+ fi
+ echo "bucket=${URL}/registry/" \
+ >> "${GITHUB_OUTPUT}"
+
+ backfill:
+ needs: prepare
+ runs-on: ubuntu-latest
+ timeout-minutes: 60
+ strategy:
+ fail-fast: false
+ matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }}
+ name: "Backfill ${{ matrix.provider }}"
+ if: >
+ contains(fromJSON('[
+ "ashb",
+ "bugraoz93",
+ "eladkal",
+ "ephraimbuddy",
+ "jedcunningham",
+ "jscheffl",
+ "kaxil",
+ "pierrejeambrun",
+ "shahar1",
+ "potiuk",
+ "utkarsharma2",
+ "vincbeck"
+ ]'), github.event.sender.login)
+ steps:
+ - name: "Checkout repository"
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd #
v6.0.2
+ with:
+ persist-credentials: false
+ fetch-depth: 0
+
+ - name: "Fetch provider tags"
+ env:
+ VERSIONS: ${{ inputs.versions }}
+ PROVIDER: ${{ matrix.provider }}
+ run: |
+ for VERSION in ${VERSIONS}; do
+ TAG="providers-${PROVIDER}/${VERSION}"
+ echo "Fetching tag: ${TAG}"
+ git fetch origin tag "${TAG}" \
+ 2>/dev/null || echo "Tag not found"
+ done
+
+ - name: "Install uv"
+ uses: astral-sh/setup-uv@bd01e18f51369d5765a7df3681d34498e332e27e #
v6.3.1
+
+ - name: "Install Breeze"
+ uses: ./.github/actions/breeze
+ with:
+ python-version: "3.12"
+
+ - name: "Install AWS CLI v2"
+ run: |
+ curl -sSf \
+ "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" \
+ -o /tmp/awscliv2.zip
+ unzip -q /tmp/awscliv2.zip -d /tmp
+ rm /tmp/awscliv2.zip
+ sudo /tmp/aws/install --update
+ rm -rf /tmp/aws/
+
+ - name: "Configure AWS credentials"
+ uses:
aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
# v6.0.0
+ with:
+ aws-access-key-id: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-east-2
+
+ - name: "Download existing providers.json"
+ env:
+ S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
+ run: |
+ aws s3 cp \
+ "${S3_BUCKET}api/providers.json" \
+ dev/registry/providers.json || true
+
+ - name: "Extract version metadata from git tags"
+ env:
+ VERSIONS: ${{ inputs.versions }}
+ PROVIDER: ${{ matrix.provider }}
+ run: |
+ VERSION_ARGS=""
+ for VERSION in ${VERSIONS}; do
+ VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
+ done
+ uv run python dev/registry/extract_versions.py \
+ --provider "${PROVIDER}" ${VERSION_ARGS} || true
+
+ - name: "Run breeze registry backfill"
+ env:
+ VERSIONS: ${{ inputs.versions }}
+ PROVIDER: ${{ matrix.provider }}
+ run: |
+ VERSION_ARGS=""
+ for VERSION in ${VERSIONS}; do
+ VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
+ done
+ breeze registry backfill \
+ --provider "${PROVIDER}" ${VERSION_ARGS}
+
+ - name: "Download data files from S3 for build"
+ env:
+ S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
+ run: |
+ aws s3 cp \
+ "${S3_BUCKET}api/providers.json" \
+ registry/src/_data/providers.json
+ aws s3 cp \
+ "${S3_BUCKET}api/modules.json" \
+ registry/src/_data/modules.json
+
+ - name: "Setup pnpm"
+ uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 #
v4.2.0
+ with:
+ version: 9
+
+ - name: "Setup Node.js"
+ uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f #
v6.3.0
+ with:
+ node-version: 20
+ cache: 'pnpm'
+ cache-dependency-path: 'registry/pnpm-lock.yaml'
+
+ - name: "Install Node.js dependencies"
+ working-directory: registry
+ run: pnpm install --frozen-lockfile
+
+ - name: "Build registry site"
+ working-directory: registry
+ env:
+ REGISTRY_PATH_PREFIX: "/registry/"
+ run: pnpm build
+
+ - name: "Sync backfilled version pages to S3"
+ env:
+ S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
+ CACHE_CONTROL: "public, max-age=300"
+ VERSIONS: ${{ inputs.versions }}
+ PROVIDER: ${{ matrix.provider }}
+ run: |
+ for VERSION in ${VERSIONS}; do
+ echo "Syncing ${PROVIDER}/${VERSION}..."
+ aws s3 sync \
+ "registry/_site/providers/${PROVIDER}/${VERSION}/" \
+ "${S3_BUCKET}providers/${PROVIDER}/${VERSION}/" \
+ --cache-control "${CACHE_CONTROL}"
+ aws s3 sync \
+ "registry/_site/api/providers/${PROVIDER}/${VERSION}/" \
+ "${S3_BUCKET}api/providers/${PROVIDER}/${VERSION}/" \
+ --cache-control "${CACHE_CONTROL}"
+ done
+
+ publish-versions:
+ needs: [prepare, backfill]
+ runs-on: ubuntu-latest
+ name: "Publish versions.json"
+ steps:
+ - name: "Checkout repository"
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd #
v6.0.2
+ with:
+ persist-credentials: false
+
+ - name: "Install Breeze"
+ uses: ./.github/actions/breeze
+ with:
+ python-version: "3.12"
+
+ - name: "Install AWS CLI v2"
+ run: |
+ curl -sSf \
+ "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" \
+ -o /tmp/awscliv2.zip
+ unzip -q /tmp/awscliv2.zip -d /tmp
+ rm /tmp/awscliv2.zip
+ sudo /tmp/aws/install --update
+ rm -rf /tmp/aws/
+
+ - name: "Configure AWS credentials"
+ uses:
aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7
# v6.0.0
+ with:
+ aws-access-key-id: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-east-2
+
+ - name: "Publish version metadata"
+ env:
+ S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
+ run: >
+ breeze registry publish-versions
+ --s3-bucket "${S3_BUCKET}"
diff --git a/dev/breeze/doc/11_registry_tasks.rst
b/dev/breeze/doc/11_registry_tasks.rst
index c64828a8b7b..6b01d9065dc 100644
--- a/dev/breeze/doc/11_registry_tasks.rst
+++ b/dev/breeze/doc/11_registry_tasks.rst
@@ -50,6 +50,47 @@ Example usage:
# Extract with a specific Python version
breeze registry extract-data --python 3.12
+Backfilling older versions
+..........................
+
+The ``breeze registry backfill`` command extracts runtime parameters and
connection
+types for older provider versions without Docker. It uses ``uv run --with`` to
+install the specific provider version in a temporary environment and runs
+``extract_parameters.py`` and ``extract_connections.py``.
+
+This is useful when you need to add pages for previously released versions that
+were not included in the initial registry build.
+
+.. image:: ./images/output_registry_backfill.svg
+ :target:
https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_registry_backfill.svg
+ :width: 100%
+ :alt: Breeze registry backfill
+
+Example usage:
+
+.. code-block:: bash
+
+ # Backfill a single version
+ breeze registry backfill --provider amazon --version 9.15.0
+
+ # Backfill multiple versions at once
+ breeze registry backfill --provider amazon --version 9.15.0 --version
9.14.0 --version 9.13.0
+
+ # Backfill a hyphenated provider
+ breeze registry backfill --provider microsoft-azure --version 11.0.0
+
+Output is written to ``registry/src/_data/versions/{provider}/{version}/``:
+
+- ``parameters.json`` — operator/sensor/hook parameters
+- ``connections.json`` — connection type definitions
+
+After backfilling, you still need to:
+
+1. Extract metadata from git tags: ``uv run python
dev/registry/extract_versions.py --provider {id} --version {version}``
+2. Build the Eleventy site: ``cd registry && pnpm build``
+3. Sync new version pages to S3
+4. Run ``breeze registry publish-versions`` to update version dropdowns
+
Publishing version metadata
..........................
diff --git a/dev/breeze/doc/images/output_registry.svg
b/dev/breeze/doc/images/output_registry.svg
index 80d5d4def08..e4b4f92c4f8 100644
--- a/dev/breeze/doc/images/output_registry.svg
+++ b/dev/breeze/doc/images/output_registry.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 367.2"
xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1482 440.4"
xmlns="http://www.w3.org/2000/svg">
<!-- Generated with Rich https://www.textualize.io -->
<style>
@@ -42,7 +42,7 @@
<defs>
<clipPath id="breeze-registry-clip-terminal">
- <rect x="0" y="0" width="1463.0" height="316.2" />
+ <rect x="0" y="0" width="1463.0" height="389.4" />
</clipPath>
<clipPath id="breeze-registry-line-0">
<rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -80,9 +80,18 @@
<clipPath id="breeze-registry-line-11">
<rect x="0" y="269.9" width="1464" height="24.65"/>
</clipPath>
+<clipPath id="breeze-registry-line-12">
+ <rect x="0" y="294.3" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-line-13">
+ <rect x="0" y="318.7" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-line-14">
+ <rect x="0" y="343.1" width="1464" height="24.65"/>
+ </clipPath>
</defs>
- <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="365.2" rx="8"/><text
class="breeze-registry-title" fill="#c5c8c6" text-anchor="middle" x="740"
y="27">Command: registry</text>
+ <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="438.4" rx="8"/><text
class="breeze-registry-title" fill="#c5c8c6" text-anchor="middle" x="740"
y="27">Command: registry</text>
<g transform="translate(26,22)">
<circle cx="0" cy="0" r="7" fill="#ff5f57"/>
<circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -102,9 +111,12 @@
</text><text class="breeze-registry-r5" x="0" y="190.8" textLength="1464"
clip-path="url(#breeze-registry-line-7)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="190.8" textLength="12.2"
clip-path="url(#breeze-registry-line-7)">
</text><text class="breeze-registry-r5" x="0" y="215.2" textLength="24.4"
clip-path="url(#breeze-registry-line-8)">╭─</text><text
class="breeze-registry-r5" x="24.4" y="215.2" textLength="231.8"
clip-path="url(#breeze-registry-line-8)"> Registry commands </text><text
class="breeze-registry-r5" x="256.2" y="215.2" textLength="1183.4"
clip-path="url(#breeze-registry-line-8)">─────────────────────────────────────────────────────────────────────────────────────────────────</te
[...]
</text><text class="breeze-registry-r5" x="0" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-line-9)">│</text><text
class="breeze-registry-r4" x="24.4" y="239.6" textLength="195.2"
clip-path="url(#breeze-registry-line-9)">extract-data    </text><text
class="breeze-registry-r1" x="244" y="239.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-9)">Extract provider metadata, parameters, and connection types for
[...]
-</text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2"
clip-path="url(#breeze-registry-line-10)">│</text><text
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2"
clip-path="url(#breeze-registry-line-10)">publish-versions</text><text
class="breeze-registry-r1" x="244" y="264" textLength="1195.6"
clip-path="url(#breeze-registry-line-10)">Publish per-provider versions.json to S3 from deployed directories. Same pattern
[...]
-</text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-line-11)">│</text><text
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6"
clip-path="url(#breeze-registry-line-11)">release-management publish-docs-to-s3'.                                  &#
[...]
-</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="1464"
clip-path="url(#breeze-registry-line-12)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">
+</text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2"
clip-path="url(#breeze-registry-line-10)">│</text><text
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2"
clip-path="url(#breeze-registry-line-10)">backfill        </text><text
class="breeze-registry-r1" x="244" y="264" textLength="1037"
clip-path="url(#breeze-registry-line-10)">Extract runtime parameters and connections for older 
[...]
+</text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-line-11)">│</text><text
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6"
clip-path="url(#breeze-registry-line-11)">install the specific version in a temporary environment and runs extract_parameters.py +          </text><text
class="breeze-registry-r5" x="1451.8" y [...]
+</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">│</text><text
class="breeze-registry-r1" x="244" y="312.8" textLength="1195.6"
clip-path="url(#breeze-registry-line-12)">extract_connections.py. No Docker needed.                                 &
[...]
+</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2"
clip-path="url(#breeze-registry-line-13)">│</text><text
class="breeze-registry-r4" x="24.4" y="337.2" textLength="195.2"
clip-path="url(#breeze-registry-line-13)">publish-versions</text><text
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6"
clip-path="url(#breeze-registry-line-13)">Publish per-provider versions.json to S3 from deployed directories. Same p
[...]
+</text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2"
clip-path="url(#breeze-registry-line-14)">│</text><text
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-14)">release-management publish-docs-to-s3'.                                  &#
[...]
+</text><text class="breeze-registry-r5" x="0" y="386" textLength="1464"
clip-path="url(#breeze-registry-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="386" textLength="12.2"
clip-path="url(#breeze-registry-line-15)">
</text>
</g>
</g>
diff --git a/dev/breeze/doc/images/output_registry.txt
b/dev/breeze/doc/images/output_registry.txt
index 6888f6b8f6f..dae5504430b 100644
--- a/dev/breeze/doc/images/output_registry.txt
+++ b/dev/breeze/doc/images/output_registry.txt
@@ -1 +1 @@
-94b4d28badb1f32f4e3c2d24bf337d78
+297843509448a55e7941eed3c0485df8
diff --git a/dev/breeze/doc/images/output_registry_backfill.svg
b/dev/breeze/doc/images/output_registry_backfill.svg
new file mode 100644
index 00000000000..12b49bb0402
--- /dev/null
+++ b/dev/breeze/doc/images/output_registry_backfill.svg
@@ -0,0 +1,126 @@
+<svg class="rich-terminal" viewBox="0 0 1482 440.4"
xmlns="http://www.w3.org/2000/svg">
+ <!-- Generated with Rich https://www.textualize.io -->
+ <style>
+
+ @font-face {
+ font-family: "Fira Code";
+ src: local("FiraCode-Regular"),
+
url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Regular.woff2")
format("woff2"),
+
url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Regular.woff")
format("woff");
+ font-style: normal;
+ font-weight: 400;
+ }
+ @font-face {
+ font-family: "Fira Code";
+ src: local("FiraCode-Bold"),
+
url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Bold.woff2")
format("woff2"),
+
url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Bold.woff")
format("woff");
+ font-style: bold;
+ font-weight: 700;
+ }
+
+ .breeze-registry-backfill-matrix {
+ font-family: Fira Code, monospace;
+ font-size: 20px;
+ line-height: 24.4px;
+ font-variant-east-asian: full-width;
+ }
+
+ .breeze-registry-backfill-title {
+ font-size: 18px;
+ font-weight: bold;
+ font-family: arial;
+ }
+
+ .breeze-registry-backfill-r1 { fill: #c5c8c6 }
+.breeze-registry-backfill-r2 { fill: #d0b344 }
+.breeze-registry-backfill-r3 { fill: #c5c8c6;font-weight: bold }
+.breeze-registry-backfill-r4 { fill: #68a0b3;font-weight: bold }
+.breeze-registry-backfill-r5 { fill: #868887 }
+.breeze-registry-backfill-r6 { fill: #cc555a }
+.breeze-registry-backfill-r7 { fill: #8a4346 }
+.breeze-registry-backfill-r8 { fill: #8d7b39 }
+.breeze-registry-backfill-r9 { fill: #98a84b;font-weight: bold }
+ </style>
+
+ <defs>
+ <clipPath id="breeze-registry-backfill-clip-terminal">
+ <rect x="0" y="0" width="1463.0" height="389.4" />
+ </clipPath>
+ <clipPath id="breeze-registry-backfill-line-0">
+ <rect x="0" y="1.5" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-1">
+ <rect x="0" y="25.9" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-2">
+ <rect x="0" y="50.3" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-3">
+ <rect x="0" y="74.7" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-4">
+ <rect x="0" y="99.1" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-5">
+ <rect x="0" y="123.5" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-6">
+ <rect x="0" y="147.9" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-7">
+ <rect x="0" y="172.3" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-8">
+ <rect x="0" y="196.7" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-9">
+ <rect x="0" y="221.1" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-10">
+ <rect x="0" y="245.5" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-11">
+ <rect x="0" y="269.9" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-12">
+ <rect x="0" y="294.3" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-13">
+ <rect x="0" y="318.7" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-14">
+ <rect x="0" y="343.1" width="1464" height="24.65"/>
+ </clipPath>
+ </defs>
+
+ <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="438.4" rx="8"/><text
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle"
x="740" y="27">Command: registry backfill</text>
+ <g transform="translate(26,22)">
+ <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
+ <circle cx="22" cy="0" r="7" fill="#febc2e"/>
+ <circle cx="44" cy="0" r="7" fill="#28c840"/>
+ </g>
+
+ <g transform="translate(9, 41)"
clip-path="url(#breeze-registry-backfill-clip-terminal)">
+
+ <g class="breeze-registry-backfill-matrix">
+ <text class="breeze-registry-backfill-r1" x="1464" y="20"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-0)">
+</text><text class="breeze-registry-backfill-r2" x="12.2" y="44.4"
textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-1)">Usage:</text><text
class="breeze-registry-backfill-r3" x="97.6" y="44.4" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-1)">breeze registry backfill</text><text
class="breeze-registry-backfill-r1" x="402.6" y="44.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-1)">[</text><text
class="breeze-registry-backfill [...]
+</text><text class="breeze-registry-backfill-r1" x="1464" y="68.8"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-2)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2"
textLength="1037"
clip-path="url(#breeze-registry-backfill-line-3)">Extract runtime parameters and connections for older provider versions. Uses 'uv run </text><text
class="breeze-registry-backfill-r4" x="1049.2" y="93.2" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-3)">--with</text><text
class="breeze-registry-backfill-r1" x="1122.4" y="9 [...]
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6"
textLength="1329.8"
clip-path="url(#breeze-registry-backfill-line-4)">version in a temporary environment and runs extract_parameters.py + extract_connections.py. No Docker needed.</text><text
class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-4)">
+</text><text class="breeze-registry-backfill-r1" x="1464" y="142"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-5)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="166.4"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-6)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="166.4" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-6)"> Backfill flags </text><text
class="breeze-registry-backfill-r5" x="219.6" y="166.4" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-6)">────────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="190.8"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-7)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="190.8" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-7)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="190.8" textLength="122"
clip-path="url(#breeze-registry-backfill-line-7)">--provider</text><text
class="breeze-registry-backfill-r1" x="207.4" y="190.8" textLengt [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="215.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="215.2" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="215.2" textLength="122"
clip-path="url(#breeze-registry-backfill-line-8)">--version </text><text
class="breeze-registry-backfill-r1" x="207.4" y="215.2" text [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="239.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r8" x="207.4" y="239.6" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-9)">(TEXT)</text><text
class="breeze-registry-backfill-r5" x="1451.8" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r1" x="1464" y="239.6" textLeng [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="264"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-10)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="264" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="288.4"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-11)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="288.4" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-11)"> Common options </text><text
class="breeze-registry-backfill-r5" x="219.6" y="288.4" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-11)">─────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="312.8"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-12)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="312.8" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-12)">--verbose</text><text
class="breeze-registry-backfill-r9" x="158.6" y="312.8" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-12)">-v</text><text
class="breeze-registry-backfill-r1" x="207.4" y="312.8" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="337.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="337.2" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-13)">--dry-run</text><text
class="breeze-registry-backfill-r9" x="158.6" y="337.2" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-13)">-D</text><text
class="breeze-registry-backfill-r1" x="207.4" y="337.2" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-14)">--help   </text><text
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-14)">-h</text><text
class="breeze-registry-backfill-r1" x="207 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="386"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="386" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-15)">
+</text>
+ </g>
+ </g>
+</svg>
diff --git a/dev/breeze/doc/images/output_registry_backfill.txt
b/dev/breeze/doc/images/output_registry_backfill.txt
new file mode 100644
index 00000000000..78e2c611d76
--- /dev/null
+++ b/dev/breeze/doc/images/output_registry_backfill.txt
@@ -0,0 +1 @@
+e83ed21dca79179e4d064a17f8cd08be
diff --git a/dev/breeze/doc/images/output_setup_check-all-params-in-groups.svg
b/dev/breeze/doc/images/output_setup_check-all-params-in-groups.svg
index 34ca9601a79..b33ae7f03e5 100644
--- a/dev/breeze/doc/images/output_setup_check-all-params-in-groups.svg
+++ b/dev/breeze/doc/images/output_setup_check-all-params-in-groups.svg
@@ -203,8 +203,8 @@
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="288.4" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-11)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="288.4"
textLength="1171.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-11)">| k8s:create-cluster | k8s:delete-cluster | k8s:deploy-airflow | k8s:dev | k8s:k9s | k8s:logs |&#
[...]
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="312.8" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-12)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="312.8"
textLength="1281"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-12)">k8s:run-complete-tests | k8s:setup-env | k8s:shell | k8s:status | k8s:tests | k8s:upload-k8s-image |
[...]
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="337.2" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-13)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="337.2"
textLength="1256.6"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-13)">pr:auto-triage | prod-image | prod-image:build | prod-image:load | prod-image:pull | prod-image:save |
[...]
-</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="361.6" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-14)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="361.6"
textLength="1268.8"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-14)">prod-image:verify | registry | registry:extract-data | registry:publish-versions | release-management | </t
[...]
-</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-15)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="386"
textLength="1000.4"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-15)">release-management:add-back-references | release-management:check-release-files | </text><text
class="breeze-setup-check-all-params-in-groups [...]
+</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="361.6" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-14)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="361.6"
textLength="1256.6"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-14)">prod-image:verify | registry | registry:backfill | registry:extract-data | registry:publish-versions | </te
[...]
+</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-15)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="386"
textLength="1256.6"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-15)">release-management | release-management:add-back-references | release-management:check-release-files | </text><text
class="breeze-s [...]
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="410.4" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-16)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="410.4"
textLength="1183.4"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-16)">release-management:clean-old-provider-artifacts | release-management:constraints-version-check | </text><text
class="breeze-setup-check-a [...]
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="434.8" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-17)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="434.8"
textLength="1012.6"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-17)">release-management:create-minor-branch | release-management:generate-constraints | </text><text
class="breeze-setup-check-all-params-in-g [...]
</text><text class="breeze-setup-check-all-params-in-groups-r5" x="0"
y="459.2" textLength="12.2"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-18)">│</text><text
class="breeze-setup-check-all-params-in-groups-r6" x="158.6" y="459.2"
textLength="1268.8"
clip-path="url(#breeze-setup-check-all-params-in-groups-line-18)">release-management:generate-issue-content-core | release-management:generate-issue-content-helm-chart | </text><text
class="breeze-setup- [...]
diff --git a/dev/breeze/doc/images/output_setup_check-all-params-in-groups.txt
b/dev/breeze/doc/images/output_setup_check-all-params-in-groups.txt
index 71800db5118..4b4b042063c 100644
--- a/dev/breeze/doc/images/output_setup_check-all-params-in-groups.txt
+++ b/dev/breeze/doc/images/output_setup_check-all-params-in-groups.txt
@@ -1 +1 @@
-37967154c159533e69675ebf1a2ad104
+acfe23e5b4622df765994caf52f455d2
diff --git a/dev/breeze/doc/images/output_setup_regenerate-command-images.svg
b/dev/breeze/doc/images/output_setup_regenerate-command-images.svg
index b08e9e63156..d44be5b47ad 100644
--- a/dev/breeze/doc/images/output_setup_regenerate-command-images.svg
+++ b/dev/breeze/doc/images/output_setup_regenerate-command-images.svg
@@ -223,7 +223,7 @@
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="337.2"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-13)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="337.2"
textLength="1244.4"
clip-path="url(#breeze-setup-regenerate-command-images-line-13)">k8s:deploy-airflow | k8s:dev | k8s:k9s | k8s:logs | k8s:run-complete-tests | k8s:setup-env | k8s:shel
[...]
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-14)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="361.6"
textLength="1244.4"
clip-path="url(#breeze-setup-regenerate-command-images-line-14)">| k8s:status | k8s:tests | k8s:upload-k8s-image | pr | pr:auto-triage | prod-image | prod-image:
[...]
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-15)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="386"
textLength="1061.4"
clip-path="url(#breeze-setup-regenerate-command-images-line-15)">| prod-image:load | prod-image:pull | prod-image:save | prod-image:verify | registry | </text><text
class="bre [...]
-</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="410.4"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-16)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="410.4"
textLength="890.6"
clip-path="url(#breeze-setup-regenerate-command-images-line-16)">registry:extract-data | registry:publish-versions | release-management | </text><text
class="breeze-setup-regenerate-command-images- [...]
+</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="410.4"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-16)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="410.4"
textLength="1134.6"
clip-path="url(#breeze-setup-regenerate-command-images-line-16)">registry:backfill | registry:extract-data | registry:publish-versions | release-management | </text><text
class="breeze-s [...]
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="434.8"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-17)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="434.8"
textLength="1000.4"
clip-path="url(#breeze-setup-regenerate-command-images-line-17)">release-management:add-back-references | release-management:check-release-files | </text><text
class="breeze-setup-regenerate-command-images- [...]
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="459.2"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-18)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="459.2"
textLength="1183.4"
clip-path="url(#breeze-setup-regenerate-command-images-line-18)">release-management:clean-old-provider-artifacts | release-management:constraints-version-check | </text><text
class="breeze-setup-regenerate- [...]
</text><text class="breeze-setup-regenerate-command-images-r5" x="0" y="483.6"
textLength="12.2"
clip-path="url(#breeze-setup-regenerate-command-images-line-19)">│</text><text
class="breeze-setup-regenerate-command-images-r6" x="195.2" y="483.6"
textLength="1012.6"
clip-path="url(#breeze-setup-regenerate-command-images-line-19)">release-management:create-minor-branch | release-management:generate-constraints | </text><text
class="breeze-setup-regenerate-command-images [...]
diff --git a/dev/breeze/doc/images/output_setup_regenerate-command-images.txt
b/dev/breeze/doc/images/output_setup_regenerate-command-images.txt
index ac78480ec0c..5cb537dfca9 100644
--- a/dev/breeze/doc/images/output_setup_regenerate-command-images.txt
+++ b/dev/breeze/doc/images/output_setup_regenerate-command-images.txt
@@ -1 +1 @@
-5bd6b8a7281b30045aadce6f7c2576c6
+cd2b1818493fedb67afad21a6a46f98d
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
index 68b74f6a113..b09b4be4c18 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
@@ -16,8 +16,10 @@
# under the License.
from __future__ import annotations
+import json
import sys
import uuid
+from pathlib import Path
import click
@@ -27,6 +29,8 @@ from airflow_breeze.params.shell_params import ShellParams
from airflow_breeze.utils.ci_group import ci_group
from airflow_breeze.utils.click_utils import BreezeGroup
from airflow_breeze.utils.docker_command_utils import
execute_command_in_shell, fix_ownership_using_docker
+from airflow_breeze.utils.path_utils import AIRFLOW_ROOT_PATH
+from airflow_breeze.utils.run_utils import run_command
@click.group(cls=BreezeGroup, name="registry", help="Tools for the Airflow
Provider Registry")
@@ -96,9 +100,192 @@ def extract_data(python: str, provider: str | None):
help="Path to providers.json. Auto-detected if not provided.",
)
def publish_versions(s3_bucket: str, providers_json: str | None):
- from pathlib import Path
-
from airflow_breeze.utils.publish_registry_versions import
publish_versions as _publish_versions
providers_path = Path(providers_json) if providers_json else None
_publish_versions(s3_bucket, providers_json_path=providers_path)
+
+
+PROVIDERS_DIR = AIRFLOW_ROOT_PATH / "providers"
+DEV_REGISTRY_DIR = AIRFLOW_ROOT_PATH / "dev" / "registry"
+
+PROVIDERS_JSON_PATH = DEV_REGISTRY_DIR / "providers.json"
+
+EXTRACT_SCRIPTS = [
+ DEV_REGISTRY_DIR / "extract_parameters.py",
+ DEV_REGISTRY_DIR / "extract_connections.py",
+]
+
+
+def _find_provider_yaml(provider_id: str) -> Path:
+ """Find provider.yaml for a given provider ID (e.g. 'amazon',
'apache-beam', 'microsoft-azure')."""
+ # Provider ID uses hyphens; directory structure uses slashes (e.g.
microsoft-azure -> microsoft/azure)
+ parts = provider_id.split("-")
+ # Try nested first (e.g. 'microsoft/azure'), then single directory (e.g.
'amazon')
+ candidates = [PROVIDERS_DIR / provider_id / "provider.yaml"]
+ if len(parts) >= 2:
+ candidates.insert(0, PROVIDERS_DIR / "/".join(parts) / "provider.yaml")
+ for candidate in candidates:
+ if candidate.exists():
+ return candidate
+ raise click.ClickException(
+ f"provider.yaml not found for '{provider_id}'. Tried: {',
'.join(str(c) for c in candidates)}"
+ )
+
+
+def _read_provider_yaml_info(provider_id: str) -> tuple[str, list[str]]:
+ """Read package name from provider.yaml and extras from pyproject.toml."""
+ try:
+ import tomllib
+ except ImportError:
+ import tomli as tomllib
+
+ import yaml
+
+ provider_yaml_path = _find_provider_yaml(provider_id)
+ with open(provider_yaml_path) as f:
+ data = yaml.safe_load(f)
+ package_name = data["package-name"]
+
+ pyproject = provider_yaml_path.parent / "pyproject.toml"
+ extras: list[str] = []
+ if pyproject.exists():
+ with open(pyproject, "rb") as f:
+ toml_data = tomllib.load(f)
+ optional_deps = toml_data.get("project",
{}).get("optional-dependencies", {})
+ extras = sorted(optional_deps.keys())
+
+ return package_name, extras
+
+
+def _build_pip_spec(package_name: str, extras: list[str], version: str) -> str:
+ """Build pip install spec, e.g.
'apache-airflow-providers-amazon[pandas,s3fs]==9.21.0'."""
+ if extras:
+ extras_str = ",".join(extras)
+ return f"{package_name}[{extras_str}]=={version}"
+ return f"{package_name}=={version}"
+
+
+def _ensure_providers_json(provider_id: str, package_name: str) -> Path:
+ """Ensure dev/registry/providers.json exists with the target provider.
+
+ The extraction scripts read this to determine which version to tag output
with.
+ If it exists (from a previous extract-data or S3 download), use it.
+ If the provider is missing from an existing file, append it rather than
replacing.
+
+ NOTE: Does NOT touch registry/src/_data/providers.json, which is used by
+ the Eleventy build and must contain all providers.
+ """
+ PROVIDERS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
+
+ if PROVIDERS_JSON_PATH.exists():
+ with open(PROVIDERS_JSON_PATH) as f:
+ data = json.load(f)
+ if any(p["id"] == provider_id for p in data.get("providers", [])):
+ return PROVIDERS_JSON_PATH
+ # Provider not in file — append it rather than replacing
+ data["providers"].append({"id": provider_id, "package_name":
package_name, "version": "0.0.0"})
+ click.echo(f"Added {provider_id} to existing {PROVIDERS_JSON_PATH}")
+ else:
+ data = {"providers": [{"id": provider_id, "package_name":
package_name, "version": "0.0.0"}]}
+ click.echo(f"Created minimal {PROVIDERS_JSON_PATH}")
+
+ with open(PROVIDERS_JSON_PATH, "w") as f:
+ json.dump(data, f, indent=2)
+ return PROVIDERS_JSON_PATH
+
+
+def _patch_providers_json(providers_json_path: Path, provider_id: str,
version: str) -> str:
+ """Patch providers.json to set the target version. Returns the original
version."""
+ with open(providers_json_path) as f:
+ data = json.load(f)
+ for p in data["providers"]:
+ if p["id"] == provider_id:
+ original_version = p["version"]
+ p["version"] = version
+ with open(providers_json_path, "w") as f:
+ json.dump(data, f, indent=2)
+ return original_version
+ raise click.ClickException(f"Provider '{provider_id}' not found in
{providers_json_path}")
+
+
+# TODO: The backfill command processes versions sequentially because
extract_parameters.py
+# and extract_connections.py write to shared files (modules.json,
providers.json).
+# To parallelize, each provider would need its own isolated output directory
so that
+# concurrent runs don't clobber each other. See also the registry-backfill.yml
workflow
+# which uses a GitHub Actions matrix to run providers in parallel CI jobs.
+
+
+@registry_group.command(
+ name="backfill",
+ help="Extract runtime parameters and connections for older provider
versions. "
+ "Uses 'uv run --with' to install the specific version in a temporary
environment "
+ "and runs extract_parameters.py + extract_connections.py. No Docker
needed.",
+)
[email protected](
+ "--provider",
+ required=True,
+ help="Provider ID (e.g. 'amazon', 'google', 'microsoft-azure').",
+)
[email protected](
+ "--version",
+ "versions",
+ required=True,
+ multiple=True,
+ help="Version(s) to extract. Can be specified multiple times: --version
9.21.0 --version 9.20.0",
+)
+@option_verbose
+@option_dry_run
+def backfill(provider: str, versions: tuple[str, ...]):
+ package_name, extras = _read_provider_yaml_info(provider)
+ providers_json_path = _ensure_providers_json(provider, package_name)
+
+ click.echo(f"Provider: {provider} ({package_name})")
+ click.echo(f"Versions: {', '.join(versions)}")
+ if extras:
+ click.echo(f"Extras: {', '.join(extras)}")
+ click.echo()
+
+ failed: list[str] = []
+
+ for version in versions:
+ click.echo(f"{'=' * 60}")
+ click.echo(f"Extracting {provider} {version}")
+ click.echo(f"{'=' * 60}")
+
+ original_version = _patch_providers_json(providers_json_path,
provider, version)
+
+ try:
+ pip_spec = _build_pip_spec(package_name, extras, version)
+ base_spec = f"{package_name}=={version}"
+ for script in EXTRACT_SCRIPTS:
+ click.echo(f"\nRunning {script.name} with {pip_spec}...")
+ result = run_command(
+ ["uv", "run", "--with", pip_spec, "python", str(script)],
+ check=False,
+ cwd=str(AIRFLOW_ROOT_PATH),
+ )
+ if result.returncode != 0 and pip_spec != base_spec:
+ click.echo(f"Retrying {script.name} without extras...")
+ result = run_command(
+ ["uv", "run", "--with", base_spec, "python",
str(script)],
+ check=False,
+ cwd=str(AIRFLOW_ROOT_PATH),
+ )
+ if result.returncode != 0:
+ click.echo(f"WARNING: {script.name} failed for {version}
(exit {result.returncode})")
+ failed.append(f"{version}/{script.name}")
+ finally:
+ _patch_providers_json(providers_json_path, provider,
original_version)
+
+ click.echo(f"\n{'=' * 60}")
+ if failed:
+ click.echo(f"Completed with failures: {', '.join(failed)}")
+ sys.exit(1)
+ else:
+ click.echo(f"Successfully extracted {len(versions)} version(s) for
{provider}")
+ click.echo(
+ f"\nOutput written to:\n"
+ f"
registry/src/_data/versions/{provider}/<version>/parameters.json\n"
+ f"
registry/src/_data/versions/{provider}/<version>/connections.json"
+ )
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
index 2e3f579e50b..fdd156d45a3 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
@@ -20,6 +20,7 @@ REGISTRY_COMMANDS: dict[str, str | list[str]] = {
"name": "Registry commands",
"commands": [
"extract-data",
+ "backfill",
"publish-versions",
],
}
@@ -34,6 +35,15 @@ REGISTRY_PARAMETERS: dict[str, list[dict[str, str |
list[str]]]] = {
],
},
],
+ "breeze registry backfill": [
+ {
+ "name": "Backfill flags",
+ "options": [
+ "--provider",
+ "--version",
+ ],
+ },
+ ],
"breeze registry publish-versions": [
{
"name": "Publish versions flags",
diff --git a/dev/breeze/tests/test_registry_backfill.py
b/dev/breeze/tests/test_registry_backfill.py
new file mode 100644
index 00000000000..2eb4b732eb5
--- /dev/null
+++ b/dev/breeze/tests/test_registry_backfill.py
@@ -0,0 +1,189 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Unit tests for the registry backfill command helpers."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+from airflow_breeze.commands.registry_commands import (
+ _build_pip_spec,
+ _ensure_providers_json,
+ _find_provider_yaml,
+ _patch_providers_json,
+ _read_provider_yaml_info,
+)
+
+
+# ---------------------------------------------------------------------------
+# _find_provider_yaml
+# ---------------------------------------------------------------------------
+class TestFindProviderYaml:
+ def test_simple_provider(self):
+ path = _find_provider_yaml("amazon")
+ assert path.name == "provider.yaml"
+ assert "providers/amazon" in str(path)
+
+ def test_hyphenated_provider(self):
+ path = _find_provider_yaml("microsoft-azure")
+ assert path.name == "provider.yaml"
+ assert "providers/microsoft/azure" in str(path)
+
+ def test_triple_hyphenated_provider(self):
+ path = _find_provider_yaml("apache-beam")
+ assert path.name == "provider.yaml"
+ assert "providers/apache/beam" in str(path) or "providers/apache-beam"
in str(path)
+
+ def test_unknown_provider_raises(self):
+ with pytest.raises(Exception, match="provider.yaml not found"):
+ _find_provider_yaml("nonexistent-provider-xyz")
+
+
+# ---------------------------------------------------------------------------
+# _read_provider_yaml_info
+# ---------------------------------------------------------------------------
+class TestReadProviderYamlInfo:
+ def test_reads_package_name_and_extras(self, tmp_path):
+ provider_dir = tmp_path / "providers" / "amazon"
+ provider_dir.mkdir(parents=True)
+ (provider_dir / "provider.yaml").write_text("package-name:
apache-airflow-providers-amazon\n")
+ (provider_dir / "pyproject.toml").write_text(
+ '[project.optional-dependencies]\npandas = ["pandas>=2.1.2"]\ns3fs
= ["s3fs>=2024.6.1"]\n'
+ )
+ with patch("airflow_breeze.commands.registry_commands.PROVIDERS_DIR",
tmp_path / "providers"):
+ package_name, extras = _read_provider_yaml_info("amazon")
+ assert package_name == "apache-airflow-providers-amazon"
+ assert extras == ["pandas", "s3fs"]
+
+ def test_no_pyproject_returns_empty_extras(self, tmp_path):
+ provider_dir = tmp_path / "providers" / "ftp"
+ provider_dir.mkdir(parents=True)
+ (provider_dir / "provider.yaml").write_text("package-name:
apache-airflow-providers-ftp\n")
+ with patch("airflow_breeze.commands.registry_commands.PROVIDERS_DIR",
tmp_path / "providers"):
+ package_name, extras = _read_provider_yaml_info("ftp")
+ assert package_name == "apache-airflow-providers-ftp"
+ assert extras == []
+
+ def test_pyproject_without_optional_deps(self, tmp_path):
+ provider_dir = tmp_path / "providers" / "sqlite"
+ provider_dir.mkdir(parents=True)
+ (provider_dir / "provider.yaml").write_text("package-name:
apache-airflow-providers-sqlite\n")
+ (provider_dir / "pyproject.toml").write_text("[project]\nname =
'test'\n")
+ with patch("airflow_breeze.commands.registry_commands.PROVIDERS_DIR",
tmp_path / "providers"):
+ _, extras = _read_provider_yaml_info("sqlite")
+ assert extras == []
+
+
+# ---------------------------------------------------------------------------
+# _build_pip_spec
+# ---------------------------------------------------------------------------
+class TestBuildPipSpec:
+ def test_with_extras(self):
+ result = _build_pip_spec("apache-airflow-providers-amazon", ["pandas",
"s3fs"], "9.21.0")
+ assert result == "apache-airflow-providers-amazon[pandas,s3fs]==9.21.0"
+
+ def test_without_extras(self):
+ result = _build_pip_spec("apache-airflow-providers-ftp", [], "1.0.0")
+ assert result == "apache-airflow-providers-ftp==1.0.0"
+
+ def test_single_extra(self):
+ result = _build_pip_spec("apache-airflow-providers-google",
["leveldb"], "10.0.0")
+ assert result == "apache-airflow-providers-google[leveldb]==10.0.0"
+
+
+# ---------------------------------------------------------------------------
+# _ensure_providers_json
+# ---------------------------------------------------------------------------
+class TestEnsureProvidersJson:
+ def test_creates_new_file(self, tmp_path):
+ providers_json = tmp_path / "dev" / "registry" / "providers.json"
+ with patch(
+ "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
+ providers_json,
+ ):
+ result = _ensure_providers_json("amazon",
"apache-airflow-providers-amazon")
+
+ assert result == providers_json
+ data = json.loads(providers_json.read_text())
+ assert len(data["providers"]) == 1
+ assert data["providers"][0]["id"] == "amazon"
+ assert data["providers"][0]["package_name"] ==
"apache-airflow-providers-amazon"
+
+ def test_appends_to_existing_file(self, tmp_path):
+ providers_json = tmp_path / "providers.json"
+ providers_json.write_text(
+ json.dumps({"providers": [{"id": "google", "package_name":
"pkg-google", "version": "1.0.0"}]})
+ )
+ with patch(
+ "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
+ providers_json,
+ ):
+ _ensure_providers_json("amazon", "apache-airflow-providers-amazon")
+
+ data = json.loads(providers_json.read_text())
+ assert len(data["providers"]) == 2
+ ids = [p["id"] for p in data["providers"]]
+ assert "google" in ids
+ assert "amazon" in ids
+
+ def test_skips_if_provider_already_present(self, tmp_path):
+ providers_json = tmp_path / "providers.json"
+ original = {"providers": [{"id": "amazon", "package_name": "pkg",
"version": "1.0.0"}]}
+ providers_json.write_text(json.dumps(original))
+ with patch(
+ "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
+ providers_json,
+ ):
+ _ensure_providers_json("amazon", "pkg")
+
+ # File should be unchanged
+ data = json.loads(providers_json.read_text())
+ assert len(data["providers"]) == 1
+
+
+# ---------------------------------------------------------------------------
+# _patch_providers_json
+# ---------------------------------------------------------------------------
+class TestPatchProvidersJson:
+ def test_patches_version(self, tmp_path):
+ providers_json = tmp_path / "providers.json"
+ providers_json.write_text(json.dumps({"providers": [{"id": "amazon",
"version": "9.22.0"}]}))
+ original = _patch_providers_json(providers_json, "amazon", "9.15.0")
+ assert original == "9.22.0"
+
+ data = json.loads(providers_json.read_text())
+ assert data["providers"][0]["version"] == "9.15.0"
+
+ def test_raises_for_missing_provider(self, tmp_path):
+ providers_json = tmp_path / "providers.json"
+ providers_json.write_text(json.dumps({"providers": [{"id": "google",
"version": "1.0.0"}]}))
+ with pytest.raises(Exception, match="not found"):
+ _patch_providers_json(providers_json, "amazon", "9.15.0")
+
+ def test_restores_original_version(self, tmp_path):
+ providers_json = tmp_path / "providers.json"
+ providers_json.write_text(json.dumps({"providers": [{"id": "amazon",
"version": "9.22.0"}]}))
+ # Patch to target version
+ _patch_providers_json(providers_json, "amazon", "9.15.0")
+ # Restore
+ _patch_providers_json(providers_json, "amazon", "9.22.0")
+
+ data = json.loads(providers_json.read_text())
+ assert data["providers"][0]["version"] == "9.22.0"
diff --git a/registry/src/_data/providerVersions.js
b/registry/src/_data/providerVersions.js
index 52bad7160d6..774a0342e5f 100644
--- a/registry/src/_data/providerVersions.js
+++ b/registry/src/_data/providerVersions.js
@@ -72,6 +72,17 @@ module.exports = function () {
const latestAirflow = provider.airflow_versions &&
provider.airflow_versions.length > 0
? provider.airflow_versions[provider.airflow_versions.length - 1]
: null;
+
+ // Compute module_counts from modules.json (runtime discovery) when
available,
+ // since providers.json may only have AST-based counts which undercount.
+ if (latestModules.length > 0) {
+ const counts = {};
+ for (const m of latestModules) {
+ counts[m.type] = (counts[m.type] || 0) + 1;
+ }
+ provider.module_counts = counts;
+ }
+
result.push({
provider,
version: provider.version,