This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-2-test by this push:
new 35e0911526e Fix registry backfill with per-provider versions and
Docker extraction (#65223) (#66927)
35e0911526e is described below
commit 35e0911526e0a65a36ea63475c06e47070cad6c2
Author: Rahul Vats <[email protected]>
AuthorDate: Fri May 15 07:12:22 2026 +0530
Fix registry backfill with per-provider versions and Docker extraction
(#65223) (#66927)
Chain both extraction scripts in a single uv run invocation to avoid
creating two ephemeral environments per version.
(cherry picked from commit cda493d1b600d80ba5306eadd8a2fc9e1703b106)
Co-authored-by: Kaxil Naik <[email protected]>
---
.github/workflows/registry-backfill.yml | 110 ++++++++------
dev/breeze/doc/images/output_registry.svg | 4 +-
dev/breeze/doc/images/output_registry.txt | 2 +-
dev/breeze/doc/images/output_registry_backfill.svg | 56 +++++---
dev/breeze/doc/images/output_registry_backfill.txt | 2 +-
.../airflow_breeze/commands/registry_commands.py | 159 +++++++++++++++++----
.../commands/registry_commands_config.py | 2 +
dev/registry/.gitignore | 1 +
8 files changed, 244 insertions(+), 92 deletions(-)
diff --git a/.github/workflows/registry-backfill.yml
b/.github/workflows/registry-backfill.yml
index 65da127365f..138e15b66cd 100644
--- a/.github/workflows/registry-backfill.yml
+++ b/.github/workflows/registry-backfill.yml
@@ -29,23 +29,54 @@ on: # yamllint disable-line rule:truthy
- staging
- live
default: staging
- providers:
+ provider-versions:
description: >
- Space-separated provider IDs
- (e.g. 'amazon google databricks')
- required: true
- type: string
- versions:
- description: >
- Space-separated versions to backfill
- (e.g. '9.15.0 9.14.0'). Applied to ALL providers.
+ Space-separated provider/version pairs
+ (e.g. 'amazon/9.24.0 google/21.0.0 celery/3.17.2').
+ Multiple versions per provider are grouped into one job.
required: true
type: string
permissions:
contents: read
+ packages: read
jobs:
+ build-ci-image:
+ name: "Build CI image"
+ uses: ./.github/workflows/ci-image-build.yml
+ permissions:
+ contents: read
+ packages: write
+ if: >
+ contains(fromJSON('[
+ "ashb",
+ "bugraoz93",
+ "eladkal",
+ "ephraimbuddy",
+ "jedcunningham",
+ "jscheffl",
+ "kaxil",
+ "pierrejeambrun",
+ "shahar1",
+ "potiuk",
+ "utkarsharma2",
+ "vincbeck"
+ ]'), github.event.sender.login)
+ with:
+ runners: '["ubuntu-22.04"]'
+ platform: "linux/amd64"
+ push-image: "false"
+ upload-image-artifact: "true"
+ upload-mount-cache-artifact: "false"
+ python-versions: '["3.12"]'
+ branch: "main"
+ constraints-branch: "constraints-main"
+ use-uv: "true"
+ upgrade-to-newer-dependencies: "false"
+ docker-cache: "registry"
+ disable-airflow-repo-cache: "false"
+
prepare:
runs-on: ubuntu-latest
outputs:
@@ -55,12 +86,19 @@ jobs:
- name: "Build provider matrix"
id: matrix
env:
- PROVIDERS: ${{ inputs.providers }}
+ PROVIDER_VERSIONS: ${{ inputs.provider-versions }}
run: |
- MATRIX=$(echo "${PROVIDERS}" \
- | tr ' ' '\n' | jq -R . \
- | jq -cs '{"provider": .}')
+ # Parse provider/version pairs, group by provider
+ # Input: "amazon/9.24.0 google/21.0.0 amazon/9.23.0"
+ # Output: {"include": [{"provider":"amazon","versions":"9.24.0
9.23.0"}, ...]}
+ MATRIX=$(echo "${PROVIDER_VERSIONS}" | tr ' ' '\n' | grep '/' | \
+ jq -R 'split("/") | {provider: .[0], version: .[1]}' | \
+ jq -cs 'group_by(.provider) | map({
+ provider: .[0].provider,
+ versions: (map(.version) | join(" "))
+ }) | {include: .}')
echo "matrix=${MATRIX}" >> "${GITHUB_OUTPUT}"
+ echo "Matrix: ${MATRIX}"
- name: "Determine S3 destination"
id: destination
@@ -76,28 +114,16 @@ jobs:
>> "${GITHUB_OUTPUT}"
backfill:
- needs: prepare
+ needs: [prepare, build-ci-image]
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }}
- name: "Backfill ${{ matrix.provider }}"
- if: >
- contains(fromJSON('[
- "ashb",
- "bugraoz93",
- "eladkal",
- "ephraimbuddy",
- "jedcunningham",
- "jscheffl",
- "kaxil",
- "pierrejeambrun",
- "shahar1",
- "potiuk",
- "utkarsharma2",
- "vincbeck"
- ]'), github.event.sender.login)
+ name: "Backfill ${{ matrix.provider }} (${{ matrix.versions }})"
+ permissions:
+ contents: read
+ packages: read
steps:
- name: "Checkout repository"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd #
v6.0.2
@@ -107,23 +133,23 @@ jobs:
- name: "Fetch provider tags"
env:
- VERSIONS: ${{ inputs.versions }}
+ VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
for VERSION in ${VERSIONS}; do
TAG="providers-${PROVIDER}/${VERSION}"
echo "Fetching tag: ${TAG}"
git fetch origin tag "${TAG}" \
- 2>/dev/null || echo "Tag not found"
+ 2>/dev/null || echo "Tag not found: ${TAG}"
done
- - name: "Install uv"
- uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b #
v8.1.0
-
- - name: "Install Breeze"
- uses: ./.github/actions/breeze
+ - name: "Prepare breeze & CI image"
+ uses: ./.github/actions/prepare_breeze_and_image
with:
- python-version: "3.12"
+ python: "3.12"
+ platform: "linux/amd64"
+ use-uv: "true"
+ make-mnt-writeable-and-cleanup: "true"
- name: "Install AWS CLI v2"
run: |
@@ -152,7 +178,7 @@ jobs:
- name: "Extract version metadata from git tags"
env:
- VERSIONS: ${{ inputs.versions }}
+ VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
VERSION_ARGS=""
@@ -164,7 +190,7 @@ jobs:
- name: "Run breeze registry backfill"
env:
- VERSIONS: ${{ inputs.versions }}
+ VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
VERSION_ARGS=""
@@ -172,7 +198,7 @@ jobs:
VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
done
breeze registry backfill \
- --provider "${PROVIDER}" ${VERSION_ARGS}
+ --provider "${PROVIDER}" --python 3.12 ${VERSION_ARGS}
- name: "Download data files from S3 for build"
env:
@@ -211,7 +237,7 @@ jobs:
env:
S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
CACHE_CONTROL: "public, max-age=300"
- VERSIONS: ${{ inputs.versions }}
+ VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
for VERSION in ${VERSIONS}; do
diff --git a/dev/breeze/doc/images/output_registry.svg
b/dev/breeze/doc/images/output_registry.svg
index 2a077a0ba4b..d836e0dd52c 100644
--- a/dev/breeze/doc/images/output_registry.svg
+++ b/dev/breeze/doc/images/output_registry.svg
@@ -116,8 +116,8 @@
</text><text class="breeze-registry-r5" x="0" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-line-9)">│</text><text
class="breeze-registry-r4" x="24.4" y="239.6" textLength="195.2"
clip-path="url(#breeze-registry-line-9)">extract-data    </text><text
class="breeze-registry-r1" x="244" y="239.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-9)">Extract provider metadata, parameters, and connection types for
[...]
</text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2"
clip-path="url(#breeze-registry-line-10)">│</text><text
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2"
clip-path="url(#breeze-registry-line-10)">backfill        </text><text
class="breeze-registry-r1" x="244" y="264" textLength="1195.6"
clip-path="url(#breeze-registry-line-10)">Extract metadata, parameters, and connections for older
[...]
</text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-line-11)">│</text><text
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6"
clip-path="url(#breeze-registry-line-11)">extract_versions.py (host, git tags) for metadata.json, then extract_parameters.py +              </text><text
class="breeze-registry-r5" x="1451 [...]
-</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">│</text><text
class="breeze-registry-r1" x="244" y="312.8" textLength="427"
clip-path="url(#breeze-registry-line-12)">extract_connections.py via 'uv run </text><text
class="breeze-registry-r4" x="671" y="312.8" textLength="73.2"
clip-path="url(#breeze-registry-line-12)">--with</text><text
class="breeze-registry-r1" x="744.2" y="312.8" textLength="69 [...]
-</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2"
clip-path="url(#breeze-registry-line-13)">│</text><text
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6"
clip-path="url(#breeze-registry-line-13)">providers.json, so multiple providers can be backfilled in parallel.                        
[...]
+</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">│</text><text
class="breeze-registry-r1" x="244" y="312.8" textLength="854"
clip-path="url(#breeze-registry-line-12)">extract_connections.py inside the Breeze CI container (or via 'uv run </text><text
class="breeze-registry-r4" x="1098" y="312.8" textLength="73.2"
clip-path="url(#breeze-registry-line-12)">--with</text>< [...]
+</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2"
clip-path="url(#breeze-registry-line-13)">│</text><text
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6"
clip-path="url(#breeze-registry-line-13)">Each version uses an isolated providers.json, so multiple providers can be backfilled in parallel.</text><text
class="breeze-registry-r5" x="1451.8" y="337.2" textLength="12.2"
clip-path="ur [...]
</text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2"
clip-path="url(#breeze-registry-line-14)">│</text><text
class="breeze-registry-r4" x="24.4" y="361.6" textLength="195.2"
clip-path="url(#breeze-registry-line-14)">publish-versions</text><text
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-14)">Publish per-provider versions.json to S3 from deployed directories. Same p
[...]
</text><text class="breeze-registry-r5" x="0" y="386" textLength="12.2"
clip-path="url(#breeze-registry-line-15)">│</text><text
class="breeze-registry-r1" x="244" y="386" textLength="1195.6"
clip-path="url(#breeze-registry-line-15)">release-management publish-docs-to-s3'.                                   
[...]
</text><text class="breeze-registry-r5" x="0" y="410.4" textLength="1464"
clip-path="url(#breeze-registry-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="410.4" textLength="12.2"
clip-path="url(#breeze-registry-line-16)">
diff --git a/dev/breeze/doc/images/output_registry.txt
b/dev/breeze/doc/images/output_registry.txt
index 2c84fcf83d1..8ae38cfd5e5 100644
--- a/dev/breeze/doc/images/output_registry.txt
+++ b/dev/breeze/doc/images/output_registry.txt
@@ -1 +1 @@
-b1c2694af08bb5e10ae6f2c3b9bb2479
+27b4df2c81ed8e0d4c566e552e13bb6a
diff --git a/dev/breeze/doc/images/output_registry_backfill.svg
b/dev/breeze/doc/images/output_registry_backfill.svg
index 75ae72f45d2..92f5a0586cd 100644
--- a/dev/breeze/doc/images/output_registry_backfill.svg
+++ b/dev/breeze/doc/images/output_registry_backfill.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 464.79999999999995"
xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1482 562.4"
xmlns="http://www.w3.org/2000/svg">
<!-- Generated with Rich https://www.textualize.io -->
<style>
@@ -37,15 +37,15 @@
.breeze-registry-backfill-r3 { fill: #c5c8c6;font-weight: bold }
.breeze-registry-backfill-r4 { fill: #68a0b3;font-weight: bold }
.breeze-registry-backfill-r5 { fill: #868887 }
-.breeze-registry-backfill-r6 { fill: #cc555a }
-.breeze-registry-backfill-r7 { fill: #8a4346 }
-.breeze-registry-backfill-r8 { fill: #8d7b39 }
-.breeze-registry-backfill-r9 { fill: #98a84b;font-weight: bold }
+.breeze-registry-backfill-r6 { fill: #98a84b;font-weight: bold }
+.breeze-registry-backfill-r7 { fill: #8d7b39 }
+.breeze-registry-backfill-r8 { fill: #cc555a }
+.breeze-registry-backfill-r9 { fill: #8a4346 }
</style>
<defs>
<clipPath id="breeze-registry-backfill-clip-terminal">
- <rect x="0" y="0" width="1463.0" height="413.79999999999995" />
+ <rect x="0" y="0" width="1463.0" height="511.4" />
</clipPath>
<clipPath id="breeze-registry-backfill-line-0">
<rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -95,9 +95,21 @@
<clipPath id="breeze-registry-backfill-line-15">
<rect x="0" y="367.5" width="1464" height="24.65"/>
</clipPath>
+<clipPath id="breeze-registry-backfill-line-16">
+ <rect x="0" y="391.9" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-17">
+ <rect x="0" y="416.3" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-18">
+ <rect x="0" y="440.7" width="1464" height="24.65"/>
+ </clipPath>
+<clipPath id="breeze-registry-backfill-line-19">
+ <rect x="0" y="465.1" width="1464" height="24.65"/>
+ </clipPath>
</defs>
- <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="462.8" rx="8"/><text
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle"
x="740" y="27">Command: registry backfill</text>
+ <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="560.4" rx="8"/><text
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle"
x="740" y="27">Command: registry backfill</text>
<g transform="translate(26,22)">
<circle cx="0" cy="0" r="7" fill="#ff5f57"/>
<circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -111,19 +123,23 @@
</text><text class="breeze-registry-backfill-r2" x="12.2" y="44.4"
textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-1)">Usage:</text><text
class="breeze-registry-backfill-r3" x="97.6" y="44.4" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-1)">breeze registry backfill</text><text
class="breeze-registry-backfill-r1" x="402.6" y="44.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-1)">[</text><text
class="breeze-registry-backfill [...]
</text><text class="breeze-registry-backfill-r1" x="1464" y="68.8"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-2)">
</text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2"
textLength="1427.4"
clip-path="url(#breeze-registry-backfill-line-3)">Extract metadata, parameters, and connections for older provider versions. Runs extract_versions.py (host, git tags) </text><text
class="breeze-registry-backfill-r1" x="1464" y="93.2" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-3)">
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6"
textLength="1012.6"
clip-path="url(#breeze-registry-backfill-line-4)">for metadata.json, then extract_parameters.py + extract_connections.py via 'uv run </text><text
class="breeze-registry-backfill-r4" x="1024.8" y="117.6" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-4)">--with</text><text
class="breeze-registry-backfill-r1" x="1098" y="117.6" textLengt [...]
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="142"
textLength="1134.6"
clip-path="url(#breeze-registry-backfill-line-5)">version uses an isolated providers.json, so multiple providers can be backfilled in parallel.</text><text
class="breeze-registry-backfill-r1" x="1464" y="142" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-5)">
-</text><text class="breeze-registry-backfill-r1" x="1464" y="166.4"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-6)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="190.8"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-7)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="190.8" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-7)"> Backfill flags </text><text
class="breeze-registry-backfill-r5" x="219.6" y="190.8" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-7)">────────────────────────────────────────────────────
[...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="215.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="215.2" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="215.2" textLength="122"
clip-path="url(#breeze-registry-backfill-line-8)">--provider</text><text
class="breeze-registry-backfill-r1" x="207.4" y="215.2" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="239.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="122"
clip-path="url(#breeze-registry-backfill-line-9)">--version </text><text
class="breeze-registry-backfill-r1" x="207.4" y="239.6" text [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="264"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r8" x="207.4" y="264" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-10)">(TEXT)</text><text
class="breeze-registry-backfill-r5" x="1451.8" y="264" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r1" x="1464" y="264" textLength="1 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="288.4"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-11)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="312.8"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-12)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="312.8" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-12)"> Common options </text><text
class="breeze-registry-backfill-r5" x="219.6" y="312.8" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-12)">─────────────────────────────────────────────────
[...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="337.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="337.2" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-13)">--verbose</text><text
class="breeze-registry-backfill-r9" x="158.6" y="337.2" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-13)">-v</text><text
class="breeze-registry-backfill-r1" x="207.4" y="337.2" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-14)">--dry-run</text><text
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-14)">-D</text><text
class="breeze-registry-backfill-r1" x="207.4" y="361.6" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-15)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="386" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-15)">--help   </text><text
class="breeze-registry-backfill-r9" x="158.6" y="386" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-15)">-h</text><text
class="breeze-registry-backfill-r1" x="207.4" y= [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="410.4"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="410.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-16)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6"
textLength="1439.6"
clip-path="url(#breeze-registry-backfill-line-4)">for metadata.json, then extract_parameters.py + extract_connections.py inside the Breeze CI container (or via 'uv run </text><text
class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-4)">
+</text><text class="breeze-registry-backfill-r4" x="12.2" y="142"
textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-5)">--with</text><text
class="breeze-registry-backfill-r1" x="85.4" y="142" textLength="85.4"
clip-path="url(#breeze-registry-backfill-line-5)">' with </text><text
class="breeze-registry-backfill-r4" x="170.8" y="142" textLength="134.2"
clip-path="url(#breeze-registry-backfill-line-5)">--no-docker</text><text
class="breeze-registry-backfill-r1" [...]
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="166.4"
textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-6)">parallel.</text><text
class="breeze-registry-backfill-r1" x="1464" y="166.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-6)">
+</text><text class="breeze-registry-backfill-r1" x="1464" y="190.8"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-7)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="215.2"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-8)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="215.2" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-8)"> Backfill flags </text><text
class="breeze-registry-backfill-r5" x="219.6" y="215.2" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-8)">────────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="239.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-9)">--python                </text><text
class="breeze-registry-backfill-r6" x="378.2" y="239.6" textLength="24.4"
clip-path="url(#breeze-registry- [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="264"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r7" x="427" y="264" textLength="439.2"
clip-path="url(#breeze-registry-backfill-line-10)">(>3.10< | 3.11 | 3.12 | 3.13 | 3.14)</text><text
class="breeze-registry-backfill-r5" x="1451.8" y="264" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</te [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="288.4"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text
class="breeze-registry-backfill-r8" x="24.4" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="288.4" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-11)">--provider            &
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="312.8"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-12)">│</text><text
class="breeze-registry-backfill-r8" x="24.4" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-12)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="312.8" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-12)">--version            &#
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="337.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text
class="breeze-registry-backfill-r1" x="427" y="337.2" textLength="85.4"
clip-path="url(#breeze-registry-backfill-line-13)">9.20.0 </text><text
class="breeze-registry-backfill-r9" x="512.4" y="337.2" textLength="122"
clip-path="url(#breeze-registry-backfill-line-13)">[required]</text><text
class="breeze-registry-backfill-r7" x="646.6" y=" [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text
class="breeze-registry-backfill-r4" x="61" y="361.6" textLength="146.4"
clip-path="url(#breeze-registry-backfill-line-14)">--use-docker</text><text
class="breeze-registry-backfill-r1" x="207.4" y="361.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">/</text><text
class="breeze-registry-backfill-r4" x="219.6" y="361.6" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="386"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="386" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-15)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="410.4"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-16)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="410.4" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-16)"> Common options </text><text
class="breeze-registry-backfill-r5" x="219.6" y="410.4" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-16)">─────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="434.8"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-17)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="434.8" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-17)">--verbose</text><text
class="breeze-registry-backfill-r6" x="158.6" y="434.8" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-17)">-v</text><text
class="breeze-registry-backfill-r1" x="207.4" y="434.8" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="459.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-18)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="459.2" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-18)">--dry-run</text><text
class="breeze-registry-backfill-r6" x="158.6" y="459.2" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-18)">-D</text><text
class="breeze-registry-backfill-r1" x="207.4" y="459.2" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="483.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-19)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="483.6" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-19)">--help   </text><text
class="breeze-registry-backfill-r6" x="158.6" y="483.6" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-19)">-h</text><text
class="breeze-registry-backfill-r1" x="207 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="508"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-20)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="508" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-20)">
</text>
</g>
</g>
diff --git a/dev/breeze/doc/images/output_registry_backfill.txt
b/dev/breeze/doc/images/output_registry_backfill.txt
index a6c2643c303..6a923c1535f 100644
--- a/dev/breeze/doc/images/output_registry_backfill.txt
+++ b/dev/breeze/doc/images/output_registry_backfill.txt
@@ -1 +1 @@
-ae5a02b96d38958e1b732eef27e22743
+88cde3ee57bf72e6fb436203f64e6a14
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
index f831f727404..900ce65f2de 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
@@ -17,6 +17,7 @@
from __future__ import annotations
import json
+import shutil
import sys
import tempfile
import uuid
@@ -257,14 +258,131 @@ def _run_extract_script(
return result.returncode
+def _backfill_docker(
+ python: str,
+ provider: str,
+ versions: tuple[str, ...],
+ package_name: str,
+ extras: list[str],
+) -> list[str]:
+ """Run parameter/connection extraction inside the Breeze CI container."""
+ failed: list[str] = []
+ unique_project_name = f"breeze-backfill-{uuid.uuid4().hex[:8]}"
+
+ shell_params = ShellParams(
+ python=python,
+ project_name=unique_project_name,
+ quiet=True,
+ skip_environment_initialization=True,
+ extra_args=(),
+ )
+
+ rebuild_or_pull_ci_image_if_needed(command_params=shell_params)
+
+ # Place isolated providers.json under dev/registry/ so it's visible inside
the container
+ # at /opt/airflow/dev/registry/
+ backfill_tmp_dir = DEV_REGISTRY_DIR / ".backfill_tmp"
+ backfill_tmp_dir.mkdir(exist_ok=True)
+
+ try:
+ for version in versions:
+ click.echo(f"{'=' * 60}")
+ click.echo(f"Extracting {provider} {version} (Docker)")
+ click.echo(f"{'=' * 60}")
+
+ providers_json = _create_isolated_providers_json(
+ provider, package_name, version, backfill_tmp_dir
+ )
+ container_providers_json =
f"/opt/airflow/dev/registry/.backfill_tmp/{providers_json.name}"
+
+ pip_spec = _build_pip_spec(package_name, extras, version)
+ base_spec = f"{package_name}=={version}"
+
+ command = (
+ f"cd dev/registry && "
+ f"uv run --with '{pip_spec}' bash -c '"
+ f"python extract_parameters.py "
+ f"--provider {provider} --providers-json
{container_providers_json} && "
+ f"python extract_connections.py "
+ f"--provider {provider} --providers-json
{container_providers_json}'"
+ )
+
+ result = execute_command_in_shell(
+ shell_params=shell_params,
+ project_name=unique_project_name,
+ command=command,
+ preserve_backend=True,
+ )
+
+ if result.returncode != 0 and pip_spec != base_spec:
+ click.echo(f"Retrying without extras ({base_spec})...")
+ command_fallback = (
+ f"cd dev/registry && "
+ f"uv run --with '{base_spec}' bash -c '"
+ f"python extract_parameters.py "
+ f"--provider {provider} --providers-json
{container_providers_json} && "
+ f"python extract_connections.py "
+ f"--provider {provider} --providers-json
{container_providers_json}'"
+ )
+ result = execute_command_in_shell(
+ shell_params=shell_params,
+ project_name=unique_project_name,
+ command=command_fallback,
+ preserve_backend=True,
+ )
+
+ if result.returncode != 0:
+ click.echo(f"WARNING: extraction failed for {version} (exit
{result.returncode})")
+ failed.append(f"{version}/docker-extraction")
+ finally:
+ shutil.rmtree(backfill_tmp_dir, ignore_errors=True)
+ fix_ownership_using_docker()
+
+ return failed
+
+
+def _backfill_uv(
+ provider: str,
+ versions: tuple[str, ...],
+ package_name: str,
+ extras: list[str],
+) -> list[str]:
+ """Run parameter/connection extraction via 'uv run --with' on the host."""
+ failed: list[str] = []
+
+ with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as
tmp_dir:
+ tmp_path = Path(tmp_dir)
+
+ for version in versions:
+ click.echo(f"{'=' * 60}")
+ click.echo(f"Extracting {provider} {version} (uv)")
+ click.echo(f"{'=' * 60}")
+
+ providers_json = _create_isolated_providers_json(provider,
package_name, version, tmp_path)
+
+ pip_spec = _build_pip_spec(package_name, extras, version)
+ base_spec = f"{package_name}=={version}"
+
+ for script in EXTRACT_SCRIPTS:
+ click.echo(f"\nRunning {script.name} with {pip_spec}...")
+ returncode = _run_extract_script(script, pip_spec, base_spec,
provider, providers_json)
+ if returncode != 0:
+ click.echo(f"WARNING: {script.name} failed for {version}
(exit {returncode})")
+ failed.append(f"{version}/{script.name}")
+
+ return failed
+
+
@registry_group.command(
name="backfill",
help="Extract metadata, parameters, and connections for older provider
versions. "
"Runs extract_versions.py (host, git tags) for metadata.json, then "
- "extract_parameters.py + extract_connections.py via 'uv run --with'. "
- "No Docker needed. Each version uses an isolated providers.json, so "
+ "extract_parameters.py + extract_connections.py inside the Breeze CI
container "
+ "(or via 'uv run --with' with --no-docker). "
+ "Each version uses an isolated providers.json, so "
"multiple providers can be backfilled in parallel.",
)
+@option_python
@click.option(
"--provider",
required=True,
@@ -277,21 +395,26 @@ def _run_extract_script(
multiple=True,
help="Version(s) to extract. Can be specified multiple times: --version
9.21.0 --version 9.20.0",
)
[email protected](
+ "--use-docker/--no-docker",
+ default=True,
+ help="Run extraction in CI Docker container (default) or via uv on host.",
+)
@option_verbose
@option_dry_run
-def backfill(provider: str, versions: tuple[str, ...]):
+def backfill(python: str, provider: str, versions: tuple[str, ...],
use_docker: bool):
package_name, extras = _read_provider_yaml_info(provider)
click.echo(f"Provider: {provider} ({package_name})")
click.echo(f"Versions: {', '.join(versions)}")
+ click.echo(f"Mode: {'Docker' if use_docker else 'uv (host)'}")
if extras:
click.echo(f"Extras: {', '.join(extras)}")
click.echo()
failed: list[str] = []
- # Step 1: extract_versions.py (host, reads git tags) → metadata.json
- # Without metadata.json, Eleventy won't generate version pages.
+ # Step 1: extract_versions.py (host, reads git tags) -> metadata.json
click.echo("Step 1: Extracting version metadata from git tags...")
for version in versions:
versions_cmd = [
@@ -309,28 +432,12 @@ def backfill(provider: str, versions: tuple[str, ...]):
click.echo(f"WARNING: extract_versions.py failed for {version}
(exit {result.returncode})")
failed.append(f"{version}/extract_versions.py")
- # Step 2: extract_parameters.py + extract_connections.py (uv run --with)
+ # Step 2: extract_parameters.py + extract_connections.py
click.echo("\nStep 2: Extracting parameters and connections...")
- with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as
tmp_dir:
- tmp_path = Path(tmp_dir)
-
- for version in versions:
- click.echo(f"{'=' * 60}")
- click.echo(f"Extracting {provider} {version}")
- click.echo(f"{'=' * 60}")
-
- # Each version gets its own isolated providers.json — no shared
state
- providers_json = _create_isolated_providers_json(provider,
package_name, version, tmp_path)
-
- pip_spec = _build_pip_spec(package_name, extras, version)
- base_spec = f"{package_name}=={version}"
-
- for script in EXTRACT_SCRIPTS:
- click.echo(f"\nRunning {script.name} with {pip_spec}...")
- returncode = _run_extract_script(script, pip_spec, base_spec,
provider, providers_json)
- if returncode != 0:
- click.echo(f"WARNING: {script.name} failed for {version}
(exit {returncode})")
- failed.append(f"{version}/{script.name}")
+ if use_docker:
+ failed.extend(_backfill_docker(python, provider, versions,
package_name, extras))
+ else:
+ failed.extend(_backfill_uv(provider, versions, package_name, extras))
click.echo(f"\n{'=' * 60}")
if failed:
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
index 24b4e870ab0..d7b44036754 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
@@ -40,8 +40,10 @@ REGISTRY_PARAMETERS: dict[str, list[dict[str, str |
list[str]]]] = {
{
"name": "Backfill flags",
"options": [
+ "--python",
"--provider",
"--version",
+ "--use-docker",
],
},
],
diff --git a/dev/registry/.gitignore b/dev/registry/.gitignore
index 893bb46fa43..41879335fec 100644
--- a/dev/registry/.gitignore
+++ b/dev/registry/.gitignore
@@ -1,4 +1,5 @@
.backfill-logs/
+.backfill_tmp/
.inventory_cache/
output/
runtime_modules.json