This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v3-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v3-2-test by this push:
     new 35e0911526e Fix registry backfill with per-provider versions and 
Docker extraction (#65223) (#66927)
35e0911526e is described below

commit 35e0911526e0a65a36ea63475c06e47070cad6c2
Author: Rahul Vats <[email protected]>
AuthorDate: Fri May 15 07:12:22 2026 +0530

    Fix registry backfill with per-provider versions and Docker extraction 
(#65223) (#66927)
    
    Chain both extraction scripts in a single uv run invocation to avoid
    creating two ephemeral environments per version.
    
    (cherry picked from commit cda493d1b600d80ba5306eadd8a2fc9e1703b106)
    
    Co-authored-by: Kaxil Naik <[email protected]>
---
 .github/workflows/registry-backfill.yml            | 110 ++++++++------
 dev/breeze/doc/images/output_registry.svg          |   4 +-
 dev/breeze/doc/images/output_registry.txt          |   2 +-
 dev/breeze/doc/images/output_registry_backfill.svg |  56 +++++---
 dev/breeze/doc/images/output_registry_backfill.txt |   2 +-
 .../airflow_breeze/commands/registry_commands.py   | 159 +++++++++++++++++----
 .../commands/registry_commands_config.py           |   2 +
 dev/registry/.gitignore                            |   1 +
 8 files changed, 244 insertions(+), 92 deletions(-)

diff --git a/.github/workflows/registry-backfill.yml 
b/.github/workflows/registry-backfill.yml
index 65da127365f..138e15b66cd 100644
--- a/.github/workflows/registry-backfill.yml
+++ b/.github/workflows/registry-backfill.yml
@@ -29,23 +29,54 @@ on:  # yamllint disable-line rule:truthy
           - staging
           - live
         default: staging
-      providers:
+      provider-versions:
         description: >
-          Space-separated provider IDs
-          (e.g. 'amazon google databricks')
-        required: true
-        type: string
-      versions:
-        description: >
-          Space-separated versions to backfill
-          (e.g. '9.15.0 9.14.0'). Applied to ALL providers.
+          Space-separated provider/version pairs
+          (e.g. 'amazon/9.24.0 google/21.0.0 celery/3.17.2').
+          Multiple versions per provider are grouped into one job.
         required: true
         type: string
 
 permissions:
   contents: read
+  packages: read
 
 jobs:
+  build-ci-image:
+    name: "Build CI image"
+    uses: ./.github/workflows/ci-image-build.yml
+    permissions:
+      contents: read
+      packages: write
+    if: >
+      contains(fromJSON('[
+        "ashb",
+        "bugraoz93",
+        "eladkal",
+        "ephraimbuddy",
+        "jedcunningham",
+        "jscheffl",
+        "kaxil",
+        "pierrejeambrun",
+        "shahar1",
+        "potiuk",
+        "utkarsharma2",
+        "vincbeck"
+        ]'), github.event.sender.login)
+    with:
+      runners: '["ubuntu-22.04"]'
+      platform: "linux/amd64"
+      push-image: "false"
+      upload-image-artifact: "true"
+      upload-mount-cache-artifact: "false"
+      python-versions: '["3.12"]'
+      branch: "main"
+      constraints-branch: "constraints-main"
+      use-uv: "true"
+      upgrade-to-newer-dependencies: "false"
+      docker-cache: "registry"
+      disable-airflow-repo-cache: "false"
+
   prepare:
     runs-on: ubuntu-latest
     outputs:
@@ -55,12 +86,19 @@ jobs:
       - name: "Build provider matrix"
         id: matrix
         env:
-          PROVIDERS: ${{ inputs.providers }}
+          PROVIDER_VERSIONS: ${{ inputs.provider-versions }}
         run: |
-          MATRIX=$(echo "${PROVIDERS}" \
-            | tr ' ' '\n' | jq -R . \
-            | jq -cs '{"provider": .}')
+          # Parse provider/version pairs, group by provider
+          # Input: "amazon/9.24.0 google/21.0.0 amazon/9.23.0"
+          # Output: {"include": [{"provider":"amazon","versions":"9.24.0 
9.23.0"}, ...]}
+          MATRIX=$(echo "${PROVIDER_VERSIONS}" | tr ' ' '\n' | grep '/' | \
+            jq -R 'split("/") | {provider: .[0], version: .[1]}' | \
+            jq -cs 'group_by(.provider) | map({
+              provider: .[0].provider,
+              versions: (map(.version) | join(" "))
+            }) | {include: .}')
           echo "matrix=${MATRIX}" >> "${GITHUB_OUTPUT}"
+          echo "Matrix: ${MATRIX}"
 
       - name: "Determine S3 destination"
         id: destination
@@ -76,28 +114,16 @@ jobs:
             >> "${GITHUB_OUTPUT}"
 
   backfill:
-    needs: prepare
+    needs: [prepare, build-ci-image]
     runs-on: ubuntu-latest
     timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }}
-    name: "Backfill ${{ matrix.provider }}"
-    if: >
-      contains(fromJSON('[
-        "ashb",
-        "bugraoz93",
-        "eladkal",
-        "ephraimbuddy",
-        "jedcunningham",
-        "jscheffl",
-        "kaxil",
-        "pierrejeambrun",
-        "shahar1",
-        "potiuk",
-        "utkarsharma2",
-        "vincbeck"
-        ]'), github.event.sender.login)
+    name: "Backfill ${{ matrix.provider }} (${{ matrix.versions }})"
+    permissions:
+      contents: read
+      packages: read
     steps:
       - name: "Checkout repository"
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # 
v6.0.2
@@ -107,23 +133,23 @@ jobs:
 
       - name: "Fetch provider tags"
         env:
-          VERSIONS: ${{ inputs.versions }}
+          VERSIONS: ${{ matrix.versions }}
           PROVIDER: ${{ matrix.provider }}
         run: |
           for VERSION in ${VERSIONS}; do
             TAG="providers-${PROVIDER}/${VERSION}"
             echo "Fetching tag: ${TAG}"
             git fetch origin tag "${TAG}" \
-              2>/dev/null || echo "Tag not found"
+              2>/dev/null || echo "Tag not found: ${TAG}"
           done
 
-      - name: "Install uv"
-        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # 
v8.1.0
-
-      - name: "Install Breeze"
-        uses: ./.github/actions/breeze
+      - name: "Prepare breeze & CI image"
+        uses: ./.github/actions/prepare_breeze_and_image
         with:
-          python-version: "3.12"
+          python: "3.12"
+          platform: "linux/amd64"
+          use-uv: "true"
+          make-mnt-writeable-and-cleanup: "true"
 
       - name: "Install AWS CLI v2"
         run: |
@@ -152,7 +178,7 @@ jobs:
 
       - name: "Extract version metadata from git tags"
         env:
-          VERSIONS: ${{ inputs.versions }}
+          VERSIONS: ${{ matrix.versions }}
           PROVIDER: ${{ matrix.provider }}
         run: |
           VERSION_ARGS=""
@@ -164,7 +190,7 @@ jobs:
 
       - name: "Run breeze registry backfill"
         env:
-          VERSIONS: ${{ inputs.versions }}
+          VERSIONS: ${{ matrix.versions }}
           PROVIDER: ${{ matrix.provider }}
         run: |
           VERSION_ARGS=""
@@ -172,7 +198,7 @@ jobs:
             VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
           done
           breeze registry backfill \
-            --provider "${PROVIDER}" ${VERSION_ARGS}
+            --provider "${PROVIDER}" --python 3.12 ${VERSION_ARGS}
 
       - name: "Download data files from S3 for build"
         env:
@@ -211,7 +237,7 @@ jobs:
         env:
           S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
           CACHE_CONTROL: "public, max-age=300"
-          VERSIONS: ${{ inputs.versions }}
+          VERSIONS: ${{ matrix.versions }}
           PROVIDER: ${{ matrix.provider }}
         run: |
           for VERSION in ${VERSIONS}; do
diff --git a/dev/breeze/doc/images/output_registry.svg 
b/dev/breeze/doc/images/output_registry.svg
index 2a077a0ba4b..d836e0dd52c 100644
--- a/dev/breeze/doc/images/output_registry.svg
+++ b/dev/breeze/doc/images/output_registry.svg
@@ -116,8 +116,8 @@
 </text><text class="breeze-registry-r5" x="0" y="239.6" textLength="12.2" 
clip-path="url(#breeze-registry-line-9)">│</text><text 
class="breeze-registry-r4" x="24.4" y="239.6" textLength="195.2" 
clip-path="url(#breeze-registry-line-9)">extract-data&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r1" x="244" y="239.6" textLength="1195.6" 
clip-path="url(#breeze-registry-line-9)">Extract&#160;provider&#160;metadata,&#160;parameters,&#160;and&#160;connection&#160;types&#160;for&#16
 [...]
 </text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-line-10)">│</text><text 
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2" 
clip-path="url(#breeze-registry-line-10)">backfill&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r1" x="244" y="264" textLength="1195.6" 
clip-path="url(#breeze-registry-line-10)">Extract&#160;metadata,&#160;parameters,&#160;and&#160;connections&#160;for&#160;older
 [...]
 </text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2" 
clip-path="url(#breeze-registry-line-11)">│</text><text 
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6" 
clip-path="url(#breeze-registry-line-11)">extract_versions.py&#160;(host,&#160;git&#160;tags)&#160;for&#160;metadata.json,&#160;then&#160;extract_parameters.py&#160;+&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r5" x="1451 [...]
-</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-line-12)">│</text><text 
class="breeze-registry-r1" x="244" y="312.8" textLength="427" 
clip-path="url(#breeze-registry-line-12)">extract_connections.py&#160;via&#160;&#x27;uv&#160;run&#160;</text><text
 class="breeze-registry-r4" x="671" y="312.8" textLength="73.2" 
clip-path="url(#breeze-registry-line-12)">--with</text><text 
class="breeze-registry-r1" x="744.2" y="312.8" textLength="69 [...]
-</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2" 
clip-path="url(#breeze-registry-line-13)">│</text><text 
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6" 
clip-path="url(#breeze-registry-line-13)">providers.json,&#160;so&#160;multiple&#160;providers&#160;can&#160;be&#160;backfilled&#160;in&#160;parallel.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#1
 [...]
+</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-line-12)">│</text><text 
class="breeze-registry-r1" x="244" y="312.8" textLength="854" 
clip-path="url(#breeze-registry-line-12)">extract_connections.py&#160;inside&#160;the&#160;Breeze&#160;CI&#160;container&#160;(or&#160;via&#160;&#x27;uv&#160;run&#160;</text><text
 class="breeze-registry-r4" x="1098" y="312.8" textLength="73.2" 
clip-path="url(#breeze-registry-line-12)">--with</text>< [...]
+</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2" 
clip-path="url(#breeze-registry-line-13)">│</text><text 
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6" 
clip-path="url(#breeze-registry-line-13)">Each&#160;version&#160;uses&#160;an&#160;isolated&#160;providers.json,&#160;so&#160;multiple&#160;providers&#160;can&#160;be&#160;backfilled&#160;in&#160;parallel.</text><text
 class="breeze-registry-r5" x="1451.8" y="337.2" textLength="12.2" 
clip-path="ur [...]
 </text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2" 
clip-path="url(#breeze-registry-line-14)">│</text><text 
class="breeze-registry-r4" x="24.4" y="361.6" textLength="195.2" 
clip-path="url(#breeze-registry-line-14)">publish-versions</text><text 
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6" 
clip-path="url(#breeze-registry-line-14)">Publish&#160;per-provider&#160;versions.json&#160;to&#160;S3&#160;from&#160;deployed&#160;directories.&#160;Same&#160;p
 [...]
 </text><text class="breeze-registry-r5" x="0" y="386" textLength="12.2" 
clip-path="url(#breeze-registry-line-15)">│</text><text 
class="breeze-registry-r1" x="244" y="386" textLength="1195.6" 
clip-path="url(#breeze-registry-line-15)">release-management&#160;publish-docs-to-s3&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;
 [...]
 </text><text class="breeze-registry-r5" x="0" y="410.4" textLength="1464" 
clip-path="url(#breeze-registry-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-r1" x="1464" y="410.4" textLength="12.2" 
clip-path="url(#breeze-registry-line-16)">
diff --git a/dev/breeze/doc/images/output_registry.txt 
b/dev/breeze/doc/images/output_registry.txt
index 2c84fcf83d1..8ae38cfd5e5 100644
--- a/dev/breeze/doc/images/output_registry.txt
+++ b/dev/breeze/doc/images/output_registry.txt
@@ -1 +1 @@
-b1c2694af08bb5e10ae6f2c3b9bb2479
+27b4df2c81ed8e0d4c566e552e13bb6a
diff --git a/dev/breeze/doc/images/output_registry_backfill.svg 
b/dev/breeze/doc/images/output_registry_backfill.svg
index 75ae72f45d2..92f5a0586cd 100644
--- a/dev/breeze/doc/images/output_registry_backfill.svg
+++ b/dev/breeze/doc/images/output_registry_backfill.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 464.79999999999995" 
xmlns="http://www.w3.org/2000/svg";>
+<svg class="rich-terminal" viewBox="0 0 1482 562.4" 
xmlns="http://www.w3.org/2000/svg";>
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -37,15 +37,15 @@
 .breeze-registry-backfill-r3 { fill: #c5c8c6;font-weight: bold }
 .breeze-registry-backfill-r4 { fill: #68a0b3;font-weight: bold }
 .breeze-registry-backfill-r5 { fill: #868887 }
-.breeze-registry-backfill-r6 { fill: #cc555a }
-.breeze-registry-backfill-r7 { fill: #8a4346 }
-.breeze-registry-backfill-r8 { fill: #8d7b39 }
-.breeze-registry-backfill-r9 { fill: #98a84b;font-weight: bold }
+.breeze-registry-backfill-r6 { fill: #98a84b;font-weight: bold }
+.breeze-registry-backfill-r7 { fill: #8d7b39 }
+.breeze-registry-backfill-r8 { fill: #cc555a }
+.breeze-registry-backfill-r9 { fill: #8a4346 }
     </style>
 
     <defs>
     <clipPath id="breeze-registry-backfill-clip-terminal">
-      <rect x="0" y="0" width="1463.0" height="413.79999999999995" />
+      <rect x="0" y="0" width="1463.0" height="511.4" />
     </clipPath>
     <clipPath id="breeze-registry-backfill-line-0">
     <rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -95,9 +95,21 @@
 <clipPath id="breeze-registry-backfill-line-15">
     <rect x="0" y="367.5" width="1464" height="24.65"/>
             </clipPath>
+<clipPath id="breeze-registry-backfill-line-16">
+    <rect x="0" y="391.9" width="1464" height="24.65"/>
+            </clipPath>
+<clipPath id="breeze-registry-backfill-line-17">
+    <rect x="0" y="416.3" width="1464" height="24.65"/>
+            </clipPath>
+<clipPath id="breeze-registry-backfill-line-18">
+    <rect x="0" y="440.7" width="1464" height="24.65"/>
+            </clipPath>
+<clipPath id="breeze-registry-backfill-line-19">
+    <rect x="0" y="465.1" width="1464" height="24.65"/>
+            </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="462.8" rx="8"/><text 
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle" 
x="740" y="27">Command:&#160;registry&#160;backfill</text>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="560.4" rx="8"/><text 
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle" 
x="740" y="27">Command:&#160;registry&#160;backfill</text>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -111,19 +123,23 @@
 </text><text class="breeze-registry-backfill-r2" x="12.2" y="44.4" 
textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-1)">Usage:</text><text 
class="breeze-registry-backfill-r3" x="97.6" y="44.4" textLength="292.8" 
clip-path="url(#breeze-registry-backfill-line-1)">breeze&#160;registry&#160;backfill</text><text
 class="breeze-registry-backfill-r1" x="402.6" y="44.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-1)">[</text><text 
class="breeze-registry-backfill [...]
 </text><text class="breeze-registry-backfill-r1" x="1464" y="68.8" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-2)">
 </text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2" 
textLength="1427.4" 
clip-path="url(#breeze-registry-backfill-line-3)">Extract&#160;metadata,&#160;parameters,&#160;and&#160;connections&#160;for&#160;older&#160;provider&#160;versions.&#160;Runs&#160;extract_versions.py&#160;(host,&#160;git&#160;tags)&#160;</text><text
 class="breeze-registry-backfill-r1" x="1464" y="93.2" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-3)">
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6" 
textLength="1012.6" 
clip-path="url(#breeze-registry-backfill-line-4)">for&#160;metadata.json,&#160;then&#160;extract_parameters.py&#160;+&#160;extract_connections.py&#160;via&#160;&#x27;uv&#160;run&#160;</text><text
 class="breeze-registry-backfill-r4" x="1024.8" y="117.6" textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-4)">--with</text><text 
class="breeze-registry-backfill-r1" x="1098" y="117.6" textLengt [...]
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="142" 
textLength="1134.6" 
clip-path="url(#breeze-registry-backfill-line-5)">version&#160;uses&#160;an&#160;isolated&#160;providers.json,&#160;so&#160;multiple&#160;providers&#160;can&#160;be&#160;backfilled&#160;in&#160;parallel.</text><text
 class="breeze-registry-backfill-r1" x="1464" y="142" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-5)">
-</text><text class="breeze-registry-backfill-r1" x="1464" y="166.4" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-6)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="190.8" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-7)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="190.8" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-7)">&#160;Backfill&#160;flags&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="190.8" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-7)">────────────────────────────────────────────────────
 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="215.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-8)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="215.2" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-8)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="215.2" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-8)">--provider</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="215.2" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="239.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="239.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-9)">--version&#160;</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="239.6" text [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="264" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text 
class="breeze-registry-backfill-r8" x="207.4" y="264" textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-10)">(TEXT)</text><text 
class="breeze-registry-backfill-r5" x="1451.8" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text 
class="breeze-registry-backfill-r1" x="1464" y="264" textLength="1 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="288.4" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-11)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="288.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-11)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="312.8" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-12)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="312.8" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-12)">&#160;Common&#160;options&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="312.8" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-12)">─────────────────────────────────────────────────
 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="337.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="337.2" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-13)">--verbose</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="337.2" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-13)">-v</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="337.2" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="361.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-14)">--dry-run</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-14)">-D</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="361.6" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="386" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-15)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="386" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-15)">--help&#160;&#160;&#160;</text><text
 class="breeze-registry-backfill-r9" x="158.6" y="386" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-15)">-h</text><text 
class="breeze-registry-backfill-r1" x="207.4" y= [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="410.4" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="410.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-16)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6" 
textLength="1439.6" 
clip-path="url(#breeze-registry-backfill-line-4)">for&#160;metadata.json,&#160;then&#160;extract_parameters.py&#160;+&#160;extract_connections.py&#160;inside&#160;the&#160;Breeze&#160;CI&#160;container&#160;(or&#160;via&#160;&#x27;uv&#160;run&#160;</text><text
 class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-4)">
+</text><text class="breeze-registry-backfill-r4" x="12.2" y="142" 
textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-5)">--with</text><text 
class="breeze-registry-backfill-r1" x="85.4" y="142" textLength="85.4" 
clip-path="url(#breeze-registry-backfill-line-5)">&#x27;&#160;with&#160;</text><text
 class="breeze-registry-backfill-r4" x="170.8" y="142" textLength="134.2" 
clip-path="url(#breeze-registry-backfill-line-5)">--no-docker</text><text 
class="breeze-registry-backfill-r1"  [...]
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="166.4" 
textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-6)">parallel.</text><text 
class="breeze-registry-backfill-r1" x="1464" y="166.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-6)">
+</text><text class="breeze-registry-backfill-r1" x="1464" y="190.8" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-7)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="215.2" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-8)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="215.2" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-8)">&#160;Backfill&#160;flags&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="215.2" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-8)">────────────────────────────────────────────────────
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="239.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text 
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="292.8" 
clip-path="url(#breeze-registry-backfill-line-9)">--python&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-backfill-r6" x="378.2" y="239.6" textLength="24.4" 
clip-path="url(#breeze-registry- [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="264" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text 
class="breeze-registry-backfill-r7" x="427" y="264" textLength="439.2" 
clip-path="url(#breeze-registry-backfill-line-10)">(&gt;3.10&lt;&#160;|&#160;3.11&#160;|&#160;3.12&#160;|&#160;3.13&#160;|&#160;3.14)</text><text
 class="breeze-registry-backfill-r5" x="1451.8" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">│</te [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="288.4" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text 
class="breeze-registry-backfill-r8" x="24.4" y="288.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-11)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="288.4" textLength="292.8" 
clip-path="url(#breeze-registry-backfill-line-11)">--provider&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="312.8" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-12)">│</text><text 
class="breeze-registry-backfill-r8" x="24.4" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-12)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="312.8" textLength="292.8" 
clip-path="url(#breeze-registry-backfill-line-12)">--version&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="337.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text 
class="breeze-registry-backfill-r1" x="427" y="337.2" textLength="85.4" 
clip-path="url(#breeze-registry-backfill-line-13)">9.20.0&#160;</text><text 
class="breeze-registry-backfill-r9" x="512.4" y="337.2" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-13)">[required]</text><text 
class="breeze-registry-backfill-r7" x="646.6" y=" [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="361.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text 
class="breeze-registry-backfill-r4" x="61" y="361.6" textLength="146.4" 
clip-path="url(#breeze-registry-backfill-line-14)">--use-docker</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="361.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-14)">/</text><text 
class="breeze-registry-backfill-r4" x="219.6" y="361.6" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="386" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="386" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-15)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="410.4" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-16)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="410.4" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-16)">&#160;Common&#160;options&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="410.4" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-16)">─────────────────────────────────────────────────
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="434.8" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-17)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="434.8" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-17)">--verbose</text><text 
class="breeze-registry-backfill-r6" x="158.6" y="434.8" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-17)">-v</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="434.8" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="459.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-18)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="459.2" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-18)">--dry-run</text><text 
class="breeze-registry-backfill-r6" x="158.6" y="459.2" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-18)">-D</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="459.2" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="483.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-19)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="483.6" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-19)">--help&#160;&#160;&#160;</text><text
 class="breeze-registry-backfill-r6" x="158.6" y="483.6" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-19)">-h</text><text 
class="breeze-registry-backfill-r1" x="207 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="508" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-20)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="508" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-20)">
 </text>
     </g>
     </g>
diff --git a/dev/breeze/doc/images/output_registry_backfill.txt 
b/dev/breeze/doc/images/output_registry_backfill.txt
index a6c2643c303..6a923c1535f 100644
--- a/dev/breeze/doc/images/output_registry_backfill.txt
+++ b/dev/breeze/doc/images/output_registry_backfill.txt
@@ -1 +1 @@
-ae5a02b96d38958e1b732eef27e22743
+88cde3ee57bf72e6fb436203f64e6a14
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands.py 
b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
index f831f727404..900ce65f2de 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 import json
+import shutil
 import sys
 import tempfile
 import uuid
@@ -257,14 +258,131 @@ def _run_extract_script(
     return result.returncode
 
 
+def _backfill_docker(
+    python: str,
+    provider: str,
+    versions: tuple[str, ...],
+    package_name: str,
+    extras: list[str],
+) -> list[str]:
+    """Run parameter/connection extraction inside the Breeze CI container."""
+    failed: list[str] = []
+    unique_project_name = f"breeze-backfill-{uuid.uuid4().hex[:8]}"
+
+    shell_params = ShellParams(
+        python=python,
+        project_name=unique_project_name,
+        quiet=True,
+        skip_environment_initialization=True,
+        extra_args=(),
+    )
+
+    rebuild_or_pull_ci_image_if_needed(command_params=shell_params)
+
+    # Place isolated providers.json under dev/registry/ so it's visible inside 
the container
+    # at /opt/airflow/dev/registry/
+    backfill_tmp_dir = DEV_REGISTRY_DIR / ".backfill_tmp"
+    backfill_tmp_dir.mkdir(exist_ok=True)
+
+    try:
+        for version in versions:
+            click.echo(f"{'=' * 60}")
+            click.echo(f"Extracting {provider} {version} (Docker)")
+            click.echo(f"{'=' * 60}")
+
+            providers_json = _create_isolated_providers_json(
+                provider, package_name, version, backfill_tmp_dir
+            )
+            container_providers_json = 
f"/opt/airflow/dev/registry/.backfill_tmp/{providers_json.name}"
+
+            pip_spec = _build_pip_spec(package_name, extras, version)
+            base_spec = f"{package_name}=={version}"
+
+            command = (
+                f"cd dev/registry && "
+                f"uv run --with '{pip_spec}' bash -c '"
+                f"python extract_parameters.py "
+                f"--provider {provider} --providers-json 
{container_providers_json} && "
+                f"python extract_connections.py "
+                f"--provider {provider} --providers-json 
{container_providers_json}'"
+            )
+
+            result = execute_command_in_shell(
+                shell_params=shell_params,
+                project_name=unique_project_name,
+                command=command,
+                preserve_backend=True,
+            )
+
+            if result.returncode != 0 and pip_spec != base_spec:
+                click.echo(f"Retrying without extras ({base_spec})...")
+                command_fallback = (
+                    f"cd dev/registry && "
+                    f"uv run --with '{base_spec}' bash -c '"
+                    f"python extract_parameters.py "
+                    f"--provider {provider} --providers-json 
{container_providers_json} && "
+                    f"python extract_connections.py "
+                    f"--provider {provider} --providers-json 
{container_providers_json}'"
+                )
+                result = execute_command_in_shell(
+                    shell_params=shell_params,
+                    project_name=unique_project_name,
+                    command=command_fallback,
+                    preserve_backend=True,
+                )
+
+            if result.returncode != 0:
+                click.echo(f"WARNING: extraction failed for {version} (exit 
{result.returncode})")
+                failed.append(f"{version}/docker-extraction")
+    finally:
+        shutil.rmtree(backfill_tmp_dir, ignore_errors=True)
+        fix_ownership_using_docker()
+
+    return failed
+
+
+def _backfill_uv(
+    provider: str,
+    versions: tuple[str, ...],
+    package_name: str,
+    extras: list[str],
+) -> list[str]:
+    """Run parameter/connection extraction via 'uv run --with' on the host."""
+    failed: list[str] = []
+
+    with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as 
tmp_dir:
+        tmp_path = Path(tmp_dir)
+
+        for version in versions:
+            click.echo(f"{'=' * 60}")
+            click.echo(f"Extracting {provider} {version} (uv)")
+            click.echo(f"{'=' * 60}")
+
+            providers_json = _create_isolated_providers_json(provider, 
package_name, version, tmp_path)
+
+            pip_spec = _build_pip_spec(package_name, extras, version)
+            base_spec = f"{package_name}=={version}"
+
+            for script in EXTRACT_SCRIPTS:
+                click.echo(f"\nRunning {script.name} with {pip_spec}...")
+                returncode = _run_extract_script(script, pip_spec, base_spec, 
provider, providers_json)
+                if returncode != 0:
+                    click.echo(f"WARNING: {script.name} failed for {version} 
(exit {returncode})")
+                    failed.append(f"{version}/{script.name}")
+
+    return failed
+
+
 @registry_group.command(
     name="backfill",
     help="Extract metadata, parameters, and connections for older provider 
versions. "
     "Runs extract_versions.py (host, git tags) for metadata.json, then "
-    "extract_parameters.py + extract_connections.py via 'uv run --with'. "
-    "No Docker needed. Each version uses an isolated providers.json, so "
+    "extract_parameters.py + extract_connections.py inside the Breeze CI 
container "
+    "(or via 'uv run --with' with --no-docker). "
+    "Each version uses an isolated providers.json, so "
     "multiple providers can be backfilled in parallel.",
 )
+@option_python
 @click.option(
     "--provider",
     required=True,
@@ -277,21 +395,26 @@ def _run_extract_script(
     multiple=True,
     help="Version(s) to extract. Can be specified multiple times: --version 
9.21.0 --version 9.20.0",
 )
[email protected](
+    "--use-docker/--no-docker",
+    default=True,
+    help="Run extraction in CI Docker container (default) or via uv on host.",
+)
 @option_verbose
 @option_dry_run
-def backfill(provider: str, versions: tuple[str, ...]):
+def backfill(python: str, provider: str, versions: tuple[str, ...], 
use_docker: bool):
     package_name, extras = _read_provider_yaml_info(provider)
 
     click.echo(f"Provider: {provider} ({package_name})")
     click.echo(f"Versions: {', '.join(versions)}")
+    click.echo(f"Mode: {'Docker' if use_docker else 'uv (host)'}")
     if extras:
         click.echo(f"Extras: {', '.join(extras)}")
     click.echo()
 
     failed: list[str] = []
 
-    # Step 1: extract_versions.py (host, reads git tags) → metadata.json
-    # Without metadata.json, Eleventy won't generate version pages.
+    # Step 1: extract_versions.py (host, reads git tags) -> metadata.json
     click.echo("Step 1: Extracting version metadata from git tags...")
     for version in versions:
         versions_cmd = [
@@ -309,28 +432,12 @@ def backfill(provider: str, versions: tuple[str, ...]):
             click.echo(f"WARNING: extract_versions.py failed for {version} 
(exit {result.returncode})")
             failed.append(f"{version}/extract_versions.py")
 
-    # Step 2: extract_parameters.py + extract_connections.py (uv run --with)
+    # Step 2: extract_parameters.py + extract_connections.py
     click.echo("\nStep 2: Extracting parameters and connections...")
-    with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as 
tmp_dir:
-        tmp_path = Path(tmp_dir)
-
-        for version in versions:
-            click.echo(f"{'=' * 60}")
-            click.echo(f"Extracting {provider} {version}")
-            click.echo(f"{'=' * 60}")
-
-            # Each version gets its own isolated providers.json — no shared 
state
-            providers_json = _create_isolated_providers_json(provider, 
package_name, version, tmp_path)
-
-            pip_spec = _build_pip_spec(package_name, extras, version)
-            base_spec = f"{package_name}=={version}"
-
-            for script in EXTRACT_SCRIPTS:
-                click.echo(f"\nRunning {script.name} with {pip_spec}...")
-                returncode = _run_extract_script(script, pip_spec, base_spec, 
provider, providers_json)
-                if returncode != 0:
-                    click.echo(f"WARNING: {script.name} failed for {version} 
(exit {returncode})")
-                    failed.append(f"{version}/{script.name}")
+    if use_docker:
+        failed.extend(_backfill_docker(python, provider, versions, 
package_name, extras))
+    else:
+        failed.extend(_backfill_uv(provider, versions, package_name, extras))
 
     click.echo(f"\n{'=' * 60}")
     if failed:
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py 
b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
index 24b4e870ab0..d7b44036754 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands_config.py
@@ -40,8 +40,10 @@ REGISTRY_PARAMETERS: dict[str, list[dict[str, str | 
list[str]]]] = {
         {
             "name": "Backfill flags",
             "options": [
+                "--python",
                 "--provider",
                 "--version",
+                "--use-docker",
             ],
         },
     ],
diff --git a/dev/registry/.gitignore b/dev/registry/.gitignore
index 893bb46fa43..41879335fec 100644
--- a/dev/registry/.gitignore
+++ b/dev/registry/.gitignore
@@ -1,4 +1,5 @@
 .backfill-logs/
+.backfill_tmp/
 .inventory_cache/
 output/
 runtime_modules.json


Reply via email to