This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new a9c0bf31a8f Enable parallel backfill by eliminating shared state 
between providers (#63288)
a9c0bf31a8f is described below

commit a9c0bf31a8fdd081561c48e9fe28d2c79ee43edc
Author: Kaxil Naik <[email protected]>
AuthorDate: Tue Mar 10 20:38:02 2026 +0000

    Enable parallel backfill by eliminating shared state between providers 
(#63288)
    
    Add --provider and --providers-json flags to extract_parameters.py and
    extract_connections.py so each backfill run uses an isolated temp
    providers.json and only scans the target provider. In --provider mode,
    modules.json is not written (it would be incomplete), so concurrent
    runs don't clobber each other.
    
    The backfill command now creates a TemporaryDirectory with per-version
    providers.json files instead of patching a shared file.
---
 dev/breeze/doc/11_registry_tasks.rst               |  11 ++
 dev/breeze/doc/images/output_registry.svg          |  18 ++-
 dev/breeze/doc/images/output_registry.txt          |   2 +-
 dev/breeze/doc/images/output_registry_backfill.svg |  38 +++---
 dev/breeze/doc/images/output_registry_backfill.txt |   2 +-
 .../airflow_breeze/commands/registry_commands.py   | 136 ++++++++++---------
 dev/breeze/tests/test_registry_backfill.py         | 145 ++++++++++++---------
 dev/registry/extract_connections.py                |  34 ++++-
 dev/registry/extract_parameters.py                 |  93 +++++++++----
 9 files changed, 288 insertions(+), 191 deletions(-)

diff --git a/dev/breeze/doc/11_registry_tasks.rst 
b/dev/breeze/doc/11_registry_tasks.rst
index 6b01d9065dc..9be90b576b3 100644
--- a/dev/breeze/doc/11_registry_tasks.rst
+++ b/dev/breeze/doc/11_registry_tasks.rst
@@ -79,6 +79,17 @@ Example usage:
      # Backfill a hyphenated provider
      breeze registry backfill --provider microsoft-azure --version 11.0.0
 
+Each run uses an isolated temporary ``providers.json``, so different providers
+can be backfilled in parallel from separate terminal sessions:
+
+.. code-block:: bash
+
+     # Terminal 1
+     breeze registry backfill --provider amazon --version 9.15.0 --version 
9.14.0
+
+     # Terminal 2 (safe to run simultaneously)
+     breeze registry backfill --provider google --version 14.0.0 --version 
13.0.0
+
 Output is written to ``registry/src/_data/versions/{provider}/{version}/``:
 
 - ``parameters.json`` — operator/sensor/hook parameters
diff --git a/dev/breeze/doc/images/output_registry.svg 
b/dev/breeze/doc/images/output_registry.svg
index e4b4f92c4f8..951851010e7 100644
--- a/dev/breeze/doc/images/output_registry.svg
+++ b/dev/breeze/doc/images/output_registry.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 440.4" 
xmlns="http://www.w3.org/2000/svg";>
+<svg class="rich-terminal" viewBox="0 0 1482 464.79999999999995" 
xmlns="http://www.w3.org/2000/svg";>
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -42,7 +42,7 @@
 
     <defs>
     <clipPath id="breeze-registry-clip-terminal">
-      <rect x="0" y="0" width="1463.0" height="389.4" />
+      <rect x="0" y="0" width="1463.0" height="413.79999999999995" />
     </clipPath>
     <clipPath id="breeze-registry-line-0">
     <rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -89,9 +89,12 @@
 <clipPath id="breeze-registry-line-14">
     <rect x="0" y="343.1" width="1464" height="24.65"/>
             </clipPath>
+<clipPath id="breeze-registry-line-15">
+    <rect x="0" y="367.5" width="1464" height="24.65"/>
+            </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="438.4" rx="8"/><text 
class="breeze-registry-title" fill="#c5c8c6" text-anchor="middle" x="740" 
y="27">Command:&#160;registry</text>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="462.8" rx="8"/><text 
class="breeze-registry-title" fill="#c5c8c6" text-anchor="middle" x="740" 
y="27">Command:&#160;registry</text>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -113,10 +116,11 @@
 </text><text class="breeze-registry-r5" x="0" y="239.6" textLength="12.2" 
clip-path="url(#breeze-registry-line-9)">│</text><text 
class="breeze-registry-r4" x="24.4" y="239.6" textLength="195.2" 
clip-path="url(#breeze-registry-line-9)">extract-data&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r1" x="244" y="239.6" textLength="1195.6" 
clip-path="url(#breeze-registry-line-9)">Extract&#160;provider&#160;metadata,&#160;parameters,&#160;and&#160;connection&#160;types&#160;for&#16
 [...]
 </text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-line-10)">│</text><text 
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2" 
clip-path="url(#breeze-registry-line-10)">backfill&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r1" x="244" y="264" textLength="1037" 
clip-path="url(#breeze-registry-line-10)">Extract&#160;runtime&#160;parameters&#160;and&#160;connections&#160;for&#160;older&#160
 [...]
 </text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2" 
clip-path="url(#breeze-registry-line-11)">│</text><text 
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6" 
clip-path="url(#breeze-registry-line-11)">install&#160;the&#160;specific&#160;version&#160;in&#160;a&#160;temporary&#160;environment&#160;and&#160;runs&#160;extract_parameters.py&#160;+&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r5" x="1451.8" y [...]
-</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-line-12)">│</text><text 
class="breeze-registry-r1" x="244" y="312.8" textLength="1195.6" 
clip-path="url(#breeze-registry-line-12)">extract_connections.py.&#160;No&#160;Docker&#160;needed.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&
 [...]
-</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2" 
clip-path="url(#breeze-registry-line-13)">│</text><text 
class="breeze-registry-r4" x="24.4" y="337.2" textLength="195.2" 
clip-path="url(#breeze-registry-line-13)">publish-versions</text><text 
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6" 
clip-path="url(#breeze-registry-line-13)">Publish&#160;per-provider&#160;versions.json&#160;to&#160;S3&#160;from&#160;deployed&#160;directories.&#160;Same&#160;p
 [...]
-</text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2" 
clip-path="url(#breeze-registry-line-14)">│</text><text 
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6" 
clip-path="url(#breeze-registry-line-14)">release-management&#160;publish-docs-to-s3&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#
 [...]
-</text><text class="breeze-registry-r5" x="0" y="386" textLength="1464" 
clip-path="url(#breeze-registry-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-r1" x="1464" y="386" textLength="12.2" 
clip-path="url(#breeze-registry-line-15)">
+</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-line-12)">│</text><text 
class="breeze-registry-r1" x="244" y="312.8" textLength="1195.6" 
clip-path="url(#breeze-registry-line-12)">extract_connections.py.&#160;No&#160;Docker&#160;needed.&#160;Each&#160;version&#160;uses&#160;an&#160;isolated&#160;providers.json,&#160;so&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="breeze-registry-r5" x="1451.8" y="312.8" textLe [...]
+</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2" 
clip-path="url(#breeze-registry-line-13)">│</text><text 
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6" 
clip-path="url(#breeze-registry-line-13)">multiple&#160;providers&#160;can&#160;be&#160;backfilled&#160;in&#160;parallel&#160;from&#160;separate&#160;terminal&#160;sessions.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text
 class="br [...]
+</text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2" 
clip-path="url(#breeze-registry-line-14)">│</text><text 
class="breeze-registry-r4" x="24.4" y="361.6" textLength="195.2" 
clip-path="url(#breeze-registry-line-14)">publish-versions</text><text 
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6" 
clip-path="url(#breeze-registry-line-14)">Publish&#160;per-provider&#160;versions.json&#160;to&#160;S3&#160;from&#160;deployed&#160;directories.&#160;Same&#160;p
 [...]
+</text><text class="breeze-registry-r5" x="0" y="386" textLength="12.2" 
clip-path="url(#breeze-registry-line-15)">│</text><text 
class="breeze-registry-r1" x="244" y="386" textLength="1195.6" 
clip-path="url(#breeze-registry-line-15)">release-management&#160;publish-docs-to-s3&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;
 [...]
+</text><text class="breeze-registry-r5" x="0" y="410.4" textLength="1464" 
clip-path="url(#breeze-registry-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-r1" x="1464" y="410.4" textLength="12.2" 
clip-path="url(#breeze-registry-line-16)">
 </text>
     </g>
     </g>
diff --git a/dev/breeze/doc/images/output_registry.txt 
b/dev/breeze/doc/images/output_registry.txt
index dae5504430b..fadd741e8b6 100644
--- a/dev/breeze/doc/images/output_registry.txt
+++ b/dev/breeze/doc/images/output_registry.txt
@@ -1 +1 @@
-297843509448a55e7941eed3c0485df8
+8c9be6264d33af7facd1fbdf435697b7
diff --git a/dev/breeze/doc/images/output_registry_backfill.svg 
b/dev/breeze/doc/images/output_registry_backfill.svg
index 12b49bb0402..4478565366e 100644
--- a/dev/breeze/doc/images/output_registry_backfill.svg
+++ b/dev/breeze/doc/images/output_registry_backfill.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 440.4" 
xmlns="http://www.w3.org/2000/svg";>
+<svg class="rich-terminal" viewBox="0 0 1482 489.2" 
xmlns="http://www.w3.org/2000/svg";>
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -45,7 +45,7 @@
 
     <defs>
     <clipPath id="breeze-registry-backfill-clip-terminal">
-      <rect x="0" y="0" width="1463.0" height="389.4" />
+      <rect x="0" y="0" width="1463.0" height="438.2" />
     </clipPath>
     <clipPath id="breeze-registry-backfill-line-0">
     <rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -92,9 +92,15 @@
 <clipPath id="breeze-registry-backfill-line-14">
     <rect x="0" y="343.1" width="1464" height="24.65"/>
             </clipPath>
+<clipPath id="breeze-registry-backfill-line-15">
+    <rect x="0" y="367.5" width="1464" height="24.65"/>
+            </clipPath>
+<clipPath id="breeze-registry-backfill-line-16">
+    <rect x="0" y="391.9" width="1464" height="24.65"/>
+            </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="438.4" rx="8"/><text 
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle" 
x="740" y="27">Command:&#160;registry&#160;backfill</text>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" 
x="1" y="1" width="1480" height="487.2" rx="8"/><text 
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle" 
x="740" y="27">Command:&#160;registry&#160;backfill</text>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -108,18 +114,20 @@
 </text><text class="breeze-registry-backfill-r2" x="12.2" y="44.4" 
textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-1)">Usage:</text><text 
class="breeze-registry-backfill-r3" x="97.6" y="44.4" textLength="292.8" 
clip-path="url(#breeze-registry-backfill-line-1)">breeze&#160;registry&#160;backfill</text><text
 class="breeze-registry-backfill-r1" x="402.6" y="44.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-1)">[</text><text 
class="breeze-registry-backfill [...]
 </text><text class="breeze-registry-backfill-r1" x="1464" y="68.8" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-2)">
 </text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2" 
textLength="1037" 
clip-path="url(#breeze-registry-backfill-line-3)">Extract&#160;runtime&#160;parameters&#160;and&#160;connections&#160;for&#160;older&#160;provider&#160;versions.&#160;Uses&#160;&#x27;uv&#160;run&#160;</text><text
 class="breeze-registry-backfill-r4" x="1049.2" y="93.2" textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-3)">--with</text><text 
class="breeze-registry-backfill-r1" x="1122.4" y="9 [...]
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6" 
textLength="1329.8" 
clip-path="url(#breeze-registry-backfill-line-4)">version&#160;in&#160;a&#160;temporary&#160;environment&#160;and&#160;runs&#160;extract_parameters.py&#160;+&#160;extract_connections.py.&#160;No&#160;Docker&#160;needed.</text><text
 class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-4)">
-</text><text class="breeze-registry-backfill-r1" x="1464" y="142" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-5)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="166.4" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-6)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="166.4" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-6)">&#160;Backfill&#160;flags&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="166.4" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-6)">────────────────────────────────────────────────────
 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="190.8" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-7)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="190.8" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-7)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="190.8" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-7)">--provider</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="190.8" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="215.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-8)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="215.2" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-8)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="215.2" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-8)">--version&#160;</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="215.2" text [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="239.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text 
class="breeze-registry-backfill-r8" x="207.4" y="239.6" textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-9)">(TEXT)</text><text 
class="breeze-registry-backfill-r5" x="1451.8" y="239.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text 
class="breeze-registry-backfill-r1" x="1464" y="239.6" textLeng [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="264" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-10)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="288.4" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-11)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="288.4" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-11)">&#160;Common&#160;options&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="288.4" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-11)">─────────────────────────────────────────────────
 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="312.8" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-12)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="312.8" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-12)">--verbose</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="312.8" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-12)">-v</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="312.8" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="337.2" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="337.2" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-13)">--dry-run</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="337.2" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-13)">-D</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="337.2" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="361.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-14)">--help&#160;&#160;&#160;</text><text
 class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-14)">-h</text><text 
class="breeze-registry-backfill-r1" x="207 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="386" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-15)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="386" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-15)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6" 
textLength="1403" 
clip-path="url(#breeze-registry-backfill-line-4)">version&#160;in&#160;a&#160;temporary&#160;environment&#160;and&#160;runs&#160;extract_parameters.py&#160;+&#160;extract_connections.py.&#160;No&#160;Docker&#160;needed.&#160;Each&#160;</text><text
 class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-4)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="142" 
textLength="1415.2" 
clip-path="url(#breeze-registry-backfill-line-5)">version&#160;uses&#160;an&#160;isolated&#160;providers.json,&#160;so&#160;multiple&#160;providers&#160;can&#160;be&#160;backfilled&#160;in&#160;parallel&#160;from&#160;separate&#160;terminal&#160;</text><text
 class="breeze-registry-backfill-r1" x="1464" y="142" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-5)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="166.4" 
textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-6)">sessions.</text><text 
class="breeze-registry-backfill-r1" x="1464" y="166.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-6)">
+</text><text class="breeze-registry-backfill-r1" x="1464" y="190.8" 
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-7)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="215.2" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-8)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="215.2" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-8)">&#160;Backfill&#160;flags&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="215.2" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-8)">────────────────────────────────────────────────────
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="239.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="239.6" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-9)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-9)">--provider</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="239.6" textLengt [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="264" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text 
class="breeze-registry-backfill-r6" x="24.4" y="264" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-10)">*</text><text 
class="breeze-registry-backfill-r4" x="61" y="264" textLength="122" 
clip-path="url(#breeze-registry-backfill-line-10)">--version&#160;</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="264" textLengt [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="288.4" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text 
class="breeze-registry-backfill-r8" x="207.4" y="288.4" textLength="73.2" 
clip-path="url(#breeze-registry-backfill-line-11)">(TEXT)</text><text 
class="breeze-registry-backfill-r5" x="1451.8" y="288.4" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text 
class="breeze-registry-backfill-r1" x="1464" y="288.4" textL [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="312.8" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-12)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="312.8" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-12)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="337.2" 
textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-13)">╭─</text><text 
class="breeze-registry-backfill-r5" x="24.4" y="337.2" textLength="195.2" 
clip-path="url(#breeze-registry-backfill-line-13)">&#160;Common&#160;options&#160;</text><text
 class="breeze-registry-backfill-r5" x="219.6" y="337.2" textLength="1220" 
clip-path="url(#breeze-registry-backfill-line-13)">─────────────────────────────────────────────────
 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="361.6" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-14)">--verbose</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-14)">-v</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="361.6" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="386" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-15)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="386" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-15)">--dry-run</text><text 
class="breeze-registry-backfill-r9" x="158.6" y="386" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-15)">-D</text><text 
class="breeze-registry-backfill-r1" x="207.4" y="386" textLengt [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="410.4" 
textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-16)">│</text><text 
class="breeze-registry-backfill-r4" x="24.4" y="410.4" textLength="109.8" 
clip-path="url(#breeze-registry-backfill-line-16)">--help&#160;&#160;&#160;</text><text
 class="breeze-registry-backfill-r9" x="158.6" y="410.4" textLength="24.4" 
clip-path="url(#breeze-registry-backfill-line-16)">-h</text><text 
class="breeze-registry-backfill-r1" x="207 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="434.8" 
textLength="1464" 
clip-path="url(#breeze-registry-backfill-line-17)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
 class="breeze-registry-backfill-r1" x="1464" y="434.8" textLength="12.2" 
clip-path="url(#breeze-registry-backfill-line-17)">
 </text>
     </g>
     </g>
diff --git a/dev/breeze/doc/images/output_registry_backfill.txt 
b/dev/breeze/doc/images/output_registry_backfill.txt
index 78e2c611d76..cff872551db 100644
--- a/dev/breeze/doc/images/output_registry_backfill.txt
+++ b/dev/breeze/doc/images/output_registry_backfill.txt
@@ -1 +1 @@
-e83ed21dca79179e4d064a17f8cd08be
+5cddc0e9c5f9524a7e1baf6c21d74263
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands.py 
b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
index b09b4be4c18..9fb58256428 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
@@ -18,10 +18,12 @@ from __future__ import annotations
 
 import json
 import sys
+import tempfile
 import uuid
 from pathlib import Path
 
 import click
+import yaml
 
 from airflow_breeze.commands.ci_image_commands import 
rebuild_or_pull_ci_image_if_needed
 from airflow_breeze.commands.common_options import option_dry_run, 
option_python, option_verbose
@@ -109,8 +111,6 @@ def publish_versions(s3_bucket: str, providers_json: str | 
None):
 PROVIDERS_DIR = AIRFLOW_ROOT_PATH / "providers"
 DEV_REGISTRY_DIR = AIRFLOW_ROOT_PATH / "dev" / "registry"
 
-PROVIDERS_JSON_PATH = DEV_REGISTRY_DIR / "providers.json"
-
 EXTRACT_SCRIPTS = [
     DEV_REGISTRY_DIR / "extract_parameters.py",
     DEV_REGISTRY_DIR / "extract_connections.py",
@@ -140,8 +140,6 @@ def _read_provider_yaml_info(provider_id: str) -> 
tuple[str, list[str]]:
     except ImportError:
         import tomli as tomllib
 
-    import yaml
-
     provider_yaml_path = _find_provider_yaml(provider_id)
     with open(provider_yaml_path) as f:
         data = yaml.safe_load(f)
@@ -166,61 +164,69 @@ def _build_pip_spec(package_name: str, extras: list[str], 
version: str) -> str:
     return f"{package_name}=={version}"
 
 
-def _ensure_providers_json(provider_id: str, package_name: str) -> Path:
-    """Ensure dev/registry/providers.json exists with the target provider.
-
-    The extraction scripts read this to determine which version to tag output 
with.
-    If it exists (from a previous extract-data or S3 download), use it.
-    If the provider is missing from an existing file, append it rather than 
replacing.
+def _create_isolated_providers_json(provider_id: str, package_name: str, 
version: str, tmp_dir: Path) -> Path:
+    """Create a temp providers.json with only the target provider/version.
 
-    NOTE: Does NOT touch registry/src/_data/providers.json, which is used by
-    the Eleventy build and must contain all providers.
+    This allows multiple providers to run in parallel without conflicting over
+    the shared dev/registry/providers.json file.
     """
-    PROVIDERS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
-
-    if PROVIDERS_JSON_PATH.exists():
-        with open(PROVIDERS_JSON_PATH) as f:
-            data = json.load(f)
-        if any(p["id"] == provider_id for p in data.get("providers", [])):
-            return PROVIDERS_JSON_PATH
-        # Provider not in file — append it rather than replacing
-        data["providers"].append({"id": provider_id, "package_name": 
package_name, "version": "0.0.0"})
-        click.echo(f"Added {provider_id} to existing {PROVIDERS_JSON_PATH}")
-    else:
-        data = {"providers": [{"id": provider_id, "package_name": 
package_name, "version": "0.0.0"}]}
-        click.echo(f"Created minimal {PROVIDERS_JSON_PATH}")
-
-    with open(PROVIDERS_JSON_PATH, "w") as f:
+    tmp_providers = tmp_dir / f"providers-{provider_id}-{version}.json"
+    data = {"providers": [{"id": provider_id, "package_name": package_name, 
"version": version}]}
+    with open(tmp_providers, "w") as f:
         json.dump(data, f, indent=2)
-    return PROVIDERS_JSON_PATH
+    return tmp_providers
 
 
-def _patch_providers_json(providers_json_path: Path, provider_id: str, 
version: str) -> str:
-    """Patch providers.json to set the target version. Returns the original 
version."""
-    with open(providers_json_path) as f:
-        data = json.load(f)
-    for p in data["providers"]:
-        if p["id"] == provider_id:
-            original_version = p["version"]
-            p["version"] = version
-            with open(providers_json_path, "w") as f:
-                json.dump(data, f, indent=2)
-            return original_version
-    raise click.ClickException(f"Provider '{provider_id}' not found in 
{providers_json_path}")
+def _run_extract_script(
+    script: Path,
+    pip_spec: str,
+    base_spec: str,
+    provider_id: str,
+    providers_json_path: Path,
+) -> int:
+    """Run an extraction script with --provider and --providers-json flags.
 
-
-# TODO: The backfill command processes versions sequentially because 
extract_parameters.py
-# and extract_connections.py write to shared files (modules.json, 
providers.json).
-# To parallelize, each provider would need its own isolated output directory 
so that
-# concurrent runs don't clobber each other. See also the registry-backfill.yml 
workflow
-# which uses a GitHub Actions matrix to run providers in parallel CI jobs.
+    Falls back to running without extras if the full spec fails.
+    Returns the exit code.
+    """
+    base_cmd = [
+        "uv",
+        "run",
+        "--with",
+        pip_spec,
+        "python",
+        str(script),
+        "--provider",
+        provider_id,
+        "--providers-json",
+        str(providers_json_path),
+    ]
+    result = run_command(base_cmd, check=False, cwd=str(AIRFLOW_ROOT_PATH))
+    if result.returncode != 0 and pip_spec != base_spec:
+        click.echo(f"Retrying {script.name} without extras...")
+        fallback_cmd = [
+            "uv",
+            "run",
+            "--with",
+            base_spec,
+            "python",
+            str(script),
+            "--provider",
+            provider_id,
+            "--providers-json",
+            str(providers_json_path),
+        ]
+        result = run_command(fallback_cmd, check=False, 
cwd=str(AIRFLOW_ROOT_PATH))
+    return result.returncode
 
 
 @registry_group.command(
     name="backfill",
     help="Extract runtime parameters and connections for older provider 
versions. "
     "Uses 'uv run --with' to install the specific version in a temporary 
environment "
-    "and runs extract_parameters.py + extract_connections.py. No Docker 
needed.",
+    "and runs extract_parameters.py + extract_connections.py. No Docker 
needed. "
+    "Each version uses an isolated providers.json, so multiple providers can 
be "
+    "backfilled in parallel from separate terminal sessions.",
 )
 @click.option(
     "--provider",
@@ -238,7 +244,6 @@ def _patch_providers_json(providers_json_path: Path, 
provider_id: str, version:
 @option_dry_run
 def backfill(provider: str, versions: tuple[str, ...]):
     package_name, extras = _read_provider_yaml_info(provider)
-    providers_json_path = _ensure_providers_json(provider, package_name)
 
     click.echo(f"Provider: {provider} ({package_name})")
     click.echo(f"Versions: {', '.join(versions)}")
@@ -248,35 +253,26 @@ def backfill(provider: str, versions: tuple[str, ...]):
 
     failed: list[str] = []
 
-    for version in versions:
-        click.echo(f"{'=' * 60}")
-        click.echo(f"Extracting {provider} {version}")
-        click.echo(f"{'=' * 60}")
+    with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as 
tmp_dir:
+        tmp_path = Path(tmp_dir)
 
-        original_version = _patch_providers_json(providers_json_path, 
provider, version)
+        for version in versions:
+            click.echo(f"{'=' * 60}")
+            click.echo(f"Extracting {provider} {version}")
+            click.echo(f"{'=' * 60}")
+
+            # Each version gets its own isolated providers.json — no shared 
state
+            providers_json = _create_isolated_providers_json(provider, 
package_name, version, tmp_path)
 
-        try:
             pip_spec = _build_pip_spec(package_name, extras, version)
             base_spec = f"{package_name}=={version}"
+
             for script in EXTRACT_SCRIPTS:
                 click.echo(f"\nRunning {script.name} with {pip_spec}...")
-                result = run_command(
-                    ["uv", "run", "--with", pip_spec, "python", str(script)],
-                    check=False,
-                    cwd=str(AIRFLOW_ROOT_PATH),
-                )
-                if result.returncode != 0 and pip_spec != base_spec:
-                    click.echo(f"Retrying {script.name} without extras...")
-                    result = run_command(
-                        ["uv", "run", "--with", base_spec, "python", 
str(script)],
-                        check=False,
-                        cwd=str(AIRFLOW_ROOT_PATH),
-                    )
-                if result.returncode != 0:
-                    click.echo(f"WARNING: {script.name} failed for {version} 
(exit {result.returncode})")
+                returncode = _run_extract_script(script, pip_spec, base_spec, 
provider, providers_json)
+                if returncode != 0:
+                    click.echo(f"WARNING: {script.name} failed for {version} 
(exit {returncode})")
                     failed.append(f"{version}/{script.name}")
-        finally:
-            _patch_providers_json(providers_json_path, provider, 
original_version)
 
     click.echo(f"\n{'=' * 60}")
     if failed:
diff --git a/dev/breeze/tests/test_registry_backfill.py 
b/dev/breeze/tests/test_registry_backfill.py
index 2eb4b732eb5..6a0cb1a1570 100644
--- a/dev/breeze/tests/test_registry_backfill.py
+++ b/dev/breeze/tests/test_registry_backfill.py
@@ -19,16 +19,16 @@
 from __future__ import annotations
 
 import json
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
 from airflow_breeze.commands.registry_commands import (
     _build_pip_spec,
-    _ensure_providers_json,
+    _create_isolated_providers_json,
     _find_provider_yaml,
-    _patch_providers_json,
     _read_provider_yaml_info,
+    _run_extract_script,
 )
 
 
@@ -109,81 +109,98 @@ class TestBuildPipSpec:
 
 
 # ---------------------------------------------------------------------------
-# _ensure_providers_json
+# _create_isolated_providers_json
 # ---------------------------------------------------------------------------
-class TestEnsureProvidersJson:
-    def test_creates_new_file(self, tmp_path):
-        providers_json = tmp_path / "dev" / "registry" / "providers.json"
-        with patch(
-            "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
-            providers_json,
-        ):
-            result = _ensure_providers_json("amazon", 
"apache-airflow-providers-amazon")
+class TestCreateIsolatedProvidersJson:
+    def test_creates_file_with_correct_content(self, tmp_path):
+        result = _create_isolated_providers_json(
+            "amazon", "apache-airflow-providers-amazon", "9.15.0", tmp_path
+        )
 
-        assert result == providers_json
-        data = json.loads(providers_json.read_text())
+        assert result.exists()
+        data = json.loads(result.read_text())
         assert len(data["providers"]) == 1
         assert data["providers"][0]["id"] == "amazon"
         assert data["providers"][0]["package_name"] == 
"apache-airflow-providers-amazon"
+        assert data["providers"][0]["version"] == "9.15.0"
 
-    def test_appends_to_existing_file(self, tmp_path):
-        providers_json = tmp_path / "providers.json"
-        providers_json.write_text(
-            json.dumps({"providers": [{"id": "google", "package_name": 
"pkg-google", "version": "1.0.0"}]})
-        )
-        with patch(
-            "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
-            providers_json,
-        ):
-            _ensure_providers_json("amazon", "apache-airflow-providers-amazon")
-
-        data = json.loads(providers_json.read_text())
-        assert len(data["providers"]) == 2
-        ids = [p["id"] for p in data["providers"]]
-        assert "google" in ids
-        assert "amazon" in ids
-
-    def test_skips_if_provider_already_present(self, tmp_path):
-        providers_json = tmp_path / "providers.json"
-        original = {"providers": [{"id": "amazon", "package_name": "pkg", 
"version": "1.0.0"}]}
-        providers_json.write_text(json.dumps(original))
-        with patch(
-            "airflow_breeze.commands.registry_commands.PROVIDERS_JSON_PATH",
-            providers_json,
-        ):
-            _ensure_providers_json("amazon", "pkg")
+    def test_filename_includes_provider_and_version(self, tmp_path):
+        result = _create_isolated_providers_json("google", "pkg", "14.0.0", 
tmp_path)
+        assert result.name == "providers-google-14.0.0.json"
 
-        # File should be unchanged
-        data = json.loads(providers_json.read_text())
-        assert len(data["providers"]) == 1
+    def test_different_versions_produce_different_files(self, tmp_path):
+        f1 = _create_isolated_providers_json("amazon", "pkg", "9.15.0", 
tmp_path)
+        f2 = _create_isolated_providers_json("amazon", "pkg", "9.14.0", 
tmp_path)
+        assert f1 != f2
+        assert f1.exists()
+        assert f2.exists()
 
 
 # ---------------------------------------------------------------------------
-# _patch_providers_json
+# _run_extract_script
 # ---------------------------------------------------------------------------
-class TestPatchProvidersJson:
-    def test_patches_version(self, tmp_path):
+class TestRunExtractScript:
+    def test_success_on_first_try(self, tmp_path):
+        script = tmp_path / "extract.py"
         providers_json = tmp_path / "providers.json"
-        providers_json.write_text(json.dumps({"providers": [{"id": "amazon", 
"version": "9.22.0"}]}))
-        original = _patch_providers_json(providers_json, "amazon", "9.15.0")
-        assert original == "9.22.0"
 
-        data = json.loads(providers_json.read_text())
-        assert data["providers"][0]["version"] == "9.15.0"
+        mock_result = MagicMock(returncode=0)
+        with patch(
+            "airflow_breeze.commands.registry_commands.run_command", 
return_value=mock_result
+        ) as mock_run:
+            rc = _run_extract_script(script, "pkg[extra]==1.0", "pkg==1.0", 
"amazon", providers_json)
+
+        assert rc == 0
+        mock_run.assert_called_once()
+        cmd = mock_run.call_args[0][0]
+        assert "--provider" in cmd
+        assert "amazon" in cmd
+        assert "--providers-json" in cmd
+
+    def test_falls_back_without_extras_on_failure(self, tmp_path):
+        script = tmp_path / "extract.py"
+        providers_json = tmp_path / "providers.json"
 
-    def test_raises_for_missing_provider(self, tmp_path):
+        fail_result = MagicMock(returncode=1)
+        ok_result = MagicMock(returncode=0)
+        with patch(
+            "airflow_breeze.commands.registry_commands.run_command",
+            side_effect=[fail_result, ok_result],
+        ) as mock_run:
+            rc = _run_extract_script(script, "pkg[extra]==1.0", "pkg==1.0", 
"amazon", providers_json)
+
+        assert rc == 0
+        assert mock_run.call_count == 2
+        # First call uses extras, second uses base spec
+        first_cmd = mock_run.call_args_list[0][0][0]
+        second_cmd = mock_run.call_args_list[1][0][0]
+        assert "pkg[extra]==1.0" in first_cmd
+        assert "pkg==1.0" in second_cmd
+
+    def test_no_fallback_when_specs_are_identical(self, tmp_path):
+        script = tmp_path / "extract.py"
         providers_json = tmp_path / "providers.json"
-        providers_json.write_text(json.dumps({"providers": [{"id": "google", 
"version": "1.0.0"}]}))
-        with pytest.raises(Exception, match="not found"):
-            _patch_providers_json(providers_json, "amazon", "9.15.0")
 
-    def test_restores_original_version(self, tmp_path):
+        fail_result = MagicMock(returncode=1)
+        with patch(
+            "airflow_breeze.commands.registry_commands.run_command",
+            return_value=fail_result,
+        ) as mock_run:
+            rc = _run_extract_script(script, "pkg==1.0", "pkg==1.0", "amazon", 
providers_json)
+
+        assert rc == 1
+        mock_run.assert_called_once()
+
+    def test_returns_fallback_failure_code(self, tmp_path):
+        script = tmp_path / "extract.py"
         providers_json = tmp_path / "providers.json"
-        providers_json.write_text(json.dumps({"providers": [{"id": "amazon", 
"version": "9.22.0"}]}))
-        # Patch to target version
-        _patch_providers_json(providers_json, "amazon", "9.15.0")
-        # Restore
-        _patch_providers_json(providers_json, "amazon", "9.22.0")
-
-        data = json.loads(providers_json.read_text())
-        assert data["providers"][0]["version"] == "9.22.0"
+
+        fail_result = MagicMock(returncode=1)
+        with patch(
+            "airflow_breeze.commands.registry_commands.run_command",
+            return_value=fail_result,
+        ) as mock_run:
+            rc = _run_extract_script(script, "pkg[extra]==1.0", "pkg==1.0", 
"amazon", providers_json)
+
+        assert rc == 1
+        assert mock_run.call_count == 2
diff --git a/dev/registry/extract_connections.py 
b/dev/registry/extract_connections.py
index 73264f26249..0f74e677d29 100644
--- a/dev/registry/extract_connections.py
+++ b/dev/registry/extract_connections.py
@@ -35,6 +35,7 @@ Output:
 
 from __future__ import annotations
 
+import argparse
 import json
 import sys
 from collections import defaultdict
@@ -155,15 +156,31 @@ def build_custom_fields(
 
 
 def main():
+    parser = argparse.ArgumentParser(description="Extract provider connection 
metadata")
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help="Only output connections for this provider ID (e.g. 'amazon').",
+    )
+    parser.add_argument(
+        "--providers-json",
+        default=None,
+        help="Path to providers.json (overrides default search paths).",
+    )
+    args = parser.parse_args()
+
     print("Airflow Registry Connection Metadata Extractor")
     print("=" * 50)
 
     # Load providers.json for provider_id -> latest_version + name mapping
-    providers_json_path = None
-    for candidate in PROVIDERS_JSON_CANDIDATES:
-        if candidate.exists():
-            providers_json_path = candidate
-            break
+    if args.providers_json:
+        providers_json_path = Path(args.providers_json)
+    else:
+        providers_json_path = None
+        for candidate in PROVIDERS_JSON_CANDIDATES:
+            if candidate.exists():
+                providers_json_path = candidate
+                break
 
     if providers_json_path is None:
         print("ERROR: providers.json not found. Run extract_metadata.py 
first.")
@@ -225,6 +242,13 @@ def main():
     print(f"  {total_with_custom} have custom fields")
     print(f"  {total_with_ui} have UI field customisation")
 
+    # Filter to single provider if requested
+    if args.provider:
+        provider_connections = {
+            pid: conns for pid, conns in provider_connections.items() if pid 
== args.provider
+        }
+        print(f"Filtering output to provider: {args.provider}")
+
     # Write per-provider files to versions/{pid}/{version}/connections.json
     for output_dir in OUTPUT_DIRS:
         if not output_dir.parent.exists():
diff --git a/dev/registry/extract_parameters.py 
b/dev/registry/extract_parameters.py
index 2a4e80e7e05..8ef31f75dc7 100644
--- a/dev/registry/extract_parameters.py
+++ b/dev/registry/extract_parameters.py
@@ -37,6 +37,7 @@ Output:
 
 from __future__ import annotations
 
+import argparse
 import concurrent.futures
 import importlib
 import inspect
@@ -839,10 +840,26 @@ def _fetch_inventories(
 
 
 def main():
+    parser = argparse.ArgumentParser(description="Extract provider parameters 
and modules")
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help="Only process this provider ID (e.g. 'amazon'). Skips 
modules.json write.",
+    )
+    parser.add_argument(
+        "--providers-json",
+        default=None,
+        help="Path to providers.json (overrides default search paths).",
+    )
+    args = parser.parse_args()
+
     print("Airflow Registry Parameter & Module Extractor")
     print("=" * 50)
 
-    providers_json_path = find_json(PROVIDERS_JSON_CANDIDATES, 
"providers.json")
+    if args.providers_json:
+        providers_json_path = Path(args.providers_json)
+    else:
+        providers_json_path = find_json(PROVIDERS_JSON_CANDIDATES, 
"providers.json")
     with open(providers_json_path) as f:
         providers_data = json.load(f)
 
@@ -851,7 +868,7 @@ def main():
         provider_versions[p["id"]] = p["version"]
 
     generated_at = datetime.now(timezone.utc).isoformat()
-    _main_discover(provider_versions, generated_at)
+    _main_discover(provider_versions, generated_at, 
only_provider=args.provider)
 
     print("\nDone!")
 
@@ -859,8 +876,14 @@ def main():
 def _main_discover(
     provider_versions: dict[str, str],
     generated_at: str,
+    only_provider: str | None = None,
 ) -> None:
-    """Runtime discovery: find classes from provider.yaml files, produce 
modules.json and parameters."""
+    """Runtime discovery: find classes from provider.yaml files, produce 
modules.json and parameters.
+
+    When only_provider is set, only that provider is scanned and modules.json 
is NOT written
+    (it would be incomplete). This enables parallel backfills since the only 
output is
+    the per-provider parameters.json file.
+    """
     provider_yaml_paths = sorted(PROVIDERS_DIR.rglob("provider.yaml"))
     print(f"Found {len(provider_yaml_paths)} provider.yaml files")
 
@@ -878,6 +901,15 @@ def _main_discover(
             provider_yamls_by_id[pid] = py
             provider_paths_by_id[pid] = yaml_path
 
+    # Filter to single provider if requested
+    if only_provider:
+        if only_provider not in provider_paths_by_id:
+            print(f"ERROR: provider '{only_provider}' not found in 
provider.yaml files")
+            sys.exit(1)
+        provider_paths_by_id = {only_provider: 
provider_paths_by_id[only_provider]}
+        provider_yamls_by_id = {only_provider: 
provider_yamls_by_id[only_provider]}
+        print(f"Filtering to provider: {only_provider}")
+
     # Fetch Sphinx inventories in parallel
     print("Fetching Sphinx inventory files ...")
     inventories = _fetch_inventories(set(provider_yamls_by_id), 
provider_yamls_by_id)
@@ -911,31 +943,36 @@ def _main_discover(
     all_discovered = unique_modules
     print(f"Deduplicated to {len(all_discovered)} unique modules")
 
-    # Write modules.json (the canonical module catalog)
-    modules_json = {"modules": all_discovered}
-    output_dirs = [SCRIPT_DIR, AIRFLOW_ROOT / "registry" / "src" / "_data"]
-    for out_dir in output_dirs:
-        if not out_dir.parent.exists():
-            continue
-        out_dir.mkdir(parents=True, exist_ok=True)
-        with open(out_dir / "modules.json", "w") as f:
-            json.dump(modules_json, f, indent=2)
-        print(f"Wrote {len(all_discovered)} modules to {out_dir / 
'modules.json'}")
-
-    # Write runtime_modules.json (debug/stats file)
-    runtime_output = {
-        "generated_at": generated_at,
-        "discovery_method": "runtime",
-        "stats": {
-            "total_classes": len(all_discovered),
-            "total_providers": len(providers_seen),
-        },
-        "classes": all_discovered,
-    }
-    runtime_json_path = SCRIPT_DIR / "runtime_modules.json"
-    with open(runtime_json_path, "w") as f:
-        json.dump(runtime_output, f, indent=2)
-    print(f"Wrote {runtime_json_path}")
+    # Write modules.json only when doing a full build (no --provider filter).
+    # With --provider, the output would be incomplete and would clobber the
+    # full modules.json from a previous build.
+    if not only_provider:
+        modules_json = {"modules": all_discovered}
+        output_dirs = [SCRIPT_DIR, AIRFLOW_ROOT / "registry" / "src" / "_data"]
+        for out_dir in output_dirs:
+            if not out_dir.parent.exists():
+                continue
+            out_dir.mkdir(parents=True, exist_ok=True)
+            with open(out_dir / "modules.json", "w") as f:
+                json.dump(modules_json, f, indent=2)
+            print(f"Wrote {len(all_discovered)} modules to {out_dir / 
'modules.json'}")
+
+        # Write runtime_modules.json (debug/stats file)
+        runtime_output = {
+            "generated_at": generated_at,
+            "discovery_method": "runtime",
+            "stats": {
+                "total_classes": len(all_discovered),
+                "total_providers": len(providers_seen),
+            },
+            "classes": all_discovered,
+        }
+        runtime_json_path = SCRIPT_DIR / "runtime_modules.json"
+        with open(runtime_json_path, "w") as f:
+            json.dump(runtime_output, f, indent=2)
+        print(f"Wrote {runtime_json_path}")
+    else:
+        print("Skipping modules.json write (--provider mode)")
 
     # Extract parameters
     print("\nExtracting parameters from runtime-discovered classes...")

Reply via email to