This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 475cdae229a Registry: single source of truth for module types (#63322)
475cdae229a is described below
commit 475cdae229a51ae8eda4c4f2dcb058f5e8af1c38
Author: Kaxil Naik <[email protected]>
AuthorDate: Wed Mar 11 04:15:43 2026 +0000
Registry: single source of truth for module types (#63322)
Consolidate ~25 duplicated module type definitions into
`dev/registry/registry_tools/types.py`. All extraction scripts now
import from this shared module, and a generated `types.json` feeds the
Eleventy frontend — so adding a new type means editing one Python dict
instead of ~10 files.
- Make `dev/registry` a uv workspace member with its own pyproject.toml
- Create `registry_tools/types.py` as canonical type registry
- Refactor extract_metadata, extract_parameters, extract_versions to
import from registry_tools.types instead of hardcoding
- Derive module counts from modules.json (runtime discovery) instead
of AST suffix matching — fixes Databricks operator undercount
- Generate types.json for frontend; templates and JS loop over it
- Remove stats grid from provider version page (redundant with filters)
- Add pre-commit hook to keep types.json in sync with types.py
- Add test_types.py for type registry validation
- Fix `"Base" in name` → `name.startswith("Base")` filter bug in
extract_versions.py (was dropping DatabaseOperator, etc.)
- Copy logos to registry/public/logos/ for local dev convenience
* Fix module counts on provider cards and version pages
Eleventy loads providers.json and providerVersions.js as separate data
objects — mutating provider objects in providerVersions.js doesn't
propagate to templates that read from providers.json directly.
Add moduleCountsByProvider.js data file that builds {provider_id: counts}
from modules.json. Templates now read counts from this dedicated source
instead of relying on in-place mutation.
* Merge into existing providers.json in incremental mode
When running extract_metadata.py --provider X, read existing
providers.json and merge rather than overwrite. This makes
parallel runs for different providers safe on the same filesystem.
* Fix statsData.js to read module counts from modules.json
statsData.js was reading p.module_counts from providers.json, which no
longer carries counts. Read from modules.json directly (same pattern as
moduleCountsByProvider.js). Fixes empty Popular Providers on homepage
and zero-count stats.
* Fix breeze registry commands for suspended providers and backfill
Two fixes:
1. extract-data: Install suspended providers (e.g. apache-beam) in the
breeze container before running extraction. These providers have source
code in the repo but aren't pre-installed in the CI image, so
extract_parameters.py couldn't discover their classes at runtime.
2. backfill: Run extract_versions.py as a first step to produce
metadata.json from git tags. Without metadata.json, Eleventy skips
generating version pages — so backfilled parameters/connections data
was invisible on the site.
---
.github/workflows/registry-backfill.yml | 2 +-
.github/workflows/registry-tests.yml | 14 +-
.pre-commit-config.yaml | 6 +
dev/breeze/doc/images/output_registry.svg | 8 +-
dev/breeze/doc/images/output_registry.txt | 2 +-
dev/breeze/doc/images/output_registry_backfill.svg | 38 +++--
dev/breeze/doc/images/output_registry_backfill.txt | 2 +-
.../airflow_breeze/commands/registry_commands.py | 56 ++++++-
dev/registry/.gitignore | 2 +
dev/registry/extract_metadata.py | 87 +++++------
dev/registry/extract_parameters.py | 28 +---
dev/registry/extract_versions.py | 19 +--
.../{tests/conftest.py => generate_types_json.py} | 28 +++-
dev/registry/{tests/conftest.py => pyproject.toml} | 25 ++-
.../conftest.py => registry_tools/__init__.py} | 8 -
dev/registry/registry_tools/types.py | 142 ++++++++++++++++++
dev/registry/tests/conftest.py | 7 -
dev/registry/tests/test_extract_metadata.py | 38 -----
dev/registry/tests/test_types.py | 109 ++++++++++++++
pyproject.toml | 1 +
registry/AGENTS.md | 10 +-
registry/CLAUDE.md | 1 +
registry/README.md | 19 +++
registry/src/_data/moduleCountsByProvider.js | 31 ++++
registry/src/_data/providerVersions.js | 10 --
registry/src/_data/statsData.js | 81 +++-------
registry/src/_data/types.json | 57 +++++++
registry/src/_includes/base.njk | 1 +
registry/src/_includes/provider-card.njk | 11 +-
registry/src/css/main.css | 144 ------------------
registry/src/js/search.js | 25 ++-
registry/src/provider-version.njk | 167 +++------------------
scripts/ci/prek/check_registry_types_json_sync.py | 101 +++++++++++++
33 files changed, 693 insertions(+), 587 deletions(-)
diff --git a/.github/workflows/registry-backfill.yml
b/.github/workflows/registry-backfill.yml
index 5a0b39d661f..fa4f7924df3 100644
--- a/.github/workflows/registry-backfill.yml
+++ b/.github/workflows/registry-backfill.yml
@@ -118,7 +118,7 @@ jobs:
done
- name: "Install uv"
- uses: astral-sh/setup-uv@bd01e18f51369d5765a7df3681d34498e332e27e #
v6.3.1
+ uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a #
v7.4.0
- name: "Install Breeze"
uses: ./.github/actions/breeze
diff --git a/.github/workflows/registry-tests.yml
b/.github/workflows/registry-tests.yml
index 19d5932e54c..fb254be34a5 100644
--- a/.github/workflows/registry-tests.yml
+++ b/.github/workflows/registry-tests.yml
@@ -49,16 +49,10 @@ jobs:
with:
persist-credentials: false
- - name: "Setup Python"
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #
v6.2.0
- with:
- python-version: "3.12" # 3.11+ required for stdlib tomllib
-
- name: "Install uv"
- run: python -m pip install uv
-
- - name: "Install test dependencies"
- run: uv pip install --system pytest pyyaml
+ uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a #
v7.4.0
+ with:
+ python-version: "3.12"
- name: "Run registry extraction tests"
- run: pytest dev/registry/tests/ -v -o "addopts="
+ run: cd dev/registry && uv run --group dev pytest tests/ -v
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a68b3080d88..8dee4411144 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -422,6 +422,12 @@ repos:
language: python
pass_filenames: false
files:
^airflow-core/src/airflow/secrets/base_secrets\.py$|^task-sdk/src/airflow/sdk/execution_time/secrets/__init__\.py$
+ - id: check-registry-types-json-sync
+ name: Check registry types.json in sync with types.py
+ entry: ./scripts/ci/prek/check_registry_types_json_sync.py
+ language: python
+ pass_filenames: false
+ files:
^dev/registry/registry_tools/types\.py$|^registry/src/_data/types\.json$
- id: ruff
name: Run 'ruff' for extremely fast Python linting
description: "Run 'ruff' for extremely fast Python linting"
diff --git a/dev/breeze/doc/images/output_registry.svg
b/dev/breeze/doc/images/output_registry.svg
index 951851010e7..2a077a0ba4b 100644
--- a/dev/breeze/doc/images/output_registry.svg
+++ b/dev/breeze/doc/images/output_registry.svg
@@ -114,10 +114,10 @@
</text><text class="breeze-registry-r5" x="0" y="190.8" textLength="1464"
clip-path="url(#breeze-registry-line-7)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="190.8" textLength="12.2"
clip-path="url(#breeze-registry-line-7)">
</text><text class="breeze-registry-r5" x="0" y="215.2" textLength="24.4"
clip-path="url(#breeze-registry-line-8)">╭─</text><text
class="breeze-registry-r5" x="24.4" y="215.2" textLength="231.8"
clip-path="url(#breeze-registry-line-8)"> Registry commands </text><text
class="breeze-registry-r5" x="256.2" y="215.2" textLength="1183.4"
clip-path="url(#breeze-registry-line-8)">─────────────────────────────────────────────────────────────────────────────────────────────────</te
[...]
</text><text class="breeze-registry-r5" x="0" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-line-9)">│</text><text
class="breeze-registry-r4" x="24.4" y="239.6" textLength="195.2"
clip-path="url(#breeze-registry-line-9)">extract-data    </text><text
class="breeze-registry-r1" x="244" y="239.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-9)">Extract provider metadata, parameters, and connection types for
[...]
-</text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2"
clip-path="url(#breeze-registry-line-10)">│</text><text
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2"
clip-path="url(#breeze-registry-line-10)">backfill        </text><text
class="breeze-registry-r1" x="244" y="264" textLength="1037"
clip-path="url(#breeze-registry-line-10)">Extract runtime parameters and connections for older 
[...]
-</text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-line-11)">│</text><text
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6"
clip-path="url(#breeze-registry-line-11)">install the specific version in a temporary environment and runs extract_parameters.py +          </text><text
class="breeze-registry-r5" x="1451.8" y [...]
-</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">│</text><text
class="breeze-registry-r1" x="244" y="312.8" textLength="1195.6"
clip-path="url(#breeze-registry-line-12)">extract_connections.py. No Docker needed. Each version uses an isolated providers.json, so        </text><text
class="breeze-registry-r5" x="1451.8" y="312.8" textLe [...]
-</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2"
clip-path="url(#breeze-registry-line-13)">│</text><text
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6"
clip-path="url(#breeze-registry-line-13)">multiple providers can be backfilled in parallel from separate terminal sessions.                 </text><text
class="br [...]
+</text><text class="breeze-registry-r5" x="0" y="264" textLength="12.2"
clip-path="url(#breeze-registry-line-10)">│</text><text
class="breeze-registry-r4" x="24.4" y="264" textLength="195.2"
clip-path="url(#breeze-registry-line-10)">backfill        </text><text
class="breeze-registry-r1" x="244" y="264" textLength="1195.6"
clip-path="url(#breeze-registry-line-10)">Extract metadata, parameters, and connections for older
[...]
+</text><text class="breeze-registry-r5" x="0" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-line-11)">│</text><text
class="breeze-registry-r1" x="244" y="288.4" textLength="1195.6"
clip-path="url(#breeze-registry-line-11)">extract_versions.py (host, git tags) for metadata.json, then extract_parameters.py +              </text><text
class="breeze-registry-r5" x="1451 [...]
+</text><text class="breeze-registry-r5" x="0" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-line-12)">│</text><text
class="breeze-registry-r1" x="244" y="312.8" textLength="427"
clip-path="url(#breeze-registry-line-12)">extract_connections.py via 'uv run </text><text
class="breeze-registry-r4" x="671" y="312.8" textLength="73.2"
clip-path="url(#breeze-registry-line-12)">--with</text><text
class="breeze-registry-r1" x="744.2" y="312.8" textLength="69 [...]
+</text><text class="breeze-registry-r5" x="0" y="337.2" textLength="12.2"
clip-path="url(#breeze-registry-line-13)">│</text><text
class="breeze-registry-r1" x="244" y="337.2" textLength="1195.6"
clip-path="url(#breeze-registry-line-13)">providers.json, so multiple providers can be backfilled in parallel.                        
[...]
</text><text class="breeze-registry-r5" x="0" y="361.6" textLength="12.2"
clip-path="url(#breeze-registry-line-14)">│</text><text
class="breeze-registry-r4" x="24.4" y="361.6" textLength="195.2"
clip-path="url(#breeze-registry-line-14)">publish-versions</text><text
class="breeze-registry-r1" x="244" y="361.6" textLength="1195.6"
clip-path="url(#breeze-registry-line-14)">Publish per-provider versions.json to S3 from deployed directories. Same p
[...]
</text><text class="breeze-registry-r5" x="0" y="386" textLength="12.2"
clip-path="url(#breeze-registry-line-15)">│</text><text
class="breeze-registry-r1" x="244" y="386" textLength="1195.6"
clip-path="url(#breeze-registry-line-15)">release-management publish-docs-to-s3'.                                   
[...]
</text><text class="breeze-registry-r5" x="0" y="410.4" textLength="1464"
clip-path="url(#breeze-registry-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-r1" x="1464" y="410.4" textLength="12.2"
clip-path="url(#breeze-registry-line-16)">
diff --git a/dev/breeze/doc/images/output_registry.txt
b/dev/breeze/doc/images/output_registry.txt
index fadd741e8b6..2c84fcf83d1 100644
--- a/dev/breeze/doc/images/output_registry.txt
+++ b/dev/breeze/doc/images/output_registry.txt
@@ -1 +1 @@
-8c9be6264d33af7facd1fbdf435697b7
+b1c2694af08bb5e10ae6f2c3b9bb2479
diff --git a/dev/breeze/doc/images/output_registry_backfill.svg
b/dev/breeze/doc/images/output_registry_backfill.svg
index 4478565366e..75ae72f45d2 100644
--- a/dev/breeze/doc/images/output_registry_backfill.svg
+++ b/dev/breeze/doc/images/output_registry_backfill.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1482 489.2"
xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1482 464.79999999999995"
xmlns="http://www.w3.org/2000/svg">
<!-- Generated with Rich https://www.textualize.io -->
<style>
@@ -45,7 +45,7 @@
<defs>
<clipPath id="breeze-registry-backfill-clip-terminal">
- <rect x="0" y="0" width="1463.0" height="438.2" />
+ <rect x="0" y="0" width="1463.0" height="413.79999999999995" />
</clipPath>
<clipPath id="breeze-registry-backfill-line-0">
<rect x="0" y="1.5" width="1464" height="24.65"/>
@@ -95,12 +95,9 @@
<clipPath id="breeze-registry-backfill-line-15">
<rect x="0" y="367.5" width="1464" height="24.65"/>
</clipPath>
-<clipPath id="breeze-registry-backfill-line-16">
- <rect x="0" y="391.9" width="1464" height="24.65"/>
- </clipPath>
</defs>
- <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="487.2" rx="8"/><text
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle"
x="740" y="27">Command: registry backfill</text>
+ <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1"
x="1" y="1" width="1480" height="462.8" rx="8"/><text
class="breeze-registry-backfill-title" fill="#c5c8c6" text-anchor="middle"
x="740" y="27">Command: registry backfill</text>
<g transform="translate(26,22)">
<circle cx="0" cy="0" r="7" fill="#ff5f57"/>
<circle cx="22" cy="0" r="7" fill="#febc2e"/>
@@ -113,21 +110,20 @@
<text class="breeze-registry-backfill-r1" x="1464" y="20"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-0)">
</text><text class="breeze-registry-backfill-r2" x="12.2" y="44.4"
textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-1)">Usage:</text><text
class="breeze-registry-backfill-r3" x="97.6" y="44.4" textLength="292.8"
clip-path="url(#breeze-registry-backfill-line-1)">breeze registry backfill</text><text
class="breeze-registry-backfill-r1" x="402.6" y="44.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-1)">[</text><text
class="breeze-registry-backfill [...]
</text><text class="breeze-registry-backfill-r1" x="1464" y="68.8"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-2)">
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2"
textLength="1037"
clip-path="url(#breeze-registry-backfill-line-3)">Extract runtime parameters and connections for older provider versions. Uses 'uv run </text><text
class="breeze-registry-backfill-r4" x="1049.2" y="93.2" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-3)">--with</text><text
class="breeze-registry-backfill-r1" x="1122.4" y="9 [...]
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6"
textLength="1403"
clip-path="url(#breeze-registry-backfill-line-4)">version in a temporary environment and runs extract_parameters.py + extract_connections.py. No Docker needed. Each </text><text
class="breeze-registry-backfill-r1" x="1464" y="117.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-4)">
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="142"
textLength="1415.2"
clip-path="url(#breeze-registry-backfill-line-5)">version uses an isolated providers.json, so multiple providers can be backfilled in parallel from separate terminal </text><text
class="breeze-registry-backfill-r1" x="1464" y="142" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-5)">
-</text><text class="breeze-registry-backfill-r1" x="12.2" y="166.4"
textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-6)">sessions.</text><text
class="breeze-registry-backfill-r1" x="1464" y="166.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-6)">
-</text><text class="breeze-registry-backfill-r1" x="1464" y="190.8"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-7)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="215.2"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-8)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="215.2" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-8)"> Backfill flags </text><text
class="breeze-registry-backfill-r5" x="219.6" y="215.2" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-8)">────────────────────────────────────────────────────
[...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="239.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="122"
clip-path="url(#breeze-registry-backfill-line-9)">--provider</text><text
class="breeze-registry-backfill-r1" x="207.4" y="239.6" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="264"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="264" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="264" textLength="122"
clip-path="url(#breeze-registry-backfill-line-10)">--version </text><text
class="breeze-registry-backfill-r1" x="207.4" y="264" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="288.4"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text
class="breeze-registry-backfill-r8" x="207.4" y="288.4" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-11)">(TEXT)</text><text
class="breeze-registry-backfill-r5" x="1451.8" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">│</text><text
class="breeze-registry-backfill-r1" x="1464" y="288.4" textL [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="312.8"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-12)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="312.8" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-12)">
-</text><text class="breeze-registry-backfill-r5" x="0" y="337.2"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-13)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="337.2" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-13)"> Common options </text><text
class="breeze-registry-backfill-r5" x="219.6" y="337.2" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-13)">─────────────────────────────────────────────────
[...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-14)">--verbose</text><text
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-14)">-v</text><text
class="breeze-registry-backfill-r1" x="207.4" y="361.6" t [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-15)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="386" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-15)">--dry-run</text><text
class="breeze-registry-backfill-r9" x="158.6" y="386" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-15)">-D</text><text
class="breeze-registry-backfill-r1" x="207.4" y="386" textLengt [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="410.4"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-16)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="410.4" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-16)">--help   </text><text
class="breeze-registry-backfill-r9" x="158.6" y="410.4" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-16)">-h</text><text
class="breeze-registry-backfill-r1" x="207 [...]
-</text><text class="breeze-registry-backfill-r5" x="0" y="434.8"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-17)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="434.8" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-17)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="93.2"
textLength="1427.4"
clip-path="url(#breeze-registry-backfill-line-3)">Extract metadata, parameters, and connections for older provider versions. Runs extract_versions.py (host, git tags) </text><text
class="breeze-registry-backfill-r1" x="1464" y="93.2" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-3)">
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="117.6"
textLength="1012.6"
clip-path="url(#breeze-registry-backfill-line-4)">for metadata.json, then extract_parameters.py + extract_connections.py via 'uv run </text><text
class="breeze-registry-backfill-r4" x="1024.8" y="117.6" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-4)">--with</text><text
class="breeze-registry-backfill-r1" x="1098" y="117.6" textLengt [...]
+</text><text class="breeze-registry-backfill-r1" x="12.2" y="142"
textLength="1134.6"
clip-path="url(#breeze-registry-backfill-line-5)">version uses an isolated providers.json, so multiple providers can be backfilled in parallel.</text><text
class="breeze-registry-backfill-r1" x="1464" y="142" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-5)">
+</text><text class="breeze-registry-backfill-r1" x="1464" y="166.4"
textLength="12.2" clip-path="url(#breeze-registry-backfill-line-6)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="190.8"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-7)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="190.8" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-7)"> Backfill flags </text><text
class="breeze-registry-backfill-r5" x="219.6" y="190.8" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-7)">────────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="215.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="215.2" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-8)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="215.2" textLength="122"
clip-path="url(#breeze-registry-backfill-line-8)">--provider</text><text
class="breeze-registry-backfill-r1" x="207.4" y="215.2" textLengt [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="239.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">│</text><text
class="breeze-registry-backfill-r6" x="24.4" y="239.6" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-9)">*</text><text
class="breeze-registry-backfill-r4" x="61" y="239.6" textLength="122"
clip-path="url(#breeze-registry-backfill-line-9)">--version </text><text
class="breeze-registry-backfill-r1" x="207.4" y="239.6" text [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="264"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r8" x="207.4" y="264" textLength="73.2"
clip-path="url(#breeze-registry-backfill-line-10)">(TEXT)</text><text
class="breeze-registry-backfill-r5" x="1451.8" y="264" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-10)">│</text><text
class="breeze-registry-backfill-r1" x="1464" y="264" textLength="1 [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="288.4"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-11)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="288.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-11)">
+</text><text class="breeze-registry-backfill-r5" x="0" y="312.8"
textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-12)">╭─</text><text
class="breeze-registry-backfill-r5" x="24.4" y="312.8" textLength="195.2"
clip-path="url(#breeze-registry-backfill-line-12)"> Common options </text><text
class="breeze-registry-backfill-r5" x="219.6" y="312.8" textLength="1220"
clip-path="url(#breeze-registry-backfill-line-12)">─────────────────────────────────────────────────
[...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="337.2"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-13)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="337.2" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-13)">--verbose</text><text
class="breeze-registry-backfill-r9" x="158.6" y="337.2" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-13)">-v</text><text
class="breeze-registry-backfill-r1" x="207.4" y="337.2" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="361.6"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-14)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="361.6" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-14)">--dry-run</text><text
class="breeze-registry-backfill-r9" x="158.6" y="361.6" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-14)">-D</text><text
class="breeze-registry-backfill-r1" x="207.4" y="361.6" t [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="386"
textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-15)">│</text><text
class="breeze-registry-backfill-r4" x="24.4" y="386" textLength="109.8"
clip-path="url(#breeze-registry-backfill-line-15)">--help   </text><text
class="breeze-registry-backfill-r9" x="158.6" y="386" textLength="24.4"
clip-path="url(#breeze-registry-backfill-line-15)">-h</text><text
class="breeze-registry-backfill-r1" x="207.4" y= [...]
+</text><text class="breeze-registry-backfill-r5" x="0" y="410.4"
textLength="1464"
clip-path="url(#breeze-registry-backfill-line-16)">╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</text><text
class="breeze-registry-backfill-r1" x="1464" y="410.4" textLength="12.2"
clip-path="url(#breeze-registry-backfill-line-16)">
</text>
</g>
</g>
diff --git a/dev/breeze/doc/images/output_registry_backfill.txt
b/dev/breeze/doc/images/output_registry_backfill.txt
index cff872551db..a6c2643c303 100644
--- a/dev/breeze/doc/images/output_registry_backfill.txt
+++ b/dev/breeze/doc/images/output_registry_backfill.txt
@@ -1 +1 @@
-5cddc0e9c5f9524a7e1baf6c21d74263
+ae5a02b96d38958e1b732eef27e22743
diff --git a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
index 9fb58256428..8a7457f1248 100644
--- a/dev/breeze/src/airflow_breeze/commands/registry_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/registry_commands.py
@@ -34,6 +34,23 @@ from airflow_breeze.utils.docker_command_utils import
execute_command_in_shell,
from airflow_breeze.utils.path_utils import AIRFLOW_ROOT_PATH
from airflow_breeze.utils.run_utils import run_command
+PROVIDERS_DIR = AIRFLOW_ROOT_PATH / "providers"
+
+
+def _get_suspended_provider_packages() -> list[str]:
+ """Return in-container pip-installable paths for providers with state:
suspended."""
+ packages = []
+ for yaml_path in sorted(PROVIDERS_DIR.rglob("provider.yaml")):
+ if "src" in yaml_path.relative_to(PROVIDERS_DIR).parts:
+ continue
+ with open(yaml_path) as f:
+ data = yaml.safe_load(f)
+ if data.get("state") == "suspended":
+ # Use in-container path (providers/ is mounted at
/opt/airflow/providers/)
+ rel = yaml_path.parent.relative_to(PROVIDERS_DIR)
+ packages.append(f"/opt/airflow/providers/{rel}")
+ return packages
+
@click.group(cls=BreezeGroup, name="registry", help="Tools for the Airflow
Provider Registry")
def registry_group():
@@ -65,8 +82,14 @@ def extract_data(python: str, provider: str | None):
rebuild_or_pull_ci_image_if_needed(command_params=shell_params)
+ # Install suspended providers that aren't in the CI image so runtime
+ # discovery (issubclass) can find their classes.
+ suspended_packages = _get_suspended_provider_packages()
+ install_cmd = f"pip install --quiet {' '.join(suspended_packages)} && " if
suspended_packages else ""
+
provider_flag = f" --provider '{provider}'" if provider else ""
command = (
+ f"{install_cmd}"
f"python dev/registry/extract_metadata.py{provider_flag} && "
"python dev/registry/extract_parameters.py && "
"python dev/registry/extract_connections.py"
@@ -108,7 +131,6 @@ def publish_versions(s3_bucket: str, providers_json: str |
None):
_publish_versions(s3_bucket, providers_json_path=providers_path)
-PROVIDERS_DIR = AIRFLOW_ROOT_PATH / "providers"
DEV_REGISTRY_DIR = AIRFLOW_ROOT_PATH / "dev" / "registry"
EXTRACT_SCRIPTS = [
@@ -222,11 +244,11 @@ def _run_extract_script(
@registry_group.command(
name="backfill",
- help="Extract runtime parameters and connections for older provider
versions. "
- "Uses 'uv run --with' to install the specific version in a temporary
environment "
- "and runs extract_parameters.py + extract_connections.py. No Docker
needed. "
- "Each version uses an isolated providers.json, so multiple providers can
be "
- "backfilled in parallel from separate terminal sessions.",
+ help="Extract metadata, parameters, and connections for older provider
versions. "
+ "Runs extract_versions.py (host, git tags) for metadata.json, then "
+ "extract_parameters.py + extract_connections.py via 'uv run --with'. "
+ "No Docker needed. Each version uses an isolated providers.json, so "
+ "multiple providers can be backfilled in parallel.",
)
@click.option(
"--provider",
@@ -253,6 +275,27 @@ def backfill(provider: str, versions: tuple[str, ...]):
failed: list[str] = []
+ # Step 1: extract_versions.py (host, reads git tags) → metadata.json
+ # Without metadata.json, Eleventy won't generate version pages.
+ click.echo("Step 1: Extracting version metadata from git tags...")
+ for version in versions:
+ versions_cmd = [
+ "uv",
+ "run",
+ "python",
+ str(DEV_REGISTRY_DIR / "extract_versions.py"),
+ "--provider",
+ provider,
+ "--version",
+ version,
+ ]
+ result = run_command(versions_cmd, check=False,
cwd=str(AIRFLOW_ROOT_PATH))
+ if result.returncode != 0:
+ click.echo(f"WARNING: extract_versions.py failed for {version}
(exit {result.returncode})")
+ failed.append(f"{version}/extract_versions.py")
+
+ # Step 2: extract_parameters.py + extract_connections.py (uv run --with)
+ click.echo("\nStep 2: Extracting parameters and connections...")
with tempfile.TemporaryDirectory(prefix=f"backfill-{provider}-") as
tmp_dir:
tmp_path = Path(tmp_dir)
@@ -282,6 +325,7 @@ def backfill(provider: str, versions: tuple[str, ...]):
click.echo(f"Successfully extracted {len(versions)} version(s) for
{provider}")
click.echo(
f"\nOutput written to:\n"
+ f"
registry/src/_data/versions/{provider}/<version>/metadata.json\n"
f"
registry/src/_data/versions/{provider}/<version>/parameters.json\n"
f"
registry/src/_data/versions/{provider}/<version>/connections.json"
)
diff --git a/dev/registry/.gitignore b/dev/registry/.gitignore
index b042ba92933..80fcf7ec8aa 100644
--- a/dev/registry/.gitignore
+++ b/dev/registry/.gitignore
@@ -1,3 +1,5 @@
+.backfill-logs/
.inventory_cache/
output/
runtime_modules.json
+uv.lock
diff --git a/dev/registry/extract_metadata.py b/dev/registry/extract_metadata.py
index c1d2043d24d..1c5047c08e1 100644
--- a/dev/registry/extract_metadata.py
+++ b/dev/registry/extract_metadata.py
@@ -273,49 +273,6 @@ def extract_integrations_as_categories(provider_yaml:
dict[str, Any]) -> list[Ca
return list(categories.values())
-def count_modules_by_type(provider_yaml: dict[str, Any]) -> dict[str, int]:
- """Count modules by type from provider.yaml."""
- counts = {
- "operator": 0,
- "hook": 0,
- "sensor": 0,
- "trigger": 0,
- "transfer": 0,
- "notifier": 0,
- "secret": 0,
- "logging": 0,
- "executor": 0,
- "bundle": 0,
- "decorator": 0,
- }
-
- # Sections where each entry has a python-modules list
- MODULE_LEVEL = {
- "operators": "operator",
- "hooks": "hook",
- "sensors": "sensor",
- "triggers": "trigger",
- "bundles": "bundle",
- }
- for yaml_key, count_key in MODULE_LEVEL.items():
- for group in provider_yaml.get(yaml_key, []):
- counts[count_key] += len(group.get("python-modules", []))
-
- # Sections where each entry is a single item (flat list or class path)
- FLAT_LEVEL = {
- "transfers": "transfer",
- "notifications": "notifier",
- "secrets-backends": "secret",
- "logging": "logging",
- "executors": "executor",
- "task-decorators": "decorator",
- }
- for yaml_key, count_key in FLAT_LEVEL.items():
- counts[count_key] = len(provider_yaml.get(yaml_key, []))
-
- return counts
-
-
def module_path_to_file_path(module_path: str, provider_path: Path) -> Path:
"""Convert a Python module path to an actual file path.
@@ -448,9 +405,6 @@ def main():
versions = provider_yaml.get("versions", [])
version = versions[0] if versions else "0.0.0"
- # Count modules
- module_counts = count_modules_by_type(provider_yaml)
-
# Extract categories from integrations
categories = extract_integrations_as_categories(provider_yaml)
@@ -483,8 +437,11 @@ def main():
# Write logos to dev/registry/logos/ — this directory is mounted in
# breeze (unlike registry/public/) so copies survive the container.
+ # Also copy to registry/public/logos/ for local dev convenience.
logos_dest_dir = SCRIPT_DIR / "logos"
logos_dest_dir.mkdir(parents=True, exist_ok=True)
+ registry_logos_dir = SCRIPT_DIR.parent.parent / "registry" / "public"
/ "logos"
+ registry_logos_dir.mkdir(parents=True, exist_ok=True)
if integration_logos_dir.exists():
# First, check for priority logos for known providers
@@ -529,6 +486,14 @@ def main():
shutil.copy2(logo_source, logo_dest)
logo = f"/logos/{provider_id}-{logo_source.name}"
+ # Also copy to registry/public/logos/ so local `pnpm dev` works without
+ # the extra CI copy step.
+ if logo:
+ logo_filename = logo.split("/")[-1]
+ src = logos_dest_dir / logo_filename
+ if src.exists():
+ shutil.copy2(src, registry_logos_dir / logo_filename)
+
# Extract connection types from provider.yaml
# Link to the connections index page since individual connection pages
might not exist
connection_types = []
@@ -568,7 +533,6 @@ def main():
versions=versions,
airflow_versions=airflow_versions,
pypi_downloads=pypi_downloads,
- module_counts=module_counts,
categories=[asdict(c) for c in categories],
connection_types=connection_types,
requires_python=pyproject_data["requires_python"],
@@ -590,11 +554,30 @@ def main():
for provider in all_providers:
provider.related_providers = find_related_providers(provider.id,
all_provider_yamls)
- # Sort providers alphabetically by name
- all_providers.sort(key=lambda p: p.name.lower())
-
# Convert to JSON-serializable format
- providers_json = {"providers": [asdict(p) for p in all_providers]}
+ new_providers = [asdict(p) for p in all_providers]
+
+ # In incremental mode, merge new providers into existing providers.json
+ # so parallel runs for different providers don't clobber each other.
+ if requested_providers:
+ new_by_id = {p["id"]: p for p in new_providers}
+ for out_dir in [SCRIPT_DIR, OUTPUT_DIR]:
+ existing_path = out_dir / "providers.json"
+ if existing_path.exists():
+ try:
+ existing = json.loads(existing_path.read_text())
+ merged = [new_by_id.pop(p["id"], p) for p in
existing["providers"]]
+ merged.extend(new_by_id.values())
+ new_providers = merged
+ print(
+ f"Merged {len(all_providers)} updated + {len(merged) -
len(all_providers)} existing providers"
+ )
+ except (json.JSONDecodeError, KeyError):
+ pass
+ break
+
+ new_providers.sort(key=lambda p: p["name"].lower())
+ providers_json = {"providers": new_providers}
# Write output files to all output directories.
# Inside breeze, registry/ is not mounted so OUTPUT_DIR writes are lost.
@@ -608,7 +591,7 @@ def main():
out_dir.mkdir(parents=True, exist_ok=True)
with open(out_dir / "providers.json", "w") as f:
json.dump(providers_json, f, indent=2)
- print(f"\nWrote {len(all_providers)} providers to {out_dir}")
+ print(f"\nWrote {len(new_providers)} providers to {out_dir}")
print("\nDone!")
diff --git a/dev/registry/extract_parameters.py
b/dev/registry/extract_parameters.py
index 8ef31f75dc7..69d7c307653 100644
--- a/dev/registry/extract_parameters.py
+++ b/dev/registry/extract_parameters.py
@@ -53,6 +53,7 @@ from pathlib import Path
import yaml
from extract_metadata import fetch_provider_inventory, read_inventory
+from registry_tools.types import BASE_CLASS_IMPORTS, CLASS_LEVEL_SECTIONS,
MODULE_LEVEL_SECTIONS
AIRFLOW_ROOT = Path(__file__).parent.parent.parent
SCRIPT_DIR = Path(__file__).parent
@@ -323,33 +324,6 @@ def find_json(candidates: list[Path], name: str) -> Path:
log = logging.getLogger(__name__)
-# Base class import paths, ordered so more-specific types are checked first
-# (sensor before operator, since BaseSensorOperator inherits BaseOperator).
-BASE_CLASS_IMPORTS: list[tuple[str, str]] = [
- ("sensor", "airflow.sdk.bases.sensor.BaseSensorOperator"),
- ("trigger", "airflow.triggers.base.BaseTrigger"),
- ("hook", "airflow.sdk.bases.hook.BaseHook"),
- ("bundle", "airflow.dag_processing.bundles.base.BaseDagBundle"),
- ("operator", "airflow.sdk.bases.operator.BaseOperator"),
-]
-
-# provider.yaml sections that list python-modules (module-level)
-MODULE_LEVEL_SECTIONS: dict[str, str] = {
- "operators": "operator",
- "hooks": "hook",
- "sensors": "sensor",
- "triggers": "trigger",
- "bundles": "bundle",
-}
-
-# provider.yaml sections that list full class paths (class-level)
-CLASS_LEVEL_SECTIONS: dict[str, str] = {
- "notifications": "notifier",
- "secrets-backends": "secret",
- "logging": "logging",
- "executors": "executor",
-}
-
def load_base_classes() -> dict[str, type]:
"""Import base classes for issubclass checks.
diff --git a/dev/registry/extract_versions.py b/dev/registry/extract_versions.py
index eb9ffc6f8c1..d52a31b5bf7 100644
--- a/dev/registry/extract_versions.py
+++ b/dev/registry/extract_versions.py
@@ -54,6 +54,7 @@ except ImportError:
print("ERROR: PyYAML required. Install with: pip install pyyaml")
sys.exit(1)
+from registry_tools.types import MODULE_LEVEL_SECTIONS, TYPE_SUFFIXES
AIRFLOW_ROOT = Path(__file__).parent.parent.parent
PROVIDERS_DIR = AIRFLOW_ROOT / "providers"
@@ -203,14 +204,7 @@ def extract_modules_from_yaml(
else:
base_source_url =
f"https://github.com/apache/airflow/blob/{tag}/providers/src"
- type_patterns = {
- "operator": ["Operator", "Command"],
- "hook": ["Hook"],
- "sensor": ["Sensor"],
- "trigger": ["Trigger"],
- "transfer": ["Operator", "Transfer"],
- "bundle": ["Bundle"],
- }
+ type_patterns = TYPE_SUFFIXES
def get_category(integration_name: str) -> str:
cat_id = integration_name.lower().replace(" ", "-").replace("(",
"").replace(")", "")
@@ -277,14 +271,7 @@ def extract_modules_from_yaml(
)
# Module-level sections (each group has integration-name + python-modules)
- MODULE_SECTIONS = {
- "operators": "operator",
- "hooks": "hook",
- "sensors": "sensor",
- "triggers": "trigger",
- "bundles": "bundle",
- }
- for yaml_key, mod_type in MODULE_SECTIONS.items():
+ for yaml_key, mod_type in MODULE_LEVEL_SECTIONS.items():
for group in provider_yaml.get(yaml_key, []):
integration = group.get("integration-name", "")
category = get_category(integration)
diff --git a/dev/registry/tests/conftest.py
b/dev/registry/generate_types_json.py
similarity index 53%
copy from dev/registry/tests/conftest.py
copy to dev/registry/generate_types_json.py
index a304d595804..8af0a77edeb 100644
--- a/dev/registry/tests/conftest.py
+++ b/dev/registry/generate_types_json.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,11 +15,30 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+"""Generate types.json for the registry frontend from registry_tools.types."""
+
from __future__ import annotations
-import sys
+import json
from pathlib import Path
-# Add dev/registry/ to sys.path so ``import extract_metadata`` works without
-# a pyproject.toml in that directory.
-sys.path.insert(0, str(Path(__file__).parent.parent))
+from registry_tools.types import MODULE_TYPES
+
+REGISTRY_DATA_DIR = Path(__file__).parent.parent.parent / "registry" / "src" /
"_data"
+
+
+def main() -> None:
+ types_list = [
+ {"id": type_id, "label": info["label"], "icon": info["icon"]}
+ for type_id, info in MODULE_TYPES.items()
+ ]
+ output_path = REGISTRY_DATA_DIR / "types.json"
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(output_path, "w") as f:
+ json.dump(types_list, f, indent=2)
+ f.write("\n")
+ print(f"Wrote {len(types_list)} types to {output_path}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dev/registry/tests/conftest.py b/dev/registry/pyproject.toml
similarity index 62%
copy from dev/registry/tests/conftest.py
copy to dev/registry/pyproject.toml
index a304d595804..8404efd5807 100644
--- a/dev/registry/tests/conftest.py
+++ b/dev/registry/pyproject.toml
@@ -14,11 +14,24 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
-import sys
-from pathlib import Path
+[build-system]
+requires = ["hatchling==1.29.0"]
+build-backend = "hatchling.build"
-# Add dev/registry/ to sys.path so ``import extract_metadata`` works without
-# a pyproject.toml in that directory.
-sys.path.insert(0, str(Path(__file__).parent.parent))
+[project]
+name = "apache-airflow-registry-tools"
+description = "Extraction and build tools for the Airflow Provider Registry"
+version = "0.0.1"
+requires-python = ">=3.10"
+classifiers = ["Private :: Do Not Upload"]
+dependencies = ["pyyaml>=6.0.3"]
+
+[dependency-groups]
+dev = ["pytest"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["registry_tools"]
+
+[tool.pytest.ini_options]
+pythonpath = ["."]
diff --git a/dev/registry/tests/conftest.py
b/dev/registry/registry_tools/__init__.py
similarity index 76%
copy from dev/registry/tests/conftest.py
copy to dev/registry/registry_tools/__init__.py
index a304d595804..13a83393a91 100644
--- a/dev/registry/tests/conftest.py
+++ b/dev/registry/registry_tools/__init__.py
@@ -14,11 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from __future__ import annotations
-
-import sys
-from pathlib import Path
-
-# Add dev/registry/ to sys.path so ``import extract_metadata`` works without
-# a pyproject.toml in that directory.
-sys.path.insert(0, str(Path(__file__).parent.parent))
diff --git a/dev/registry/registry_tools/types.py
b/dev/registry/registry_tools/types.py
new file mode 100644
index 00000000000..7b266d15da2
--- /dev/null
+++ b/dev/registry/registry_tools/types.py
@@ -0,0 +1,142 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Single source of truth for provider module types.
+
+All extraction scripts and frontend data files derive their type
+definitions from this module. To add a new module type, add an
+entry to ``MODULE_TYPES`` (and optionally ``BASE_CLASS_IMPORTS``),
+then run ``generate_types_json.py`` to propagate to the frontend.
+"""
+
+from __future__ import annotations
+
+MODULE_TYPES: dict[str, dict] = {
+ "operator": {
+ "yaml_key": "operators",
+ "level": "module", # "module" = python-modules list
+ "suffixes": ["Operator", "Command"],
+ "label": "Operators",
+ "icon": "O",
+ },
+ "hook": {
+ "yaml_key": "hooks",
+ "level": "module",
+ "suffixes": ["Hook"],
+ "label": "Hooks",
+ "icon": "H",
+ },
+ "sensor": {
+ "yaml_key": "sensors",
+ "level": "module",
+ "suffixes": ["Sensor"],
+ "label": "Sensors",
+ "icon": "S",
+ },
+ "trigger": {
+ "yaml_key": "triggers",
+ "level": "module",
+ "suffixes": ["Trigger"],
+ "label": "Triggers",
+ "icon": "T",
+ },
+ "transfer": {
+ "yaml_key": "transfers",
+ "level": "flat", # "flat" = count only (len of list)
+ "suffixes": ["Operator", "Transfer"],
+ "label": "Transfers",
+ "icon": "X",
+ },
+ "bundle": {
+ "yaml_key": "bundles",
+ "level": "module",
+ "suffixes": ["Bundle"],
+ "label": "Bundles",
+ "icon": "B",
+ },
+ "notifier": {
+ "yaml_key": "notifications",
+ "level": "flat",
+ "suffixes": [],
+ "label": "Notifiers",
+ "icon": "N",
+ },
+ "secret": {
+ "yaml_key": "secrets-backends",
+ "level": "flat",
+ "suffixes": [],
+ "label": "Secrets Backend",
+ "icon": "K",
+ },
+ "logging": {
+ "yaml_key": "logging",
+ "level": "flat",
+ "suffixes": [],
+ "label": "Log Handler",
+ "icon": "L",
+ },
+ "executor": {
+ "yaml_key": "executors",
+ "level": "flat",
+ "suffixes": [],
+ "label": "Executors",
+ "icon": "E",
+ },
+ "decorator": {
+ "yaml_key": "task-decorators",
+ "level": "flat",
+ "suffixes": [],
+ "label": "Decorators",
+ "icon": "@",
+ },
+}
+
+# Runtime base class imports for issubclass checks (extract_parameters.py).
+# Ordered so more-specific types are checked first (sensor before operator,
+# since BaseSensorOperator inherits BaseOperator).
+BASE_CLASS_IMPORTS: list[tuple[str, str]] = [
+ ("sensor", "airflow.sdk.bases.sensor.BaseSensorOperator"),
+ ("trigger", "airflow.triggers.base.BaseTrigger"),
+ ("hook", "airflow.sdk.bases.hook.BaseHook"),
+ ("bundle", "airflow.dag_processing.bundles.base.BaseDagBundle"),
+ ("operator", "airflow.sdk.bases.operator.BaseOperator"),
+]
+
+# Derived lookups used by extraction scripts.
+# Maps yaml section key -> type id for module-level sections.
+MODULE_LEVEL_SECTIONS: dict[str, str] = {
+ info["yaml_key"]: type_id for type_id, info in MODULE_TYPES.items() if
info["level"] == "module"
+}
+
+# Maps yaml section key -> type id for flat-count sections.
+FLAT_LEVEL_SECTIONS: dict[str, str] = {
+ info["yaml_key"]: type_id for type_id, info in MODULE_TYPES.items() if
info["level"] == "flat"
+}
+
+# Maps type id -> list of class name suffixes for AST matching.
+TYPE_SUFFIXES: dict[str, list[str]] = {type_id: info["suffixes"] for type_id,
info in MODULE_TYPES.items()}
+
+# Class-level sections used by extract_parameters.py (subset of flat that
+# list full class paths rather than simple entries).
+CLASS_LEVEL_SECTIONS: dict[str, str] = {
+ "notifications": "notifier",
+ "secrets-backends": "secret",
+ "logging": "logging",
+ "executors": "executor",
+}
+
+# All type ids, ordered consistently.
+ALL_TYPE_IDS: list[str] = list(MODULE_TYPES.keys())
diff --git a/dev/registry/tests/conftest.py b/dev/registry/tests/conftest.py
index a304d595804..21d298ede6e 100644
--- a/dev/registry/tests/conftest.py
+++ b/dev/registry/tests/conftest.py
@@ -15,10 +15,3 @@
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
-
-import sys
-from pathlib import Path
-
-# Add dev/registry/ to sys.path so ``import extract_metadata`` works without
-# a pyproject.toml in that directory.
-sys.path.insert(0, str(Path(__file__).parent.parent))
diff --git a/dev/registry/tests/test_extract_metadata.py
b/dev/registry/tests/test_extract_metadata.py
index ab817c3e8d1..138b41daae4 100644
--- a/dev/registry/tests/test_extract_metadata.py
+++ b/dev/registry/tests/test_extract_metadata.py
@@ -26,7 +26,6 @@ from unittest.mock import MagicMock, patch
import pytest
from extract_metadata import (
- count_modules_by_type,
determine_airflow_versions,
extract_integrations_as_categories,
fetch_provider_inventory,
@@ -98,43 +97,6 @@ class TestExtractIntegrationsAsCategories:
assert len(categories) == 1
-# ---------------------------------------------------------------------------
-# count_modules_by_type
-# ---------------------------------------------------------------------------
-class TestCountModulesByType:
- def test_empty_yaml_returns_all_zero(self):
- counts = count_modules_by_type({})
- assert len(counts) == 11
- assert all(v == 0 for v in counts.values())
-
- def test_operators_only(self):
- yaml_data = {
- "operators": [
- {"python-modules": ["mod1", "mod2"]},
- {"python-modules": ["mod3"]},
- ]
- }
- counts = count_modules_by_type(yaml_data)
- assert counts["operator"] == 3
- assert counts["hook"] == 0
-
- def test_mixed_module_types(self):
- yaml_data = {
- "operators": [{"python-modules": ["op1"]}],
- "hooks": [{"python-modules": ["h1", "h2"]}],
- "transfers": [{"source": "a", "target": "b"}],
- "notifications": ["notifier.Class"],
- "task-decorators": [{"name": "my_task", "class-name": "mod.func"}],
- }
- counts = count_modules_by_type(yaml_data)
- assert counts["operator"] == 1
- assert counts["hook"] == 2
- assert counts["transfer"] == 1
- assert counts["notifier"] == 1
- assert counts["decorator"] == 1
- assert counts["sensor"] == 0
-
-
# ---------------------------------------------------------------------------
# determine_airflow_versions
# ---------------------------------------------------------------------------
diff --git a/dev/registry/tests/test_types.py b/dev/registry/tests/test_types.py
new file mode 100644
index 00000000000..d5d2dfbf218
--- /dev/null
+++ b/dev/registry/tests/test_types.py
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Validates the registry_tools.types module — single source of truth for
module types."""
+
+from __future__ import annotations
+
+import pytest
+from registry_tools.types import (
+ ALL_TYPE_IDS,
+ BASE_CLASS_IMPORTS,
+ CLASS_LEVEL_SECTIONS,
+ FLAT_LEVEL_SECTIONS,
+ MODULE_LEVEL_SECTIONS,
+ MODULE_TYPES,
+ TYPE_SUFFIXES,
+)
+
+REQUIRED_FIELDS = {"yaml_key", "level", "label", "icon", "suffixes"}
+VALID_LEVELS = {"module", "flat"}
+
+
+class TestModuleTypes:
+ def test_all_type_ids_lowercase_alphanumeric(self):
+ for type_id in ALL_TYPE_IDS:
+ assert type_id == type_id.lower(), f"{type_id} is not lowercase"
+ assert type_id.replace("_", "").isalnum(), f"{type_id} contains
invalid chars"
+
+ def test_every_type_has_required_fields(self):
+ for type_id, info in MODULE_TYPES.items():
+ missing = REQUIRED_FIELDS - set(info.keys())
+ assert not missing, f"{type_id} missing fields: {missing}"
+
+ def test_no_duplicate_yaml_keys(self):
+ yaml_keys = [info["yaml_key"] for info in MODULE_TYPES.values()]
+ assert len(yaml_keys) == len(set(yaml_keys)), f"Duplicate yaml_keys:
{yaml_keys}"
+
+ def test_valid_level_values(self):
+ for type_id, info in MODULE_TYPES.items():
+ assert info["level"] in VALID_LEVELS, f"{type_id} has invalid
level: {info['level']}"
+
+ def test_icons_are_single_character(self):
+ for type_id, info in MODULE_TYPES.items():
+ assert len(info["icon"]) == 1, f"{type_id} icon is not single
char: {info['icon']}"
+
+ def test_labels_are_nonempty(self):
+ for type_id, info in MODULE_TYPES.items():
+ assert info["label"].strip(), f"{type_id} has empty label"
+
+ def test_suffixes_are_lists(self):
+ for type_id, info in MODULE_TYPES.items():
+ assert isinstance(info["suffixes"], list), f"{type_id} suffixes is
not a list"
+
+
+class TestDerivedLookups:
+ def test_module_level_sections_match_module_types(self):
+ for yaml_key, type_id in MODULE_LEVEL_SECTIONS.items():
+ assert type_id in MODULE_TYPES
+ assert MODULE_TYPES[type_id]["yaml_key"] == yaml_key
+ assert MODULE_TYPES[type_id]["level"] == "module"
+
+ def test_flat_level_sections_match_module_types(self):
+ for yaml_key, type_id in FLAT_LEVEL_SECTIONS.items():
+ assert type_id in MODULE_TYPES
+ assert MODULE_TYPES[type_id]["yaml_key"] == yaml_key
+ assert MODULE_TYPES[type_id]["level"] == "flat"
+
+ def test_all_types_covered_by_sections(self):
+ """Every type should appear in either MODULE_LEVEL or FLAT_LEVEL."""
+ covered = set(MODULE_LEVEL_SECTIONS.values()) |
set(FLAT_LEVEL_SECTIONS.values())
+ assert covered == set(ALL_TYPE_IDS)
+
+ def test_type_suffixes_matches_module_types(self):
+ for type_id, suffixes in TYPE_SUFFIXES.items():
+ assert type_id in MODULE_TYPES
+ assert suffixes == MODULE_TYPES[type_id]["suffixes"]
+
+ def test_class_level_sections_are_subset_of_flat(self):
+ for yaml_key, type_id in CLASS_LEVEL_SECTIONS.items():
+ assert yaml_key in FLAT_LEVEL_SECTIONS
+ assert FLAT_LEVEL_SECTIONS[yaml_key] == type_id
+
+
+class TestBaseClassImports:
+ def test_all_entries_are_tuples(self):
+ for entry in BASE_CLASS_IMPORTS:
+ assert isinstance(entry, tuple)
+ assert len(entry) == 2
+
+ @pytest.mark.parametrize(("type_name", "import_path"), BASE_CLASS_IMPORTS)
+ def test_type_names_exist_in_module_types(self, type_name, import_path):
+ assert type_name in MODULE_TYPES
+
+ @pytest.mark.parametrize(("type_name", "import_path"), BASE_CLASS_IMPORTS)
+ def test_import_paths_are_dotted(self, type_name, import_path):
+ assert "." in import_path
diff --git a/pyproject.toml b/pyproject.toml
index b83c2fcd07d..c359eda1a9e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1473,6 +1473,7 @@ members = [
"airflow-core",
"airflow-e2e-tests",
"dev/breeze",
+ "dev/registry",
"airflow-ctl",
"airflow-ctl-tests",
"dev",
diff --git a/registry/AGENTS.md b/registry/AGENTS.md
index e2a0d5ab3f4..d036969c2d8 100644
--- a/registry/AGENTS.md
+++ b/registry/AGENTS.md
@@ -430,7 +430,15 @@ These changes mirror the existing `/docs/*` rewrite
pattern.
## Data Extraction (`dev/registry/`)
-The registry's JSON data is produced by four extraction scripts in
`dev/registry/`.
+The registry's JSON data is produced by four extraction scripts in
`dev/registry/`,
+which is a Python package (workspace member) with shared code in
`registry_tools/`.
+
+**Module type definitions** live in `dev/registry/registry_tools/types.py` —
this is
+the single source of truth for all module types (operator, hook, sensor,
trigger, etc.).
+The three Python extraction scripts and the frontend data file (`types.json`)
all derive
+from this module. To add a new type, add it to `MODULE_TYPES` in `types.py`
and run
+`generate_types_json.py`.
+
When modifying these scripts, understand the design decisions below.
### Why four separate scripts?
diff --git a/registry/CLAUDE.md b/registry/CLAUDE.md
new file mode 120000
index 00000000000..47dc3e3d863
--- /dev/null
+++ b/registry/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/registry/README.md b/registry/README.md
index 4ed7e62b014..828749d2792 100644
--- a/registry/README.md
+++ b/registry/README.md
@@ -67,6 +67,10 @@ development. In production the prefix defaults to
`/registry/`.
### Data Pipeline
```
+registry_tools/types.py ← Single source of truth for module type
definitions
+ │
+ ├─── generate_types_json.py → registry/src/_data/types.json (for
frontend)
+ │
provider.yaml files (providers/*/provider.yaml)
│
▼
@@ -79,6 +83,7 @@ extract_parameters.py ← Runtime class discovery +
parameter extraction
registry/src/_data/
├── providers.json ← Provider metadata (name, versions, downloads,
lifecycle, ...)
├── modules.json ← Individual modules (operators, hooks, sensors,
...)
+ ├── types.json ← Module type definitions (generated from
types.py)
└── versions/{id}/{ver}/ ← Per-version metadata, parameters, connections
│
▼
@@ -166,6 +171,7 @@ the same Sphinx build that generates the docs.
|---|---|---|
| `providers.json` | Generated | All providers with metadata, sorted
alphabetically |
| `modules.json` | Generated | All extracted modules (operators, hooks, etc.) |
+| `types.json` | Generated | Module type definitions (from
`registry_tools/types.py`) |
| `versions/` | Generated | Per-provider, per-version
metadata/parameters/connections |
| `exploreCategories.js` | Checked-in | Category definitions with keyword
lists for the Explore page |
| `statsData.js` | Checked-in | Computed statistics (lifecycle counts, top
providers, etc.) |
@@ -461,6 +467,19 @@ provider appears well in the registry:
class-level docstrings for module descriptions
4. **Publish to PyPI** — download stats are fetched automatically
+## Adding a New Module Type
+
+Module types (operator, hook, sensor, etc.) are defined in a single place:
+`dev/registry/registry_tools/types.py`. To add a new type (e.g.,
`auth_manager`):
+
+1. Add an entry to `MODULE_TYPES` in `dev/registry/registry_tools/types.py`
+2. Run `uv run python dev/registry/generate_types_json.py` to update
+ `registry/src/_data/types.json` (auto-propagates to frontend templates and
JS)
+3. Add CSS variable `--color-auth-manager` and class `.auth-manager` in
+ `src/css/tokens.css` and `src/css/main.css`
+4. If runtime discovery is needed, add a base class entry to
`BASE_CLASS_IMPORTS`
+ in `types.py`
+
## Development Tips
- Run `uv run python dev/registry/extract_metadata.py` whenever provider
metadata changes
diff --git a/registry/src/_data/moduleCountsByProvider.js
b/registry/src/_data/moduleCountsByProvider.js
new file mode 100644
index 00000000000..c682a1e72f3
--- /dev/null
+++ b/registry/src/_data/moduleCountsByProvider.js
@@ -0,0 +1,31 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// Builds { provider_id: { type: count } } from modules.json.
+// Used by provider-card.njk for module count badges on the listing page.
+const modulesData = require("./modules.json");
+
+module.exports = function () {
+ const counts = {};
+ for (const m of modulesData.modules) {
+ if (!counts[m.provider_id]) counts[m.provider_id] = {};
+ counts[m.provider_id][m.type] = (counts[m.provider_id][m.type] || 0) + 1;
+ }
+ return counts;
+};
diff --git a/registry/src/_data/providerVersions.js
b/registry/src/_data/providerVersions.js
index 774a0342e5f..8b9a721ba5e 100644
--- a/registry/src/_data/providerVersions.js
+++ b/registry/src/_data/providerVersions.js
@@ -73,16 +73,6 @@ module.exports = function () {
? provider.airflow_versions[provider.airflow_versions.length - 1]
: null;
- // Compute module_counts from modules.json (runtime discovery) when
available,
- // since providers.json may only have AST-based counts which undercount.
- if (latestModules.length > 0) {
- const counts = {};
- for (const m of latestModules) {
- counts[m.type] = (counts[m.type] || 0) + 1;
- }
- provider.module_counts = counts;
- }
-
result.push({
provider,
version: provider.version,
diff --git a/registry/src/_data/statsData.js b/registry/src/_data/statsData.js
index 58fd29f2984..02d40eeeb28 100644
--- a/registry/src/_data/statsData.js
+++ b/registry/src/_data/statsData.js
@@ -18,10 +18,19 @@
*/
const providersData = require('./providers.json');
+const modulesData = require('./modules.json');
+const typesData = require('./types.json');
module.exports = function() {
const providers = providersData.providers;
+ // Build module counts from modules.json (single source of truth)
+ const countsByProvider = {};
+ for (const m of modulesData.modules) {
+ if (!countsByProvider[m.provider_id]) countsByProvider[m.provider_id] = {};
+ countsByProvider[m.provider_id][m.type] =
(countsByProvider[m.provider_id][m.type] || 0) + 1;
+ }
+
// Total providers count
const totalProviders = providers.length;
@@ -41,8 +50,9 @@ module.exports = function() {
// Aggregate module counts across all providers
const aggregateModuleCounts = providers.reduce((acc, p) => {
- if (p.module_counts) {
- Object.entries(p.module_counts).forEach(([type, count]) => {
+ const mc = countsByProvider[p.id];
+ if (mc) {
+ Object.entries(mc).forEach(([type, count]) => {
acc[type] = (acc[type] || 0) + count;
});
}
@@ -51,64 +61,11 @@ module.exports = function() {
const totalModules = Object.values(aggregateModuleCounts).reduce((a, b) => a
+ b, 0);
- // Module type metadata
- const moduleTypeInfo = {
- operator: {
- label: 'Operators',
- icon: 'O',
- colorClass: 'operator'
- },
- hook: {
- label: 'Hooks',
- icon: 'H',
- colorClass: 'hook'
- },
- sensor: {
- label: 'Sensors',
- icon: 'S',
- colorClass: 'sensor'
- },
- trigger: {
- label: 'Triggers',
- icon: 'T',
- colorClass: 'trigger'
- },
- transfer: {
- label: 'Transfers',
- icon: 'X',
- colorClass: 'transfer'
- },
- bundle: {
- label: 'Bundles',
- icon: 'B',
- colorClass: 'bundle'
- },
- notifier: {
- label: 'Notifiers',
- icon: 'N',
- colorClass: 'notifier'
- },
- secret: {
- label: 'Secrets Backend',
- icon: 'K',
- colorClass: 'secret'
- },
- logging: {
- label: 'Log Handler',
- icon: 'L',
- colorClass: 'logging'
- },
- executor: {
- label: 'Executors',
- icon: 'E',
- colorClass: 'executor'
- },
- decorator: {
- label: 'Decorators',
- icon: '@',
- colorClass: 'decorator'
- }
- };
+ // Module type metadata (from types.json)
+ const moduleTypeInfo = {};
+ for (const t of typesData) {
+ moduleTypeInfo[t.id] = { label: t.label, icon: t.icon, colorClass: t.id };
+ }
// Module type display data with counts and percentages
const moduleTypeStats = Object.entries(moduleTypeInfo).map(([type, info]) =>
{
@@ -141,8 +98,8 @@ module.exports = function() {
// Enriched provider list with totals
const enrichedProviders = [...providers].map(p => ({
...p,
- totalModules: p.module_counts
- ? Object.values(p.module_counts).reduce((a, b) => a + b, 0)
+ totalModules: countsByProvider[p.id]
+ ? Object.values(countsByProvider[p.id]).reduce((a, b) => a + b, 0)
: 0,
monthlyDownloads: (p.pypi_downloads && p.pypi_downloads.monthly) || 0,
weeklyDownloads: (p.pypi_downloads && p.pypi_downloads.weekly) || 0
diff --git a/registry/src/_data/types.json b/registry/src/_data/types.json
new file mode 100644
index 00000000000..000e26a172f
--- /dev/null
+++ b/registry/src/_data/types.json
@@ -0,0 +1,57 @@
+[
+ {
+ "id": "operator",
+ "label": "Operators",
+ "icon": "O"
+ },
+ {
+ "id": "hook",
+ "label": "Hooks",
+ "icon": "H"
+ },
+ {
+ "id": "sensor",
+ "label": "Sensors",
+ "icon": "S"
+ },
+ {
+ "id": "trigger",
+ "label": "Triggers",
+ "icon": "T"
+ },
+ {
+ "id": "transfer",
+ "label": "Transfers",
+ "icon": "X"
+ },
+ {
+ "id": "bundle",
+ "label": "Bundles",
+ "icon": "B"
+ },
+ {
+ "id": "notifier",
+ "label": "Notifiers",
+ "icon": "N"
+ },
+ {
+ "id": "secret",
+ "label": "Secrets Backend",
+ "icon": "K"
+ },
+ {
+ "id": "logging",
+ "label": "Log Handler",
+ "icon": "L"
+ },
+ {
+ "id": "executor",
+ "label": "Executors",
+ "icon": "E"
+ },
+ {
+ "id": "decorator",
+ "label": "Decorators",
+ "icon": "@"
+ }
+]
diff --git a/registry/src/_includes/base.njk b/registry/src/_includes/base.njk
index 02906819bb4..86e12b41891 100644
--- a/registry/src/_includes/base.njk
+++ b/registry/src/_includes/base.njk
@@ -181,6 +181,7 @@
</div>
</div>
+ <script id="types-data" type="application/json">{{ types | dump | safe
}}</script>
<script src="{{ '/js/theme.js' | url }}" defer></script>
<script src="{{ '/js/mobile-menu.js' | url }}" defer></script>
<script src="{{ '/js/copy-button.js' | url }}" defer></script>
diff --git a/registry/src/_includes/provider-card.njk
b/registry/src/_includes/provider-card.njk
index 029a0319f2d..37198e55b74 100644
--- a/registry/src/_includes/provider-card.njk
+++ b/registry/src/_includes/provider-card.njk
@@ -21,14 +21,15 @@
<p class="description">{{ provider.description }}</p>
<div class="modules">
- {% set counts = provider.module_counts or {} %}
- {% set totalMods = (counts.operator or 0) + (counts.hook or 0) +
(counts.sensor or 0) + (counts.trigger or 0) + (counts.transfer or 0) +
(counts.notifier or 0) + (counts.secret or 0) + (counts.logging or 0) +
(counts.executor or 0) + (counts.bundle or 0) + (counts.decorator or 0) %}
+ {% set counts = moduleCountsByProvider[provider.id] or {} %}
+ {% set totalMods = 0 %}
+ {% for t in types %}{% set totalMods = totalMods + (counts[t.id] or 0)
%}{% endfor %}
<span class="count">{{ totalMods }} module{{ "s" if totalMods != 1
}}</span>
{% if totalMods > 0 %}
<div class="breakdown">
- {% for mtype in ["operator", "hook", "sensor", "trigger", "transfer",
"executor", "notifier", "secret", "logging", "bundle", "decorator"] %}
- {% if counts[mtype] %}
- <div class="{{ mtype }}" style="width:{{ (counts[mtype] / totalMods *
100) }}%" data-tooltip="{{ counts[mtype] }} {{ mtype | capitalize }}{{ 's' if
counts[mtype] != 1 }}"></div>
+ {% for t in types %}
+ {% if counts[t.id] %}
+ <div class="{{ t.id }}" style="width:{{ (counts[t.id] / totalMods *
100) }}%" data-tooltip="{{ counts[t.id] }} {{ t.label }}"></div>
{% endif %}
{% endfor %}
</div>
diff --git a/registry/src/css/main.css b/registry/src/css/main.css
index 11dfe1f06a3..e602fd7f1b0 100644
--- a/registry/src/css/main.css
+++ b/registry/src/css/main.css
@@ -2723,150 +2723,6 @@ main {
}
/* Stats */
-.provider-detail-page header .stats {
- display: grid;
- grid-template-columns: repeat(2, 1fr);
- gap: var(--space-3);
- margin-bottom: var(--space-6);
-}
-
-@media (min-width: 640px) {
- .provider-detail-page header .stats {
- grid-template-columns: repeat(3, 1fr);
- }
-}
-
-@media (min-width: 1024px) {
- .provider-detail-page header .stats {
- grid-template-columns: repeat(6, 1fr);
- }
-}
-
-.provider-detail-page header .stat {
- background: rgb(from var(--bg-secondary) r g b / 0.5);
- border-radius: var(--radius-lg);
- padding: var(--space-3);
-}
-
-.provider-detail-page header .stat-row {
- display: flex;
- align-items: center;
- gap: var(--space-2);
- margin-bottom: var(--space-1);
-}
-
-.provider-detail-page header .stat.operator {
- border: 1px solid rgb(from var(--color-operator) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.hook {
- border: 1px solid rgb(from var(--color-hook) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.sensor {
- border: 1px solid rgb(from var(--color-sensor) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.trigger {
- border: 1px solid rgb(from var(--color-trigger) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.transfer {
- border: 1px solid rgb(from var(--color-transfer) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.bundle {
- border: 1px solid rgb(from var(--color-bundle) r g b / 0.2);
-}
-
-.provider-detail-page header .stat.total {
- border: 1px solid rgb(from var(--accent-primary) r g b / 0.2);
-}
-
-.provider-detail-page header .stat .icon {
- width: 1.5rem;
- height: 1.5rem;
- border-radius: var(--radius-sm);
- display: flex;
- align-items: center;
- justify-content: center;
- font-size: var(--text-xs);
- font-weight: var(--font-bold);
-}
-
-.provider-detail-page header .stat.operator .icon {
- background: rgb(from var(--color-operator) r g b / 0.2);
- color: var(--color-operator);
-}
-
-.provider-detail-page header .stat.hook .icon {
- background: rgb(from var(--color-hook) r g b / 0.2);
- color: var(--color-hook);
-}
-
-.provider-detail-page header .stat.sensor .icon {
- background: rgb(from var(--color-sensor) r g b / 0.2);
- color: var(--color-sensor);
-}
-
-.provider-detail-page header .stat.trigger .icon {
- background: rgb(from var(--color-trigger) r g b / 0.2);
- color: var(--color-trigger);
-}
-
-.provider-detail-page header .stat.transfer .icon {
- background: rgb(from var(--color-transfer) r g b / 0.2);
- color: var(--color-transfer);
-}
-
-.provider-detail-page header .stat.bundle .icon {
- background: rgb(from var(--color-bundle) r g b / 0.2);
- color: var(--color-bundle);
-}
-
-.provider-detail-page header .stat.total .icon {
- background: rgb(from var(--accent-primary) r g b / 0.2);
- color: var(--accent-primary);
-}
-
-.provider-detail-page header .stat .count {
- font-size: var(--text-xl);
- font-weight: var(--font-bold);
-}
-
-.provider-detail-page header .stat.operator .count {
- color: var(--color-operator);
-}
-
-.provider-detail-page header .stat.hook .count {
- color: var(--color-hook);
-}
-
-.provider-detail-page header .stat.sensor .count {
- color: var(--color-sensor);
-}
-
-.provider-detail-page header .stat.trigger .count {
- color: var(--color-trigger);
-}
-
-.provider-detail-page header .stat.transfer .count {
- color: var(--color-transfer);
-}
-
-.provider-detail-page header .stat.bundle .count {
- color: var(--color-bundle);
-}
-
-.provider-detail-page header .stat.total .count {
- color: var(--accent-primary);
-}
-
-.provider-detail-page header .stat .label {
- font-size: var(--text-xs);
- color: var(--text-secondary);
-}
-
/* Header Footer */
.provider-detail-page header .footer {
display: flex;
diff --git a/registry/src/js/search.js b/registry/src/js/search.js
index 76416338d14..d7b13d523d6 100644
--- a/registry/src/js/search.js
+++ b/registry/src/js/search.js
@@ -24,19 +24,18 @@
let currentResults = [];
let searchId = 0;
- const typeLabels = {
- operator: 'Operator',
- hook: 'Hook',
- sensor: 'Sensor',
- trigger: 'Trigger',
- transfer: 'Transfer',
- bundle: 'Bundle',
- notifier: 'Notifier',
- secret: 'Secrets Backend',
- logging: 'Log Handler',
- executor: 'Executor',
- decorator: 'Decorator',
- };
+ // Type labels loaded from types.json (injected via base.njk)
+ const typeLabels = {};
+ try {
+ const typesEl = document.getElementById('types-data');
+ if (typesEl) {
+ for (const t of JSON.parse(typesEl.textContent)) {
+ typeLabels[t.id] = t.label;
+ }
+ }
+ } catch (_) {
+ // Fallback: empty object — badges will show raw type name
+ }
function escapeHtml(str) {
const div = document.createElement('div');
diff --git a/registry/src/provider-version.njk
b/registry/src/provider-version.njk
index 790b19172ba..51179078bab 100644
--- a/registry/src/provider-version.njk
+++ b/registry/src/provider-version.njk
@@ -14,7 +14,7 @@ eleventyComputed:
{# Choose data source: latest uses providers.json + modules.json, older uses
versionData #}
{% if pv.isLatest %}
- {% set moduleCounts = pv.provider.module_counts or {} %}
+ {% set moduleCounts = moduleCountsByProvider[pv.provider.id] or {} %}
{% set deps = pv.provider.dependencies or [] %}
{% set extras = pv.provider.optional_extras or {} %}
{% set conns = pv.provider.connection_types or [] %}
@@ -35,7 +35,8 @@ eleventyComputed:
{% set sourceUrl = pv.provider.source_url %}
{% endif %}
-{% set totalModules = (moduleCounts.operator or 0) + (moduleCounts.hook or 0)
+ (moduleCounts.sensor or 0) + (moduleCounts.trigger or 0) +
(moduleCounts.transfer or 0) + (moduleCounts.bundle or 0) +
(moduleCounts.notifier or 0) + (moduleCounts.secret or 0) +
(moduleCounts.logging or 0) + (moduleCounts.executor or 0) +
(moduleCounts.decorator or 0) %}
+{% set totalModules = 0 %}
+{% for t in types %}{% set totalModules = totalModules + (moduleCounts[t.id]
or 0) %}{% endfor %}
<div class="container">
{# Breadcrumb #}
@@ -94,77 +95,16 @@ eleventyComputed:
</div>
</div>
- {# Stats Grid #}
- <div class="stats">
- {% if moduleCounts.operator > 0 %}
- <div class="stat operator">
- <div class="stat-row">
- <span class="icon">O</span>
- <span class="count">{{ moduleCounts.operator }}</span>
- </div>
- <span class="label">Operators</span>
- </div>
- {% endif %}
- {% if moduleCounts.hook > 0 %}
- <div class="stat hook">
- <div class="stat-row">
- <span class="icon">H</span>
- <span class="count">{{ moduleCounts.hook }}</span>
- </div>
- <span class="label">Hooks</span>
- </div>
- {% endif %}
- {% if moduleCounts.sensor > 0 %}
- <div class="stat sensor">
- <div class="stat-row">
- <span class="icon">S</span>
- <span class="count">{{ moduleCounts.sensor }}</span>
- </div>
- <span class="label">Sensors</span>
- </div>
- {% endif %}
- {% if moduleCounts.trigger > 0 %}
- <div class="stat trigger">
- <div class="stat-row">
- <span class="icon">T</span>
- <span class="count">{{ moduleCounts.trigger }}</span>
- </div>
- <span class="label">Triggers</span>
- </div>
- {% endif %}
- {% if moduleCounts.transfer > 0 %}
- <div class="stat transfer">
- <div class="stat-row">
- <span class="icon">X</span>
- <span class="count">{{ moduleCounts.transfer }}</span>
- </div>
- <span class="label">Transfers</span>
- </div>
- {% endif %}
- {% if moduleCounts.bundle > 0 %}
- <div class="stat bundle">
- <div class="stat-row">
- <span class="icon">B</span>
- <span class="count">{{ moduleCounts.bundle }}</span>
- </div>
- <span class="label">Bundles</span>
- </div>
- {% endif %}
- <div class="stat total">
- <div class="stat-row">
- <span class="icon">Σ</span>
- <span class="count">{{ totalModules }}</span>
- </div>
- <span class="label">Total Modules</span>
- </div>
- </div>
-
- {# Bottom row: Downloads and actions #}
+ {# Bottom row: Downloads, module count, and actions #}
<div class="footer">
<div class="downloads">
<div class="count">{{ pv.provider.pypi_downloads.monthly |
formatDownloads }}</div>
<div class="label">Monthly Downloads</div>
</div>
+ <div class="downloads">
+ <div class="count">{{ totalModules }}</div>
+ <div class="label">Total Modules</div>
+ </div>
<div class="actions">
<a href="{{ docsUrl }}" target="_blank" rel="noopener"
class="btn-primary">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24"
aria-hidden="true">
@@ -339,61 +279,13 @@ eleventyComputed:
{# Module Type Tabs #}
<nav class="module-tabs">
<button class="module-tab active" data-type="all">All ({{ totalModules
}})</button>
- {% if moduleCounts.operator > 0 %}
- <button class="module-tab" data-type="operator">
- <span class="tab-icon operator">O</span> Operators ({{
moduleCounts.operator }})
- </button>
- {% endif %}
- {% if moduleCounts.hook > 0 %}
- <button class="module-tab" data-type="hook">
- <span class="tab-icon hook">H</span> Hooks ({{ moduleCounts.hook }})
- </button>
- {% endif %}
- {% if moduleCounts.sensor > 0 %}
- <button class="module-tab" data-type="sensor">
- <span class="tab-icon sensor">S</span> Sensors ({{ moduleCounts.sensor
}})
- </button>
- {% endif %}
- {% if moduleCounts.trigger > 0 %}
- <button class="module-tab" data-type="trigger">
- <span class="tab-icon trigger">T</span> Triggers ({{
moduleCounts.trigger }})
- </button>
- {% endif %}
- {% if moduleCounts.transfer > 0 %}
- <button class="module-tab" data-type="transfer">
- <span class="tab-icon transfer">X</span> Transfers ({{
moduleCounts.transfer }})
- </button>
- {% endif %}
- {% if moduleCounts.bundle > 0 %}
- <button class="module-tab" data-type="bundle">
- <span class="tab-icon bundle">B</span> Bundles ({{ moduleCounts.bundle
}})
- </button>
- {% endif %}
- {% if moduleCounts.notifier > 0 %}
- <button class="module-tab" data-type="notifier">
- <span class="tab-icon notifier">N</span> Notifiers ({{
moduleCounts.notifier }})
- </button>
- {% endif %}
- {% if moduleCounts.secret > 0 %}
- <button class="module-tab" data-type="secret">
- <span class="tab-icon secret">K</span> Secrets Backend ({{
moduleCounts.secret }})
- </button>
- {% endif %}
- {% if moduleCounts.logging > 0 %}
- <button class="module-tab" data-type="logging">
- <span class="tab-icon logging">L</span> Log Handler ({{
moduleCounts.logging }})
- </button>
- {% endif %}
- {% if moduleCounts.executor > 0 %}
- <button class="module-tab" data-type="executor">
- <span class="tab-icon executor">E</span> Executors ({{
moduleCounts.executor }})
- </button>
- {% endif %}
- {% if moduleCounts.decorator > 0 %}
- <button class="module-tab" data-type="decorator">
- <span class="tab-icon decorator">@</span> Decorators ({{
moduleCounts.decorator }})
+ {% for t in types %}
+ {% if moduleCounts[t.id] > 0 %}
+ <button class="module-tab" data-type="{{ t.id }}">
+ <span class="tab-icon {{ t.id }}">{{ t.icon }}</span> {{ t.label }} ({{
moduleCounts[t.id] }})
</button>
{% endif %}
+ {% endfor %}
</nav>
{# Main content with sidebar #}
@@ -419,36 +311,11 @@ eleventyComputed:
<div class="sidebar-section card">
<h3>Module Types</h3>
<nav class="category-nav">
- {% if moduleCounts.operator > 0 %}
- <button class="category-btn" data-type="operator"><span
class="tab-icon operator">O</span> <span>Operators</span> <span
class="count">{{ moduleCounts.operator }}</span></button>
- {% endif %}
- {% if moduleCounts.hook > 0 %}
- <button class="category-btn" data-type="hook"><span class="tab-icon
hook">H</span> <span>Hooks</span> <span class="count">{{ moduleCounts.hook
}}</span></button>
- {% endif %}
- {% if moduleCounts.sensor > 0 %}
- <button class="category-btn" data-type="sensor"><span
class="tab-icon sensor">S</span> <span>Sensors</span> <span class="count">{{
moduleCounts.sensor }}</span></button>
- {% endif %}
- {% if moduleCounts.trigger > 0 %}
- <button class="category-btn" data-type="trigger"><span
class="tab-icon trigger">T</span> <span>Triggers</span> <span class="count">{{
moduleCounts.trigger }}</span></button>
- {% endif %}
- {% if moduleCounts.transfer > 0 %}
- <button class="category-btn" data-type="transfer"><span
class="tab-icon transfer">X</span> <span>Transfers</span> <span
class="count">{{ moduleCounts.transfer }}</span></button>
- {% endif %}
- {% if moduleCounts.executor > 0 %}
- <button class="category-btn" data-type="executor"><span
class="tab-icon executor">E</span> <span>Executors</span> <span
class="count">{{ moduleCounts.executor }}</span></button>
- {% endif %}
- {% if moduleCounts.bundle > 0 %}
- <button class="category-btn" data-type="bundle"><span
class="tab-icon bundle">B</span> <span>Bundles</span> <span class="count">{{
moduleCounts.bundle }}</span></button>
- {% endif %}
- {% if moduleCounts.notifier > 0 %}
- <button class="category-btn" data-type="notifier"><span
class="tab-icon notifier">N</span> <span>Notifiers</span> <span
class="count">{{ moduleCounts.notifier }}</span></button>
- {% endif %}
- {% if moduleCounts.secret > 0 %}
- <button class="category-btn" data-type="secret"><span
class="tab-icon secret">K</span> <span>Secrets</span> <span class="count">{{
moduleCounts.secret }}</span></button>
- {% endif %}
- {% if moduleCounts.logging > 0 %}
- <button class="category-btn" data-type="logging"><span
class="tab-icon logging">L</span> <span>Log Handlers</span> <span
class="count">{{ moduleCounts.logging }}</span></button>
+ {% for t in types %}
+ {% if moduleCounts[t.id] > 0 %}
+ <button class="category-btn" data-type="{{ t.id }}"><span
class="tab-icon {{ t.id }}">{{ t.icon }}</span> <span>{{ t.label }}</span>
<span class="count">{{ moduleCounts[t.id] }}</span></button>
{% endif %}
+ {% endfor %}
</nav>
</div>
{% endif %}
diff --git a/scripts/ci/prek/check_registry_types_json_sync.py
b/scripts/ci/prek/check_registry_types_json_sync.py
new file mode 100644
index 00000000000..c726c4d01aa
--- /dev/null
+++ b/scripts/ci/prek/check_registry_types_json_sync.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Check that registry/src/_data/types.json is in sync with
registry_tools/types.py."""
+
+from __future__ import annotations
+
+import ast
+import json
+import sys
+from pathlib import Path
+
+AIRFLOW_ROOT = Path(__file__).parents[3].resolve()
+TYPES_PY = AIRFLOW_ROOT / "dev" / "registry" / "registry_tools" / "types.py"
+TYPES_JSON = AIRFLOW_ROOT / "registry" / "src" / "_data" / "types.json"
+
+
+def _extract_string(node: ast.expr) -> str | None:
+ """Extract a string value from an AST constant node."""
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
+ return node.value
+ return None
+
+
+def load_types_from_py() -> list[dict]:
+ """Parse MODULE_TYPES from types.py using AST and build the expected JSON
list."""
+ tree = ast.parse(TYPES_PY.read_text(), filename=str(TYPES_PY))
+
+ for node in ast.walk(tree):
+ target: ast.expr
+ # MODULE_TYPES uses annotated assignment: MODULE_TYPES: dict[str,
dict] = {...}
+ if isinstance(node, ast.AnnAssign):
+ target = node.target
+ value = node.value
+ elif isinstance(node, ast.Assign) and len(node.targets) == 1:
+ target = node.targets[0]
+ value = node.value
+ else:
+ continue
+
+ if isinstance(target, ast.Name) and target.id == "MODULE_TYPES":
+ if not isinstance(value, ast.Dict):
+ continue
+ result = []
+ for key_node, value_node in zip(value.keys, value.values):
+ if key_node is None:
+ continue
+ type_id = _extract_string(key_node)
+ if type_id is None or not isinstance(value_node, ast.Dict):
+ continue
+ info = {}
+ for k, v in zip(value_node.keys, value_node.values):
+ if k is None:
+ continue
+ field_name = _extract_string(k)
+ if field_name in ("label", "icon"):
+ info[field_name] = _extract_string(v)
+ if "label" in info and "icon" in info:
+ result.append({"id": type_id, "label": info["label"],
"icon": info["icon"]})
+ return result
+
+ print(f"ERROR: Could not find MODULE_TYPES in {TYPES_PY}", file=sys.stderr)
+ sys.exit(1)
+
+
+def main() -> None:
+ if not TYPES_JSON.exists():
+ print(f"ERROR: {TYPES_JSON} does not exist.", file=sys.stderr)
+ print("Run: uv run python dev/registry/generate_types_json.py",
file=sys.stderr)
+ sys.exit(1)
+
+ expected = load_types_from_py()
+ actual = json.loads(TYPES_JSON.read_text())
+
+ if expected == actual:
+ sys.exit(0)
+
+ print("ERROR: registry/src/_data/types.json is out of sync with",
file=sys.stderr)
+ print(" dev/registry/registry_tools/types.py", file=sys.stderr)
+ print("", file=sys.stderr)
+ print("Run: uv run python dev/registry/generate_types_json.py",
file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()