jscheffl commented on code in PR #62261:
URL: https://github.com/apache/airflow/pull/62261#discussion_r2837387846


##########
dev/registry/extract_connections.py:
##########
@@ -0,0 +1,413 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Airflow Registry Connection Metadata Extractor
+
+Extracts connection form metadata (custom fields, UI behaviour, standard field
+configuration) for all providers.  Reads from provider.yaml first (conn-fields
+and ui-field-behaviour), falling back to runtime inspection of hook classes for
+providers that have not yet migrated to YAML.
+
+Must be run inside breeze where all providers are installed.
+
+Usage:
+    breeze run python dev/registry/extract_connections.py
+
+Output:
+    - registry/src/_data/versions/{provider_id}/{version}/connections.json
+    - dev/registry/output/versions/{provider_id}/{version}/connections.json
+"""
+
+from __future__ import annotations
+
+import importlib
+import json
+import sys
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+import yaml
+
+AIRFLOW_ROOT = Path(__file__).parent.parent.parent
+SCRIPT_DIR = Path(__file__).parent
+PROVIDERS_DIR = AIRFLOW_ROOT / "providers"
+
+PROVIDERS_JSON_CANDIDATES = [
+    SCRIPT_DIR / "providers.json",
+    AIRFLOW_ROOT / "registry" / "src" / "_data" / "providers.json",
+]
+
+OUTPUT_DIRS = [
+    SCRIPT_DIR / "output",
+    AIRFLOW_ROOT / "registry" / "src" / "_data",
+]
+
+STANDARD_FIELDS = ["host", "port", "login", "password", "schema", "extra", 
"description"]
+
+DEFAULT_LABELS = {
+    "host": "Host",
+    "port": "Port",
+    "login": "Login",
+    "password": "Password",
+    "schema": "Schema",
+    "extra": "Extra",
+    "description": "Description",
+}
+
+
+def discover_provider_yamls() -> dict[str, tuple[str, dict]]:
+    """
+    Scan providers/ for all provider.yaml files.
+
+    Returns {provider_id: (yaml_path, parsed_yaml)}.
+    """
+    results: dict[str, tuple[str, dict]] = {}
+
+    for yaml_path in sorted(PROVIDERS_DIR.rglob("provider.yaml")):
+        relative = yaml_path.relative_to(PROVIDERS_DIR)
+        parts = relative.parts[:-1]
+        provider_id = "-".join(parts)
+
+        try:
+            with open(yaml_path) as f:
+                data = yaml.safe_load(f)
+            results[provider_id] = (str(yaml_path), data)
+        except Exception as e:
+            print(f"  WARN: Failed to parse {yaml_path}: {e}")
+
+    return results
+
+
+def import_hook_class(hook_class_name: str) -> type | None:
+    """Import a hook class by its fully-qualified name."""
+    parts = hook_class_name.rsplit(".", 1)
+    if len(parts) != 2:
+        return None
+    module_path, class_name = parts
+    try:
+        module = importlib.import_module(module_path)
+        return getattr(module, class_name, None)
+    except Exception as e:
+        print(f"  WARN: Failed to import {hook_class_name}: {e}")
+        return None
+
+
+def extract_conn_fields_from_widgets(widgets: dict, connection_type: str) -> 
dict:
+    """
+    Convert WTForms widgets from get_connection_form_widgets() to the
+    same schema as provider.yaml conn-fields.
+
+    Mirrors logic from scripts/tools/generate_yaml_format_for_hooks.py.
+    """
+    conn_fields: dict[str, dict] = {}
+    prefix = f"extra__{connection_type}__"
+
+    for field_key, field_widget in widgets.items():
+        field_name = field_key[len(prefix) :] if field_key.startswith(prefix) 
else field_key
+
+        if hasattr(field_widget, "param"):
+            field_data = field_widget.param.dump()
+            schema = field_data.get("schema", {}).copy()
+            label = schema.pop("title", field_name.replace("_", " ").title())
+
+            field_class = getattr(field_widget, "field_class", None)
+            if field_class and "Password" in field_class.__name__:
+                if schema.get("format") != "password":
+                    schema["format"] = "password"
+
+            if field_data.get("value") is not None:
+                schema["default"] = field_data["value"]
+
+            entry: dict = {"label": label, "schema": schema}
+            if field_data.get("description"):
+                entry["description"] = field_data["description"]
+        else:
+            field_class = getattr(field_widget, "field_class", None)
+            label_obj = (
+                getattr(field_widget, "args", [None])[0] if 
getattr(field_widget, "args", None) else None
+            )
+            label = str(label_obj) if label_obj else field_name.replace("_", " 
").title()
+            description = getattr(field_widget, "description", None)
+
+            schema = {"type": "string"}
+            if field_class:
+                cls_name = field_class.__name__
+                if "Integer" in cls_name:
+                    schema["type"] = "integer"
+                elif "Boolean" in cls_name:
+                    schema["type"] = "boolean"
+                elif "Password" in cls_name:
+                    schema["format"] = "password"
+
+            default = getattr(field_widget, "default", None)
+            if default is not None:
+                schema["default"] = default
+
+            entry = {"label": label, "schema": schema}
+            if description:
+                entry["description"] = str(description)
+
+        conn_fields[field_name] = entry
+
+    return conn_fields
+
+
+def extract_ui_behaviour_from_hook(hook_class: type) -> dict | None:
+    """
+    Call get_ui_field_behaviour() on a hook class and convert to YAML-style 
keys.
+    Only calls if the method is defined directly on this class (not just 
inherited).
+    """
+    if not hasattr(hook_class, "get_ui_field_behaviour"):
+        return None
+    if "get_ui_field_behaviour" not in hook_class.__dict__:
+        return None
+
+    try:
+        behaviour = hook_class.get_ui_field_behaviour()
+    except Exception as e:
+        print(f"  WARN: get_ui_field_behaviour() failed for 
{hook_class.__name__}: {e}")
+        return None
+
+    if not behaviour:
+        return None
+
+    result: dict = {}
+    if behaviour.get("hidden_fields"):
+        result["hidden-fields"] = behaviour["hidden_fields"]
+    if behaviour.get("relabeling"):
+        result["relabeling"] = behaviour["relabeling"]
+    if behaviour.get("placeholders"):
+        result["placeholders"] = behaviour["placeholders"]
+
+    return result or None
+
+
+def build_standard_fields(ui_behaviour: dict | None) -> dict:

Review Comment:
   I'd assume switching the rest of providers to have fields in provider.yaml 
is just a matter of a few days. @amoghrajesh le me know if I under-estimate 
complexity and I could help.
   
   Would rather push migration than adding complexity here for a potential 
transitional problem.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to