jason810496 commented on code in PR #64127:
URL: https://github.com/apache/airflow/pull/64127#discussion_r3004824799


##########
scripts/in_container/run_provider_yaml_files_check.py:
##########
@@ -473,6 +473,233 @@ def 
check_hook_class_name_entries_in_connection_types(yaml_files: dict[str, dict
     return num_connection_types, num_errors
 
 
+@run_check("Checking that hook classes defining conn_type are registered in 
connection-types")
+def check_hook_classes_with_conn_type_are_registered(yaml_files: dict[str, 
dict]) -> tuple[int, int]:
+    """Find Hook subclasses that define conn_type but are not listed in 
connection-types."""
+    from airflow.sdk.bases.hook import BaseHook
+
+    num_checks = 0
+    num_errors = 0
+    for yaml_file_path, provider_data in yaml_files.items():
+        connection_types = provider_data.get("connection-types", [])
+        registered_hook_classes = {ct["hook-class-name"] for ct in 
connection_types}
+        # Collect conn_type values that are already covered by a registered 
hook class
+        registered_conn_types = {ct["connection-type"] for ct in 
connection_types}
+        hook_modules = [
+            mod for entry in provider_data.get("hooks", []) for mod in 
entry.get("python-modules", [])
+        ]
+        for module_name in hook_modules:
+            try:
+                with warnings.catch_warnings(record=True):
+                    module = importlib.import_module(module_name)
+            except (ImportError, AirflowOptionalProviderFeatureException):
+                continue  # Import errors are caught by other checks
+
+            for attr_name in dir(module):
+                if attr_name.startswith("_"):
+                    continue
+                obj = getattr(module, attr_name, None)
+                if not (inspect.isclass(obj) and issubclass(obj, BaseHook) and 
obj is not BaseHook):
+                    continue
+                # Only check classes defined in this module, not re-exported 
ones
+                if obj.__module__ != module_name:
+                    continue
+                # Skip abstract classes — they are base classes, not concrete 
hooks
+                if inspect.isabstract(obj):
+                    continue
+                num_checks += 1
+                # Check conn_type defined directly on the class, not inherited
+                if "conn_type" not in obj.__dict__:
+                    continue
+                conn_type = obj.__dict__["conn_type"]
+                if not conn_type:
+                    continue
+                full_class_name = f"{module_name}.{attr_name}"
+                if full_class_name in registered_hook_classes:
+                    continue
+                # If another hook class already registered the same conn_type, 
this is fine
+                # (e.g. async variants sharing conn_type with sync hooks)
+                if conn_type in registered_conn_types:
+                    continue
+                errors.append(
+                    f"Hook class `{full_class_name}` defines 
conn_type='{conn_type}' "
+                    f"but no hook class is registered for this connection type 
"
+                    f"in 'connection-types' in {yaml_file_path}.\n"
+                    f"[yellow]How to fix it[/]: Add an entry with "
+                    f"hook-class-name: {full_class_name} to the 
connection-types "
+                    f"section of {yaml_file_path}."
+                )
+                num_errors += 1
+    return num_checks, num_errors
+
+
+@run_check(
+    "Checking that all provider Hook/Operator/Sensor/Trigger/Executor/Notifier"
+    " classes are registered in provider.yaml"
+)
+def check_all_provider_classes_are_registered(yaml_files: dict[str, dict]) -> 
tuple[int, int]:
+    """
+    Walk all provider source files, find 
Hook/Operator/Sensor/Trigger/Executor/Notifier
+    subclasses, and verify they are registered in the appropriate 
provider.yaml section.
+
+    This catches classes placed in non-standard directories or modules that 
were missed
+    when updating provider.yaml.
+    """
+    from airflow.executors.base_executor import BaseExecutor
+    from airflow.models.baseoperator import BaseOperator
+    from airflow.sdk.bases.hook import BaseHook
+    from airflow.sdk.bases.notifier import BaseNotifier
+    from airflow.sensors.base import BaseSensorOperator
+    from airflow.triggers.base import BaseTrigger
+

Review Comment:
   Do we need to add validation of provider subclasses such as 
‎`BaseAuthManager`, ‎`BaseDagBundle`, ‎`BaseSecretsBackend`, or 
‎`FileTaskHandler`?
   
   These are the features I found that are not covered here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to