jason810496 commented on code in PR #64127:
URL: https://github.com/apache/airflow/pull/64127#discussion_r3004824799
##########
scripts/in_container/run_provider_yaml_files_check.py:
##########
@@ -473,6 +473,233 @@ def
check_hook_class_name_entries_in_connection_types(yaml_files: dict[str, dict
return num_connection_types, num_errors
+@run_check("Checking that hook classes defining conn_type are registered in
connection-types")
+def check_hook_classes_with_conn_type_are_registered(yaml_files: dict[str,
dict]) -> tuple[int, int]:
+ """Find Hook subclasses that define conn_type but are not listed in
connection-types."""
+ from airflow.sdk.bases.hook import BaseHook
+
+ num_checks = 0
+ num_errors = 0
+ for yaml_file_path, provider_data in yaml_files.items():
+ connection_types = provider_data.get("connection-types", [])
+ registered_hook_classes = {ct["hook-class-name"] for ct in
connection_types}
+ # Collect conn_type values that are already covered by a registered
hook class
+ registered_conn_types = {ct["connection-type"] for ct in
connection_types}
+ hook_modules = [
+ mod for entry in provider_data.get("hooks", []) for mod in
entry.get("python-modules", [])
+ ]
+ for module_name in hook_modules:
+ try:
+ with warnings.catch_warnings(record=True):
+ module = importlib.import_module(module_name)
+ except (ImportError, AirflowOptionalProviderFeatureException):
+ continue # Import errors are caught by other checks
+
+ for attr_name in dir(module):
+ if attr_name.startswith("_"):
+ continue
+ obj = getattr(module, attr_name, None)
+ if not (inspect.isclass(obj) and issubclass(obj, BaseHook) and
obj is not BaseHook):
+ continue
+ # Only check classes defined in this module, not re-exported
ones
+ if obj.__module__ != module_name:
+ continue
+ # Skip abstract classes — they are base classes, not concrete
hooks
+ if inspect.isabstract(obj):
+ continue
+ num_checks += 1
+ # Check conn_type defined directly on the class, not inherited
+ if "conn_type" not in obj.__dict__:
+ continue
+ conn_type = obj.__dict__["conn_type"]
+ if not conn_type:
+ continue
+ full_class_name = f"{module_name}.{attr_name}"
+ if full_class_name in registered_hook_classes:
+ continue
+ # If another hook class already registered the same conn_type,
this is fine
+ # (e.g. async variants sharing conn_type with sync hooks)
+ if conn_type in registered_conn_types:
+ continue
+ errors.append(
+ f"Hook class `{full_class_name}` defines
conn_type='{conn_type}' "
+ f"but no hook class is registered for this connection type
"
+ f"in 'connection-types' in {yaml_file_path}.\n"
+ f"[yellow]How to fix it[/]: Add an entry with "
+ f"hook-class-name: {full_class_name} to the
connection-types "
+ f"section of {yaml_file_path}."
+ )
+ num_errors += 1
+ return num_checks, num_errors
+
+
+@run_check(
+ "Checking that all provider Hook/Operator/Sensor/Trigger/Executor/Notifier"
+ " classes are registered in provider.yaml"
+)
+def check_all_provider_classes_are_registered(yaml_files: dict[str, dict]) ->
tuple[int, int]:
+ """
+ Walk all provider source files, find
Hook/Operator/Sensor/Trigger/Executor/Notifier
+ subclasses, and verify they are registered in the appropriate
provider.yaml section.
+
+ This catches classes placed in non-standard directories or modules that
were missed
+ when updating provider.yaml.
+ """
+ from airflow.executors.base_executor import BaseExecutor
+ from airflow.models.baseoperator import BaseOperator
+ from airflow.sdk.bases.hook import BaseHook
+ from airflow.sdk.bases.notifier import BaseNotifier
+ from airflow.sensors.base import BaseSensorOperator
+ from airflow.triggers.base import BaseTrigger
+
Review Comment:
Do we need to add validation of provider subclasses such as
`BaseAuthManager`, `BaseDagBundle`, `BaseSecretsBackend`, or
`FileTaskHandler`?
These are the features I found that are not covered here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]