This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new dd218dde8a Improve provider verification pre-commit (#33640)
dd218dde8a is described below
commit dd218dde8a57865a0af59f0e8e06ad3f30c5fd92
Author: Jarek Potiuk <[email protected]>
AuthorDate: Wed Aug 23 17:36:24 2023 +0200
Improve provider verification pre-commit (#33640)
There were a numer of problems with the provider verification pre-commit
scripts:
* It missed verification of "notifications"
* It did not check if the classes or modules specified in provider yaml
raised deprecation warnings
* The messages produced by the script when some discrepancies were found
were pretty cryptic and it was difficult to guess what kind of action
should be taken to fix the problem
This PR fixes all those problems:
* verification of notification is performed
* when importing all the classes and modules, check for the
AirflowProviderDeprecationWarnings is done and treated as error
* The messages produced provide clear actionable instructions on what
to do and explain what are the discrepancies of expected vs. current
list in a clear way
---
.../pre_commit_check_provider_yaml_files.py | 2 +
.../in_container/run_provider_yaml_files_check.py | 138 +++++++++++++++++----
2 files changed, 116 insertions(+), 24 deletions(-)
diff --git a/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py
b/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py
index 995ff18ba0..400b958065 100755
--- a/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py
+++ b/scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py
@@ -51,6 +51,8 @@ if __name__ == "__main__":
*get_extra_docker_flags(MOUNT_SELECTED),
"-e",
"SKIP_ENVIRONMENT_INITIALIZATION=true",
+ "-e",
+ "PYTHONWARNINGS=default",
"--pull",
"never",
airflow_image,
diff --git a/scripts/in_container/run_provider_yaml_files_check.py
b/scripts/in_container/run_provider_yaml_files_check.py
index 9e873f90c3..61cf5842cc 100755
--- a/scripts/in_container/run_provider_yaml_files_check.py
+++ b/scripts/in_container/run_provider_yaml_files_check.py
@@ -26,6 +26,7 @@ import pathlib
import platform
import sys
import textwrap
+import warnings
from collections import Counter
from enum import Enum
from typing import Any, Iterable
@@ -37,18 +38,22 @@ from rich.console import Console
from tabulate import tabulate
from airflow.cli.commands.info_command import Architecture
+from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.providers_manager import ProvidersManager
# Those are deprecated modules that contain removed Hooks/Sensors/Operators
that we left in the code
# so that users can get a very specific error message when they try to use
them.
-EXCLUDED_MODULES = [
+DEPRECATED_MODULES = [
"airflow.providers.apache.hdfs.sensors.hdfs",
"airflow.providers.apache.hdfs.hooks.hdfs",
"airflow.providers.cncf.kubernetes.triggers.kubernetes_pod",
"airflow.providers.cncf.kubernetes.operators.kubernetes_pod",
]
+KNOWN_DEPRECATED_CLASSES = [
+ "airflow.providers.google.cloud.links.dataproc.DataprocLink",
+]
try:
from yaml import CSafeLoader as SafeLoader
@@ -71,6 +76,13 @@ CORE_INTEGRATIONS = ["SQL", "Local"]
errors: list[str] = []
console = Console(width=400, color_system="standard")
+# you need to enable warnings for all deprecations - needed by importlib
library to show deprecations
+if os.environ.get("PYTHONWARNINGS") != "default":
+ console.print(
+ "[red]Error: PYTHONWARNINGS not set[/]\n"
+ "You must set `PYTHONWARNINGS=default` environment variable to run
this script"
+ )
+ sys.exit(1)
suspended_providers: set[str] = set()
suspended_logos: set[str] = set()
@@ -136,7 +148,14 @@ def check_integration_duplicates(yaml_files: dict[str,
dict]):
sys.exit(3)
-def assert_sets_equal(set1, set2, allow_extra_in_set2=False):
+def assert_sets_equal(
+ set1: set[str],
+ set_name_1: str,
+ set2: set[str],
+ set_name_2: str,
+ allow_extra_in_set2=False,
+ extra_message: str = "",
+):
try:
difference1 = set1.difference(set2)
except TypeError as e:
@@ -153,6 +172,8 @@ def assert_sets_equal(set1, set2,
allow_extra_in_set2=False):
if difference1 or (difference2 and not allow_extra_in_set2):
lines = []
+ lines.append(f" Left set:{set_name_1}")
+ lines.append(f" Right set:{set_name_2}")
if difference1:
lines.append(" -- Items in the left set but not the right:")
for item in sorted(difference1):
@@ -163,6 +184,8 @@ def assert_sets_equal(set1, set2,
allow_extra_in_set2=False):
lines.append(f" {item!r}")
standard_msg = "\n".join(lines)
+ if extra_message:
+ standard_msg += f"\n{extra_message}"
raise AssertionError(standard_msg)
@@ -174,12 +197,37 @@ class ObjectType(Enum):
def check_if_object_exist(object_name: str, resource_type: str,
yaml_file_path: str, object_type: ObjectType):
try:
if object_type == ObjectType.CLASS:
- module_name, object_name = object_name.rsplit(".", maxsplit=1)
- the_class = getattr(importlib.import_module(module_name),
object_name)
+ module_name, class_name = object_name.rsplit(".", maxsplit=1)
+ with warnings.catch_warnings(record=True) as w:
+ the_class = getattr(importlib.import_module(module_name),
class_name)
+ for warn in w:
+ if warn.category == AirflowProviderDeprecationWarning:
+ if object_name in KNOWN_DEPRECATED_CLASSES:
+ console.print(
+ f"[yellow]The {object_name} class is deprecated
and we know about it. "
+ f"It should be removed in the future."
+ )
+ continue
+ errors.append(
+ f"The `{class_name}` class in {resource_type} list in
{yaml_file_path} "
+ f"is deprecated with this message: '{warn.message}'.\n"
+ f"[yellow]How to fix it[/]: Please remove it from
provider.yaml and replace with "
+ f"the new class."
+ )
if the_class and inspect.isclass(the_class):
return
elif object_type == ObjectType.MODULE:
- module = importlib.import_module(object_name)
+ with warnings.catch_warnings(record=True) as w:
+ module = importlib.import_module(object_name)
+ for warn in w:
+ if warn.category == AirflowProviderDeprecationWarning:
+ errors.append(
+ f"The `{object_name}` module in {resource_type} list
in {yaml_file_path} "
+ f"is deprecated with this message: '{warn.message}'.\n"
+ f"[yellow]How to fix it[/]: Please remove it from
provider.yaml and replace it "
+ f"with the new module. If you see warnings in classes
- fix the classes so that "
+ f"they are not raising Deprecation Warnings when
module is imported."
+ )
if inspect.ismodule(module):
return
else:
@@ -231,23 +279,32 @@ def parse_module_data(provider_data, resource_type,
yaml_file_path):
return expected_modules, provider_package, resource_data
-def check_correctness_of_list_of_sensors_operators_hook_modules(yaml_files:
dict[str, dict]):
- print("Checking completeness of list of {sensors, hooks, operators,
triggers}")
- print(" -- {sensors, hooks, operators, triggers} - Expected modules (left)
: Current modules (right)")
+def
check_correctness_of_list_of_sensors_operators_hook_trigger_modules(yaml_files:
dict[str, dict]):
+ print(" -- Checking completeness of list of {sensors, hooks, operators,
triggers}")
for (yaml_file_path, provider_data), resource_type in itertools.product(
yaml_files.items(), ["sensors", "operators", "hooks", "triggers"]
):
expected_modules, provider_package, resource_data = parse_module_data(
provider_data, resource_type, yaml_file_path
)
- expected_modules = {module for module in expected_modules if module
not in EXCLUDED_MODULES}
+ expected_modules = {module for module in expected_modules if module
not in DEPRECATED_MODULES}
current_modules = {str(i) for r in resource_data for i in
r.get("python-modules", [])}
check_if_objects_exist_and_belong_to_package(
current_modules, provider_package, yaml_file_path, resource_type,
ObjectType.MODULE
)
try:
- assert_sets_equal(set(expected_modules), set(current_modules))
+ package_name =
os.fspath(ROOT_DIR.joinpath(yaml_file_path).parent.relative_to(ROOT_DIR)).replace(
+ "/", "."
+ )
+ assert_sets_equal(
+ set(expected_modules),
+ f"Found list of {resource_type} modules in provider package:
{package_name}",
+ set(current_modules),
+ f"Currently configured list of {resource_type} modules in
{yaml_file_path}",
+ extra_message="[yellow]If there are deprecated modules in the
list, please add them to "
+ f"DEPRECATED_MODULES in
{pathlib.Path(__file__).relative_to(ROOT_DIR)}[/]",
+ )
except AssertionError as ex:
nested_error = textwrap.indent(str(ex), " ")
errors.append(
@@ -276,19 +333,27 @@ def check_completeness_of_list_of_transfers(yaml_files:
dict[str, dict]):
print("Checking completeness of list of transfers")
resource_type = "transfers"
- print(" -- Expected transfers modules(Left): Current transfers
Modules(Right)")
+ print(" -- Checking transfers modules")
for yaml_file_path, provider_data in yaml_files.items():
expected_modules, provider_package, resource_data = parse_module_data(
provider_data, resource_type, yaml_file_path
)
- expected_modules = {module for module in expected_modules if module
not in EXCLUDED_MODULES}
+ expected_modules = {module for module in expected_modules if module
not in DEPRECATED_MODULES}
current_modules = {r.get("python-module") for r in resource_data}
check_if_objects_exist_and_belong_to_package(
current_modules, provider_package, yaml_file_path, resource_type,
ObjectType.MODULE
)
try:
- assert_sets_equal(set(expected_modules), set(current_modules))
+ package_name =
os.fspath(ROOT_DIR.joinpath(yaml_file_path).parent.relative_to(ROOT_DIR)).replace(
+ "/", "."
+ )
+ assert_sets_equal(
+ set(expected_modules),
+ f"Found list of transfer modules in provider package:
{package_name}",
+ set(current_modules),
+ f"Currently configured list of transfer modules in
{yaml_file_path}",
+ )
except AssertionError as ex:
nested_error = textwrap.indent(str(ex), " ")
errors.append(
@@ -337,6 +402,18 @@ def check_extra_link_classes(yaml_files: dict[str, dict]):
)
+def check_notification_classes(yaml_files: dict[str, dict]):
+ print("Checking notifications belong to package, exist and are classes")
+ resource_type = "notifications"
+ for yaml_file_path, provider_data in yaml_files.items():
+ provider_package =
pathlib.Path(yaml_file_path).parent.as_posix().replace("/", ".")
+ notifications = provider_data.get(resource_type)
+ if notifications:
+ check_if_objects_exist_and_belong_to_package(
+ notifications, provider_package, yaml_file_path,
resource_type, ObjectType.CLASS
+ )
+
+
def check_duplicates_in_list_of_transfers(yaml_files: dict[str, dict]):
print("Checking for duplicates in list of transfers")
errors = []
@@ -435,11 +512,20 @@ def check_doc_files(yaml_files: dict[str, dict]):
}
try:
- print(" -- Checking document urls: expected (left), current (right)")
- assert_sets_equal(set(expected_doc_urls), set(current_doc_urls))
-
- print(" -- Checking logo urls: expected (left), current (right)")
- assert_sets_equal(set(expected_logo_urls), set(current_logo_urls))
+ print(" -- Checking document urls")
+ assert_sets_equal(
+ set(expected_doc_urls),
+ "Document urls found in airflow/docs",
+ set(current_doc_urls),
+ "Document urls configured in provider.yaml files",
+ )
+ print(" -- Checking logo urls")
+ assert_sets_equal(
+ set(expected_logo_urls),
+ "Logo urls found in airflow/docs/integration-logos",
+ set(current_logo_urls),
+ "Logo urls configured in provider.yaml files",
+ )
except AssertionError as ex:
print(ex)
sys.exit(1)
@@ -465,12 +551,15 @@ def
check_providers_are_mentioned_in_issue_template(yaml_files: dict[str, dict])
issue_template = yaml.safe_load(issue_file)
all_mentioned_providers = [match.value for match in
jsonpath_expr.find(issue_template)]
try:
- print(
- f" -- Checking providers: present in code (left), "
- f"mentioned in {PROVIDER_ISSUE_TEMPLATE_PATH} (right)"
- )
+ print(f" -- Checking providers are mentioned in
{PROVIDER_ISSUE_TEMPLATE_PATH}")
# in case of suspended providers, we still want to have them in the
issue template
- assert_sets_equal(set(short_provider_names),
set(all_mentioned_providers), allow_extra_in_set2=True)
+ assert_sets_equal(
+ set(short_provider_names),
+ "Provider names found in provider.yaml files",
+ set(all_mentioned_providers),
+ f"Provider names mentioned in {PROVIDER_ISSUE_TEMPLATE_PATH}",
+ allow_extra_in_set2=True,
+ )
except AssertionError as ex:
print(ex)
sys.exit(1)
@@ -512,7 +601,8 @@ if __name__ == "__main__":
check_hook_connection_classes(all_parsed_yaml_files)
check_plugin_classes(all_parsed_yaml_files)
check_extra_link_classes(all_parsed_yaml_files)
-
check_correctness_of_list_of_sensors_operators_hook_modules(all_parsed_yaml_files)
+
check_correctness_of_list_of_sensors_operators_hook_trigger_modules(all_parsed_yaml_files)
+ check_notification_classes(all_parsed_yaml_files)
check_unique_provider_name(all_parsed_yaml_files)
check_providers_have_all_documentation_files(all_parsed_yaml_files)