ashb commented on PR #44686:
URL: https://github.com/apache/airflow/pull/44686#issuecomment-2522904089
A quick POC knocked up using `ruff analyze graph` output:
```
python ./scripts/ci/pre_commit/check_provider_cross_deps.py
warning: `ruff analyze graph` is experimental and may change without warning
'providers/src/airflow/providers/airbyte/hooks/airbyte.py' imports from the
'common.compat' but doesn't list it as a dependency!
'providers/src/airflow/providers/celery/executors/celery_executor_utils.py'
imports from the 'standard' but doesn't list it as a dependency!
'providers/src/airflow/providers/edge/example_dags/integration_test.py'
imports from the 'standard' but doesn't list it as a dependency!
```
The first was me adding an import to test, the other two are
```
try:
from airflow.providers.standard.operators.bash import BashOperator
except ImportError:
from airflow.operators.bash import BashOperator # type:
ignore[no-redef,attr-defined]
```
<details>
<summary>POC code in here:</summary>
```python
from __future__ import annotations
import json
import os
import subprocess
import sys
from functools import cache
from pathlib import Path
AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve()
GENERATED_PROVIDERS_DEPENDENCIES_FILE = AIRFLOW_ROOT_PATH / "generated" /
"provider_dependencies.json"
PROVIDER_DEPENDENCIES =
json.loads(GENERATED_PROVIDERS_DEPENDENCIES_FILE.read_text())
PROVIDERS_REL_SRC = Path("providers/src/")
PROVIDERS_NS_REL_SRC = Path("providers/src/airflow/providers/")
@cache
def rel_file_to_provider_name(rel_name: str):
try:
rel = Path(rel_name).relative_to(PROVIDERS_NS_REL_SRC)
except ValueError:
# Not a provider file
return None
if rel.suffix == ".py":
rel = rel.parent
# This checks for two levels existing as a key in PROVIDER_DEPENDENCIES
(i.e. "airbyte" or
# "common.compat") -- if we ever have an
"airflow.providers.ns1.ns2.packge" we will need to change this
for name in (rel.parts[0], ".".join(rel.parts[0:2])):
if name in PROVIDER_DEPENDENCIES:
return name
return None
def main():
# Ask ruff to analyze the import graphs
import_tree_str = subprocess.check_output(
["ruff", "analyze", "graph", os.fspath(AIRFLOW_ROOT_PATH /
PROVIDERS_REL_SRC)]
)
import_tree = json.loads(import_tree_str)
invalid_imports_found = False
for file, imports in import_tree.items():
this_provider = rel_file_to_provider_name(file)
imported_cross_providers = set(
filter(
lambda x: x not in (None, this_provider),
(rel_file_to_provider_name(imported_file) for imported_file
in imports),
)
)
for cross_provider in imported_cross_providers:
if cross_provider not in
PROVIDER_DEPENDENCIES[this_provider]["cross-providers-deps"]:
# TODO: rich
print(
f"{file!r} imports from the {cross_provider!r} but
doesn't list it as a dependency!",
file=sys.stderr,
)
invalid_imports_found = True
if __name__ == "__main__":
main()
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]