Copilot commented on code in PR #65358:
URL: https://github.com/apache/airflow/pull/65358#discussion_r3235176265
##########
scripts/ci/prek/common_prek_utils.py:
##########
@@ -526,16 +526,111 @@ def get_all_provider_info_dicts() -> dict[str, dict]:
return providers
-def has_nocheck_marker(source_lines: list[str], node: ast.ImportFrom, marker:
str) -> bool:
- """Check if the import statement has the given nocheck marker comment on
any of its lines."""
+_NOQA_RE = re.compile(r"#\s*noqa\s*:\s*([^\n]*)", re.IGNORECASE)
+_NOQA_CODE_RE = re.compile(r"[A-Z]+\d+")
+
+
+def _parse_noqa_codes(line: str) -> set[str]:
+ """Extract codes from the leading comma-separated list in a ``# noqa:
<codes>`` comment.
+
+ Anything after the first non-code token is treated as explanatory text and
+ ignored, so ``# noqa: F401 - see SDK002 docs`` only yields ``{"F401"}``.
+ """
+ match = _NOQA_RE.search(line)
+ if not match:
+ return set()
+ codes: set[str] = set()
+ for raw in match.group(1).split(","):
+ code_match = _NOQA_CODE_RE.match(raw.strip())
+ if not code_match:
Review Comment:
`_NOQA_CODE_RE` uses `[A-Z]+\d+` with `.match()`, which will treat tokens
like `SDK002x`/`F401foo` as valid codes (partial match) and incorrectly
suppress violations. Consider requiring a word boundary/end after the digits
(e.g. `\b`/`(?=\W|$)`) so only real codes (optionally followed by
whitespace/explanation) are accepted.
##########
scripts/ci/prek/check_core_imports_in_sdk.py:
##########
@@ -25,44 +25,24 @@
from __future__ import annotations
import argparse
-import ast
import sys
from pathlib import Path
-from common_prek_utils import console
+from common_prek_utils import find_import_violations, report_import_violations
+
+NOCHECK_CODE = "SDK002"
def check_file_for_core_imports(file_path: Path) -> list[tuple[int, str]]:
"""Check file for airflow-core imports (anything except airflow.sdk).
Returns list of (line_num, import_statement)."""
- try:
- source = file_path.read_text(encoding="utf-8")
- tree = ast.parse(source, filename=str(file_path))
- except (OSError, UnicodeDecodeError, SyntaxError):
- return []
-
- mismatches = []
-
- for node in ast.walk(tree):
- # for `from airflow.x import y` statements
- if isinstance(node, ast.ImportFrom):
- if (
- node.module
- and node.module.startswith("airflow.")
- and not node.module.startswith("airflow.sdk")
- ):
- import_names = ", ".join(alias.name for alias in node.names)
- statement = f"from {node.module} import {import_names}"
- mismatches.append((node.lineno, statement))
- # for `import airflow.x` statements
- elif isinstance(node, ast.Import):
- for alias in node.names:
- if alias.name.startswith("airflow.") and not
alias.name.startswith("airflow.sdk"):
- statement = f"import {alias.name}"
- if alias.asname:
- statement += f" as {alias.asname}"
- mismatches.append((node.lineno, statement))
-
- return mismatches
+ return find_import_violations(
+ file_path,
+ is_violating_module=lambda module: (
+ module.startswith("airflow.") and not
module.startswith("airflow.sdk")
+ ),
+ nocheck_code=NOCHECK_CODE,
+ check_plain_imports=True,
+ )
Review Comment:
The core-import predicate only flags `from airflow.<something> import ...` /
`import airflow.<something>`, but it misses `from airflow import <name>`
imports (e.g. `from airflow import settings`), which are still airflow-core
usage and won’t be catchable via `# noqa: SDK002`. Consider explicitly handling
`ast.ImportFrom` with `module == "airflow"` and treating any imported name
other than `sdk` as a violation (and add a regression test).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]