Copilot commented on code in PR #65358:
URL: https://github.com/apache/airflow/pull/65358#discussion_r3235016652


##########
scripts/ci/prek/common_prek_utils.py:
##########
@@ -526,16 +526,111 @@ def get_all_provider_info_dicts() -> dict[str, dict]:
     return providers
 
 
-def has_nocheck_marker(source_lines: list[str], node: ast.ImportFrom, marker: 
str) -> bool:
-    """Check if the import statement has the given nocheck marker comment on 
any of its lines."""
+_NOQA_RE = re.compile(r"#\s*noqa\s*:\s*([^\n]*)", re.IGNORECASE)
+_NOQA_CODE_RE = re.compile(r"[A-Z]+\d+")
+
+
+def _parse_noqa_codes(line: str) -> set[str]:
+    """Extract codes from the leading comma-separated list in a ``# noqa: 
<codes>`` comment.
+
+    Anything after the first non-code token is treated as explanatory text and
+    ignored, so ``# noqa: F401 - see SDK002 docs`` only yields ``{"F401"}``.
+    """
+    match = _NOQA_RE.search(line)
+    if not match:
+        return set()
+    codes: set[str] = set()
+    for raw in match.group(1).split(","):
+        code_match = _NOQA_CODE_RE.match(raw.strip())
+        if not code_match:
+            break
+        codes.add(code_match.group(0))
+    return codes

Review Comment:
   `_parse_noqa_codes()` uses `_NOQA_CODE_RE.match()` with pattern `[A-Z]+\d+`, 
which matches prefixes. This means tokens like `SDK002X` (or `F401foo`) will be 
treated as the code and can incorrectly suppress violations. Consider 
tightening the parsing to require a word boundary/end-of-token for the code 
(while still allowing trailing explanatory text like `SDK002 - reason`), and 
adjust the docstring to match the actual parsing behavior.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to