Lee-W commented on code in PR #55416:
URL: https://github.com/apache/airflow/pull/55416#discussion_r3044061702


##########
scripts/ci/prek/check_new_airflow_exception_usage.py:
##########
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#   "rich>=13.0.0",
+# ]
+# ///
+"""Check that no new ``raise AirflowException`` usages are introduced.
+
+All *existing* usages are recorded in ``known_airflow_exceptions.txt`` next to
+this script (one ``relative/path::stripped_raise_line`` entry per line).  Any
+``raise AirflowException`` found in a checked file that is **not** present in
+that list is treated as a violation – use a dedicated exception class instead.
+
+Modes
+-----
+Default (files passed by prek/pre-commit):
+    Check only the supplied files; fail on any unlisted usage.
+
+``--all-files``:
+    Walk the whole repository and check every ``.py`` file.
+
+``--cleanup``:
+    Remove stale entries from the allowlist (entries whose exception no longer
+    exists in the corresponding source file). Safe to run at any time; does
+    not add new entries.
+
+``--generate``:
+    Scan the whole repository and *rebuild* the allowlist from scratch.
+    Intended for the initial setup or after a large-scale clean-up sprint.
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+from pathlib import Path
+
+from rich.console import Console
+from rich.panel import Panel
+
+console = Console()
+
+REPO_ROOT = Path(__file__).parents[3]
+
+# Match lines that actually raise AirflowException. Comment filtering is done
+# in _raise_lines() by skipping lines whose stripped form starts with "#".
+_RAISE_RE = re.compile(r"raise\s+AirflowException\b")
+
+
+class AllowlistManager:
+    def __init__(self, allowlist_file: Path) -> None:
+        self.allowlist_file = allowlist_file
+
+    def load(self) -> set[str]:
+        if not self.allowlist_file.exists():
+            return set()
+        return {line for line in self.allowlist_file.read_text().splitlines() 
if line.strip()}
+
+    def save(self, entries: set[str]) -> None:
+        self.allowlist_file.write_text("\n".join(sorted(entries)) + "\n")
+
+    def generate(self) -> int:
+        console.print(f"Scanning [cyan]{REPO_ROOT}[/cyan] for raise 
AirflowException …")
+        entries: set[str] = set()
+        for path in _iter_python_files():
+            for line in _raise_lines(path):
+                entries.add(_make_entry(path, line))
+
+        self.save(entries)
+        console.print(
+            f"[green]✓ Generated[/green] 
[cyan]{self.allowlist_file.relative_to(REPO_ROOT)}[/cyan] "
+            f"with [bold]{len(entries)}[/bold] entries."
+        )
+        return 0
+
+    def cleanup(self) -> int:
+        allowlist = self.load()
+        if not allowlist:
+            console.print("[yellow]Allowlist is empty – nothing to clean 
up.[/yellow]")
+            return 0
+
+        stale: set[str] = set()
+        for entry in allowlist:
+            rel_str, _, raise_line = entry.partition("::")
+            path = REPO_ROOT / rel_str
+            if not path.exists() or raise_line not in _raise_lines(path):
+                stale.add(entry)
+
+        if stale:
+            console.print(
+                f"[yellow]Removing {len(stale)} stale entr{'y' if len(stale) 
== 1 else 'ies'}:[/yellow]"
+            )
+            for s in sorted(stale):
+                console.print(f"  [dim]-[/dim] {s}")
+            self.save(allowlist - stale)
+            console.print(
+                f"\n[green]Updated[/green] 
[cyan]{self.allowlist_file.relative_to(REPO_ROOT)}[/cyan]"
+            )
+        else:
+            console.print("[green]✓ No stale entries found.[/green]")
+        return 0
+
+
+def _make_entry(path: Path, stripped_line: str) -> str:
+    """Generate entry like ``relative/path/to/file.py::raise 
AirflowException(...)``"""
+    return f"{path.relative_to(REPO_ROOT)}::{stripped_line}"
+
+
+def _raise_lines(path: Path) -> list[str]:
+    """Return stripped raise-lines from *path* that match the pattern."""
+    try:
+        text = path.read_text(encoding="utf-8", errors="replace")

Review Comment:
   I end up doing `general count of AirflowExceptions per specific file`. 
Thanks for the suggestion!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to