Copilot commented on code in PR #64972:
URL: https://github.com/apache/airflow/pull/64972#discussion_r3066503513


##########
scripts/ci/prek/check_migration_patterns.py:
##########
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10,<3.11"
+# dependencies = [
+#   "rich>=13.6.0",
+# ]
+# ///
+"""
+Static analysis checks for Alembic migration anti-patterns.
+
+These checks use AST analysis to detect common migration mistakes that can 
cause
+silent failures, particularly on SQLite where ``PRAGMA foreign_keys`` has 
specific
+requirements about transaction state.
+
+Rules
+=====
+
+MIG001 -- DML before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) that
+appear before a ``with disable_sqlite_fkeys(op):`` block in the same function.
+
+**Why is this bad:**
+On SQLite, any DML statement triggers *autobegin*, starting a transaction.  
Once a
+transaction is active, ``PRAGMA foreign_keys=off`` (issued by 
``disable_sqlite_fkeys``)
+becomes a no-op.  This means foreign key checks remain enabled, and
+``batch_alter_table`` (which recreates tables) may fail with foreign key 
constraint
+violations.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+
+MIG002 -- DDL before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects any Alembic ``op.*`` call (DDL operations like ``op.add_column``,
+``op.drop_column``, ``op.create_table``, ``op.drop_table``, 
``op.create_index``,
+``op.batch_alter_table``, etc.) that appears before a
+``with disable_sqlite_fkeys(op):`` block in the same function.  Excludes DML 
calls
+covered by MIG001.
+
+**Why is this bad:**
+DDL operations also trigger *autobegin* on SQLite, making the subsequent
+``PRAGMA foreign_keys=off`` a no-op, exactly like DML.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.add_column("dag_run", sa.Column("created_at", ...))  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.add_column("dag_run", sa.Column("created_at", ...))
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+
+MIG003 -- DML without offline-mode guard
+------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) in
+``upgrade()`` or ``downgrade()`` functions that do not contain a
+``context.is_offline_mode()`` check anywhere in the same function.
+
+**Why is this bad:**
+Alembic's offline mode generates SQL scripts instead of executing against a 
live
+database.  DML statements like ``UPDATE ... WHERE col IS NULL`` depend on 
actual data
+and produce no useful output in offline mode.  Without an 
``is_offline_mode()`` guard,
+the generated script includes DML that may fail or silently do nothing when 
applied
+later.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        if not context.is_offline_mode():
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+
+Known Limitation -- Future MIG004 Candidate
+---------------------------------------------
+
+Migrations that perform DML and use ``batch_alter_table`` but never call
+``disable_sqlite_fkeys`` at all are NOT detected by MIG001/MIG002.  These 
migrations
+may silently break SQLite foreign key handling.  Detecting this pattern 
requires
+understanding whether foreign key relationships exist on the affected tables, 
which
+is beyond AST-level analysis.  This is tracked as a future MIG004 candidate.
+
+
+Suppression
+===========
+
+Add ``# noqa: MIG0XX`` to a line to suppress the corresponding check for that 
line.
+Multiple codes can be comma-separated: ``# noqa: MIG001, MIG003``.
+
+Include a brief reason after ``--``::
+
+    # noqa: MIG003 -- simple UPDATE safe in offline mode
+"""
+
+from __future__ import annotations
+
+import ast
+import re
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console(color_system="standard", width=200)
+
+DML_KEYWORDS = ("UPDATE ", "INSERT ", "DELETE ")
+
+
+def _get_noqa_codes(line: str) -> set[str]:
+    """Extract noqa codes from a source line."""
+    match = re.search(r"#\s*noqa:\s*([\w,\s]+)", line)
+    if match:
+        return {code.strip() for code in match.group(1).split(",")}
+    return set()
+
+
+def _is_dml_string(node: ast.expr) -> bool:
+    """Check if an AST expression is a string literal starting with a DML 
keyword."""
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        stripped = node.value.strip().upper()
+        return any(stripped.startswith(kw) for kw in DML_KEYWORDS)

Review Comment:
   DML detection uses `startswith` with keywords that include a trailing space 
(e.g. `\"DELETE \"`). This will miss common multi-line SQL styles such as 
`\"DELETE\\nFROM ...\"` / `\"INSERT\\nINTO ...\"`, leading to false negatives 
for MIG001/MIG003. Consider switching to a regex like 
`r'^(UPDATE|INSERT|DELETE)\\b'` against the stripped/uppercased prefix so any 
whitespace formatting is handled.



##########
scripts/ci/prek/check_migration_patterns.py:
##########
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10,<3.11"
+# dependencies = [
+#   "rich>=13.6.0",
+# ]
+# ///
+"""
+Static analysis checks for Alembic migration anti-patterns.
+
+These checks use AST analysis to detect common migration mistakes that can 
cause
+silent failures, particularly on SQLite where ``PRAGMA foreign_keys`` has 
specific
+requirements about transaction state.
+
+Rules
+=====
+
+MIG001 -- DML before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) that
+appear before a ``with disable_sqlite_fkeys(op):`` block in the same function.
+
+**Why is this bad:**
+On SQLite, any DML statement triggers *autobegin*, starting a transaction.  
Once a
+transaction is active, ``PRAGMA foreign_keys=off`` (issued by 
``disable_sqlite_fkeys``)
+becomes a no-op.  This means foreign key checks remain enabled, and
+``batch_alter_table`` (which recreates tables) may fail with foreign key 
constraint
+violations.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+
+MIG002 -- DDL before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects any Alembic ``op.*`` call (DDL operations like ``op.add_column``,
+``op.drop_column``, ``op.create_table``, ``op.drop_table``, 
``op.create_index``,
+``op.batch_alter_table``, etc.) that appears before a
+``with disable_sqlite_fkeys(op):`` block in the same function.  Excludes DML 
calls
+covered by MIG001.
+
+**Why is this bad:**
+DDL operations also trigger *autobegin* on SQLite, making the subsequent
+``PRAGMA foreign_keys=off`` a no-op, exactly like DML.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.add_column("dag_run", sa.Column("created_at", ...))  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.add_column("dag_run", sa.Column("created_at", ...))
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+
+MIG003 -- DML without offline-mode guard
+------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) in
+``upgrade()`` or ``downgrade()`` functions that do not contain a
+``context.is_offline_mode()`` check anywhere in the same function.
+
+**Why is this bad:**
+Alembic's offline mode generates SQL scripts instead of executing against a 
live
+database.  DML statements like ``UPDATE ... WHERE col IS NULL`` depend on 
actual data
+and produce no useful output in offline mode.  Without an 
``is_offline_mode()`` guard,
+the generated script includes DML that may fail or silently do nothing when 
applied
+later.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        if not context.is_offline_mode():
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+
+Known Limitation -- Future MIG004 Candidate
+---------------------------------------------
+
+Migrations that perform DML and use ``batch_alter_table`` but never call
+``disable_sqlite_fkeys`` at all are NOT detected by MIG001/MIG002.  These 
migrations
+may silently break SQLite foreign key handling.  Detecting this pattern 
requires
+understanding whether foreign key relationships exist on the affected tables, 
which
+is beyond AST-level analysis.  This is tracked as a future MIG004 candidate.
+
+
+Suppression
+===========
+
+Add ``# noqa: MIG0XX`` to a line to suppress the corresponding check for that 
line.
+Multiple codes can be comma-separated: ``# noqa: MIG001, MIG003``.
+
+Include a brief reason after ``--``::
+
+    # noqa: MIG003 -- simple UPDATE safe in offline mode
+"""
+
+from __future__ import annotations
+
+import ast
+import re
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console(color_system="standard", width=200)
+
+DML_KEYWORDS = ("UPDATE ", "INSERT ", "DELETE ")
+
+
+def _get_noqa_codes(line: str) -> set[str]:
+    """Extract noqa codes from a source line."""
+    match = re.search(r"#\s*noqa:\s*([\w,\s]+)", line)
+    if match:
+        return {code.strip() for code in match.group(1).split(",")}
+    return set()
+
+
+def _is_dml_string(node: ast.expr) -> bool:
+    """Check if an AST expression is a string literal starting with a DML 
keyword."""
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        stripped = node.value.strip().upper()
+        return any(stripped.startswith(kw) for kw in DML_KEYWORDS)
+    # f-strings: check the first string fragment
+    if isinstance(node, ast.JoinedStr):
+        for value in node.values:
+            if isinstance(value, ast.Constant) and isinstance(value.value, 
str):
+                stripped = value.value.strip().upper()
+                if any(stripped.startswith(kw) for kw in DML_KEYWORDS):
+                    return True
+                break  # only the leading fragment matters
+    return False
+
+
+def _is_op_execute_with_dml(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.execute(<DML string>)``."""
+    if not (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+        and node.func.attr == "execute"
+    ):
+        return False
+    if not node.args:
+        return False
+    arg = node.args[0]
+    # Direct string: op.execute("UPDATE ...")
+    if _is_dml_string(arg):
+        return True
+    # Wrapped: op.execute(text("UPDATE ...")) or op.execute(sa.text("UPDATE 
..."))
+    if isinstance(arg, ast.Call) and arg.args:
+        return _is_dml_string(arg.args[0])
+    return False
+
+
+def _is_op_call(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.<something>(...)``."""
+    return (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+    )

Review Comment:
   MIG002 currently treats any `op.<anything>(...)` as DDL (via `_is_op_call`), 
which will also match non-DDL helpers like `op.get_bind()`, `op.get_context()`, 
etc. This will produce false positives and force noisy suppressions/refactors. 
Consider restricting MIG002 to a concrete allowlist of DDL-ish Alembic 
operations (e.g. `add_column`, `drop_column`, `create_table`, `drop_table`, 
`create_index`, `drop_index`, `batch_alter_table`, `rename_table`, 
`alter_column`, `create_foreign_key`, ...), and explicitly excluding known 
non-DDL methods such as `get_bind`.



##########
.pre-commit-config.yaml:
##########
@@ -1034,6 +1034,14 @@ repos:
         pass_filenames: false
         files: ^.*\.py$
         require_serial: true
+      - id: check-migration-patterns
+        name: Check migration files for anti-patterns (MIG001/MIG002/MIG003)
+        entry: ./scripts/ci/prek/check_migration_patterns.py
+        language: python
+        pass_filenames: true
+        files: ^airflow-core/src/airflow/migrations/versions/.*\.py$
+        additional_dependencies: [rich>=13.6.0]
+        require_serial: true

Review Comment:
   If `check_migration_patterns.py` truly requires a specific Python minor 
version (per its PEP 723 header), this prek hook should pin `language_version` 
accordingly; otherwise developers/CI may execute it under a different 
interpreter than intended. Either add an explicit `language_version` here to 
match the requirement, or (preferably) relax the script’s `requires-python` so 
the hook can run under the repo’s standard tooling Python.



##########
scripts/ci/prek/check_migration_patterns.py:
##########
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10,<3.11"
+# dependencies = [
+#   "rich>=13.6.0",
+# ]

Review Comment:
   The PEP 723 `requires-python = \">=3.10,<3.11\"` upper-bound is unusually 
restrictive for a repo that typically runs tooling on newer Python versions 
too, and it can prevent running this script via PEP-723-capable runners (e.g. 
`uv run`) on Python 3.11+. If there isn’t a hard incompatibility, widen/remove 
the `<3.11` cap (e.g. `>=3.10`) or align it with the project’s supported 
tooling Python versions.



##########
scripts/ci/prek/check_new_airflow_exception_usage.py:
##########
@@ -59,7 +59,7 @@
 from rich.console import Console
 from rich.panel import Panel
 
-console = Console()
+console = Console(color_system="standard", width=200)

Review Comment:
   This console configuration change is unrelated to migration-pattern static 
analysis (MIG001/2/3) and isn’t mentioned in the PR description. Consider 
either updating the PR description to include the motivation for this change, 
or moving it into a separate PR to keep scope focused.



##########
scripts/ci/prek/check_migration_patterns.py:
##########
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10,<3.11"
+# dependencies = [
+#   "rich>=13.6.0",
+# ]
+# ///
+"""
+Static analysis checks for Alembic migration anti-patterns.
+
+These checks use AST analysis to detect common migration mistakes that can 
cause
+silent failures, particularly on SQLite where ``PRAGMA foreign_keys`` has 
specific
+requirements about transaction state.
+
+Rules
+=====
+
+MIG001 -- DML before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) that
+appear before a ``with disable_sqlite_fkeys(op):`` block in the same function.
+
+**Why is this bad:**
+On SQLite, any DML statement triggers *autobegin*, starting a transaction.  
Once a
+transaction is active, ``PRAGMA foreign_keys=off`` (issued by 
``disable_sqlite_fkeys``)
+becomes a no-op.  This means foreign key checks remain enabled, and
+``batch_alter_table`` (which recreates tables) may fail with foreign key 
constraint
+violations.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+
+MIG002 -- DDL before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects any Alembic ``op.*`` call (DDL operations like ``op.add_column``,
+``op.drop_column``, ``op.create_table``, ``op.drop_table``, 
``op.create_index``,
+``op.batch_alter_table``, etc.) that appears before a
+``with disable_sqlite_fkeys(op):`` block in the same function.  Excludes DML 
calls
+covered by MIG001.
+
+**Why is this bad:**
+DDL operations also trigger *autobegin* on SQLite, making the subsequent
+``PRAGMA foreign_keys=off`` a no-op, exactly like DML.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.add_column("dag_run", sa.Column("created_at", ...))  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.add_column("dag_run", sa.Column("created_at", ...))
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+
+MIG003 -- DML without offline-mode guard
+------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) in
+``upgrade()`` or ``downgrade()`` functions that do not contain a
+``context.is_offline_mode()`` check anywhere in the same function.
+
+**Why is this bad:**
+Alembic's offline mode generates SQL scripts instead of executing against a 
live
+database.  DML statements like ``UPDATE ... WHERE col IS NULL`` depend on 
actual data
+and produce no useful output in offline mode.  Without an 
``is_offline_mode()`` guard,
+the generated script includes DML that may fail or silently do nothing when 
applied
+later.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        if not context.is_offline_mode():
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+
+Known Limitation -- Future MIG004 Candidate
+---------------------------------------------
+
+Migrations that perform DML and use ``batch_alter_table`` but never call
+``disable_sqlite_fkeys`` at all are NOT detected by MIG001/MIG002.  These 
migrations
+may silently break SQLite foreign key handling.  Detecting this pattern 
requires
+understanding whether foreign key relationships exist on the affected tables, 
which
+is beyond AST-level analysis.  This is tracked as a future MIG004 candidate.
+
+
+Suppression
+===========
+
+Add ``# noqa: MIG0XX`` to a line to suppress the corresponding check for that 
line.
+Multiple codes can be comma-separated: ``# noqa: MIG001, MIG003``.
+
+Include a brief reason after ``--``::
+
+    # noqa: MIG003 -- simple UPDATE safe in offline mode
+"""
+
+from __future__ import annotations
+
+import ast
+import re
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console(color_system="standard", width=200)
+
+DML_KEYWORDS = ("UPDATE ", "INSERT ", "DELETE ")
+
+
+def _get_noqa_codes(line: str) -> set[str]:
+    """Extract noqa codes from a source line."""
+    match = re.search(r"#\s*noqa:\s*([\w,\s]+)", line)
+    if match:
+        return {code.strip() for code in match.group(1).split(",")}
+    return set()
+
+
+def _is_dml_string(node: ast.expr) -> bool:
+    """Check if an AST expression is a string literal starting with a DML 
keyword."""
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        stripped = node.value.strip().upper()
+        return any(stripped.startswith(kw) for kw in DML_KEYWORDS)
+    # f-strings: check the first string fragment
+    if isinstance(node, ast.JoinedStr):
+        for value in node.values:
+            if isinstance(value, ast.Constant) and isinstance(value.value, 
str):
+                stripped = value.value.strip().upper()
+                if any(stripped.startswith(kw) for kw in DML_KEYWORDS):
+                    return True
+                break  # only the leading fragment matters
+    return False
+
+
+def _is_op_execute_with_dml(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.execute(<DML string>)``."""
+    if not (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+        and node.func.attr == "execute"
+    ):
+        return False
+    if not node.args:
+        return False
+    arg = node.args[0]
+    # Direct string: op.execute("UPDATE ...")
+    if _is_dml_string(arg):
+        return True
+    # Wrapped: op.execute(text("UPDATE ...")) or op.execute(sa.text("UPDATE 
..."))
+    if isinstance(arg, ast.Call) and arg.args:
+        return _is_dml_string(arg.args[0])
+    return False
+
+
+def _is_op_call(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.<something>(...)``."""
+    return (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+    )
+
+
+def _find_disable_sqlite_fkeys_line(func_node: ast.FunctionDef) -> int | None:
+    """Find the line number of the first ``with disable_sqlite_fkeys(op):`` in 
a function."""
+    # Collect all matches then take min() — ast.walk BFS order does not 
guarantee
+    # line-number order when guards appear at different nesting depths.
+    found: list[int] = []
+    for node in ast.walk(func_node):
+        if not isinstance(node, ast.With):
+            continue
+        for item in node.items:
+            ctx = item.context_expr
+            if isinstance(ctx, ast.Call):
+                func = ctx.func
+                if (isinstance(func, ast.Name) and func.id == 
"disable_sqlite_fkeys") or (
+                    isinstance(func, ast.Attribute) and func.attr == 
"disable_sqlite_fkeys"
+                ):
+                    found.append(node.lineno)
+    return min(found) if found else None

Review Comment:
   MIG001/MIG002 rely on comparing `node.lineno` with `guard_line`, which 
misses a key SQLite hazard: `with op.batch_alter_table(...), 
disable_sqlite_fkeys(op):` (i.e., `disable_sqlite_fkeys` not first in the same 
`with` statement). In that case, the first context expression is evaluated 
before the PRAGMA is applied, but both calls share the same `lineno`, so the 
current `>= guard_line` logic won’t flag it. Suggestion: track the guard 
location more precisely (e.g., detect `ast.With` nodes and ensure 
`disable_sqlite_fkeys(...)` is the first item), and report MIG002 (and/or a 
dedicated rule) when it is not.



##########
scripts/ci/prek/check_migration_patterns.py:
##########
@@ -0,0 +1,383 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# /// script
+# requires-python = ">=3.10,<3.11"
+# dependencies = [
+#   "rich>=13.6.0",
+# ]
+# ///
+"""
+Static analysis checks for Alembic migration anti-patterns.
+
+These checks use AST analysis to detect common migration mistakes that can 
cause
+silent failures, particularly on SQLite where ``PRAGMA foreign_keys`` has 
specific
+requirements about transaction state.
+
+Rules
+=====
+
+MIG001 -- DML before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) that
+appear before a ``with disable_sqlite_fkeys(op):`` block in the same function.
+
+**Why is this bad:**
+On SQLite, any DML statement triggers *autobegin*, starting a transaction.  
Once a
+transaction is active, ``PRAGMA foreign_keys=off`` (issued by 
``disable_sqlite_fkeys``)
+becomes a no-op.  This means foreign key checks remain enabled, and
+``batch_alter_table`` (which recreates tables) may fail with foreign key 
constraint
+violations.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+            with op.batch_alter_table("dag") as batch_op:
+                batch_op.alter_column("col", nullable=False)
+
+
+MIG002 -- DDL before ``disable_sqlite_fkeys``
+----------------------------------------------
+
+**What it does:**
+Detects any Alembic ``op.*`` call (DDL operations like ``op.add_column``,
+``op.drop_column``, ``op.create_table``, ``op.drop_table``, 
``op.create_index``,
+``op.batch_alter_table``, etc.) that appears before a
+``with disable_sqlite_fkeys(op):`` block in the same function.  Excludes DML 
calls
+covered by MIG001.
+
+**Why is this bad:**
+DDL operations also trigger *autobegin* on SQLite, making the subsequent
+``PRAGMA foreign_keys=off`` a no-op, exactly like DML.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.add_column("dag_run", sa.Column("created_at", ...))  # triggers 
autobegin
+        with disable_sqlite_fkeys(op):  # PRAGMA is now a no-op!
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        with disable_sqlite_fkeys(op):
+            op.add_column("dag_run", sa.Column("created_at", ...))
+            with op.batch_alter_table("backfill") as batch_op:
+                batch_op.alter_column("col", nullable=True)
+
+
+MIG003 -- DML without offline-mode guard
+------------------------------------------
+
+**What it does:**
+Detects ``op.execute()`` calls containing DML keywords (UPDATE, INSERT, 
DELETE) in
+``upgrade()`` or ``downgrade()`` functions that do not contain a
+``context.is_offline_mode()`` check anywhere in the same function.
+
+**Why is this bad:**
+Alembic's offline mode generates SQL scripts instead of executing against a 
live
+database.  DML statements like ``UPDATE ... WHERE col IS NULL`` depend on 
actual data
+and produce no useful output in offline mode.  Without an 
``is_offline_mode()`` guard,
+the generated script includes DML that may fail or silently do nothing when 
applied
+later.
+
+**Example (bad):**
+
+.. code-block:: python
+
+    def upgrade():
+        op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+**Use instead:**
+
+.. code-block:: python
+
+    def upgrade():
+        if not context.is_offline_mode():
+            op.execute("UPDATE dag SET col = '' WHERE col IS NULL")
+        with op.batch_alter_table("dag") as batch_op:
+            batch_op.alter_column("col", nullable=False)
+
+
+Known Limitation -- Future MIG004 Candidate
+---------------------------------------------
+
+Migrations that perform DML and use ``batch_alter_table`` but never call
+``disable_sqlite_fkeys`` at all are NOT detected by MIG001/MIG002.  These 
migrations
+may silently break SQLite foreign key handling.  Detecting this pattern 
requires
+understanding whether foreign key relationships exist on the affected tables, 
which
+is beyond AST-level analysis.  This is tracked as a future MIG004 candidate.
+
+
+Suppression
+===========
+
+Add ``# noqa: MIG0XX`` to a line to suppress the corresponding check for that 
line.
+Multiple codes can be comma-separated: ``# noqa: MIG001, MIG003``.
+
+Include a brief reason after ``--``::
+
+    # noqa: MIG003 -- simple UPDATE safe in offline mode
+"""
+
+from __future__ import annotations
+
+import ast
+import re
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console(color_system="standard", width=200)
+
+DML_KEYWORDS = ("UPDATE ", "INSERT ", "DELETE ")
+
+
+def _get_noqa_codes(line: str) -> set[str]:
+    """Extract noqa codes from a source line."""
+    match = re.search(r"#\s*noqa:\s*([\w,\s]+)", line)
+    if match:
+        return {code.strip() for code in match.group(1).split(",")}
+    return set()
+
+
+def _is_dml_string(node: ast.expr) -> bool:
+    """Check if an AST expression is a string literal starting with a DML 
keyword."""
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        stripped = node.value.strip().upper()
+        return any(stripped.startswith(kw) for kw in DML_KEYWORDS)
+    # f-strings: check the first string fragment
+    if isinstance(node, ast.JoinedStr):
+        for value in node.values:
+            if isinstance(value, ast.Constant) and isinstance(value.value, 
str):
+                stripped = value.value.strip().upper()
+                if any(stripped.startswith(kw) for kw in DML_KEYWORDS):
+                    return True
+                break  # only the leading fragment matters
+    return False
+
+
+def _is_op_execute_with_dml(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.execute(<DML string>)``."""
+    if not (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+        and node.func.attr == "execute"
+    ):
+        return False
+    if not node.args:
+        return False
+    arg = node.args[0]
+    # Direct string: op.execute("UPDATE ...")
+    if _is_dml_string(arg):
+        return True
+    # Wrapped: op.execute(text("UPDATE ...")) or op.execute(sa.text("UPDATE 
..."))
+    if isinstance(arg, ast.Call) and arg.args:
+        return _is_dml_string(arg.args[0])
+    return False
+
+
+def _is_op_call(node: ast.Call) -> bool:
+    """Check if a Call node is ``op.<something>(...)``."""
+    return (
+        isinstance(node.func, ast.Attribute)
+        and isinstance(node.func.value, ast.Name)
+        and node.func.value.id == "op"
+    )
+
+
+def _find_disable_sqlite_fkeys_line(func_node: ast.FunctionDef) -> int | None:
+    """Find the line number of the first ``with disable_sqlite_fkeys(op):`` in 
a function."""
+    # Collect all matches then take min() — ast.walk BFS order does not 
guarantee
+    # line-number order when guards appear at different nesting depths.
+    found: list[int] = []
+    for node in ast.walk(func_node):
+        if not isinstance(node, ast.With):
+            continue
+        for item in node.items:
+            ctx = item.context_expr
+            if isinstance(ctx, ast.Call):
+                func = ctx.func
+                if (isinstance(func, ast.Name) and func.id == 
"disable_sqlite_fkeys") or (
+                    isinstance(func, ast.Attribute) and func.attr == 
"disable_sqlite_fkeys"
+                ):
+                    found.append(node.lineno)
+    return min(found) if found else None
+
+
+def _has_offline_mode_check(func_node: ast.FunctionDef) -> bool:
+    """Check if function contains ``context.is_offline_mode()`` call."""
+    for node in ast.walk(func_node):
+        if isinstance(node, ast.Call):
+            func = node.func
+            if isinstance(func, ast.Attribute) and func.attr == 
"is_offline_mode":
+                return True
+    return False

Review Comment:
   `_has_offline_mode_check` currently returns true for any call to 
`*.is_offline_mode()`, not specifically `context.is_offline_mode()`. This can 
cause false negatives for MIG003 if some other object in the migration happens 
to have an `is_offline_mode()` method. Tighten the check to require 
`func.value` is `ast.Name` with `id == 'context'` (and optionally also support 
the known Alembic patterns used in this repo, if any).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to