mikebridge commented on code in PR #39859: URL: https://github.com/apache/superset/pull/39859#discussion_r3210498932
########## superset/migrations/versions/2026-05-01_23-36_2bee73611e32_composite_pk_association_tables.py: ########## @@ -0,0 +1,332 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""composite_pk_association_tables + +Replace the unused synthetic ``id INTEGER PRIMARY KEY`` on eight many-to-many +association tables with a composite primary key on the two FK columns. Drops +the now-redundant ``UniqueConstraint(fk1, fk2)`` on the two tables that +already carry one. Pre-flight: deletes rows with NULL FK values (six tables +allow them today) and any duplicate ``(fk1, fk2)`` rows. + +Motivated by SQLAlchemy-Continuum issue #129 (M2M restore against junction +tables with surrogate PKs); also closes the data-integrity hole where six +of the eight tables lacked DB-level uniqueness. + +Revision ID: 2bee73611e32 +Revises: ce6bd21901ab +Create Date: 2026-05-01 23:36:34.050058 + +""" + +import logging +from typing import NamedTuple + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import inspect +from sqlalchemy.engine import Connection + +# revision identifiers, used by Alembic. +revision = "2bee73611e32" +down_revision = "ce6bd21901ab" + +logger = logging.getLogger("alembic.env") + + +class AssociationTable(NamedTuple): + """A junction table being converted from surrogate-id PK to composite-FK PK.""" + + name: str + fk1: str + fk2: str + + +# Order is alphabetical by table name; deterministic for review and bisection. +AFFECTED_TABLES: list[AssociationTable] = [ + AssociationTable("dashboard_roles", "dashboard_id", "role_id"), + AssociationTable("dashboard_slices", "dashboard_id", "slice_id"), + AssociationTable("dashboard_user", "user_id", "dashboard_id"), + AssociationTable("report_schedule_user", "user_id", "report_schedule_id"), + AssociationTable("rls_filter_roles", "role_id", "rls_filter_id"), + AssociationTable("rls_filter_tables", "table_id", "rls_filter_id"), + AssociationTable("slice_user", "user_id", "slice_id"), + AssociationTable("sqlatable_user", "user_id", "table_id"), +] + +# These two tables already declare ``UniqueConstraint(fk1, fk2)`` in the model; +# the composite PK subsumes it, so the migration drops the redundant constraint. +TABLES_WITH_PRE_EXISTING_UNIQUE: set[str] = { + "dashboard_slices", + "report_schedule_user", +} + +# Documentation set: tables whose FK columns are nullable in their original +# create_table migrations (``dashboard_roles.dashboard_id`` from revision +# e11ccdd12658 is the most recent addition). ``report_schedule_user`` is the +# only affected table created with both FK columns ``NOT NULL`` and is +# intentionally absent here. This set is no longer consulted at runtime — the +# upgrade now runs the NULL-FK cleanup on every affected table because the +# DELETE is a cheap no-op when the columns are already NOT NULL, and that +# eliminates the risk of bugs from this set going stale (the +# ``dashboard_roles`` omission caught in PR review was exactly that bug). +TABLES_WITH_NULLABLE_FKS: set[str] = { + "dashboard_roles", + "dashboard_slices", + "dashboard_user", + "rls_filter_roles", + "rls_filter_tables", + "slice_user", + "sqlatable_user", +} + + +def _check_no_external_fks_to_id(conn: Connection) -> None: + """Raise ``RuntimeError`` if any foreign key in the database references one + of the eight junction-table ``id`` columns. Uses SQLAlchemy's ``Inspector`` + for dialect-agnostic introspection across PostgreSQL, MySQL, and SQLite. + + Scope limitation: ``Inspector.get_table_names()`` returns tables in the + connection's default schema only. On PostgreSQL deployments where Superset + metadata lives in a non-default schema, or on multi-schema deployments + that allow cross-schema FKs, an external FK in another schema would not + be detected. This is acceptable for the standard single-schema + deployment that Superset documents; operators with multi-schema + metadata should run the equivalent inventory query against + ``information_schema.referential_constraints`` themselves before + applying. + """ + affected = {t.name for t in AFFECTED_TABLES} + insp = inspect(conn) + for table_name in insp.get_table_names(): + if table_name in affected: + continue + for fk in insp.get_foreign_keys(table_name): + if fk["referred_table"] in affected and "id" in fk["referred_columns"]: + raise RuntimeError( + f"Cannot drop synthetic id from {fk['referred_table']}: " + f"external FK {fk.get('name', '<unnamed>')} on {table_name} " + f"references {fk['referred_table']}({fk['referred_columns']}). " + f"Drop or migrate the referencing FK before applying this " + f"migration." Review Comment: ...I believe ruff doesn't flag it because the concatenation as a whole has interpolation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
