Copilot commented on code in PR #66711:
URL: https://github.com/apache/airflow/pull/66711#discussion_r3222035545
##########
airflow-core/src/airflow/migrations/versions/0036_3_0_0_add_name_field_to_dataset_model.py:
##########
@@ -107,17 +107,31 @@ def downgrade():
# Keep only the datasets with min id if multiple orphaned datasets with
the same uri exist.
# This usually happens when all the dags are turned off.
- op.execute(
- """
- with unique_dataset as (select min(id) as min_id, uri as uri from
dataset group by id),
- duplicate_dataset_id as (
- select id from dataset join unique_dataset
- on dataset.uri = unique_dataset.uri
- where dataset.id > unique_dataset.min_id
+ if op.get_bind().dialect.name == "mysql":
+ # MySQL/MariaDB error 1093: can't specify target table in a FROM
clause of a subquery
+ # that modifies it. Wrapping in a derived table works around this.
+ op.execute(
+ "DELETE FROM dataset WHERE id IN ("
+ " SELECT id FROM ("
+ " SELECT d.id FROM dataset d"
+ " JOIN (SELECT uri, MIN(id) AS min_id FROM dataset GROUP BY
uri) m"
+ " ON d.uri = m.uri"
+ " WHERE d.id > m.min_id"
+ " ) AS sub"
+ ")"
+ )
+ else:
+ op.execute(
+ """
+ with unique_dataset as (select min(id) as min_id, uri as uri from
dataset group by id),
Review Comment:
In the non-MySQL downgrade path, the CTE `unique_dataset` groups by `id`,
which creates one row per dataset row and turns the subsequent join into an
unnecessary N×N self-join for each duplicated URI. This is much more expensive
than intended and is easy to fix by grouping by `uri` (so the CTE has one row
per URI with its MIN(id)).
##########
airflow-core/tests/unit/api_fastapi/common/test_exceptions.py:
##########
@@ -271,7 +284,20 @@ def
test_handle_single_column_unique_constraint_error_with_stacktrace(
exeinfo_response_error.value.detail.pop("message", None) # type:
ignore[attr-defined]
assert exeinfo_response_error.value.status_code ==
expected_exception.status_code
- assert exeinfo_response_error.value.detail == expected_exception.detail
+ # statement and orig_error exact formats vary between database servers
and connectors
+ # (e.g. mysqlclient uses %(name)s params while PyMySQL uses %s, and
MariaDB qualifies
+ # constraint names with a table prefix while MySQL does not), so when
the expected
+ # value is a list, verify the actual value matches one of the expected
values.
+ response_detail = exeinfo_response_error.value.detail
+ expected_detail = expected_exception.detail
+ for key in ("statement", "orig_error"):
+ actual_val = response_detail.pop(key, None) # type:
ignore[attr-defined]
+ expected_val = expected_detail.pop(key, None) # type:
ignore[attr-defined]
+ if isinstance(expected_val, list):
Review Comment:
This test mutates `expected_exception.detail` in-place via `expected_detail
= expected_exception.detail` followed by `expected_detail.pop(...)`. Since
`expected_exception` is the parametrized expected value, mutating it makes the
assertion logic harder to follow and can create order-dependent failures if the
object is ever reused. Consider copying the dict (e.g., `expected_detail =
expected_exception.detail.copy()` / deep copy) before popping keys for
comparison.
##########
airflow-core/tests/unit/api_fastapi/common/test_exceptions.py:
##########
@@ -379,13 +408,18 @@ def
test_handle_multiple_columns_unique_constraint_error_with_stacktrace(
response_detail = exeinfo_response_error.value.detail
expected_detail = expected_exception.detail
actual_statement = response_detail.pop("statement", None) # type:
ignore[attr-defined]
- expected_detail.pop("statement", None)
+ expected_detail.pop("statement", None) # type: ignore[attr-defined]
+ actual_orig = response_detail.pop("orig_error", None) # type:
ignore[attr-defined]
+ expected_orig = expected_detail.pop("orig_error", None) # type:
ignore[attr-defined]
Review Comment:
Same in-place mutation issue here: `expected_detail =
expected_exception.detail` is modified by popping keys. Copy the
expected/actual detail dicts before destructively removing keys so the
`expected_exception` object remains immutable within the test and future
refactors don’t introduce hidden coupling.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]