pierrejeambrun commented on code in PR #68869:
URL: https://github.com/apache/airflow/pull/68869#discussion_r3482318219
##########
airflow-core/newsfragments/68869.bugfix.rst:
##########
@@ -0,0 +1 @@
+Fix cursor (keyset) pagination silently dropping rows when sorting by a
nullable column. The keyset filter and the ``ORDER BY`` disagreed on where
NULLs fall, so rows on one side of the NULL boundary were skipped. Cursor
pagination now orders NULLs consistently across all database backends (matching
PostgreSQL: last when ascending, first when descending) and returns every row
exactly once.
Review Comment:
to remove
##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py:
##########
@@ -662,6 +662,41 @@ def test_cursor_pagination_invalid_token(self,
test_client):
)
assert response.status_code == 400
+ @pytest.mark.usefixtures("configure_git_connection_for_dag_bundle")
+ def test_cursor_pagination_nullable_sort_column_returns_all_rows(self,
test_client, session):
+ """Cursor pagination sorted by a nullable column must not silently
drop rows.
+
+ Regression for https://github.com/apache/airflow/issues/68858: with a
NULL in the
Review Comment:
```suggestion
NULL in the
```
##########
airflow-core/tests/unit/api_fastapi/common/test_cursors.py:
##########
@@ -198,3 +198,153 @@ def
test_apply_cursor_filter_null_value_does_not_raise(self):
sql = str(stmt)
assert "IS NULL" in sql
assert "IS NOT NULL" in sql
+
+
+class TestSortParamKeysetColumns:
+ """Unit coverage for the NULLs-last keyset expansion added for cursor
pagination."""
+
+ @pytest.mark.parametrize(
+ ("column", "expected"),
+ [
+ pytest.param(TaskInstance.start_date, True, id="mapped-nullable"),
+ pytest.param(TaskInstance.map_index, False, id="mapped-not-null"),
+ pytest.param(Column("x", String, nullable=True), True,
id="raw-column"),
+ pytest.param(SimpleNamespace(), True,
id="unknown-expression-defaults-nullable"),
+ ],
+ )
+ def test_is_nullable_sort_column(self, column, expected):
+ assert SortParam._is_nullable_sort_column(column) is expected
+
+ def test_get_keyset_columns_inserts_rank_before_nullable_only(self):
+ sp = SortParam(["id", "start_date", "map_index"], TaskInstance)
+ sp.set_value(["start_date"])
+ names = [name for name, _col, _desc in sp.get_keyset_columns()]
+ # start_date is nullable -> rank prepended; id (pk) is not ->
untouched.
+ assert names == ["start_date__null_rank", "start_date", "id"]
+
+ def test_get_keyset_columns_no_rank_for_non_nullable_sort(self):
+ sp = SortParam(["id", "map_index"], TaskInstance)
+ sp.set_value(["map_index"])
+ names = [name for name, _col, _desc in sp.get_keyset_columns()]
+ assert names == ["map_index", "id"]
+
+ @pytest.mark.parametrize(
+ ("values", "expected"),
+ [
+ pytest.param(["2024-01-01", "uuid"], [0, "2024-01-01", "uuid"],
id="non-null-rank-0"),
+ pytest.param([None, "uuid"], [1, None, "uuid"], id="null-rank-1"),
+ ],
+ )
+ def test_expand_keyset_values_aligns_with_columns(self, values, expected):
+ sp = SortParam(["id", "start_date"], TaskInstance)
+ sp.set_value(["start_date"])
+ assert sp.expand_keyset_values(values) == expected
+
+
+class TestKeysetPaginationNullableColumn:
+ """End-to-end: cursor pagination over a nullable sort column returns every
row exactly once.
+
+ Regression for https://github.com/apache/airflow/issues/68858. Before the
fix the
Review Comment:
To remove
##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -2154,6 +2154,107 @@ def
test_cursor_pagination_order_by_run_after_roundtrips(self, test_client, sess
assert ids1.isdisjoint(ids2), "Pages must not overlap"
assert len(ids1) + len(ids2) == total_tis
+ def test_cursor_pagination_nullable_sort_column_returns_all_rows(self,
test_client, session):
+ """Cursor pagination sorted by a nullable column must not silently
drop rows.
+
+ Regression for https://github.com/apache/airflow/issues/68858: with
NULLs present
Review Comment:
Same here, do not mention tickets in test docstring.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]