pierrejeambrun commented on code in PR #68869:
URL: https://github.com/apache/airflow/pull/68869#discussion_r3482318219


##########
airflow-core/newsfragments/68869.bugfix.rst:
##########
@@ -0,0 +1 @@
+Fix cursor (keyset) pagination silently dropping rows when sorting by a 
nullable column. The keyset filter and the ``ORDER BY`` disagreed on where 
NULLs fall, so rows on one side of the NULL boundary were skipped. Cursor 
pagination now orders NULLs consistently across all database backends (matching 
PostgreSQL: last when ascending, first when descending) and returns every row 
exactly once.

Review Comment:
   to remove



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_run.py:
##########
@@ -662,6 +662,41 @@ def test_cursor_pagination_invalid_token(self, 
test_client):
         )
         assert response.status_code == 400
 
+    @pytest.mark.usefixtures("configure_git_connection_for_dag_bundle")
+    def test_cursor_pagination_nullable_sort_column_returns_all_rows(self, 
test_client, session):
+        """Cursor pagination sorted by a nullable column must not silently 
drop rows.
+
+        Regression for https://github.com/apache/airflow/issues/68858: with a 
NULL in the

Review Comment:
   
   ```suggestion
           NULL in the
   ```



##########
airflow-core/tests/unit/api_fastapi/common/test_cursors.py:
##########
@@ -198,3 +198,153 @@ def 
test_apply_cursor_filter_null_value_does_not_raise(self):
         sql = str(stmt)
         assert "IS NULL" in sql
         assert "IS NOT NULL" in sql
+
+
+class TestSortParamKeysetColumns:
+    """Unit coverage for the NULLs-last keyset expansion added for cursor 
pagination."""
+
+    @pytest.mark.parametrize(
+        ("column", "expected"),
+        [
+            pytest.param(TaskInstance.start_date, True, id="mapped-nullable"),
+            pytest.param(TaskInstance.map_index, False, id="mapped-not-null"),
+            pytest.param(Column("x", String, nullable=True), True, 
id="raw-column"),
+            pytest.param(SimpleNamespace(), True, 
id="unknown-expression-defaults-nullable"),
+        ],
+    )
+    def test_is_nullable_sort_column(self, column, expected):
+        assert SortParam._is_nullable_sort_column(column) is expected
+
+    def test_get_keyset_columns_inserts_rank_before_nullable_only(self):
+        sp = SortParam(["id", "start_date", "map_index"], TaskInstance)
+        sp.set_value(["start_date"])
+        names = [name for name, _col, _desc in sp.get_keyset_columns()]
+        # start_date is nullable -> rank prepended; id (pk) is not -> 
untouched.
+        assert names == ["start_date__null_rank", "start_date", "id"]
+
+    def test_get_keyset_columns_no_rank_for_non_nullable_sort(self):
+        sp = SortParam(["id", "map_index"], TaskInstance)
+        sp.set_value(["map_index"])
+        names = [name for name, _col, _desc in sp.get_keyset_columns()]
+        assert names == ["map_index", "id"]
+
+    @pytest.mark.parametrize(
+        ("values", "expected"),
+        [
+            pytest.param(["2024-01-01", "uuid"], [0, "2024-01-01", "uuid"], 
id="non-null-rank-0"),
+            pytest.param([None, "uuid"], [1, None, "uuid"], id="null-rank-1"),
+        ],
+    )
+    def test_expand_keyset_values_aligns_with_columns(self, values, expected):
+        sp = SortParam(["id", "start_date"], TaskInstance)
+        sp.set_value(["start_date"])
+        assert sp.expand_keyset_values(values) == expected
+
+
+class TestKeysetPaginationNullableColumn:
+    """End-to-end: cursor pagination over a nullable sort column returns every 
row exactly once.
+
+    Regression for https://github.com/apache/airflow/issues/68858. Before the 
fix the

Review Comment:
   To remove



##########
airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py:
##########
@@ -2154,6 +2154,107 @@ def 
test_cursor_pagination_order_by_run_after_roundtrips(self, test_client, sess
         assert ids1.isdisjoint(ids2), "Pages must not overlap"
         assert len(ids1) + len(ids2) == total_tis
 
+    def test_cursor_pagination_nullable_sort_column_returns_all_rows(self, 
test_client, session):
+        """Cursor pagination sorted by a nullable column must not silently 
drop rows.
+
+        Regression for https://github.com/apache/airflow/issues/68858: with 
NULLs present

Review Comment:
   Same here, do not mention tickets in test docstring.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to