This is an automated email from the ASF dual-hosted git repository.

924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a2d9d55f1e [fix](nereids) clamp the merged limit of MERGE_TOP_N by 
the parent offset (#64306)
3a2d9d55f1e is described below

commit 3a2d9d55f1e8e2d74187179ef89c36c8562815fd
Author: 924060929 <[email protected]>
AuthorDate: Wed Jun 10 13:07:12 2026 +0800

    [fix](nereids) clamp the merged limit of MERGE_TOP_N by the parent offset 
(#64306)
    
    `MergeTopNs` (the `MERGE_TOP_N` rewrite rule) merges a parent `TopN`
    into its child `TopN`
    when their order keys are compatible. When the parent `TopN` carries a
    non-zero `OFFSET`, the
    merged limit was computed as `min(parent.limit, child.limit)`, which
    ignores that the parent
    offset consumes rows from the child's output. The merged `TopN`
    therefore keeps too many rows
    and the query returns a wrong result.
    
    Example:
    
    ```sql
    SELECT * FROM (SELECT k, v FROM t ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 
OFFSET 4;
    ```
    
    The inner `ORDER BY k LIMIT 5` yields 5 rows; the outer `LIMIT 3 OFFSET
    4` skips 4 of them, so
    only 1 row should remain. Before this PR the rule merged the two `TopN`
    into `OFFSET 4 LIMIT 3`
    (instead of `OFFSET 4 LIMIT 1`), so it returned 3 rows.
    
    Fix: clamp the merged limit by `max(child.limit - parent.offset, 0)`,
    the same semantics
    already used by `MergeLimits.mergeLimit` for consecutive limits. The bug
    only triggers when the
    outer `TopN` has a non-zero offset (offset = 0 makes both formulas
    equal).
    
    The existing unit test `MergeTopNsTest.testOffset` asserted the buggy
    value (`limit == 10`,
    while the correct value is `9`); this PR corrects that assertion as
    well.
    
    ### Release note
    
    Fix the wrong result produced by the `MERGE_TOP_N` optimization when an
    outer
    `ORDER BY ... LIMIT` carries a non-zero `OFFSET` over an inner `ORDER BY
    ... LIMIT`.
---
 .../doris/nereids/rules/rewrite/MergeTopNs.java    | 11 +++--
 .../nereids/rules/rewrite/MergeTopNsTest.java      | 20 ++++++++-
 .../data/correctness_p0/test_merge_topn_offset.out | 10 +++++
 .../correctness_p0/test_merge_topn_offset.groovy   | 50 ++++++++++++++++++++++
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
index 39ea7a384df..3614434459d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
@@ -30,7 +30,8 @@ import java.util.List;
  * <p>
  * topN - child topN
  * If child topN orderby list is prefix subList of topN =>
- * topN with limit = min(topN.limit, childTopN.limit), offset = topN.offset + 
childTopN.offset
+ * topN with limit = min(topN.limit, max(childTopN.limit - topN.offset, 0)),
+ *         offset = topN.offset + childTopN.offset
  */
 public class MergeTopNs extends OneRewriteRuleFactory {
     @Override
@@ -48,8 +49,12 @@ public class MergeTopNs extends OneRewriteRuleFactory {
                     long childOffset = childTopN.getOffset();
                     long childLimit = childTopN.getLimit();
                     long newOffset = offset + childOffset;
-                    // choose min limit
-                    long newLimit = Math.min(limit, childLimit);
+                    // The parent's offset is applied on top of the child's 
output, so only
+                    // (childLimit - offset) of the child's rows survive. 
Clamp the merged limit
+                    // by that remaining count; otherwise rows beyond the 
child's limit leak through
+                    // when the parent has a non-zero offset (DORIS-26301). 
Same semantics as
+                    // MergeLimits.mergeLimit for consecutive limits.
+                    long newLimit = Math.min(limit, Math.max(childLimit - 
offset, 0));
                     return topN.withLimitChild(newLimit, newOffset, 
childTopN.child());
                 }).toRule(RuleType.MERGE_TOP_N);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
index e16a38be6ba..63b2c97ac31 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
@@ -70,7 +70,25 @@ class MergeTopNsTest implements MemoPatternMatchSupported {
         PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
                 .applyTopDown(new MergeTopNs())
                 .matches(
-                        logicalTopN(logicalOlapScan()).when(topN -> 
topN.getLimit() == 10 && topN.getOffset() == 4)
+                        logicalTopN(logicalOlapScan()).when(topN -> 
topN.getLimit() == 9 && topN.getOffset() == 4)
+                );
+    }
+
+    @Test
+    void testParentOffsetReducesChildLimit() {
+        // DORIS-26301: when the parent (upper) TopN has a non-zero offset, 
the merged limit must be
+        // clamped by (childLimit - parentOffset); otherwise rows beyond the 
child's limit leak through.
+        // child : ORDER BY k LIMIT 5           (limit=5, offset=0) -> yields 
5 rows
+        // parent: ORDER BY k LIMIT 3 OFFSET 4  (limit=3, offset=4) -> skips 
4, only 1 row remains
+        // merged: offset = 4, limit = min(3, max(5 - 4, 0)) = 1
+        LogicalPlan plan = new LogicalPlanBuilder(score)
+                .topN(5, 0, ImmutableList.of(0))
+                .topN(3, 4, ImmutableList.of(0))
+                .build();
+        PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
+                .applyTopDown(new MergeTopNs())
+                .matches(
+                        logicalTopN(logicalOlapScan()).when(topN -> 
topN.getLimit() == 1 && topN.getOffset() == 4)
                 );
     }
 
diff --git a/regression-test/data/correctness_p0/test_merge_topn_offset.out 
b/regression-test/data/correctness_p0/test_merge_topn_offset.out
new file mode 100644
index 00000000000..bf103b2fed2
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_merge_topn_offset.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !merge_topn_off4lim3 --
+5      50
+
+-- !merge_topn_off2lim3 --
+3      30
+4      40
+5      50
+
+-- !merge_topn_off6lim3 --
diff --git 
a/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy 
b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy
new file mode 100644
index 00000000000..fec6162d1d6
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_merge_topn_offset") {
+    // DORIS-26301: when MERGE_TOP_N merges a parent TopN that carries a 
non-zero OFFSET into its
+    // child TopN, the merged limit must be clamped by (childLimit - 
parentOffset). Otherwise rows
+    // beyond the child's limit leak through. Before the fix, an outer "LIMIT 
3 OFFSET 4" over an
+    // inner "LIMIT 5" wrongly returned 3 rows (k=5,6,7) instead of the single 
correct row (k=5).
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    def tableName = "test_merge_topn_offset"
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+            k INT NOT NULL,
+            v INT NOT NULL
+        )
+        DUPLICATE KEY(k)
+        DISTRIBUTED BY HASH(k) BUCKETS 1
+        PROPERTIES ("replication_num" = "1")
+    """
+    sql """ INSERT INTO ${tableName} VALUES
+        (1, 10), (2, 20), (3, 30), (4, 40), (5, 50),
+        (6, 60), (7, 70), (8, 80), (9, 90), (10, 100) """
+
+    // MERGE_TOP_N is enabled by default; stacking an outer TopN with an 
OFFSET over an inner TopN
+    // triggers it. The inner "ORDER BY k LIMIT 5" yields k = 1..5.
+
+    // outer offset (4) consumes 4 of the inner's 5 rows -> only k=5 remains
+    qt_merge_topn_off4lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} 
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 4 """
+    // outer offset (2) is within the inner limit -> k = 3,4,5
+    qt_merge_topn_off2lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} 
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 2 """
+    // outer offset (6) exceeds the inner limit (5) -> empty
+    qt_merge_topn_off6lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName} 
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 6 """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to