This is an automated email from the ASF dual-hosted git repository.
924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3a2d9d55f1e [fix](nereids) clamp the merged limit of MERGE_TOP_N by
the parent offset (#64306)
3a2d9d55f1e is described below
commit 3a2d9d55f1e8e2d74187179ef89c36c8562815fd
Author: 924060929 <[email protected]>
AuthorDate: Wed Jun 10 13:07:12 2026 +0800
[fix](nereids) clamp the merged limit of MERGE_TOP_N by the parent offset
(#64306)
`MergeTopNs` (the `MERGE_TOP_N` rewrite rule) merges a parent `TopN`
into its child `TopN`
when their order keys are compatible. When the parent `TopN` carries a
non-zero `OFFSET`, the
merged limit was computed as `min(parent.limit, child.limit)`, which
ignores that the parent
offset consumes rows from the child's output. The merged `TopN`
therefore keeps too many rows
and the query returns a wrong result.
Example:
```sql
SELECT * FROM (SELECT k, v FROM t ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3
OFFSET 4;
```
The inner `ORDER BY k LIMIT 5` yields 5 rows; the outer `LIMIT 3 OFFSET
4` skips 4 of them, so
only 1 row should remain. Before this PR the rule merged the two `TopN`
into `OFFSET 4 LIMIT 3`
(instead of `OFFSET 4 LIMIT 1`), so it returned 3 rows.
Fix: clamp the merged limit by `max(child.limit - parent.offset, 0)`,
the same semantics
already used by `MergeLimits.mergeLimit` for consecutive limits. The bug
only triggers when the
outer `TopN` has a non-zero offset (offset = 0 makes both formulas
equal).
The existing unit test `MergeTopNsTest.testOffset` asserted the buggy
value (`limit == 10`,
while the correct value is `9`); this PR corrects that assertion as
well.
### Release note
Fix the wrong result produced by the `MERGE_TOP_N` optimization when an
outer
`ORDER BY ... LIMIT` carries a non-zero `OFFSET` over an inner `ORDER BY
... LIMIT`.
---
.../doris/nereids/rules/rewrite/MergeTopNs.java | 11 +++--
.../nereids/rules/rewrite/MergeTopNsTest.java | 20 ++++++++-
.../data/correctness_p0/test_merge_topn_offset.out | 10 +++++
.../correctness_p0/test_merge_topn_offset.groovy | 50 ++++++++++++++++++++++
4 files changed, 87 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
index 39ea7a384df..3614434459d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeTopNs.java
@@ -30,7 +30,8 @@ import java.util.List;
* <p>
* topN - child topN
* If child topN orderby list is prefix subList of topN =>
- * topN with limit = min(topN.limit, childTopN.limit), offset = topN.offset +
childTopN.offset
+ * topN with limit = min(topN.limit, max(childTopN.limit - topN.offset, 0)),
+ * offset = topN.offset + childTopN.offset
*/
public class MergeTopNs extends OneRewriteRuleFactory {
@Override
@@ -48,8 +49,12 @@ public class MergeTopNs extends OneRewriteRuleFactory {
long childOffset = childTopN.getOffset();
long childLimit = childTopN.getLimit();
long newOffset = offset + childOffset;
- // choose min limit
- long newLimit = Math.min(limit, childLimit);
+ // The parent's offset is applied on top of the child's
output, so only
+ // (childLimit - offset) of the child's rows survive.
Clamp the merged limit
+ // by that remaining count; otherwise rows beyond the
child's limit leak through
+ // when the parent has a non-zero offset (DORIS-26301).
Same semantics as
+ // MergeLimits.mergeLimit for consecutive limits.
+ long newLimit = Math.min(limit, Math.max(childLimit -
offset, 0));
return topN.withLimitChild(newLimit, newOffset,
childTopN.child());
}).toRule(RuleType.MERGE_TOP_N);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
index e16a38be6ba..63b2c97ac31 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeTopNsTest.java
@@ -70,7 +70,25 @@ class MergeTopNsTest implements MemoPatternMatchSupported {
PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
.applyTopDown(new MergeTopNs())
.matches(
- logicalTopN(logicalOlapScan()).when(topN ->
topN.getLimit() == 10 && topN.getOffset() == 4)
+ logicalTopN(logicalOlapScan()).when(topN ->
topN.getLimit() == 9 && topN.getOffset() == 4)
+ );
+ }
+
+ @Test
+ void testParentOffsetReducesChildLimit() {
+ // DORIS-26301: when the parent (upper) TopN has a non-zero offset,
the merged limit must be
+ // clamped by (childLimit - parentOffset); otherwise rows beyond the
child's limit leak through.
+ // child : ORDER BY k LIMIT 5 (limit=5, offset=0) -> yields
5 rows
+ // parent: ORDER BY k LIMIT 3 OFFSET 4 (limit=3, offset=4) -> skips
4, only 1 row remains
+ // merged: offset = 4, limit = min(3, max(5 - 4, 0)) = 1
+ LogicalPlan plan = new LogicalPlanBuilder(score)
+ .topN(5, 0, ImmutableList.of(0))
+ .topN(3, 4, ImmutableList.of(0))
+ .build();
+ PlanChecker.from(MemoTestUtils.createConnectContext(), plan)
+ .applyTopDown(new MergeTopNs())
+ .matches(
+ logicalTopN(logicalOlapScan()).when(topN ->
topN.getLimit() == 1 && topN.getOffset() == 4)
);
}
diff --git a/regression-test/data/correctness_p0/test_merge_topn_offset.out
b/regression-test/data/correctness_p0/test_merge_topn_offset.out
new file mode 100644
index 00000000000..bf103b2fed2
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_merge_topn_offset.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !merge_topn_off4lim3 --
+5 50
+
+-- !merge_topn_off2lim3 --
+3 30
+4 40
+5 50
+
+-- !merge_topn_off6lim3 --
diff --git
a/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy
b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy
new file mode 100644
index 00000000000..fec6162d1d6
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_merge_topn_offset.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_merge_topn_offset") {
+ // DORIS-26301: when MERGE_TOP_N merges a parent TopN that carries a
non-zero OFFSET into its
+ // child TopN, the merged limit must be clamped by (childLimit -
parentOffset). Otherwise rows
+ // beyond the child's limit leak through. Before the fix, an outer "LIMIT
3 OFFSET 4" over an
+ // inner "LIMIT 5" wrongly returned 3 rows (k=5,6,7) instead of the single
correct row (k=5).
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ def tableName = "test_merge_topn_offset"
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE ${tableName} (
+ k INT NOT NULL,
+ v INT NOT NULL
+ )
+ DUPLICATE KEY(k)
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ PROPERTIES ("replication_num" = "1")
+ """
+ sql """ INSERT INTO ${tableName} VALUES
+ (1, 10), (2, 20), (3, 30), (4, 40), (5, 50),
+ (6, 60), (7, 70), (8, 80), (9, 90), (10, 100) """
+
+ // MERGE_TOP_N is enabled by default; stacking an outer TopN with an
OFFSET over an inner TopN
+ // triggers it. The inner "ORDER BY k LIMIT 5" yields k = 1..5.
+
+ // outer offset (4) consumes 4 of the inner's 5 rows -> only k=5 remains
+ qt_merge_topn_off4lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName}
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 4 """
+ // outer offset (2) is within the inner limit -> k = 3,4,5
+ qt_merge_topn_off2lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName}
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 2 """
+ // outer offset (6) exceeds the inner limit (5) -> empty
+ qt_merge_topn_off6lim3 """ SELECT * FROM (SELECT k, v FROM ${tableName}
ORDER BY k LIMIT 5) s ORDER BY k LIMIT 3 OFFSET 6 """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]