This is an automated email from the ASF dual-hosted git repository.

lide-reed pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d82f600b20 [fix](nereids) Fix pre-aggregation context leakage across 
join branches (#63357)
5d82f600b20 is described below

commit 5d82f600b202c8760e3fb4a53ec374323b16dbf2
Author: lw112 <[email protected]>
AuthorDate: Wed May 20 14:11:31 2026 +0800

    [fix](nereids) Fix pre-aggregation context leakage across join branches 
(#63357)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    for agg_keys tables, Nereids may incorrectly set PREAGGREGATION: ON for
    a normal scan in a join when the other join branch contains an aggregate
    subquery.
    
    example sql
    ```
    select cw.term_id, cw.class_id, cw.original_num, cw.abs_week
    from repro_preagg_join cw
    join (
        select term_id, class_id, max(abs_week) as abs_week
        from repro_preagg_join
        where class_id in (10192497, 10192366)
        group by term_id, class_id
    ) mw on cw.class_id = mw.class_id
        and cw.abs_week = mw.abs_week;
    ```
    root cause:
    
    1. If repro_preagg_join is an AGGREGATE KEY table and the same key
    exists in multiple rowsets, the left scan cw should merge rows before
    returning data. However, before this fix, the plan could mark the left
    scan as: "PREAGGREGATION: ON
    "
    2. This makes be read the underlying rows directly without merging, so
    duplicated physical rows from multiple rowsets can be returned and then
    joined, producing duplicated query results.
    3. The right aggregate subquery itself is not duplicated. The wrong
    result is caused by the left sibling scan being incorrectly affected by
    the aggregate context from the right branch.
    
    
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../nereids/rules/rewrite/SetPreAggStatus.java     |  1 +
 .../nereids_rules_p0/set_preagg/set_preagg.groovy  | 25 +++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SetPreAggStatus.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SetPreAggStatus.java
index 3720d851b07..68df1c21bd4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SetPreAggStatus.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SetPreAggStatus.java
@@ -212,6 +212,7 @@ public class SetPreAggStatus extends 
DefaultPlanRewriter<Stack<SetPreAggStatus.P
         Plan plan = super.visit(logicalAggregate, context);
         if (!context.isEmpty()) {
             PreAggInfoContext preAggInfoContext = context.pop();
+            
preAggInfoContext.olapScanIds.retainAll(logicalAggregate.child().getInputRelations());
             
preAggInfoContext.addAggregateFunctions(logicalAggregate.getAggregateFunctions());
             
preAggInfoContext.addGroupByExpresssions(logicalAggregate.getGroupByExpressions());
             for (RelationId id : preAggInfoContext.olapScanIds) {
diff --git 
a/regression-test/suites/nereids_rules_p0/set_preagg/set_preagg.groovy 
b/regression-test/suites/nereids_rules_p0/set_preagg/set_preagg.groovy
index 106f05b8f13..888892bc9c4 100644
--- a/regression-test/suites/nereids_rules_p0/set_preagg/set_preagg.groovy
+++ b/regression-test/suites/nereids_rules_p0/set_preagg/set_preagg.groovy
@@ -269,7 +269,9 @@ suite("set_preagg") {
             group by preagg_t3.k2, t12.k2
             order by 1, 2;
         """)
-        notContains "PREAGGREGATION: OFF"
+        contains "(preagg_t1), PREAGGREGATION: ON"
+        contains "(preagg_t2), PREAGGREGATION: ON"
+        contains "(preagg_t3), PREAGGREGATION: OFF"
     }
 
     explain {
@@ -287,7 +289,9 @@ suite("set_preagg") {
             group by preagg_t3.k2, t12.k2
             order by 1, 2;
         """)
-        notContains "PREAGGREGATION: OFF"
+        contains "(preagg_t1), PREAGGREGATION: ON"
+        contains "(preagg_t2), PREAGGREGATION: ON"
+        contains "(preagg_t3), PREAGGREGATION: OFF"
     }
 
     explain {
@@ -307,6 +311,21 @@ suite("set_preagg") {
         """)
         contains "(preagg_t1), PREAGGREGATION: ON"
         contains "(preagg_t2), PREAGGREGATION: OFF. Reason: can't turn preAgg 
on because aggregate function sum"
-        contains "(preagg_t3), PREAGGREGATION: OFF. Reason: can't turn preAgg 
on because aggregate function sum"
+        contains "(preagg_t3), PREAGGREGATION: OFF"
+    }
+
+    explain {
+        sql("""
+            select cw.k1, cw.k2, cw.v7, cw.v9
+            from preagg_t1 cw
+            inner join (
+                select k1, k2, max(v9) as v9
+                from preagg_t1
+                where k1 in (1, 2)
+                group by k1, k2
+            ) mw on cw.k1 = mw.k1 and cw.v9 = mw.v9;
+        """)
+        contains "(preagg_t1), PREAGGREGATION: OFF. Reason: No valid aggregate 
on scan."
+        contains "(preagg_t1), PREAGGREGATION: ON"
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to