This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch tpc_preview6
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/tpc_preview6 by this push:
new f0bed3a403b support count pushdown. tpcds 14/ tpch13 ds14 增加了agg
push,执行时间 4.7 -> 4.8 h13 增加了 agg push,应该让p6 恢复到 p4 的成绩,从10sec 恢复到 7 sec
f0bed3a403b is described below
commit f0bed3a403b36d6271648a39287be089074b872c
Author: englefly <[email protected]>
AuthorDate: Thu Feb 5 20:39:17 2026 +0800
support count pushdown. tpcds 14/ tpch13
ds14 增加了agg push,执行时间 4.7 -> 4.8
h13 增加了 agg push,应该让p6 恢复到 p4 的成绩,从10sec 恢复到 7 sec
---
.../eageraggregation/PushDownAggregation.java | 20 ++-
.../java/org/apache/doris/qe/SessionVariable.java | 4 +
.../java/org/apache/doris/qe/StmtExecutor.java | 4 +-
.../eageraggregation/EagerAggRewriterTest.java | 113 +++++++++++++++
.../data/nereids_p0/eager_agg/eager_agg.out | 161 ++++++++++++++++++++-
.../shape_check/tpcds_sf1000/shape/query14.out | 78 +++++-----
.../tpcds_sf1000_nopkfk/shape/query14.out | 78 +++++-----
.../data/shape_check/tpch_sf1000/shape/q13.out | 11 +-
.../suites/nereids_p0/eager_agg/eager_agg.groovy | 152 +++++++++++++++++--
.../suites/nereids_p0/eager_agg/load.groovy | 114 +++++++++++----
10 files changed, 612 insertions(+), 123 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
index 5d21515aa56..db6de5562c3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/PushDownAggregation.java
@@ -41,9 +41,12 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.Function;
import
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
+import org.apache.doris.nereids.trees.expressions.functions.agg.RollUpTrait;
import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
@@ -82,6 +85,7 @@ public class PushDownAggregation extends
DefaultPlanRewriter<JobContext> impleme
public final EagerAggRewriter writer = new EagerAggRewriter();
private final Set<Class> pushDownAggFunctionSet = Sets.newHashSet(
+ Count.class,
Sum.class,
Max.class,
Min.class);
@@ -148,7 +152,7 @@ public class PushDownAggregation extends
DefaultPlanRewriter<JobContext> impleme
AggregateFunction aggFunction = (AggregateFunction) obj;
if (pushDownAggFunctionSet.contains(aggFunction.getClass())
&& !aggFunction.isDistinct()) {
- if (aggFunction.child(0) instanceof If) {
+ if (aggFunction.arity() > 0 && aggFunction.child(0)
instanceof If) {
If body = (If) (aggFunction).child(0);
Set<Slot> valueSlots =
Sets.newHashSet(body.getTrueValue().getInputSlots());
valueSlots.addAll(body.getFalseValue().getInputSlots());
@@ -226,10 +230,20 @@ public class PushDownAggregation extends
DefaultPlanRewriter<JobContext> impleme
// -> T2 [...]
// for min(A), replaceMap: A->minA
// for sum(A), replaceMap: A->sumA
- Map<Expression, Slot> replaceMap = new HashMap<>();
+ // for count(A), replaceMap: count(A)->sum(countA),
because count needs rollup to sum
+ Map<Expression, Expression> replaceMap = new
HashMap<>();
List<AggregateFunction> relatedAggFunc =
aggFunctionsForOutputExpressions.get(ne);
for (AggregateFunction func : relatedAggFunc) {
- replaceMap.put(func.child(0),
pushDownContext.getAliasMap().get(func).toSlot());
+ Slot pushedDownSlot =
pushDownContext.getAliasMap().get(func).toSlot();
+ if (func instanceof Count) {
+ // For count(A), after pushdown we have
count(A) as x,
+ // and the top agg should use sum(x) instead
of count(x)
+ Function rollUpFunc = ((RollUpTrait)
func).constructRollUp(pushedDownSlot);
+ replaceMap.put(func, rollUpFunc);
+ } else if (func.arity() > 0) {
+ // For sum/max/min, replace the child
expression with the pushed down slot
+ replaceMap.put(func.child(0), pushedDownSlot);
+ }
}
NamedExpression replaceAliasExpr = (NamedExpression)
ExpressionUtils.replace(ne, replaceMap);
replaceAliasExpr = (NamedExpression)
ExpressionUtils.rebuildSignature(replaceAliasExpr);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 232ae772a27..122065f6d35 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2233,6 +2233,10 @@ public class SessionVariable implements Serializable,
Writable {
}
}
+ public void setEagerAggregationMode(int mode) {
+ this.eagerAggregationMode = mode;
+ }
+
@VariableMgr.VarAttr(name = "eager_aggregation_on_join", needForward =
true)
public boolean eagerAggregationOnJoin = false;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index 4284ddbfc4a..f2afbc47085 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -771,9 +771,7 @@ public class StmtExecutor {
new AnalysisException(e.getMessage(), e));
} catch (Exception | Error e) {
// Maybe our bug
- if (LOG.isDebugEnabled()) {
- LOG.debug("Command({}) process failed.",
originStmt.originStmt, e);
- }
+ LOG.info("Command({}) process failed.", originStmt.originStmt,
e);
context.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR,
e.getMessage());
throw new NereidsException("Command (" + originStmt.originStmt
+ ") process failed.",
new AnalysisException(e.getMessage() == null ?
e.toString() : e.getMessage(), e));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriterTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriterTest.java
new file mode 100644
index 00000000000..bf46829c4e0
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/eageraggregation/EagerAggRewriterTest.java
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite.eageraggregation;
+
+import org.apache.doris.nereids.util.MemoPatternMatchSupported;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Test;
+
+class EagerAggRewriterTest extends TestWithFeService implements
MemoPatternMatchSupported {
+ @Override
+ protected void runBeforeAll() throws Exception {
+ createDatabase("test");
+ connectContext.setDatabase("default_cluster:test");
+ createTables(
+ "CREATE TABLE IF NOT EXISTS t1 (\n"
+ + " id1 int not null,\n"
+ + " name varchar(20)\n"
+ + ")\n"
+ + "DUPLICATE KEY(id1)\n"
+ + "DISTRIBUTED BY HASH(id1) BUCKETS 10\n"
+ + "PROPERTIES (\"replication_num\" = \"1\")\n",
+ "CREATE TABLE IF NOT EXISTS t2 (\n"
+ + " id2 int not null,\n"
+ + " name varchar(20)\n"
+ + ")\n"
+ + "DUPLICATE KEY(id2)\n"
+ + "DISTRIBUTED BY HASH(id2) BUCKETS 10\n"
+ + "PROPERTIES (\"replication_num\" = \"1\")\n"
+ );
+
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");
+ }
+
+ @Test
+ void testNotPushAggCaseWhenToNullableSideOfOuterJoin() {
+ connectContext.getSessionVariable().setEagerAggregationMode(1);
+ connectContext.getSessionVariable().setDisableJoinReorder(true);
+ try {
+ // RIGHT JOIN: agg function (case-when) references left side
columns,
+ // left side is nullable, should NOT be pushed below the join
+ String sql = "select max(case when t1.name is not null then 'aaa'
end) from t1 right join t2 on t1.id1 = t2.id2"
+ + " group by t1.id1";
+ PlanChecker.from(connectContext)
+ .analyze(sql)
+ .rewrite()
+ .nonMatch(logicalJoin(logicalAggregate(), any()))
+ .printlnTree();
+
+ // LEFT JOIN: agg function(case-when) references right side
columns,
+ // right side is nullable, should NOT be pushed below the join
+ sql = "select max(case when t2.name is null then 'xxx' end) from
t1 left join t2"
+ + " on t1.id1 = t2.id2 group by t1.id1";
+ PlanChecker.from(connectContext)
+ .analyze(sql)
+ .rewrite()
+ .nonMatch(logicalJoin(any(), logicalAggregate()))
+ .printlnTree();
+ // RIGHT JOIN: agg function (not-case-when) references left side
columns,
+ // left side is nullable, can be pushed below the join
+ sql = "select max(t2.name) from t1 left join t2"
+ + " on t1.id1 = t2.id2 group by t1.id1";
+ PlanChecker.from(connectContext)
+ .analyze(sql)
+ .rewrite()
+ .matches(logicalJoin(any(), logicalAggregate()))
+ .printlnTree();
+ } finally {
+ connectContext.getSessionVariable().setEagerAggregationMode(0);
+ }
+ }
+
+ @Test
+ void testPushDownCount() {
+ // Test count pushdown: count(a) should be pushed down and
+ // the top aggregation should use sum to aggregate the count results
+ // Before: agg(count(name), groupby(id2))
+ // -> join(t1.id1=t2.id2)
+ // -> t1(id1, name)
+ // -> t2(id2)
+ // After: agg(sum(x), groupby(id2))
+ // -> join(t1.id1=t2.id2)
+ // -> agg(count(name) as x, groupby(id1))
+ // -> t1(id1, name)
+ // -> t2(id2)
+ connectContext.getSessionVariable().setEagerAggregationMode(1);
+ try {
+ String sql = "select count(t1.name), t2.id2 from t1 join t2 on
t1.id1 = t2.id2 group by t2.id2";
+ PlanChecker.from(connectContext)
+ .analyze(sql)
+ .rewrite()
+
.matches(logicalAggregate(logicalProject(logicalJoin(logicalAggregate(),
any()))))
+ .printlnTree();
+ } finally {
+ connectContext.getSessionVariable().setEagerAggregationMode(0);
+ }
+ }
+}
diff --git a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
index cd2b3bc8f99..5a959fa2016 100644
--- a/regression-test/data/nereids_p0/eager_agg/eager_agg.out
+++ b/regression-test/data/nereids_p0/eager_agg/eager_agg.out
@@ -24,6 +24,8 @@ UnUsed:
SyntaxError:
-- !a_exe --
+2024 66.00 54.00
+2025 50.00 42.00
-- !a2 --
PhysicalResultSink
@@ -50,6 +52,8 @@ UnUsed:
SyntaxError:
-- !a2_exe --
+2024 120.00
+2025 92.00
-- !sum_min_max --
PhysicalResultSink
@@ -76,8 +80,10 @@ UnUsed:
SyntaxError:
-- !sum_min_max_exe --
+2024 66.00 11.00 16.00
+2025 50.00 20.00 22.00
--- !avg_count --
+-- !avg --
PhysicalResultSink
--hashAgg[GLOBAL]
----PhysicalDistribute[DistributionSpecHash]
@@ -98,7 +104,146 @@ Used: leading({ ss ws } dt )
UnUsed:
SyntaxError:
--- !avg_count_exe --
+-- !avg_exe --
+2025 25.0000
+2024 16.5000
+
+-- !count_column --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecHash]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk =
ss.ss_sold_date_sk)) otherCondition=()
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ss.ss_item_sk
= ws.ws_item_sk)) otherCondition=()
+----------------PhysicalProject
+------------------PhysicalOlapScan[store_sales(ss)]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales(ws)]
+------------PhysicalProject
+--------------PhysicalOlapScan[date_dim(dt)]
+
+Hint log:
+Used: leading({ ss ws } dt )
+UnUsed:
+SyntaxError:
+
+-- !count_column_exe --
+2024 4
+2025 2
+
+-- !count_star --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecHash]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk =
ss.ss_sold_date_sk)) otherCondition=()
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ss.ss_item_sk
= ws.ws_item_sk)) otherCondition=()
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[store_sales(ss)]
+----------------PhysicalProject
+------------------PhysicalOlapScan[web_sales(ws)]
+------------PhysicalProject
+--------------PhysicalOlapScan[date_dim(dt)]
+
+Hint log:
+Used: leading({ ss ws } dt )
+UnUsed:
+SyntaxError:
+
+-- !count_star_exe --
+2025 2
+2024 4
+
+-- !count_distinct --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecHash]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk =
ss.ss_sold_date_sk)) otherCondition=()
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk =
ws.ws_item_sk)) otherCondition=()
+----------------PhysicalProject
+------------------PhysicalOlapScan[store_sales(ss)]
+----------------PhysicalProject
+------------------PhysicalOlapScan[web_sales(ws)]
+------------PhysicalProject
+--------------PhysicalOlapScan[date_dim(dt)]
+
+Hint log:
+Used: leading({ ss ws } dt )
+UnUsed:
+SyntaxError:
+
+-- !count_distinct_exe --
+2024 2
+2025 1
+
+-- !count_sum_mixed --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecHash]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((dt.d_date_sk =
ss.ss_sold_date_sk)) otherCondition=()
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ss.ss_item_sk
= ws.ws_item_sk)) otherCondition=()
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[store_sales(ss)]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales(ws)]
+------------PhysicalProject
+--------------PhysicalOlapScan[date_dim(dt)]
+
+Hint log:
+Used: leading({ ss ws } dt )
+UnUsed:
+SyntaxError:
+
+-- !count_sum_mixed_exe --
+2025 2 42.00
+2024 4 54.00
+
+-- !count_star_sum_mixed --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecHash]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN shuffle] hashCondition=((dt.d_date_sk =
ss.ss_sold_date_sk)) otherCondition=()
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((ss.ss_item_sk
= ws.ws_item_sk)) otherCondition=()
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[store_sales(ss)]
+----------------PhysicalProject
+------------------PhysicalOlapScan[web_sales(ws)]
+------------PhysicalProject
+--------------PhysicalOlapScan[date_dim(dt)]
+
+Hint log:
+Used: leading({ ss ws } dt )
+UnUsed:
+SyntaxError:
+
+-- !count_star_sum_mixed_exe --
+2024 4 54.00
+2025 2 42.00
-- !groupkey_push_SS_JOIN_D --
PhysicalResultSink
@@ -126,6 +271,11 @@ UnUsed:
SyntaxError:
-- !groupkey_push_SS_JOIN_D_exe --
+2024 10.00 12.00
+2024 15.00 17.00
+2024 25.00 29.00
+2025 18.00 21.00
+2025 20.00 23.00
-- !groupkey_push --
PhysicalResultSink
@@ -153,6 +303,10 @@ UnUsed:
SyntaxError:
-- !groupkey_push_exe --
+2024 20.00 22.00
+2024 30.00 32.00
+2025 18.00 20.00
+2025 20.00 22.00
-- !sum_if_push --
PhysicalResultSink
@@ -178,7 +332,8 @@ UnUsed:
SyntaxError:
-- !sum_if_push_exe --
-1 \N \N \N \N \N \N
+1 30.50 \N \N \N \N \N
+2 \N 22.00 \N \N \N \N
-- !check_nullable --
PhysicalResultSink
diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query14.out
b/regression-test/data/shape_check/tpcds_sf1000/shape/query14.out
index a69d5958d67..f64ea0c891c 100644
--- a/regression-test/data/shape_check/tpcds_sf1000/shape/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query14.out
@@ -82,18 +82,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF10 RF11 RF12
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF12
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF12 ss_item_sk->[i_item_sk,ss_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF11 d_date_sk->[ss_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF10 i_item_sk->[ss_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF10 RF11 RF12
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF12
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
@@ -104,18 +108,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF13 RF14 RF15
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF15
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF15 ss_item_sk->[cs_item_sk,i_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF14 d_date_sk->[cs_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF13 i_item_sk->[cs_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[catalog_sales]
apply RFs: RF13 RF14 RF15
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF15
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
@@ -126,18 +134,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[web_sales] apply RFs:
RF16 RF17 RF18
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF18
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF18 ss_item_sk->[i_item_sk,ws_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF16 i_item_sk->[ws_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[web_sales]
apply RFs: RF16 RF17 RF18
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF18
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
diff --git
a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out
b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out
index a69d5958d67..f64ea0c891c 100644
--- a/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out
+++ b/regression-test/data/shape_check/tpcds_sf1000_nopkfk/shape/query14.out
@@ -82,18 +82,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF12 i_item_sk->[ss_item_sk,ss_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF11 ss_item_sk->[ss_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF10 d_date_sk->[ss_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[store_sales] apply
RFs: RF10 RF11 RF12
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF12
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((store_sales.ss_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF12 ss_item_sk->[i_item_sk,ss_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF11 d_date_sk->[ss_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF10 i_item_sk->[ss_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF10 RF11 RF12
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF12
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
@@ -104,18 +108,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF15 i_item_sk->[cs_item_sk,ss_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF14 ss_item_sk->[cs_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF13 d_date_sk->[cs_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[catalog_sales] apply
RFs: RF13 RF14 RF15
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF15
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF15 ss_item_sk->[cs_item_sk,i_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF14 d_date_sk->[cs_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=()
build RFs:RF13 i_item_sk->[cs_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[catalog_sales]
apply RFs: RF13 RF14 RF15
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF15
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
@@ -126,18 +134,22 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
-------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF18 i_item_sk->[ss_item_sk,ws_item_sk]
---------------------------------hashJoin[LEFT_SEMI_JOIN broadcast]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF17 ss_item_sk->[ws_item_sk]
-----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF16 d_date_sk->[ws_sold_date_sk]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalOlapScan[web_sales] apply RFs:
RF16 RF17 RF18
---------------------------------------PhysicalProject
-----------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
-------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply
RFs: RF18
+------------------------------hashJoin[LEFT_SEMI_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = cross_items.ss_item_sk))
otherCondition=() build RFs:RF18 ss_item_sk->[i_item_sk,ws_item_sk]
--------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
+----------------------------------hashJoin[INNER_JOIN broadcast]
hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk))
otherCondition=() build RFs:RF17 d_date_sk->[ws_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN bucketShuffle]
hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build
RFs:RF16 i_item_sk->[ws_item_sk]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[web_sales]
apply RFs: RF16 RF17 RF18
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[item] apply RFs:
RF18
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 11) and
(date_dim.d_year = 2001))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------PhysicalAssertNumRows
----------------------PhysicalDistribute[DistributionSpecGather]
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q13.out
b/regression-test/data/shape_check/tpch_sf1000/shape/q13.out
index ab30427a698..a9c26e203f3 100644
--- a/regression-test/data/shape_check/tpch_sf1000/shape/q13.out
+++ b/regression-test/data/shape_check/tpch_sf1000/shape/q13.out
@@ -10,10 +10,13 @@ PhysicalResultSink
--------------PhysicalProject
----------------hashAgg[GLOBAL]
------------------PhysicalProject
---------------------hashJoin[RIGHT_OUTER_JOIN shuffle]
hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build
RFs:RF0 c_custkey->[o_custkey]
-----------------------PhysicalProject
-------------------------filter(( not (o_comment like '%special%requests%')))
---------------------------PhysicalOlapScan[orders] apply RFs: RF0
+--------------------hashJoin[LEFT_OUTER_JOIN bucketShuffle]
hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
----------------------PhysicalProject
------------------------PhysicalOlapScan[customer]
+----------------------hashAgg[GLOBAL]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------hashAgg[LOCAL]
+----------------------------PhysicalProject
+------------------------------filter(( not (o_comment like
'%special%requests%')))
+--------------------------------PhysicalOlapScan[orders]
diff --git a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
index a401ac663af..8229ff1d028 100644
--- a/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/eager_agg.groovy
@@ -104,12 +104,12 @@ suite("eager_agg") {
group by dt.d_year
"""
- // do not push avg/count aggFunc
- qt_avg_count """
+ // do not push avg aggFunc
+ qt_avg """
explain shape plan
- select /*+leading({ss ws} dt)*/ dt.d_year
- ,avg(ws_list_price)
- from date_dim dt
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,avg(ws_list_price)
+ from date_dim dt
,store_sales ss
,web_sales ws
where dt.d_date_sk = ss_sold_date_sk
@@ -117,10 +117,134 @@ suite("eager_agg") {
group by dt.d_year
"""
- qt_avg_count_exe """
- select /*+leading({ss ws} dt)*/ dt.d_year
- ,avg(ws_list_price)
- from date_dim dt
+ qt_avg_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,avg(ws_list_price)
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ // push count(column) - count should be pushed down and converted to sum
at top level
+ qt_count_column """
+ explain shape plan
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(ws_list_price) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ order_qt_count_column_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(ws_list_price) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ // push count(*) - count(*) should be pushed down and converted to sum at
top level
+ qt_count_star """
+ explain shape plan
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(*) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ qt_count_star_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(*) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ // do not push count(distinct column) - distinct aggregation should not be
pushed
+ qt_count_distinct """
+ explain shape plan
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(distinct ws_list_price) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ order_qt_count_distinct_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(distinct ws_list_price) cnt
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ // push count with sum - mixed aggregation
+ qt_count_sum_mixed """
+ explain shape plan
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(ws_list_price) cnt
+ ,sum(ss_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ qt_count_sum_mixed_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(ws_list_price) cnt
+ ,sum(ss_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ // push count(*) with sum - both should be pushed down
+ qt_count_star_sum_mixed """
+ explain shape plan
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(*) cnt
+ ,sum(ss_sales_price) sum_agg
+ from date_dim dt
+ ,store_sales ss
+ ,web_sales ws
+ where dt.d_date_sk = ss_sold_date_sk
+ and ss_item_sk = ws_item_sk
+ group by dt.d_year
+ """
+
+ order_qt_count_star_sum_mixed_exe """
+ select /*+leading({ss ws} dt)*/ dt.d_year
+ ,count(*) cnt
+ ,sum(ss_sales_price) sum_agg
+ from date_dim dt
,store_sales ss
,web_sales ws
where dt.d_date_sk = ss_sold_date_sk
@@ -142,7 +266,7 @@ suite("eager_agg") {
group by dt.d_year, ss_hdemo_sk + ws_quantity
"""
- qt_groupkey_push_SS_JOIN_D_exe """
+ order_qt_groupkey_push_SS_JOIN_D_exe """
select /*+leading({ss dt} ws)*/ dt.d_year
,sum(ss_wholesale_cost) brand
,sum(ss_sales_price + d_moy) sum_agg
@@ -168,7 +292,7 @@ suite("eager_agg") {
group by dt.d_year, ss_hdemo_sk + d_moy
"""
- qt_groupkey_push_exe """
+ order_qt_groupkey_push_exe """
select /*+leading({ss dt} ws)*/ dt.d_year
,sum(ss_wholesale_cost) brand
,sum(ss_sales_price) sum_agg
@@ -194,7 +318,7 @@ suite("eager_agg") {
group by d_week_seq, ws_item_sk;
"""
- qt_sum_if_push_exe """
+ order_qt_sum_if_push_exe """
select /*+leading({web_sales item} date_dim)*/ d_week_seq,
sum(case when (d_day_name='Monday') then ws_sales_price else
null end) mon_sales,
sum(case when (d_day_name='Tuesday') then ws_sales_price else
null end) tue_sales,
@@ -252,7 +376,7 @@ suite("eager_agg") {
having brand is null;
"""
- qt_min_sum_same_slot_exe """
+ order_qt_min_sum_same_slot_exe """
select /*+leading({ss dt} ws)*/ dt.d_moy
,min(d_year) brand
,sum(d_year) sum_agg
@@ -265,7 +389,7 @@ suite("eager_agg") {
having brand is null;
"""
- qt_sum_min_same_slot_exe """
+ order_qt_sum_min_same_slot_exe """
select /*+leading({ss dt} ws)*/ dt.d_moy
,sum(d_year) sum_agg
,min(d_year) brand
diff --git a/regression-test/suites/nereids_p0/eager_agg/load.groovy
b/regression-test/suites/nereids_p0/eager_agg/load.groovy
index 42abc8d50a4..1c27b3ba36f 100644
--- a/regression-test/suites/nereids_p0/eager_agg/load.groovy
+++ b/regression-test/suites/nereids_p0/eager_agg/load.groovy
@@ -192,34 +192,69 @@ PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
+-- Insert store_sales data
+-- Join conditions: dt.d_date_sk = ss_sold_date_sk AND ss_item_sk = ws_item_sk
INSERT INTO store_sales (
ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk,
ss_hdemo_sk,
ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity,
ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt,
ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax,
ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit
-) VALUES (
- 20240101, 36000, 1001, 501, 601, 701,
- 801, 901, 10001, 55500001, 2,
- 10.00, 12.00, 11.00, 2.00,
- 22.00, 20.00, 24.00, 1.54,
- 0.00, 22.00, 23.54, 3.54
-);
+) VALUES
+ -- Row 1: d_year=2024, item_sk=1001
+ (1, 36000, 1001, 501, 601, 701,
+ 801, 901, 10001, 55500001, 2,
+ 10.00, 12.00, 11.00, 2.00,
+ 22.00, 20.00, 24.00, 1.54,
+ 0.00, 22.00, 23.54, 3.54),
+ -- Row 2: d_year=2024, item_sk=1001
+ (1, 36001, 1001, 502, 602, 702,
+ 802, 902, 10002, 55500002, 3,
+ 15.00, 18.00, 16.00, 3.00,
+ 48.00, 45.00, 54.00, 3.36,
+ 0.00, 48.00, 51.36, 6.36),
+ -- Row 3: d_year=2025, item_sk=1002
+ (2, 36002, 1002, 503, 603, 703,
+ 803, 903, 10003, 55500003, 5,
+ 20.00, 25.00, 22.00, 5.00,
+ 110.00, 100.00, 125.00, 7.70,
+ 0.00, 110.00, 117.70, 17.70),
+ -- Row 4: d_year=2025, item_sk=1002
+ (2, 36003, 1002, 504, 604, 704,
+ 804, 904, 10004, 55500004, 4,
+ 18.00, 22.00, 20.00, 4.00,
+ 80.00, 72.00, 88.00, 5.60,
+ 0.00, 80.00, 85.60, 13.60);
+-- Insert date_dim data with different d_year and d_day_name values
INSERT INTO date_dim (
d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq, d_year,
d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq,
d_current_day, d_current_week, d_current_month, d_current_quarter,
d_current_year
-) VALUES (
- 20240101, '2024-01-01', '2024-01-01', 1, 1, 1, 2024,
- 1, 1, 1, 1, 2024, 1, 1,
- 'MON', 'Q1', 'N', 'N', 'N',
- 1, 31, 20230101, 20231001,
- 'Y', 'Y', 'Y', 'Y', 'Y'
-);
+) VALUES
+ -- d_date_sk=1, d_year=2024, Monday
+ (1, '2024-01-01', '2024-01-01', 1, 1, 1, 2024,
+ 1, 1, 1, 1, 2024, 1, 1,
+ 'Monday', 'Q1', 'N', 'N', 'N',
+ 1, 31, 20230101, 20231001,
+ 'Y', 'Y', 'Y', 'Y', 'Y'),
+ -- d_date_sk=2, d_year=2025, Tuesday
+ (2, '2025-01-07', '2025-01-07', 13, 2, 5, 2025,
+ 2, 1, 7, 1, 2025, 5, 2,
+ 'Tuesday', 'Q1', 'N', 'N', 'N',
+ 1, 31, 20240107, 20241007,
+ 'Y', 'Y', 'Y', 'Y', 'Y'),
+ -- d_date_sk=3, d_year=2024, Wednesday
+ (3, '2024-01-03', '2024-01-03', 1, 1, 1, 2024,
+ 3, 1, 3, 1, 2024, 1, 1,
+ 'Wednesday', 'Q1', 'N', 'N', 'N',
+ 1, 31, 20230103, 20231003,
+ 'Y', 'Y', 'Y', 'Y', 'Y');
+-- Insert web_sales data
+-- Join conditions: ss_item_sk = ws_item_sk AND d_date_sk = ws_sold_date_sk
INSERT INTO web_sales (
ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk,
ws_bill_customer_sk, ws_bill_cdemo_sk, ws_bill_hdemo_sk, ws_bill_addr_sk,
@@ -229,17 +264,36 @@ INSERT INTO web_sales (
ws_ext_discount_amt, ws_ext_sales_price, ws_ext_wholesale_cost,
ws_ext_list_price,
ws_ext_tax, ws_coupon_amt, ws_ext_ship_cost, ws_net_paid,
ws_net_paid_inc_tax,
ws_net_paid_inc_ship, ws_net_paid_inc_ship_tax, ws_net_profit
-) VALUES (
- 20240101, 43200, 20240103, 2001,
- 601, 701, 801, 901,
- 602, 702, 802, 902,
- 3001, 4001, 5001, 6001, 7001,
- 8800001, 3, 15.00, 18.00, 16.50,
- 4.50, 49.50, 45.00, 54.00,
- 3.47, 0.00, 5.00, 49.50, 52.97,
- 54.50, 58.00, 7.97
-);
+) VALUES
+ -- Row 1: item_sk=1001 (matches store_sales), sold_date_sk=1
+ (1, 43200, 3, 1001,
+ 601, 701, 801, 901,
+ 602, 702, 802, 902,
+ 3001, 4001, 5001, 6001, 7001,
+ 8800001, 3, 15.00, 18.00, 16.50,
+ 4.50, 49.50, 45.00, 54.00,
+ 3.47, 0.00, 5.00, 49.50, 52.97,
+ 54.50, 58.00, 7.97),
+ -- Row 2: item_sk=1001 (matches store_sales), sold_date_sk=1
+ (1, 43201, 3, 1001,
+ 602, 702, 802, 902,
+ 603, 703, 803, 903,
+ 3002, 4002, 5002, 6002, 7002,
+ 8800002, 2, 12.00, 15.00, 14.00,
+ 2.00, 28.00, 24.00, 30.00,
+ 1.96, 0.00, 3.00, 28.00, 29.96,
+ 31.00, 33.00, 5.96),
+ -- Row 3: item_sk=1002 (matches store_sales row 3,4), sold_date_sk=2
+ (2, 43202, 4, 1002,
+ 603, 703, 803, 903,
+ 604, 704, 804, 904,
+ 3003, 4003, 5003, 6003, 7003,
+ 8800003, 4, 20.00, 25.00, 22.00,
+ 6.00, 88.00, 80.00, 100.00,
+ 6.16, 0.00, 8.00, 88.00, 94.16,
+ 96.00, 102.00, 14.16);
+-- Insert item data
INSERT INTO item (
i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date,
i_item_desc, i_current_price, i_wholesale_cost,
@@ -254,12 +308,12 @@ INSERT INTO item (
201, 'CategoryA', 301, 'ManufactA',
'M', 'Std', 'Red', 'EA', 'BOX',
1, 'Product 1001'),
- (2001, 'ITEM-0002001', '2024-01-01', NULL,
- 'Sample item 2001', 18.00, 15.00,
- 11, 'BrandB', 102, 'ClassB',
- 202, 'CategoryB', 302, 'ManufactB',
- 'L', 'Std', 'Blue', 'EA', 'BOX',
- 2, 'Product 2001');
+ (1002, 'ITEM-0001002', '2024-01-01', NULL,
+ 'Sample item 1002', 25.00, 20.00,
+ 10, 'BrandA', 101, 'ClassA',
+ 201, 'CategoryA', 301, 'ManufactA',
+ 'L', 'Std', 'Green', 'EA', 'BOX',
+ 1, 'Product 1002');
"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]