This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 12089cda086 [refactor](nereids) make NormalizeAggregate rule more
clear and readable (#28607)
12089cda086 is described below
commit 12089cda08605e60b7bea3edf81ae743901b8880
Author: starocean999 <[email protected]>
AuthorDate: Thu Dec 21 11:42:22 2023 +0800
[refactor](nereids) make NormalizeAggregate rule more clear and readable
(#28607)
---
.../nereids/rules/analysis/NormalizeAggregate.java | 189 ++++++++++++---------
.../shape/query16.out | 53 +++---
.../shape/query94.out | 53 +++---
.../noStatsRfPrune/query16.out | 57 +++----
.../noStatsRfPrune/query94.out | 57 +++----
.../no_stats_shape/query16.out | 57 +++----
.../no_stats_shape/query94.out | 57 +++----
.../rf_prune/query16.out | 61 ++++---
.../rf_prune/query94.out | 53 +++---
.../nereids_tpcds_shape_sf100_p0/shape/query16.out | 61 ++++---
.../nereids_tpcds_shape_sf100_p0/shape/query94.out | 53 +++---
.../aggregate/agg_distinct_case_when.groovy | 54 ++++++
.../window_functions/test_window_fn.groovy | 7 +-
13 files changed, 438 insertions(+), 374 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
index d265c3d8d40..a7eb7c7e5cc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.rewrite.NormalizeToSlot;
+import
org.apache.doris.nereids.rules.rewrite.NormalizeToSlot.NormalizeToSlotContext;
import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
@@ -40,9 +41,9 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -100,22 +101,94 @@ public class NormalizeAggregate extends
OneRewriteRuleFactory implements Normali
@Override
public Rule build() {
return
logicalAggregate().whenNot(LogicalAggregate::isNormalized).then(aggregate -> {
+ // The LogicalAggregate node may contain window agg functions and
usual agg functions
+ // we call window agg functions as window-agg and usual agg
functions as trival-agg for short
+ // This rule simplify LogicalAggregate node by:
+ // 1. Push down some exprs from old LogicalAggregate node to a new
child LogicalProject Node,
+ // 2. create a new LogicalAggregate with normalized group by exprs
and trival-aggs
+ // 3. Pull up normalized old LogicalAggregate's output exprs to a
new parent LogicalProject Node
+ // Push down exprs:
+ // 1. all group by exprs
+ // 2. child contains subquery expr in trival-agg
+ // 3. child contains window expr in trival-agg
+ // 4. all input slots of trival-agg
+ // 5. expr(including subquery) in distinct trival-agg
+ // Normalize LogicalAggregate's output.
+ // 1. normalize group by exprs by outputs of bottom LogicalProject
+ // 2. normalize trival-aggs by outputs of bottom LogicalProject
+ // 3. build normalized agg outputs
+ // Pull up exprs:
+ // normalize all output exprs in old LogicalAggregate to build a
parent project node, typically includes:
+ // 1. simple slots
+ // 2. aliases
+ // a. alias with no aggs child
+ // b. alias with trival-agg child
+ // c. alias with window-agg
- List<NamedExpression> aggregateOutput =
aggregate.getOutputExpressions();
- Set<Alias> existsAlias =
ExpressionUtils.mutableCollect(aggregateOutput, Alias.class::isInstance);
+ // Push down exprs:
+ // collect group by exprs
+ Set<Expression> groupingByExprs =
+ ImmutableSet.copyOf(aggregate.getGroupByExpressions());
+ // collect all trival-agg
+ List<NamedExpression> aggregateOutput =
aggregate.getOutputExpressions();
List<AggregateFunction> aggFuncs = Lists.newArrayList();
aggregateOutput.forEach(o ->
o.accept(CollectNonWindowedAggFuncs.INSTANCE, aggFuncs));
- // we need push down subquery exprs inside non-window and
non-distinct agg functions
- Set<SubqueryExpr> subqueryExprs =
ExpressionUtils.mutableCollect(aggFuncs.stream()
- .filter(aggFunc ->
!aggFunc.isDistinct()).collect(Collectors.toList()),
- SubqueryExpr.class::isInstance);
- Set<Expression> groupingByExprs =
ImmutableSet.copyOf(aggregate.getGroupByExpressions());
+ // split non-distinct agg child as two part
+ // TRUE part 1: need push down itself, if it contains subqury or
window expression
+ // FALSE part 2: need push down its input slots, if it DOES NOT
contain subqury or window expression
+ Map<Boolean, Set<Expression>> categorizedNoDistinctAggsChildren =
aggFuncs.stream()
+ .filter(aggFunc -> !aggFunc.isDistinct())
+ .flatMap(agg -> agg.children().stream())
+ .collect(Collectors.groupingBy(
+ child -> child.containsType(SubqueryExpr.class,
WindowExpression.class),
+ Collectors.toSet()));
+
+ // split distinct agg child as two parts
+ // TRUE part 1: need push down itself, if it is NOT SlotReference
or Literal
+ // FALSE part 2: need push down its input slots, if it is
SlotReference or Literal
+ Map<Boolean, Set<Expression>> categorizedDistinctAggsChildren =
aggFuncs.stream()
+ .filter(aggFunc -> aggFunc.isDistinct()).flatMap(agg ->
agg.children().stream())
+ .collect(Collectors.groupingBy(
+ child -> !(child instanceof SlotReference || child
instanceof Literal),
+ Collectors.toSet()));
+
+ Set<Expression> needPushSelf = Sets.union(
+ categorizedNoDistinctAggsChildren.getOrDefault(true, new
HashSet<>()),
+ categorizedDistinctAggsChildren.getOrDefault(true, new
HashSet<>()));
+ Set<Slot> needPushInputSlots =
ExpressionUtils.getInputSlotSet(Sets.union(
+ categorizedNoDistinctAggsChildren.getOrDefault(false, new
HashSet<>()),
+ categorizedDistinctAggsChildren.getOrDefault(false, new
HashSet<>())));
+
+ Set<Alias> existsAlias =
+ ExpressionUtils.mutableCollect(aggregateOutput,
Alias.class::isInstance);
+
+ // push down 3 kinds of exprs, these pushed exprs will be used to
normalize agg output later
+ // 1. group by exprs
+ // 2. trivalAgg children
+ // 3. trivalAgg input slots
+ Set<Expression> allPushDownExprs =
+ Sets.union(groupingByExprs, Sets.union(needPushSelf,
needPushInputSlots));
NormalizeToSlotContext bottomSlotContext =
- NormalizeToSlotContext.buildContext(existsAlias,
Sets.union(groupingByExprs, subqueryExprs));
- Set<NamedExpression> bottomOutputs =
-
bottomSlotContext.pushDownToNamedExpression(Sets.union(groupingByExprs,
subqueryExprs));
+ NormalizeToSlotContext.buildContext(existsAlias,
allPushDownExprs);
+ Set<NamedExpression> pushedGroupByExprs =
+
bottomSlotContext.pushDownToNamedExpression(groupingByExprs);
+ Set<NamedExpression> pushedTrivalAggChildren =
+ bottomSlotContext.pushDownToNamedExpression(needPushSelf);
+ Set<NamedExpression> pushedTrivalAggInputSlots =
+
bottomSlotContext.pushDownToNamedExpression(needPushInputSlots);
+ Set<NamedExpression> bottomProjects =
Sets.union(pushedGroupByExprs,
+ Sets.union(pushedTrivalAggChildren,
pushedTrivalAggInputSlots));
+
+ // create bottom project
+ Plan bottomPlan;
+ if (!bottomProjects.isEmpty()) {
+ bottomPlan = new
LogicalProject<>(ImmutableList.copyOf(bottomProjects),
+ aggregate.child());
+ } else {
+ bottomPlan = aggregate.child();
+ }
// use group by context to normalize agg functions to process
// sql like: select sum(a + 1) from t group by a + 1
@@ -127,89 +200,37 @@ public class NormalizeAggregate extends
OneRewriteRuleFactory implements Normali
// after normalize:
// agg(output: sum(alias(a + 1)[#1])[#2], group_by: alias(a +
1)[#1])
// +-- project((a[#0] + 1)[#1])
- List<AggregateFunction> normalizedAggFuncs =
bottomSlotContext.normalizeToUseSlotRef(aggFuncs);
- Set<NamedExpression> bottomProjects =
Sets.newHashSet(bottomOutputs);
- // TODO: if we have distinct agg, we must push down its children,
- // because need use it to generate distribution enforce
- // step 1: split agg functions into 2 parts: distinct and not
distinct
- List<AggregateFunction> distinctAggFuncs = Lists.newArrayList();
- List<AggregateFunction> nonDistinctAggFuncs = Lists.newArrayList();
- for (AggregateFunction aggregateFunction : normalizedAggFuncs) {
- if (aggregateFunction.isDistinct()) {
- distinctAggFuncs.add(aggregateFunction);
- } else {
- nonDistinctAggFuncs.add(aggregateFunction);
- }
- }
- // step 2: if we only have one distinct agg function, we do push
down for it
- if (!distinctAggFuncs.isEmpty()) {
- // process distinct normalize and put it back to
normalizedAggFuncs
- List<AggregateFunction> newDistinctAggFuncs =
Lists.newArrayList();
- Map<Expression, Expression> replaceMap = Maps.newHashMap();
- Map<Expression, NamedExpression> aliasCache =
Maps.newHashMap();
- for (AggregateFunction distinctAggFunc : distinctAggFuncs) {
- List<Expression> newChildren = Lists.newArrayList();
- for (Expression child : distinctAggFunc.children()) {
- if (child instanceof SlotReference || child instanceof
Literal) {
- newChildren.add(child);
- } else {
- NamedExpression alias;
- if (aliasCache.containsKey(child)) {
- alias = aliasCache.get(child);
- } else {
- alias = new Alias(child);
- aliasCache.put(child, alias);
- }
- bottomProjects.add(alias);
- newChildren.add(alias.toSlot());
- }
- }
- AggregateFunction newDistinctAggFunc =
distinctAggFunc.withChildren(newChildren);
- replaceMap.put(distinctAggFunc, newDistinctAggFunc);
- newDistinctAggFuncs.add(newDistinctAggFunc);
- }
- aggregateOutput = aggregateOutput.stream()
- .map(e -> ExpressionUtils.replace(e, replaceMap))
- .map(NamedExpression.class::cast)
- .collect(Collectors.toList());
- distinctAggFuncs = newDistinctAggFuncs;
- }
- normalizedAggFuncs = Lists.newArrayList(nonDistinctAggFuncs);
- normalizedAggFuncs.addAll(distinctAggFuncs);
- // TODO: process redundant expressions in aggregate functions
children
+
+ // normalize group by exprs by bottomProjects
+ List<Expression> normalizedGroupExprs =
+ bottomSlotContext.normalizeToUseSlotRef(groupingByExprs);
+
+ // normalize trival-aggs by bottomProjects
+ List<AggregateFunction> normalizedAggFuncs =
+ bottomSlotContext.normalizeToUseSlotRef(aggFuncs);
+
// build normalized agg output
NormalizeToSlotContext normalizedAggFuncsToSlotContext =
NormalizeToSlotContext.buildContext(existsAlias,
normalizedAggFuncs);
- // agg output include 2 part, normalized group by slots and
normalized agg functions
+
+ // agg output include 2 parts
+ // pushedGroupByExprs and normalized agg functions
List<NamedExpression> normalizedAggOutput =
ImmutableList.<NamedExpression>builder()
-
.addAll(bottomOutputs.stream().map(NamedExpression::toSlot).iterator())
-
.addAll(normalizedAggFuncsToSlotContext.pushDownToNamedExpression(normalizedAggFuncs))
+
.addAll(pushedGroupByExprs.stream().map(NamedExpression::toSlot).iterator())
+ .addAll(normalizedAggFuncsToSlotContext
+ .pushDownToNamedExpression(normalizedAggFuncs))
.build();
- // add normalized agg's input slots to bottom projects
- Set<Slot> bottomProjectSlots = bottomProjects.stream()
- .map(NamedExpression::toSlot)
- .collect(Collectors.toSet());
- Set<NamedExpression> aggInputSlots = normalizedAggFuncs.stream()
- .map(Expression::getInputSlots)
- .flatMap(Set::stream)
- .filter(e -> !bottomProjectSlots.contains(e))
- .collect(Collectors.toSet());
- bottomProjects.addAll(aggInputSlots);
- // build group by exprs
- List<Expression> normalizedGroupExprs =
bottomSlotContext.normalizeToUseSlotRef(groupingByExprs);
- Plan bottomPlan;
- if (!bottomProjects.isEmpty()) {
- bottomPlan = new
LogicalProject<>(ImmutableList.copyOf(bottomProjects), aggregate.child());
- } else {
- bottomPlan = aggregate.child();
- }
+ // create new agg node
+ LogicalAggregate newAggregate =
+ aggregate.withNormalized(normalizedGroupExprs,
normalizedAggOutput, bottomPlan);
+ // create upper projects by normalize all output exprs in old
LogicalAggregate
List<NamedExpression> upperProjects =
normalizeOutput(aggregateOutput,
bottomSlotContext, normalizedAggFuncsToSlotContext);
- return new LogicalProject<>(upperProjects,
- aggregate.withNormalized(normalizedGroupExprs,
normalizedAggOutput, bottomPlan));
+ // create a parent project node
+ return new LogicalProject<>(upperProjects, newAggregate);
}).toRule(RuleType.NORMALIZE_AGGREGATE);
}
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
index 1b116185aeb..3fe19ee10d9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
+--------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF3 cs_order_number->[cr_order_number]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
-----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF3 cs_order_number->[cr_order_number]
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
-------------------------PhysicalDistribute
---------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1 RF2
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'PA'))
---------------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1 RF2
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter((customer_address.ca_state = 'PA'))
+------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
---------------------------------filter((call_center.cc_county = 'Williamson
County'))
-----------------------------------PhysicalOlapScan[call_center]
+--------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((call_center.cc_county = 'Williamson
County'))
+--------------------------------PhysicalOlapScan[call_center]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
index b7cc3a9c809..4e6d82d7ab9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+--------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
-------------------------PhysicalDistribute
---------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1 RF2
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'OK'))
---------------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
RF2
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2002-06-30') and
(date_dim.d_date >= '2002-05-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter((customer_address.ca_state = 'OK'))
+------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
---------------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------------PhysicalOlapScan[web_site]
+--------------------------------filter((date_dim.d_date <= '2002-06-30') and
(date_dim.d_date >= '2002-05-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------------PhysicalOlapScan[web_site]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out
index d7431a4cc90..b08f9d62730 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
-----------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2
cs_order_number->[cs_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[cs_ship_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2002-05-31')
and (date_dim.d_date >= '2002-04-01'))
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'WV'))
-------------------------------------PhysicalOlapScan[customer_address]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number
= cr1.cr_order_number)) otherCondition=()
+--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk
= call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3
cc_call_center_sk->[cs_call_center_sk]
+----------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2
cs_order_number->[cs_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
-------------------------------PhysicalOlapScan[call_center]
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[cs_ship_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1 RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'WV'))
+----------------------------------PhysicalOlapScan[customer_address]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_returns]
+--------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
+----------------------------PhysicalOlapScan[call_center]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_returns]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out
index 879e2acc496..a7f921c597b 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
-----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk
= web_site.web_site_sk)) otherCondition=() build RFs:RF3
web_site_sk->[ws_web_site_sk]
-------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2
ws_order_number->[ws_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[ws_ship_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2000-04-01')
and (date_dim.d_date >= '2000-02-01'))
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'OK'))
-------------------------------------PhysicalOlapScan[customer_address]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number
= wr1.wr_order_number)) otherCondition=()
+--------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk =
web_site.web_site_sk)) otherCondition=() build RFs:RF3
web_site_sk->[ws_web_site_sk]
+----------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2
ws_order_number->[ws_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter((web_site.web_company_name = 'pri'))
-------------------------------PhysicalOlapScan[web_site]
+----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[ws_ship_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'OK'))
+----------------------------------PhysicalOlapScan[customer_address]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_returns]
+--------------------------filter((web_site.web_company_name = 'pri'))
+----------------------------PhysicalOlapScan[web_site]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_returns]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out
index d7431a4cc90..b08f9d62730 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
-----------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2
cs_order_number->[cs_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[cs_ship_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2002-05-31')
and (date_dim.d_date >= '2002-04-01'))
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'WV'))
-------------------------------------PhysicalOlapScan[customer_address]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number
= cr1.cr_order_number)) otherCondition=()
+--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk
= call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3
cc_call_center_sk->[cs_call_center_sk]
+----------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2
cs_order_number->[cs_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
-------------------------------PhysicalOlapScan[call_center]
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[cs_ship_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1 RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'WV'))
+----------------------------------PhysicalOlapScan[customer_address]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_returns]
+--------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
+----------------------------PhysicalOlapScan[call_center]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_returns]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out
index 879e2acc496..a7f921c597b 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[LEFT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
-----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk
= web_site.web_site_sk)) otherCondition=() build RFs:RF3
web_site_sk->[ws_web_site_sk]
-------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2
ws_order_number->[ws_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[ws_ship_date_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((date_dim.d_date <= '2000-04-01')
and (date_dim.d_date >= '2000-02-01'))
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((customer_address.ca_state = 'OK'))
-------------------------------------PhysicalOlapScan[customer_address]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number
= wr1.wr_order_number)) otherCondition=()
+--------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk =
web_site.web_site_sk)) otherCondition=() build RFs:RF3
web_site_sk->[ws_web_site_sk]
+----------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2
ws_order_number->[ws_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter((web_site.web_company_name = 'pri'))
-------------------------------PhysicalOlapScan[web_site]
+----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF0 d_date_sk->[ws_ship_date_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
+------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_state = 'OK'))
+----------------------------------PhysicalOlapScan[customer_address]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_returns]
+--------------------------filter((web_site.web_company_name = 'pri'))
+----------------------------PhysicalOlapScan[web_site]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_returns]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
index b0921bb9f0c..f715eb2136d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
-----------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-------------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF2 cs_order_number->[cr_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'WV'))
---------------------------------------PhysicalOlapScan[customer_address]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
+--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk
= call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3
cc_call_center_sk->[cs_call_center_sk]
+----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF2 cs_order_number->[cr_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
-------------------------------PhysicalOlapScan[call_center]
+----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1 RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((customer_address.ca_state = 'WV'))
+------------------------------------PhysicalOlapScan[customer_address]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
+----------------------------PhysicalOlapScan[call_center]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
index 4fb347a2c78..4c4f311c281 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+--------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
-------------------------PhysicalDistribute
---------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1 RF2
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'OK'))
---------------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
RF2
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter((customer_address.ca_state = 'OK'))
+------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
---------------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------------PhysicalOlapScan[web_site]
+--------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------------PhysicalOlapScan[web_site]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
index b0921bb9f0c..f715eb2136d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
-----------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk))
otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
-------------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF2 cs_order_number->[cr_order_number]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
---------------------------PhysicalDistribute
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs:
RF0 RF1 RF3
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'WV'))
---------------------------------------PhysicalOlapScan[customer_address]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=((
not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4
cs_order_number->[cs_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
+--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk
= call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3
cc_call_center_sk->[cs_call_center_sk]
+----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
build RFs:RF2 cs_order_number->[cr_order_number]
------------------------PhysicalDistribute
--------------------------PhysicalProject
-----------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
-------------------------------PhysicalOlapScan[call_center]
+----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2
+------------------------PhysicalDistribute
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[cs_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
RF1 RF3
+------------------------------PhysicalDistribute
+--------------------------------PhysicalProject
+----------------------------------filter((customer_address.ca_state = 'WV'))
+------------------------------------PhysicalOlapScan[customer_address]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------filter((date_dim.d_date <= '2002-05-31') and
(date_dim.d_date >= '2002-04-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute
+------------------------PhysicalProject
+--------------------------filter(cc_county IN ('Barrow County', 'Daviess
County', 'Luce County', 'Richland County', 'Ziebach County'))
+----------------------------PhysicalOlapScan[call_center]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
index 4fb347a2c78..4c4f311c281 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out
@@ -3,37 +3,36 @@
PhysicalResultSink
--PhysicalTopN[MERGE_SORT]
----PhysicalTopN[LOCAL_SORT]
-------PhysicalProject
---------hashAgg[DISTINCT_GLOBAL]
-----------PhysicalDistribute
-------------hashAgg[DISTINCT_LOCAL]
---------------hashAgg[GLOBAL]
-----------------hashAgg[LOCAL]
-------------------PhysicalProject
---------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+------hashAgg[DISTINCT_GLOBAL]
+--------PhysicalDistribute
+----------hashAgg[DISTINCT_LOCAL]
+------------hashAgg[GLOBAL]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=((
not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4
ws_order_number->[ws_order_number]
+--------------------PhysicalDistribute
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+--------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
----------------------PhysicalDistribute
------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-----------------------hashJoin[RIGHT_ANTI_JOIN]
hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=()
build RFs:RF3 ws_order_number->[wr_order_number]
-------------------------PhysicalDistribute
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
-------------------------PhysicalDistribute
---------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
-----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
-------------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
RF1 RF2
---------------------------------PhysicalDistribute
-----------------------------------PhysicalProject
-------------------------------------filter((customer_address.ca_state = 'OK'))
---------------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3
+----------------------PhysicalDistribute
+------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=()
build RFs:RF2 web_site_sk->[ws_web_site_sk]
+--------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=()
build RFs:RF1 d_date_sk->[ws_ship_date_sk]
+----------------------------hashJoin[INNER_JOIN]
hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk))
otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
RF2
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
-------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------filter((customer_address.ca_state = 'OK'))
+------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
---------------------------------filter((web_site.web_company_name = 'pri'))
-----------------------------------PhysicalOlapScan[web_site]
+--------------------------------filter((date_dim.d_date <= '2000-04-01') and
(date_dim.d_date >= '2000-02-01'))
+----------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute
+----------------------------PhysicalProject
+------------------------------filter((web_site.web_company_name = 'pri'))
+--------------------------------PhysicalOlapScan[web_site]
diff --git
a/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy
b/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy
new file mode 100644
index 00000000000..546586702e5
--- /dev/null
+++ b/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("agg_distinct_case_when") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+ sql "DROP TABLE IF EXISTS agg_test_table_t;"
+ sql """
+ CREATE TABLE `agg_test_table_t` (
+ `k1` varchar(65533) NULL,
+ `k2` text NULL,
+ `k3` text null,
+ `k4` text null
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+
+ sql """insert into agg_test_table_t(`k1`,`k2`,`k3`)
values('20231026221524','PA','adigu1bububud');"""
+ sql """
+ select
+ count(distinct case when t.k2='PA' and
loan_date=to_date(substr(t.k1,1,8)) then t.k2 end )
+ from (
+ select substr(k1,1,8) loan_date,k3,k2,k1 from agg_test_table_t) t
+ group by
+ substr(t.k1,1,8);"""
+
+ sql "DROP TABLE IF EXISTS agg_test_table_t;"
+}
diff --git
a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy
b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy
index f4b62846bca..17f1b6aa87d 100644
---
a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy
+++
b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy
@@ -383,10 +383,9 @@ suite("test_window_fn", "arrow_flight_sql") {
"storage_format" = "V2"
);
"""
- test {
- sql """SELECT SUM(MAX(c1) OVER (PARTITION BY c2, c3)) FROM
test_window_in_agg;"""
- exception "errCode = 2, detailMessage = AGGREGATE clause must not
contain analytic expressions"
- }
+ sql """set enable_nereids_planner=true;"""
+ sql """SELECT SUM(MAX(c1) OVER (PARTITION BY c2, c3)) FROM
test_window_in_agg;"""
+
sql "DROP TABLE IF EXISTS test_window_in_agg;"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]