This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2e8a2a69411 [Feat](nereids) add transform rule
SimplifyWindowExpression (#33647)
2e8a2a69411 is described below
commit 2e8a2a6941153acd9711e3976f88aefc3c4d7c3d
Author: feiniaofeiafei <[email protected]>
AuthorDate: Wed Apr 24 16:11:13 2024 +0800
[Feat](nereids) add transform rule SimplifyWindowExpression (#33647)
rewrite func(para) over (partition by unique_keys)
1. func() is count(non-null) or rank/dense_rank/row_number -> 1
2. func(para) is min/max/sum/avg/first_value/last_value -> para
e.g
select max(c1) over(partition by pk) from t1;
-> select c1 from t1;
---
.../doris/nereids/jobs/executor/Rewriter.java | 4 +-
.../org/apache/doris/nereids/rules/RuleType.java | 1 +
.../rules/rewrite/SimplifyWindowExpression.java | 123 ++++++++++
.../doris/nereids/properties/UniqueTest.java | 2 +-
.../simplify_window_expression.out | 254 +++++++++++++++++++++
.../shape/query70.out | 36 ++-
.../noStatsRfPrune/query70.out | 36 ++-
.../no_stats_shape/query70.out | 35 ++-
.../rf_prune/query70.out | 36 ++-
.../nereids_tpcds_shape_sf100_p0/shape/query70.out | 35 ++-
.../simplify_window_expression.groovy | 110 +++++++++
11 files changed, 567 insertions(+), 105 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 2361c276372..24669f40ff8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -121,6 +121,7 @@ import
org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion;
import org.apache.doris.nereids.rules.rewrite.ReduceAggregateChildOutputRows;
import org.apache.doris.nereids.rules.rewrite.ReorderJoin;
import org.apache.doris.nereids.rules.rewrite.RewriteCteChildren;
+import org.apache.doris.nereids.rules.rewrite.SimplifyWindowExpression;
import org.apache.doris.nereids.rules.rewrite.SplitLimit;
import org.apache.doris.nereids.rules.rewrite.SumLiteralRewrite;
import org.apache.doris.nereids.rules.rewrite.TransposeSemiJoinAgg;
@@ -226,7 +227,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
topic("Window analysis",
topDown(
new ExtractAndNormalizeWindowExpression(),
- new CheckAndStandardizeWindowFunctionAndFrame()
+ new CheckAndStandardizeWindowFunctionAndFrame(),
+ new SimplifyWindowExpression()
)
),
topic("Rewrite join",
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 696463523f6..4da8c36f89e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -96,6 +96,7 @@ public enum RuleType {
NORMALIZE_SORT(RuleTypeClass.REWRITE),
NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
EXTRACT_AND_NORMALIZE_WINDOW_EXPRESSIONS(RuleTypeClass.REWRITE),
+ SIMPLIFY_WINDOW_EXPRESSION(RuleTypeClass.REWRITE),
CHECK_AND_STANDARDIZE_WINDOW_FUNCTION_AND_FRAME(RuleTypeClass.REWRITE),
CHECK_MATCH_EXPRESSION(RuleTypeClass.REWRITE),
CREATE_PARTITION_TOPN_FOR_WINDOW(RuleTypeClass.REWRITE),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
new file mode 100644
index 00000000000..872ca789818
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.annotation.DependsRules;
+import org.apache.doris.nereids.pattern.MatchingContext;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.WindowExpression;
+import org.apache.doris.nereids.trees.expressions.functions.BoundFunction;
+import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * rewrite func(para) over (partition by unique_keys)
+ * 1. func() is count(non-null) or rank/dense_rank/row_number -> 1
+ * 2. func(para) is min/max/sum/avg/first_value/last_value -> para
+ * e.g
+ * select max(c1) over(partition by pk) from t1;
+ * -> select c1 from t1;
+ * */
+@DependsRules({
+ ExtractAndNormalizeWindowExpression.class
+})
+public class SimplifyWindowExpression extends OneRewriteRuleFactory {
+ private static final String COUNT = "count";
+ private static final ImmutableSet<String>
REWRRITE_TO_CONST_WINDOW_FUNCTIONS =
+ ImmutableSet.of("rank", "dense_rank", "row_number");
+ private static final ImmutableSet<String>
REWRRITE_TO_SLOT_WINDOW_FUNCTIONS =
+ ImmutableSet.of("min", "max", "sum", "avg", "first_value",
"last_value");
+
+ @Override
+ public Rule build() {
+ return logicalWindow(any()).thenApply(this::simplify)
+ .toRule(RuleType.SIMPLIFY_WINDOW_EXPRESSION);
+ }
+
+ private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) {
+ LogicalWindow<Plan> window = ctx.root;
+ ImmutableList.Builder<NamedExpression> projectionsBuilder =
ImmutableList.builder();
+ ImmutableList.Builder<NamedExpression> remainWindowExpression =
ImmutableList.builder();
+ List<NamedExpression> windowExpressions =
window.getWindowExpressions();
+ for (NamedExpression expr : windowExpressions) {
+ Alias alias = (Alias) expr;
+ WindowExpression windowExpression = (WindowExpression)
alias.child();
+ if (windowExpression.getPartitionKeys().stream().anyMatch((
+ partitionKey ->
partitionKey.getDataType().isOnlyMetricType()))) {
+ continue;
+ }
+ // after normalize window, partition key must be slot
+ List<Slot> partitionSlots = (List<Slot>) (List)
windowExpression.getPartitionKeys();
+ Set<Slot> partitionSlotSet = new HashSet<>(partitionSlots);
+ if
(!window.getLogicalProperties().getFunctionalDependencies().isUnique(partitionSlotSet))
{
+ remainWindowExpression.add(expr);
+ continue;
+ }
+ Expression function = windowExpression.getFunction();
+ if (function instanceof BoundFunction) {
+ BoundFunction boundFunction = (BoundFunction) function;
+ String name = ((BoundFunction) function).getName();
+ if ((name.equals(COUNT) &&
boundFunction.child(0).notNullable())
+ || REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) {
+ projectionsBuilder.add(new Alias(alias.getExprId(), new
TinyIntLiteral((byte) 1), alias.getName()));
+ } else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) {
+ projectionsBuilder.add(new Alias(alias.getExprId(),
boundFunction.child(0), alias.getName()));
+ } else {
+ remainWindowExpression.add(expr);
+ }
+ } else {
+ remainWindowExpression.add(expr);
+ }
+ }
+ List<NamedExpression> projections = projectionsBuilder.build();
+ List<NamedExpression> remainWindows = remainWindowExpression.build();
+ if (projections.isEmpty()) {
+ return window;
+ } else if (remainWindows.isEmpty()) {
+ Plan windowChild = window.child(0);
+ List<Slot> slots = windowChild.getOutput();
+ List<NamedExpression> finalProjections =
Lists.newArrayList(projections);
+ finalProjections.addAll(slots);
+ return new LogicalProject(finalProjections, windowChild);
+ } else {
+ List<Slot> windowOutputs = Lists.newArrayList();
+ for (NamedExpression remainWindow : remainWindows) {
+ windowOutputs.add(remainWindow.toSlot());
+ }
+ List<NamedExpression> finalProjections =
Lists.newArrayList(projections);
+ finalProjections.addAll(windowOutputs);
+ return new LogicalProject(finalProjections,
window.withExpression(remainWindows,
+ window.child(0)));
+ }
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
index 27d64ad186c..fb57098421b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java
@@ -363,7 +363,7 @@ class UniqueTest extends TestWithFeService {
void testWindow() {
// partition by uniform
Plan plan = PlanChecker.from(connectContext)
- .analyze("select id, row_number() over(partition by id) from
agg where id =1")
+ .analyze("select id, row_number() over(partition by name) from
agg where name ='d'")
.rewrite()
.getPlan();
Assertions.assertTrue(plan.getLogicalProperties()
diff --git
a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
new file mode 100644
index 00000000000..3befc3dcbb2
--- /dev/null
+++
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
@@ -0,0 +1,254 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_count_col --
+\N 0 0
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+3 1 1
+3 1 1
+4 1 1
+6 1 1
+6 1 1
+
+-- !select_rank --
+\N 1 1
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+3 1 1
+3 1 1
+4 1 1
+6 1 1
+6 1 1
+
+-- !select_dense_rank --
+\N 1 1
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+3 1 1
+3 1 1
+4 1 1
+6 1 1
+6 1 1
+
+-- !select_row_number --
+\N 1 1
+1 1 1
+1 1 1
+2 1 1
+2 1 1
+2 1 1
+3 1 1
+3 1 1
+4 1 1
+6 1 1
+6 1 1
+
+-- !select_first_value --
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+2 2 2
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+6 6 6
+6 6 6
+
+-- !select_last_value --
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+2 2 2
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+6 6 6
+6 6 6
+
+-- !select_min --
+\N \N \N
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+5 5 5
+5 5 5
+7 7 7
+
+-- !select_max --
+\N \N \N
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+5 5 5
+5 5 5
+7 7 7
+
+-- !select_sum --
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+2 2 2
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+6 6 6
+6 6 6
+
+-- !select_avg --
+\N \N \N
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+5 5 5
+5 5 5
+7 7 7
+
+-- !more_than_pk --
+\N \N \N
+\N \N \N
+1 1 1
+1 1 1
+2 2 2
+3 3 3
+3 3 3
+4 4 4
+5 5 5
+5 5 5
+7 7 7
+
+-- !select_last_value_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_min_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !more_than_pk_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+--------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_all_rewrite --
+\N \N
+\N \N
+1 1
+1 1
+2 2
+3 3
+3 3
+4 4
+5 5
+5 5
+7 7
+
+-- !select_upper_plan_use_rewrite_and_not_rewrite --
+\N \N \N
+\N \N 7
+1 1 3
+1 1 4
+2 2 5
+3 3 3
+3 3 4
+4 4 4
+5 5 5
+5 5 5
+7 7 7
+
+-- !select_upper_plan_use_all_not_rewrite --
+\N \N
+\N 7
+1 3
+1 4
+2 5
+3 3
+3 4
+4 4
+5 5
+5 5
+7 7
+
+-- !select_upper_plan_use_all_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_rewrite_and_not_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------PhysicalWindow
+------------PhysicalQuickSort[LOCAL_SORT]
+--------------PhysicalProject
+----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !select_upper_plan_use_all_not_rewrite_shape --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------PhysicalWindow
+------------PhysicalQuickSort[LOCAL_SORT]
+--------------PhysicalProject
+----------------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------------PhysicalOlapScan[mal_test_simplify_window]
+
+-- !window_agg --
+\N 1
+1 1
+2 1
+3 1
+4 1
+6 1
+
+-- !window_agg_shape --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
+------------PhysicalOlapScan[mal_test_simplify_window]
+
diff --git
a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
index 35ca9710459..fdbfa12a4b7 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
@@ -30,24 +30,18 @@ PhysicalResultSink
--------------------------------------PhysicalOlapScan[store] apply RFs: RF2
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
-------------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
<= 1231) and (date_dim.d_month_seq >= 1220))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
-
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
+--------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
<= 1231) and (date_dim.d_month_seq >= 1220))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
index d919942139d..abdcd1b0149 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
@@ -27,27 +27,21 @@ PhysicalResultSink
--------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((store.s_state = tmp1.s_state)) otherCondition=()
------------------------------------PhysicalProject
---------------------------------------filter((ranking <= 5))
-----------------------------------------PhysicalWindow
-------------------------------------------PhysicalQuickSort[LOCAL_SORT]
---------------------------------------------PhysicalPartitionTopN
-----------------------------------------------PhysicalProject
-------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
-----------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
-------------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store]
+--------------------------------------hashAgg[GLOBAL]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------hashAgg[LOCAL]
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
+------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
+--------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store]
------------------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
-
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
index c0383fa20db..a5776a3d5e1 100644
---
a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
+++
b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query70.out
@@ -27,26 +27,21 @@ PhysicalResultSink
--------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------hashJoin[RIGHT_SEMI_JOIN]
hashCondition=((store.s_state = tmp1.s_state)) otherCondition=() build RFs:RF2
s_state->[s_state]
------------------------------------PhysicalProject
---------------------------------------filter((ranking <= 5))
-----------------------------------------PhysicalWindow
-------------------------------------------PhysicalQuickSort[LOCAL_SORT]
---------------------------------------------PhysicalPartitionTopN
-----------------------------------------------PhysicalProject
-------------------------------------------------hashAgg[GLOBAL]
---------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------------------------------hashAgg[LOCAL]
-------------------------------------------------------PhysicalProject
---------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
-----------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
-------------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------------------------------PhysicalProject
---------------------------------------------------------------PhysicalOlapScan[store]
apply RFs: RF2
+--------------------------------------hashAgg[GLOBAL]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------hashAgg[LOCAL]
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
+------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
+--------------------------------------------------------PhysicalOlapScan[date_dim]
+------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------PhysicalOlapScan[store]
apply RFs: RF2
------------------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
index 93610210333..f3e524aabcf 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
@@ -30,24 +30,18 @@ PhysicalResultSink
--------------------------------------PhysicalOlapScan[store] apply RFs: RF2
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
-------------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
-
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
+--------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
index f3e8b1c3a72..38de6f0c4ed 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query70.out
@@ -30,24 +30,19 @@ PhysicalResultSink
--------------------------------------PhysicalOlapScan[store] apply RFs: RF2
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
-----------------------------------------filter((ranking <= 5))
-------------------------------------------PhysicalWindow
---------------------------------------------PhysicalQuickSort[LOCAL_SORT]
-----------------------------------------------PhysicalPartitionTopN
-------------------------------------------------PhysicalProject
---------------------------------------------------hashAgg[GLOBAL]
-----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------hashAgg[LOCAL]
---------------------------------------------------------PhysicalProject
-----------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
-------------------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
---------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------------------------------------------------PhysicalProject
-------------------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
---------------------------------------------------------------------PhysicalOlapScan[date_dim]
-------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------------------------------------PhysicalProject
-----------------------------------------------------------------PhysicalOlapScan[store]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=()
build RFs:RF1 s_store_sk->[ss_store_sk]
+--------------------------------------------------hashJoin[INNER_JOIN]
hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk))
otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store_sales]
apply RFs: RF0 RF1
+----------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((date_dim.d_month_seq
<= 1224) and (date_dim.d_month_seq >= 1213))
+----------------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------PhysicalProject
+------------------------------------------------------PhysicalOlapScan[store]
diff --git
a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
new file mode 100644
index 00000000000..11ad672c74f
--- /dev/null
+++
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("simplify_window_expression") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+ sql """
+ DROP TABLE IF EXISTS mal_test_simplify_window
+ """
+
+ sql """
+ create table mal_test_simplify_window(a int, b int, c int) unique
key(a,b) distributed by hash(a) buckets 10
+ properties('replication_num' = '1');
+ """
+
+ sql """
+ insert into mal_test_simplify_window values(6,null,6),(4,5,6),(1,1,4)
+ ,(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8),(null,null,3);
+ """
+
+ sql "sync"
+
+ qt_select_count_col """
+ select a,count(a) over (partition by a,b) c1, count(a) over (partition
by a,b order by a) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_rank """
+ select a,rank() over (partition by a,b) c1, rank() over (partition by
a,b order by b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_dense_rank """
+ select a,dense_rank() over (partition by a,b) c1, dense_rank() over
(partition by a,b order by a,b) c1
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_row_number """
+ select a,row_number() over (partition by a,b) c1, row_number() over
(partition by a,b order by 1) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_first_value """
+ select a,first_value(a) over (partition by a,b) c1, first_value(a)
over (partition by a,b order by a) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_last_value """
+ select a,last_value(a) over (partition by a,b) c1,last_value(a) over
(partition by a,b order by b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_min """
+ select b,min(b) over (partition by a,b) c1, min(b) over (partition by
a,b order by a,b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_max """
+ select b,max(b) over (partition by a,b) c1,max(b) over (partition by
a,b order by a,b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_sum """
+ select a,sum(a) over (partition by a,b) c1, sum(a) over (partition by
a,b order by a) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_select_avg """
+ select b, avg(b) over (partition by a,b) c1, avg(b) over (partition by
a,b order by b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+ qt_more_than_pk """
+ select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition
by a,b,c order by b) c2
+ from mal_test_simplify_window order by 1,2,3;"""
+
+ qt_select_last_value_shape """explain shape plan
+ select a,last_value(a) over (partition by a,b) c1,last_value(a) over
(partition by a,b order by b) c2
+ from mal_test_simplify_window"""
+ qt_select_min_shape """explain shape plan
+ select b,min(b) over (partition by a,b) c1, min(b) over (partition by
a,b order by a,b) c2
+ from mal_test_simplify_window"""
+ qt_more_than_pk_shape """
+ explain shape plan
+ select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition
by a,b,c order by b) c2
+ from mal_test_simplify_window"""
+
+ qt_select_upper_plan_use_all_rewrite """
+ select b, c1 from (select b,avg(b) over (partition by a,b) c1
+ from mal_test_simplify_window) t order by 1,2"""
+ qt_select_upper_plan_use_rewrite_and_not_rewrite """
+ select b, c1, c2 from (select b,sum(b) over (partition by a,b) c1,
max(b) over (partition by a order by a) c2
+ from mal_test_simplify_window) t order by 1,2,3 """
+ qt_select_upper_plan_use_all_not_rewrite """
+ select b, c2 from (select b, max(b) over (partition by a order by a) c2
+ from mal_test_simplify_window) t order by 1,2 """
+ qt_select_upper_plan_use_all_rewrite_shape """
+ explain shape plan select b, c1 from (select b,avg(b) over (partition
by a,b) c1
+ from mal_test_simplify_window) t order by 1,2"""
+ qt_select_upper_plan_use_rewrite_and_not_rewrite_shape """
+ explain shape plan select b, c1, c2 from (select b,sum(b) over
(partition by a,b) c1, max(b) over (partition by a order by a) c2
+ from mal_test_simplify_window) t order by 1,2,3 """
+ qt_select_upper_plan_use_all_not_rewrite_shape """
+ explain shape plan select b, c2 from (select b, max(b) over (partition
by a order by a) c2
+ from mal_test_simplify_window) t order by 1,2 """
+
+ qt_window_agg """
+ select a, rank() over (partition by a order by sum(b) desc) as ranking
+ from mal_test_simplify_window group by a order by 1,2;
+ """
+ qt_window_agg_shape """
+ explain shape plan
+ select a, rank() over (partition by a order by sum(b) desc) as ranking
+ from mal_test_simplify_window group by a;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]