This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch 2.1-tmp
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 09db427eed125312f0a5330e8d8a4dc5ba28b255
Author: Pxl <pxl...@qq.com>
AuthorDate: Mon Apr 1 14:35:08 2024 +0800

    [Feature](materialized-view) support ignore not slot is null when 
count(slot) not has key in mv (#32912)
    
    support ignore not slot is null when count(slot) not has key in mv
---
 .../mv/AbstractSelectMaterializedIndexRule.java    | 95 +++++++++++++++++++---
 .../mv/SelectMaterializedIndexWithAggregate.java   | 24 ++++--
 .../mv_ignore_predicate/mv_ignore_predicate.out    | 15 ++++
 .../mv_ignore_predicate/mv_ignore_predicate.groovy | 59 ++++++++++++++
 .../test_dup_mv_repeat/test_dup_mv_repeat.groovy   |  2 +-
 5 files changed, 178 insertions(+), 17 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java
index c77cda4c8f8..7ca697726ee 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.rules.rewrite.mv;
 
 import org.apache.doris.analysis.CreateMaterializedViewStmt;
+import org.apache.doris.catalog.AggregateType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.MaterializedIndex;
 import org.apache.doris.catalog.MaterializedIndexMeta;
@@ -33,12 +34,14 @@ import 
org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.InPredicate;
 import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.VirtualSlotReference;
 import org.apache.doris.nereids.trees.expressions.WhenClause;
 import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait;
 import 
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ScalarFunction;
 import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
@@ -62,6 +65,7 @@ import com.google.common.collect.ImmutableSortedMap;
 import com.google.common.collect.Lists;
 import org.apache.commons.collections.CollectionUtils;
 
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -72,6 +76,7 @@ import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Base class for selecting materialized index rules.
@@ -109,6 +114,45 @@ public abstract class AbstractSelectMaterializedIndexRule {
         }
     }
 
+    // get the predicates that can be ignored when all aggregate functions are 
sum
+    protected static List<Expression> 
getPrunedPredicatesWithAllSumAgg(List<Expression> aggExpressions,
+            Set<Expression> predicateExpr) {
+        List<Expression> prunedExpr = new ArrayList<>();
+
+        Set<String> sumSlots = aggExpressions.stream().map(e -> 
e.child(0).toSql())
+                .collect(Collectors.toCollection(() -> new 
TreeSet<String>(String.CASE_INSENSITIVE_ORDER)));
+        for (Expression expr : predicateExpr) {
+            if (expr instanceof Not && expr.child(0) instanceof IsNull) {
+                Expression slot = expr.child(0).child(0);
+                String countColumn = 
normalizeName(CreateMaterializedViewStmt.mvColumnBuilder(AggregateType.SUM,
+                        
CreateMaterializedViewStmt.mvColumnBuilder(slotToCaseWhen(slot).toSql())));
+                if (sumSlots.contains(countColumn)) {
+                    prunedExpr.add(expr);
+                }
+            }
+        }
+        return prunedExpr;
+    }
+
+    // we can prune some predicates when there is no group-by column
+    protected static List<Expression> getPrunedPredicates(List<Expression> 
aggExpressions,
+            Set<Expression> predicateExpr) {
+        List<Expression> prunedExpr = new ArrayList<>();
+
+        boolean isAllSumAgg = true;
+        for (Expression expr : aggExpressions) {
+            if (!(expr instanceof Sum)) {
+                isAllSumAgg = false;
+                break;
+            }
+        }
+        if (isAllSumAgg) {
+            prunedExpr.addAll(getPrunedPredicatesWithAllSumAgg(aggExpressions, 
predicateExpr));
+        }
+
+        return prunedExpr;
+    }
+
     protected static boolean containAllRequiredColumns(MaterializedIndex 
index, LogicalOlapScan scan,
             Set<Slot> requiredScanOutput, Set<? extends Expression> 
requiredExpr, Set<Expression> predicateExpr) {
         OlapTable table = scan.getTable();
@@ -121,12 +165,14 @@ public abstract class AbstractSelectMaterializedIndexRule 
{
                 .map(e -> {
                     e.setDisableTableName(true);
                     return e;
-                })
-                .map(e -> new 
NereidsParser().parseExpression(e.toSql()).toSql()).collect(Collectors.toSet());
-        Set<String> commonConjuncts = 
indexConjuncts.stream().filter(predicateExprSql::contains)
-                .collect(Collectors.toSet());
-        if (commonConjuncts.size() != indexConjuncts.size()) {
-            return false;
+                }).map(e -> new 
NereidsParser().parseExpression(e.toSql()).toSql()).collect(Collectors.toSet());
+
+        for (String indexConjunct : indexConjuncts) {
+            if (predicateExprSql.contains(indexConjunct)) {
+                predicateExprSql.remove(indexConjunct);
+            } else {
+                return false;
+            }
         }
 
         Set<String> requiredMvColumnNames = requiredScanOutput.stream()
@@ -138,10 +184,24 @@ public abstract class AbstractSelectMaterializedIndexRule 
{
                 .collect(Collectors.toCollection(() -> new 
TreeSet<String>(String.CASE_INSENSITIVE_ORDER)));
         mvColNames.addAll(indexConjuncts);
 
-        return mvColNames.containsAll(requiredMvColumnNames)
-                && (indexConjuncts.isEmpty() || commonConjuncts.size() == 
predicateExprSql.size())
-                || requiredExpr.stream().filter(e -> !containsAllColumn(e, 
mvColNames)).collect(Collectors.toSet())
-                        .isEmpty();
+        if (mvColNames.containsAll(requiredMvColumnNames) && 
predicateExprSql.isEmpty()) {
+            return true;
+        }
+
+        Set<Expression> remained = requiredExpr.stream().filter(e -> 
!containsAllColumn(e, mvColNames))
+                .collect(Collectors.toSet());
+        if (remained.isEmpty()) {
+            return true;
+        }
+
+        if (!scan.getGroupExpression().isPresent()) {
+            Set<Expression> prunedExpr = getPrunedPredicates(
+                    requiredExpr.stream().filter(e -> e instanceof 
AggregateFunction).collect(Collectors.toList()),
+                    predicateExpr).stream().collect(Collectors.toSet());
+            remained = remained.stream().filter(e -> 
!prunedExpr.contains(e)).collect(Collectors.toSet());
+        }
+
+        return remained.isEmpty();
     }
 
     public static String parseMvColumnToSql(String mvName) {
@@ -428,6 +488,21 @@ public abstract class AbstractSelectMaterializedIndexRule {
                         .collect(Collectors.toSet()));
     }
 
+    // Call this generateBaseScanExprToMvExpr only when we have both agg and 
filter
+    protected SlotContext generateBaseScanExprToMvExpr(LogicalOlapScan mvPlan, 
Set<Expression> requiredExpr,
+            Set<Expression> predicateExpr) {
+        SlotContext context = generateBaseScanExprToMvExpr(mvPlan);
+        if (mvPlan.getGroupExpression().isPresent()) {
+            return context;
+        }
+        Set<Expression> pruned = getPrunedPredicates(
+                requiredExpr.stream().filter(e -> e instanceof 
AggregateFunction).collect(Collectors.toList()),
+                predicateExpr).stream().collect(Collectors.toSet());
+
+        return new SlotContext(context.baseSlotToMvSlot, 
context.mvNameToMvSlot,
+                Stream.concat(pruned.stream(), 
context.trueExprs.stream()).collect(Collectors.toSet()));
+    }
+
     /** SlotContext */
     protected static class SlotContext {
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
index 0a01c1b5679..bd3494378ae 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java
@@ -164,7 +164,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
 
                             return new LogicalProject<>(
                                 generateProjectsAlias(agg.getOutputs(), 
slotContext),
@@ -248,7 +250,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
                             if (result.indexId == 
scan.getTable().getBaseIndexId()) {
                                 LogicalOlapScan mvPlanWithoutAgg = 
SelectMaterializedIndexWithoutAggregate.select(scan,
                                         project::getInputSlots, 
filter::getConjuncts,
@@ -308,7 +312,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
 
                             List<NamedExpression> newProjectList = 
replaceProjectList(project,
                                     result.exprRewriteMap.projectExprMap);
@@ -387,7 +393,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
 
                             return new LogicalProject<>(
                                 generateProjectsAlias(agg.getOutputs(), 
slotContext),
@@ -478,7 +486,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
 
                             List<NamedExpression> newProjectList = 
replaceProjectList(project,
                                     result.exprRewriteMap.projectExprMap);
@@ -528,7 +538,9 @@ public class SelectMaterializedIndexWithAggregate extends 
AbstractSelectMaterial
                             );
 
                             LogicalOlapScan mvPlan = 
createLogicalOlapScan(scan, result);
-                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan);
+                            SlotContext slotContext = 
generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream()
+                                    .map(e -> 
result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()),
+                                    filter.getConjuncts());
 
                             List<NamedExpression> newProjectList = 
replaceProjectList(project,
                                     result.exprRewriteMap.projectExprMap);
diff --git 
a/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out 
b/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out
new file mode 100644
index 00000000000..e35122c75d0
--- /dev/null
+++ b/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_star --
+\N     4       \N      d
+-4     -4      -4      d
+1      1       1       a
+2      2       2       b
+3      -3      \N      c
+5      \N      \N      \N
+
+-- !select_mv --
+5
+
+-- !select_mv --
+5
+
diff --git 
a/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy 
b/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy
new file mode 100644
index 00000000000..974dabda466
--- /dev/null
+++ 
b/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite ("mv_ignore_predicate") {
+
+    sql """ DROP TABLE IF EXISTS d_table; """
+
+    sql """
+            create table d_table(
+                k1 int null,
+                k2 int null,
+                k3 bigint null,
+                k4 varchar(100) null
+            )
+            duplicate key (k1,k2,k3)
+            distributed BY hash(k1) buckets 3
+            properties("replication_num" = "1");
+        """
+
+    sql "insert into d_table select 1,1,1,'a';"
+    sql "insert into d_table select 2,2,2,'b';"
+    sql "insert into d_table select 3,-3,null,'c';"
+
+    createMV("create materialized view kign as select k1,count(k2) from 
d_table group by k1;")
+
+    sql "insert into d_table select -4,-4,-4,'d';"
+    sql "insert into d_table(k4,k2) values('d',4);"
+    sql "insert into d_table select 5,null,null,null;"
+
+    qt_select_star "select * from d_table order by k1;"
+
+    explain {
+        sql("select count(k2) from d_table;")
+        contains "(kign)"
+    }
+    qt_select_mv "select count(k2) from d_table;"
+
+    explain {
+        sql("select count(k2) from d_table where k2 is not null;")
+        contains "(kign)"
+    }
+    qt_select_mv "select count(k2) from d_table where k2 is not null;"
+}
diff --git 
a/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy 
b/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy
index 0a40c3cb050..f8672eaa259 100644
--- a/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy
+++ b/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy
@@ -19,7 +19,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods
 
 suite ("test_dup_mv_repeat") {
 
-    sql """ DROP TABLE IF EXISTS d_table; """
+    sql """ DROP TABLE IF EXISTS db1; """
 
     sql """
             CREATE TABLE `db1` (


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to