This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch 2.1-tmp in repository https://gitbox.apache.org/repos/asf/doris.git
commit 09db427eed125312f0a5330e8d8a4dc5ba28b255 Author: Pxl <pxl...@qq.com> AuthorDate: Mon Apr 1 14:35:08 2024 +0800 [Feature](materialized-view) support ignore not slot is null when count(slot) not has key in mv (#32912) support ignore not slot is null when count(slot) not has key in mv --- .../mv/AbstractSelectMaterializedIndexRule.java | 95 +++++++++++++++++++--- .../mv/SelectMaterializedIndexWithAggregate.java | 24 ++++-- .../mv_ignore_predicate/mv_ignore_predicate.out | 15 ++++ .../mv_ignore_predicate/mv_ignore_predicate.groovy | 59 ++++++++++++++ .../test_dup_mv_repeat/test_dup_mv_repeat.groovy | 2 +- 5 files changed, 178 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java index c77cda4c8f8..7ca697726ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/AbstractSelectMaterializedIndexRule.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.rules.rewrite.mv; import org.apache.doris.analysis.CreateMaterializedViewStmt; +import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.MaterializedIndexMeta; @@ -33,12 +34,14 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.InPredicate; import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.VirtualSlotReference; import org.apache.doris.nereids.trees.expressions.WhenClause; import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; import org.apache.doris.nereids.trees.expressions.functions.scalar.ScalarFunction; import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; @@ -62,6 +65,7 @@ import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Lists; import org.apache.commons.collections.CollectionUtils; +import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.List; @@ -72,6 +76,7 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Base class for selecting materialized index rules. @@ -109,6 +114,45 @@ public abstract class AbstractSelectMaterializedIndexRule { } } + // get the predicates that can be ignored when all aggregate functions are sum + protected static List<Expression> getPrunedPredicatesWithAllSumAgg(List<Expression> aggExpressions, + Set<Expression> predicateExpr) { + List<Expression> prunedExpr = new ArrayList<>(); + + Set<String> sumSlots = aggExpressions.stream().map(e -> e.child(0).toSql()) + .collect(Collectors.toCollection(() -> new TreeSet<String>(String.CASE_INSENSITIVE_ORDER))); + for (Expression expr : predicateExpr) { + if (expr instanceof Not && expr.child(0) instanceof IsNull) { + Expression slot = expr.child(0).child(0); + String countColumn = normalizeName(CreateMaterializedViewStmt.mvColumnBuilder(AggregateType.SUM, + CreateMaterializedViewStmt.mvColumnBuilder(slotToCaseWhen(slot).toSql()))); + if (sumSlots.contains(countColumn)) { + prunedExpr.add(expr); + } + } + } + return prunedExpr; + } + + // we can prune some predicates when there is no group-by column + protected static List<Expression> getPrunedPredicates(List<Expression> aggExpressions, + Set<Expression> predicateExpr) { + List<Expression> prunedExpr = new ArrayList<>(); + + boolean isAllSumAgg = true; + for (Expression expr : aggExpressions) { + if (!(expr instanceof Sum)) { + isAllSumAgg = false; + break; + } + } + if (isAllSumAgg) { + prunedExpr.addAll(getPrunedPredicatesWithAllSumAgg(aggExpressions, predicateExpr)); + } + + return prunedExpr; + } + protected static boolean containAllRequiredColumns(MaterializedIndex index, LogicalOlapScan scan, Set<Slot> requiredScanOutput, Set<? extends Expression> requiredExpr, Set<Expression> predicateExpr) { OlapTable table = scan.getTable(); @@ -121,12 +165,14 @@ public abstract class AbstractSelectMaterializedIndexRule { .map(e -> { e.setDisableTableName(true); return e; - }) - .map(e -> new NereidsParser().parseExpression(e.toSql()).toSql()).collect(Collectors.toSet()); - Set<String> commonConjuncts = indexConjuncts.stream().filter(predicateExprSql::contains) - .collect(Collectors.toSet()); - if (commonConjuncts.size() != indexConjuncts.size()) { - return false; + }).map(e -> new NereidsParser().parseExpression(e.toSql()).toSql()).collect(Collectors.toSet()); + + for (String indexConjunct : indexConjuncts) { + if (predicateExprSql.contains(indexConjunct)) { + predicateExprSql.remove(indexConjunct); + } else { + return false; + } } Set<String> requiredMvColumnNames = requiredScanOutput.stream() @@ -138,10 +184,24 @@ public abstract class AbstractSelectMaterializedIndexRule { .collect(Collectors.toCollection(() -> new TreeSet<String>(String.CASE_INSENSITIVE_ORDER))); mvColNames.addAll(indexConjuncts); - return mvColNames.containsAll(requiredMvColumnNames) - && (indexConjuncts.isEmpty() || commonConjuncts.size() == predicateExprSql.size()) - || requiredExpr.stream().filter(e -> !containsAllColumn(e, mvColNames)).collect(Collectors.toSet()) - .isEmpty(); + if (mvColNames.containsAll(requiredMvColumnNames) && predicateExprSql.isEmpty()) { + return true; + } + + Set<Expression> remained = requiredExpr.stream().filter(e -> !containsAllColumn(e, mvColNames)) + .collect(Collectors.toSet()); + if (remained.isEmpty()) { + return true; + } + + if (!scan.getGroupExpression().isPresent()) { + Set<Expression> prunedExpr = getPrunedPredicates( + requiredExpr.stream().filter(e -> e instanceof AggregateFunction).collect(Collectors.toList()), + predicateExpr).stream().collect(Collectors.toSet()); + remained = remained.stream().filter(e -> !prunedExpr.contains(e)).collect(Collectors.toSet()); + } + + return remained.isEmpty(); } public static String parseMvColumnToSql(String mvName) { @@ -428,6 +488,21 @@ public abstract class AbstractSelectMaterializedIndexRule { .collect(Collectors.toSet())); } + // Call this generateBaseScanExprToMvExpr only when we have both agg and filter + protected SlotContext generateBaseScanExprToMvExpr(LogicalOlapScan mvPlan, Set<Expression> requiredExpr, + Set<Expression> predicateExpr) { + SlotContext context = generateBaseScanExprToMvExpr(mvPlan); + if (mvPlan.getGroupExpression().isPresent()) { + return context; + } + Set<Expression> pruned = getPrunedPredicates( + requiredExpr.stream().filter(e -> e instanceof AggregateFunction).collect(Collectors.toList()), + predicateExpr).stream().collect(Collectors.toSet()); + + return new SlotContext(context.baseSlotToMvSlot, context.mvNameToMvSlot, + Stream.concat(pruned.stream(), context.trueExprs.stream()).collect(Collectors.toSet())); + } + /** SlotContext */ protected static class SlotContext { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java index 0a01c1b5679..bd3494378ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMaterializedIndexWithAggregate.java @@ -164,7 +164,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); return new LogicalProject<>( generateProjectsAlias(agg.getOutputs(), slotContext), @@ -248,7 +250,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); if (result.indexId == scan.getTable().getBaseIndexId()) { LogicalOlapScan mvPlanWithoutAgg = SelectMaterializedIndexWithoutAggregate.select(scan, project::getInputSlots, filter::getConjuncts, @@ -308,7 +312,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); List<NamedExpression> newProjectList = replaceProjectList(project, result.exprRewriteMap.projectExprMap); @@ -387,7 +393,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); return new LogicalProject<>( generateProjectsAlias(agg.getOutputs(), slotContext), @@ -478,7 +486,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); List<NamedExpression> newProjectList = replaceProjectList(project, result.exprRewriteMap.projectExprMap); @@ -528,7 +538,9 @@ public class SelectMaterializedIndexWithAggregate extends AbstractSelectMaterial ); LogicalOlapScan mvPlan = createLogicalOlapScan(scan, result); - SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan); + SlotContext slotContext = generateBaseScanExprToMvExpr(mvPlan, requiredExpr.stream() + .map(e -> result.exprRewriteMap.replaceAgg(e)).collect(Collectors.toSet()), + filter.getConjuncts()); List<NamedExpression> newProjectList = replaceProjectList(project, result.exprRewriteMap.projectExprMap); diff --git a/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out b/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out new file mode 100644 index 00000000000..e35122c75d0 --- /dev/null +++ b/regression-test/data/mv_p0/mv_ignore_predicate/mv_ignore_predicate.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_star -- +\N 4 \N d +-4 -4 -4 d +1 1 1 a +2 2 2 b +3 -3 \N c +5 \N \N \N + +-- !select_mv -- +5 + +-- !select_mv -- +5 + diff --git a/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy b/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy new file mode 100644 index 00000000000..974dabda466 --- /dev/null +++ b/regression-test/suites/mv_p0/mv_ignore_predicate/mv_ignore_predicate.groovy @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("mv_ignore_predicate") { + + sql """ DROP TABLE IF EXISTS d_table; """ + + sql """ + create table d_table( + k1 int null, + k2 int null, + k3 bigint null, + k4 varchar(100) null + ) + duplicate key (k1,k2,k3) + distributed BY hash(k1) buckets 3 + properties("replication_num" = "1"); + """ + + sql "insert into d_table select 1,1,1,'a';" + sql "insert into d_table select 2,2,2,'b';" + sql "insert into d_table select 3,-3,null,'c';" + + createMV("create materialized view kign as select k1,count(k2) from d_table group by k1;") + + sql "insert into d_table select -4,-4,-4,'d';" + sql "insert into d_table(k4,k2) values('d',4);" + sql "insert into d_table select 5,null,null,null;" + + qt_select_star "select * from d_table order by k1;" + + explain { + sql("select count(k2) from d_table;") + contains "(kign)" + } + qt_select_mv "select count(k2) from d_table;" + + explain { + sql("select count(k2) from d_table where k2 is not null;") + contains "(kign)" + } + qt_select_mv "select count(k2) from d_table where k2 is not null;" +} diff --git a/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy b/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy index 0a40c3cb050..f8672eaa259 100644 --- a/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy +++ b/regression-test/suites/mv_p0/test_dup_mv_repeat/test_dup_mv_repeat.groovy @@ -19,7 +19,7 @@ import org.codehaus.groovy.runtime.IOGroovyMethods suite ("test_dup_mv_repeat") { - sql """ DROP TABLE IF EXISTS d_table; """ + sql """ DROP TABLE IF EXISTS db1; """ sql """ CREATE TABLE `db1` ( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org