This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch revert-24243-2.0-forbid-unknown-stats in repository https://gitbox.apache.org/repos/asf/doris.git
commit 008cd1524953462bdd73cc87dd17c72fb95ab5ff Author: Kang <[email protected]> AuthorDate: Wed Sep 13 15:18:28 2023 +0800 Revert "[refactor](nereids)forbid unknown stats for branch2.0 #24061 (#24243)" This reverts commit 108e91f2a37d08119e4a611a51fdd0341d9330b3. --- .../glue/translator/PhysicalPlanTranslator.java | 35 ---------------- .../glue/translator/PlanTranslatorContext.java | 31 -------------- .../rules/rewrite/PredicatePropagation.java | 39 +++++++---------- .../doris/nereids/stats/StatsCalculator.java | 49 +++++++++++++++++----- .../nereids/rules/rewrite/InferPredicatesTest.java | 30 ------------- .../infer_predicate/infer_predicate.groovy | 18 -------- 6 files changed, 54 insertions(+), 148 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 359373ddbf0..406dda54859 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -126,11 +126,6 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalUnion; import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; -import org.apache.doris.nereids.types.ArrayType; -import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.MapType; -import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; @@ -240,14 +235,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla Collections.reverse(context.getPlanFragments()); // TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout context.getDescTable().computeMemLayout(); - if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats(); - if (!scans.isEmpty()) { - StringBuilder builder = new StringBuilder(); - scans.forEach(scanNode -> builder.append(scanNode)); - throw new AnalysisException("tables with unknown column stats: " + builder); - } - } return rootFragment; } @@ -543,15 +530,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla // TODO: move all node set cardinality into one place if (olapScan.getStats() != null) { olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); - if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - for (int i = 0; i < slots.size(); i++) { - Slot slot = slots.get(i); - if (olapScan.getStats().findColumnStatistics(slot).isUnKnown() - && !isComplexDataType(slot.getDataType())) { - context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId()); - } - } - } } // TODO: Do we really need tableName here? TableName tableName = new TableName(null, "", ""); @@ -2000,14 +1978,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla scanNode.getTupleDesc().getSlots().add(smallest); } try { - if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - for (SlotId slotId : requiredByProjectSlotIdSet) { - if (context.isColumnStatsUnknown(scanNode, slotId)) { - throw new AnalysisException("meet unknown column stats on table " + scanNode); - } - } - context.removeScanFromStatsUnknownColumnsMap(scanNode); - } scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet); } catch (UserException e) { Util.logAndThrowRuntimeException(LOG, @@ -2270,9 +2240,4 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } return outputExprs; } - - private boolean isComplexDataType(DataType dataType) { - return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType - || dataType instanceof StructType; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index e69b5ee8ef3..256b37d7057 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -45,13 +45,11 @@ import org.apache.doris.thrift.TPushAggOp; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -92,7 +90,6 @@ public class PlanTranslatorContext { private final Map<CTEId, PhysicalCTEProducer> cteProducerMap = Maps.newHashMap(); private final Map<RelationId, TPushAggOp> tablePushAggOp = Maps.newHashMap(); - private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap = Maps.newHashMap(); public PlanTranslatorContext(CascadesContext ctx) { this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); @@ -103,34 +100,6 @@ public class PlanTranslatorContext { translator = null; } - /** - * remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check - */ - public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) { - Set<SlotId> slots = statsUnknownColumnsMap.get(scan); - if (slots == null) { - statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId)); - } else { - statsUnknownColumnsMap.get(scan).add(slotId); - } - } - - public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) { - Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan); - if (unknownSlots == null) { - return false; - } - return unknownSlots.contains(slotId); - } - - public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) { - statsUnknownColumnsMap.remove(scan); - } - - public Set<ScanNode> getScanNodeWithUnknownColumnStats() { - return statsUnknownColumnsMap.keySet(); - } - public List<PlanFragment> getPlanFragments() { return planFragments; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java index 71818966696..cc45952817a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -59,12 +59,12 @@ public class PredicatePropagation { } /** - * Use the left or right child of `equalExpr` to replace the left or right child of `expression` + * Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression` * Now only support infer `ComparisonPredicate`. * TODO: We should determine whether `expression` satisfies the condition for replacement * eg: Satisfy `expression` is non-deterministic */ - private Expression doInfer(Expression equalExpr, Expression expression) { + private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) { return expression.accept(new DefaultExpressionRewriter<Void>() { @Override @@ -76,43 +76,36 @@ public class PredicatePropagation { public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) { // we need to get expression covered by cast, because we want to infer different datatype if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left())); } else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right())); } return super.visit(cp, context); } private boolean isDataTypeValid(DataType originDataType, Expression expr) { - if ((expr.child(0).getDataType() instanceof IntegralType) - && (expr.child(1).getDataType() instanceof IntegralType) + if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType) + && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType) && (originDataType instanceof IntegralType)) { // infer filter can not be lower than original datatype, or dataset would be wrong if (!((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(0).getDataType()) + (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType()) && !((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(1).getDataType())) { + (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) { return true; } - } else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) { - return true; } return false; } - private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) { - if (!isDataTypeValid(originDataType, equal)) { - return sourcePredicate; - } - return sourcePredicate.rewriteUp(e -> { - // we can not replace Cast expression to slot because when rewrite up, we have replace child of cast - if (e instanceof Cast) { - return e; - } - if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) { - return equal.child(1); - } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) { - return equal.child(0); + private Expression replaceSlot(Expression expr, DataType originDataType) { + return expr.rewriteUp(e -> { + if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) { + if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) { + return leftSlotEqualToRightSlot.child(1); + } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) { + return leftSlotEqualToRightSlot.child(0); + } } return e; }); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 24ec929e820..45aeae54fde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -26,6 +26,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -122,6 +123,7 @@ import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -621,21 +623,46 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - cache = columnStatisticBuilder.build(); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); - totalHistogramMap.put(table.getName() + colName, histogram); + if (cache.isUnKnown) { + if (forbidUnknownColStats && !shouldIgnoreThisCol) { + if (StatisticsUtil.statsTblAvailable()) { + throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" + + "It may caused by:\n" + + "\n" + + "1. This column never got analyzed\n" + + "2. This table is empty\n" + + "3. Stats load failed caused by unstable of backends," + + "and FE cached the unknown stats by default in this scenario\n" + + "4. There is a bug, please report it to Doris community\n" + + "\n" + + "If an unknown stats for this column is tolerable," + + "you could set session variable `forbid_unknown_col_stats` to false to make planner" + + " ignore this error and keep planning.", table.getName(), colName)); + } else { + throw new AnalysisException("BE is not available!"); } } + columnStatisticMap.put(slotReference, cache); + continue; + } + rowCount = Math.max(rowCount, cache.count); + Histogram histogram = getColumnHistogram(table, colName); + if (histogram != null) { + ColumnStatisticBuilder columnStatisticBuilder = + new ColumnStatisticBuilder(cache).setHistogram(histogram); + columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); + cache = columnStatisticBuilder.build(); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalHistogramMap.put(table.getName() + ":" + colName, histogram); + } } columnStatisticMap.put(slotReference, cache); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); + totalHistogramMap.put(table.getName() + colName, histogram); + } } return new Statistics(rowCount, columnStatisticMap); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java index b7b235d2b43..adc67ca835f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java @@ -17,33 +17,15 @@ package org.apache.doris.nereids.rules.rewrite; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; -import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; -import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Sets; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Optional; -import java.util.Set; - public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported { - private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); - - private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); - - private final PredicatePropagation propagation = new PredicatePropagation(); - @Override protected void runBeforeAll() throws Exception { createDatabase("test"); @@ -646,16 +628,4 @@ public class InferPredicatesTest extends TestWithFeService implements MemoPatter ).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN) ); } - - @Test - void testInfer() { - EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)); - EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0)); - Set<Expression> predicates = Sets.newHashSet(); - predicates.add(equalTo2); - predicates.add(equalTo); - Set<Expression> newPredicates = propagation.infer(predicates); - Optional<Expression> newPredicate = newPredicates.stream().findFirst(); - Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)))); - } } diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index 120c9a8f674..a1621f1c239 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -22,8 +22,6 @@ suite("test_infer_predicate") { sql 'drop table if exists infer_tb1;' sql 'drop table if exists infer_tb2;' sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' @@ -31,10 +29,6 @@ suite("test_infer_predicate") { sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - - sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - explain { sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;" contains "PREDICATES: k2" @@ -61,16 +55,4 @@ suite("test_infer_predicate") { contains "PREDICATES: k3" contains "PREDICATES: k2" } - - explain { - sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';" - contains "PREDICATES: k3" - contains "PREDICATES: k2" - } - - sql 'drop table if exists infer_tb1;' - sql 'drop table if exists infer_tb2;' - sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
