This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 687a91872a2 Revert "[refactor](nereids)forbid unknown stats for
branch2.0 #24061 (#24243)" (#24294)
687a91872a2 is described below
commit 687a91872a296bd60d38a2866a0ed21138a2e5d8
Author: Kang <[email protected]>
AuthorDate: Wed Sep 13 15:18:48 2023 +0800
Revert "[refactor](nereids)forbid unknown stats for branch2.0 #24061
(#24243)" (#24294)
This reverts commit 108e91f2a37d08119e4a611a51fdd0341d9330b3.
---
.../glue/translator/PhysicalPlanTranslator.java | 35 ----------------
.../glue/translator/PlanTranslatorContext.java | 31 --------------
.../rules/rewrite/PredicatePropagation.java | 39 +++++++----------
.../doris/nereids/stats/StatsCalculator.java | 49 +++++++++++++++++-----
.../nereids/rules/rewrite/InferPredicatesTest.java | 30 -------------
.../infer_predicate/infer_predicate.groovy | 18 --------
6 files changed, 54 insertions(+), 148 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 359373ddbf0..406dda54859 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -126,11 +126,6 @@ import
org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
-import org.apache.doris.nereids.types.ArrayType;
-import org.apache.doris.nereids.types.DataType;
-import org.apache.doris.nereids.types.JsonType;
-import org.apache.doris.nereids.types.MapType;
-import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.nereids.util.Utils;
@@ -240,14 +235,6 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
Collections.reverse(context.getPlanFragments());
// TODO: maybe we need to trans nullable directly? and then we could
remove call computeMemLayout
context.getDescTable().computeMemLayout();
- if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
- Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
- if (!scans.isEmpty()) {
- StringBuilder builder = new StringBuilder();
- scans.forEach(scanNode -> builder.append(scanNode));
- throw new AnalysisException("tables with unknown column stats:
" + builder);
- }
- }
return rootFragment;
}
@@ -543,15 +530,6 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
// TODO: move all node set cardinality into one place
if (olapScan.getStats() != null) {
olapScanNode.setCardinality((long)
olapScan.getStats().getRowCount());
- if
(ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
- for (int i = 0; i < slots.size(); i++) {
- Slot slot = slots.get(i);
- if
(olapScan.getStats().findColumnStatistics(slot).isUnKnown()
- && !isComplexDataType(slot.getDataType())) {
- context.addUnknownStatsColumn(olapScanNode,
tupleDescriptor.getSlots().get(i).getId());
- }
- }
- }
}
// TODO: Do we really need tableName here?
TableName tableName = new TableName(null, "", "");
@@ -2000,14 +1978,6 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
scanNode.getTupleDesc().getSlots().add(smallest);
}
try {
- if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
- for (SlotId slotId : requiredByProjectSlotIdSet) {
- if (context.isColumnStatsUnknown(scanNode, slotId)) {
- throw new AnalysisException("meet unknown column stats
on table " + scanNode);
- }
- }
- context.removeScanFromStatsUnknownColumnsMap(scanNode);
- }
scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
} catch (UserException e) {
Util.logAndThrowRuntimeException(LOG,
@@ -2270,9 +2240,4 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
}
return outputExprs;
}
-
- private boolean isComplexDataType(DataType dataType) {
- return dataType instanceof ArrayType || dataType instanceof MapType ||
dataType instanceof JsonType
- || dataType instanceof StructType;
- }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
index e69b5ee8ef3..256b37d7057 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
@@ -45,13 +45,11 @@ import org.apache.doris.thrift.TPushAggOp;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
@@ -92,7 +90,6 @@ public class PlanTranslatorContext {
private final Map<CTEId, PhysicalCTEProducer> cteProducerMap =
Maps.newHashMap();
private final Map<RelationId, TPushAggOp> tablePushAggOp =
Maps.newHashMap();
- private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap =
Maps.newHashMap();
public PlanTranslatorContext(CascadesContext ctx) {
this.translator = new
RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
@@ -103,34 +100,6 @@ public class PlanTranslatorContext {
translator = null;
}
- /**
- * remember the unknown-stats column and its scan, used for
forbid_unknown_col_stats check
- */
- public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
- Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
- if (slots == null) {
- statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
- } else {
- statsUnknownColumnsMap.get(scan).add(slotId);
- }
- }
-
- public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
- Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
- if (unknownSlots == null) {
- return false;
- }
- return unknownSlots.contains(slotId);
- }
-
- public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
- statsUnknownColumnsMap.remove(scan);
- }
-
- public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
- return statsUnknownColumnsMap.keySet();
- }
-
public List<PlanFragment> getPlanFragments() {
return planFragments;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
index 71818966696..cc45952817a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java
@@ -59,12 +59,12 @@ public class PredicatePropagation {
}
/**
- * Use the left or right child of `equalExpr` to replace the left or right
child of `expression`
+ * Use the left or right child of `leftSlotEqualToRightSlot` to replace
the left or right child of `expression`
* Now only support infer `ComparisonPredicate`.
* TODO: We should determine whether `expression` satisfies the condition
for replacement
* eg: Satisfy `expression` is non-deterministic
*/
- private Expression doInfer(Expression equalExpr, Expression expression) {
+ private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression
expression) {
return expression.accept(new DefaultExpressionRewriter<Void>() {
@Override
@@ -76,43 +76,36 @@ public class PredicatePropagation {
public Expression visitComparisonPredicate(ComparisonPredicate cp,
Void context) {
// we need to get expression covered by cast, because we want
to infer different datatype
if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left())
&& (cp.right().isConstant())) {
- return replaceSlot(cp,
ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr);
+ return replaceSlot(cp,
ExpressionUtils.getDatatypeCoveredByCast(cp.left()));
} else if
(ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) &&
cp.left().isConstant()) {
- return replaceSlot(cp,
ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr);
+ return replaceSlot(cp,
ExpressionUtils.getDatatypeCoveredByCast(cp.right()));
}
return super.visit(cp, context);
}
private boolean isDataTypeValid(DataType originDataType,
Expression expr) {
- if ((expr.child(0).getDataType() instanceof IntegralType)
- && (expr.child(1).getDataType() instanceof
IntegralType)
+ if ((leftSlotEqualToRightSlot.child(0).getDataType()
instanceof IntegralType)
+ && (leftSlotEqualToRightSlot.child(1).getDataType()
instanceof IntegralType)
&& (originDataType instanceof IntegralType)) {
// infer filter can not be lower than original datatype,
or dataset would be wrong
if (!((IntegralType) originDataType).widerThan(
- (IntegralType) expr.child(0).getDataType())
+ (IntegralType)
leftSlotEqualToRightSlot.child(0).getDataType())
&& !((IntegralType)
originDataType).widerThan(
- (IntegralType)
expr.child(1).getDataType())) {
+ (IntegralType)
leftSlotEqualToRightSlot.child(1).getDataType())) {
return true;
}
- } else if
(expr.child(0).getDataType().equals(expr.child(1).getDataType())) {
- return true;
}
return false;
}
- private Expression replaceSlot(Expression sourcePredicate,
DataType originDataType, Expression equal) {
- if (!isDataTypeValid(originDataType, equal)) {
- return sourcePredicate;
- }
- return sourcePredicate.rewriteUp(e -> {
- // we can not replace Cast expression to slot because when
rewrite up, we have replace child of cast
- if (e instanceof Cast) {
- return e;
- }
- if (ExpressionUtils.isTwoExpressionEqualWithCast(e,
equal.child(0))) {
- return equal.child(1);
- } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e,
equal.child(1))) {
- return equal.child(0);
+ private Expression replaceSlot(Expression expr, DataType
originDataType) {
+ return expr.rewriteUp(e -> {
+ if (isDataTypeValid(originDataType,
leftSlotEqualToRightSlot)) {
+ if (ExpressionUtils.isTwoExpressionEqualWithCast(e,
leftSlotEqualToRightSlot.child(0))) {
+ return leftSlotEqualToRightSlot.child(1);
+ } else if
(ExpressionUtils.isTwoExpressionEqualWithCast(e,
leftSlotEqualToRightSlot.child(1))) {
+ return leftSlotEqualToRightSlot.child(0);
+ }
}
return e;
});
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 24ec929e820..45aeae54fde 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -26,6 +26,7 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.expressions.Alias;
@@ -122,6 +123,7 @@ import org.apache.doris.statistics.StatisticConstants;
import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
+import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
@@ -621,21 +623,46 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
.build();
}
- if (!cache.isUnKnown) {
- rowCount = Math.max(rowCount, cache.count);
- Histogram histogram = getColumnHistogram(table, colName);
- if (histogram != null) {
- ColumnStatisticBuilder columnStatisticBuilder =
- new
ColumnStatisticBuilder(cache).setHistogram(histogram);
- cache = columnStatisticBuilder.build();
- if
(ConnectContext.get().getSessionVariable().isEnableMinidump()
- &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
- totalColumnStatisticMap.put(table.getName() + ":" +
colName, cache);
- totalHistogramMap.put(table.getName() + colName,
histogram);
+ if (cache.isUnKnown) {
+ if (forbidUnknownColStats && !shouldIgnoreThisCol) {
+ if (StatisticsUtil.statsTblAvailable()) {
+ throw new AnalysisException(String.format("Found
unknown stats for column:%s.%s.\n"
+ + "It may caused by:\n"
+ + "\n"
+ + "1. This column never got analyzed\n"
+ + "2. This table is empty\n"
+ + "3. Stats load failed caused by unstable of
backends,"
+ + "and FE cached the unknown stats by default
in this scenario\n"
+ + "4. There is a bug, please report it to
Doris community\n"
+ + "\n"
+ + "If an unknown stats for this column is
tolerable,"
+ + "you could set session variable
`forbid_unknown_col_stats` to false to make planner"
+ + " ignore this error and keep planning.",
table.getName(), colName));
+ } else {
+ throw new AnalysisException("BE is not available!");
}
}
+ columnStatisticMap.put(slotReference, cache);
+ continue;
+ }
+ rowCount = Math.max(rowCount, cache.count);
+ Histogram histogram = getColumnHistogram(table, colName);
+ if (histogram != null) {
+ ColumnStatisticBuilder columnStatisticBuilder =
+ new
ColumnStatisticBuilder(cache).setHistogram(histogram);
+ columnStatisticMap.put(slotReference,
columnStatisticBuilder.build());
+ cache = columnStatisticBuilder.build();
+ if
(ConnectContext.get().getSessionVariable().isEnableMinidump()
+ &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+ totalHistogramMap.put(table.getName() + ":" + colName,
histogram);
+ }
}
columnStatisticMap.put(slotReference, cache);
+ if (ConnectContext.get().getSessionVariable().isEnableMinidump()
+ &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+ totalColumnStatisticMap.put(table.getName() + ":" + colName,
cache);
+ totalHistogramMap.put(table.getName() + colName, histogram);
+ }
}
return new Statistics(rowCount, columnStatisticMap);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
index b7b235d2b43..adc67ca835f 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java
@@ -17,33 +17,15 @@
package org.apache.doris.nereids.rules.rewrite;
-import org.apache.doris.nereids.trees.expressions.Cast;
-import org.apache.doris.nereids.trees.expressions.EqualTo;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.plans.JoinType;
-import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
-import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.nereids.util.MemoPatternMatchSupported;
import org.apache.doris.nereids.util.PlanChecker;
-import org.apache.doris.nereids.util.PlanConstructor;
import org.apache.doris.utframe.TestWithFeService;
-import com.google.common.collect.Sets;
-import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import java.util.Optional;
-import java.util.Set;
-
public class InferPredicatesTest extends TestWithFeService implements
MemoPatternMatchSupported {
- private final LogicalOlapScan scan1 =
PlanConstructor.newLogicalOlapScan(0, "t1", 0);
-
- private final LogicalOlapScan scan2 =
PlanConstructor.newLogicalOlapScan(1, "t2", 0);
-
- private final PredicatePropagation propagation = new
PredicatePropagation();
-
@Override
protected void runBeforeAll() throws Exception {
createDatabase("test");
@@ -646,16 +628,4 @@ public class InferPredicatesTest extends TestWithFeService
implements MemoPatter
).when(join -> join.getJoinType() ==
JoinType.LEFT_OUTER_JOIN)
);
}
-
- @Test
- void testInfer() {
- EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0),
BigIntType.INSTANCE), Literal.of(1));
- EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0),
scan1.getOutput().get(0));
- Set<Expression> predicates = Sets.newHashSet();
- predicates.add(equalTo2);
- predicates.add(equalTo);
- Set<Expression> newPredicates = propagation.infer(predicates);
- Optional<Expression> newPredicate = newPredicates.stream().findFirst();
- Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new
Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1))));
- }
}
diff --git
a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
index 120c9a8f674..a1621f1c239 100644
--- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
+++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy
@@ -22,8 +22,6 @@ suite("test_infer_predicate") {
sql 'drop table if exists infer_tb1;'
sql 'drop table if exists infer_tb2;'
sql 'drop table if exists infer_tb3;'
- sql 'drop table if exists infer_tb4;'
- sql 'drop table if exists infer_tb5;'
sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1)
buckets 3 properties('replication_num' = '1');'''
@@ -31,10 +29,6 @@ suite("test_infer_predicate") {
sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by
hash(k1) buckets 3 properties('replication_num' = '1');'''
- sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by
hash(k1) buckets 3 properties('replication_num' = '1');'''
-
- sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by
hash(k1) buckets 3 properties('replication_num' = '1');'''
-
explain {
sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 =
infer_tb1.k2 and infer_tb2.k1 = 1;"
contains "PREDICATES: k2"
@@ -61,16 +55,4 @@ suite("test_infer_predicate") {
contains "PREDICATES: k3"
contains "PREDICATES: k2"
}
-
- explain {
- sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 =
infer_tb5.k3 where infer_tb4.k2 = '20230901';"
- contains "PREDICATES: k3"
- contains "PREDICATES: k2"
- }
-
- sql 'drop table if exists infer_tb1;'
- sql 'drop table if exists infer_tb2;'
- sql 'drop table if exists infer_tb3;'
- sql 'drop table if exists infer_tb4;'
- sql 'drop table if exists infer_tb5;'
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]