This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 37b062fd65 [refactor](nereids) forbid unknown stats for branch 2.0
#24061 (#24323)
37b062fd65 is described below
commit 37b062fd65a8ca37538b7e4e9dc95a8c7321920a
Author: minghong <[email protected]>
AuthorDate: Wed Sep 13 21:02:20 2023 +0800
[refactor](nereids) forbid unknown stats for branch 2.0 #24061 (#24323)
---
.../glue/translator/PhysicalPlanTranslator.java | 35 ++++++++++++++++
.../glue/translator/PlanTranslatorContext.java | 31 ++++++++++++++
.../doris/nereids/stats/StatsCalculator.java | 49 +++++-----------------
3 files changed, 77 insertions(+), 38 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 406dda5485..359373ddbf 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -126,6 +126,11 @@ import
org.apache.doris.nereids.trees.plans.physical.PhysicalUnion;
import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow;
import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.JsonType;
+import org.apache.doris.nereids.types.MapType;
+import org.apache.doris.nereids.types.StructType;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.nereids.util.Utils;
@@ -235,6 +240,14 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
Collections.reverse(context.getPlanFragments());
// TODO: maybe we need to trans nullable directly? and then we could
remove call computeMemLayout
context.getDescTable().computeMemLayout();
+ if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+ Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats();
+ if (!scans.isEmpty()) {
+ StringBuilder builder = new StringBuilder();
+ scans.forEach(scanNode -> builder.append(scanNode));
+ throw new AnalysisException("tables with unknown column stats:
" + builder);
+ }
+ }
return rootFragment;
}
@@ -530,6 +543,15 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
// TODO: move all node set cardinality into one place
if (olapScan.getStats() != null) {
olapScanNode.setCardinality((long)
olapScan.getStats().getRowCount());
+ if
(ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+ for (int i = 0; i < slots.size(); i++) {
+ Slot slot = slots.get(i);
+ if
(olapScan.getStats().findColumnStatistics(slot).isUnKnown()
+ && !isComplexDataType(slot.getDataType())) {
+ context.addUnknownStatsColumn(olapScanNode,
tupleDescriptor.getSlots().get(i).getId());
+ }
+ }
+ }
}
// TODO: Do we really need tableName here?
TableName tableName = new TableName(null, "", "");
@@ -1978,6 +2000,14 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
scanNode.getTupleDesc().getSlots().add(smallest);
}
try {
+ if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().forbidUnknownColStats) {
+ for (SlotId slotId : requiredByProjectSlotIdSet) {
+ if (context.isColumnStatsUnknown(scanNode, slotId)) {
+ throw new AnalysisException("meet unknown column stats
on table " + scanNode);
+ }
+ }
+ context.removeScanFromStatsUnknownColumnsMap(scanNode);
+ }
scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet);
} catch (UserException e) {
Util.logAndThrowRuntimeException(LOG,
@@ -2240,4 +2270,9 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
}
return outputExprs;
}
+
+ private boolean isComplexDataType(DataType dataType) {
+ return dataType instanceof ArrayType || dataType instanceof MapType ||
dataType instanceof JsonType
+ || dataType instanceof StructType;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
index 256b37d705..e69b5ee8ef 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java
@@ -45,11 +45,13 @@ import org.apache.doris.thrift.TPushAggOp;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
@@ -90,6 +92,7 @@ public class PlanTranslatorContext {
private final Map<CTEId, PhysicalCTEProducer> cteProducerMap =
Maps.newHashMap();
private final Map<RelationId, TPushAggOp> tablePushAggOp =
Maps.newHashMap();
+ private final Map<ScanNode, Set<SlotId>> statsUnknownColumnsMap =
Maps.newHashMap();
public PlanTranslatorContext(CascadesContext ctx) {
this.translator = new
RuntimeFilterTranslator(ctx.getRuntimeFilterContext());
@@ -100,6 +103,34 @@ public class PlanTranslatorContext {
translator = null;
}
+ /**
+ * remember the unknown-stats column and its scan, used for
forbid_unknown_col_stats check
+ */
+ public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) {
+ Set<SlotId> slots = statsUnknownColumnsMap.get(scan);
+ if (slots == null) {
+ statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId));
+ } else {
+ statsUnknownColumnsMap.get(scan).add(slotId);
+ }
+ }
+
+ public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) {
+ Set<SlotId> unknownSlots = statsUnknownColumnsMap.get(scan);
+ if (unknownSlots == null) {
+ return false;
+ }
+ return unknownSlots.contains(slotId);
+ }
+
+ public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) {
+ statsUnknownColumnsMap.remove(scan);
+ }
+
+ public Set<ScanNode> getScanNodeWithUnknownColumnStats() {
+ return statsUnknownColumnsMap.keySet();
+ }
+
public List<PlanFragment> getPlanFragments() {
return planFragments;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 45aeae54fd..24ec929e82 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -26,7 +26,6 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
-import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.memo.Group;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.trees.expressions.Alias;
@@ -123,7 +122,6 @@ import org.apache.doris.statistics.StatisticConstants;
import org.apache.doris.statistics.StatisticRange;
import org.apache.doris.statistics.Statistics;
import org.apache.doris.statistics.StatisticsBuilder;
-import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
@@ -623,46 +621,21 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize())
.build();
}
- if (cache.isUnKnown) {
- if (forbidUnknownColStats && !shouldIgnoreThisCol) {
- if (StatisticsUtil.statsTblAvailable()) {
- throw new AnalysisException(String.format("Found
unknown stats for column:%s.%s.\n"
- + "It may caused by:\n"
- + "\n"
- + "1. This column never got analyzed\n"
- + "2. This table is empty\n"
- + "3. Stats load failed caused by unstable of
backends,"
- + "and FE cached the unknown stats by default
in this scenario\n"
- + "4. There is a bug, please report it to
Doris community\n"
- + "\n"
- + "If an unknown stats for this column is
tolerable,"
- + "you could set session variable
`forbid_unknown_col_stats` to false to make planner"
- + " ignore this error and keep planning.",
table.getName(), colName));
- } else {
- throw new AnalysisException("BE is not available!");
+ if (!cache.isUnKnown) {
+ rowCount = Math.max(rowCount, cache.count);
+ Histogram histogram = getColumnHistogram(table, colName);
+ if (histogram != null) {
+ ColumnStatisticBuilder columnStatisticBuilder =
+ new
ColumnStatisticBuilder(cache).setHistogram(histogram);
+ cache = columnStatisticBuilder.build();
+ if
(ConnectContext.get().getSessionVariable().isEnableMinidump()
+ &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
+ totalColumnStatisticMap.put(table.getName() + ":" +
colName, cache);
+ totalHistogramMap.put(table.getName() + colName,
histogram);
}
}
- columnStatisticMap.put(slotReference, cache);
- continue;
- }
- rowCount = Math.max(rowCount, cache.count);
- Histogram histogram = getColumnHistogram(table, colName);
- if (histogram != null) {
- ColumnStatisticBuilder columnStatisticBuilder =
- new
ColumnStatisticBuilder(cache).setHistogram(histogram);
- columnStatisticMap.put(slotReference,
columnStatisticBuilder.build());
- cache = columnStatisticBuilder.build();
- if
(ConnectContext.get().getSessionVariable().isEnableMinidump()
- &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
- totalHistogramMap.put(table.getName() + ":" + colName,
histogram);
- }
}
columnStatisticMap.put(slotReference, cache);
- if (ConnectContext.get().getSessionVariable().isEnableMinidump()
- &&
!ConnectContext.get().getSessionVariable().isPlayNereidsDump()) {
- totalColumnStatisticMap.put(table.getName() + ":" + colName,
cache);
- totalHistogramMap.put(table.getName() + colName, histogram);
- }
}
return new Statistics(rowCount, columnStatisticMap);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]