This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch pick_4.0_57912 in repository https://gitbox.apache.org/repos/asf/doris.git
commit fcc4be6fe85d9ce3286a87c87b9464e668ab3d7b Author: morrySnow <[email protected]> AuthorDate: Wed Nov 12 23:42:18 2025 +0800 branch-4.0: [chore](plan) remove ColumnStats and mem layout on tuple #57912 picked from #57912 --- fe/.idea/vcs.xml | 24 +- .../org/apache/doris/catalog/PrimitiveType.java | 5 - .../java/org/apache/doris/alter/RollupJobV2.java | 1 - .../org/apache/doris/alter/SchemaChangeJobV2.java | 1 - .../org/apache/doris/analysis/AggregateInfo.java | 4 +- .../org/apache/doris/analysis/DescriptorTable.java | 150 +----------- .../main/java/org/apache/doris/analysis/Expr.java | 12 - .../org/apache/doris/analysis/SlotDescriptor.java | 140 +---------- .../java/org/apache/doris/analysis/TableRef.java | 9 - .../org/apache/doris/analysis/TupleDescriptor.java | 261 +-------------------- .../java/org/apache/doris/catalog/ColumnStats.java | 176 -------------- .../apache/doris/datasource/FileQueryScanNode.java | 7 - .../doris/source/RemoteDorisScanNode.java | 3 - .../doris/datasource/jdbc/source/JdbcScanNode.java | 7 +- .../doris/datasource/odbc/source/OdbcScanNode.java | 7 +- .../source/TrinoConnectorScanNode.java | 3 - .../datasource/tvf/source/MetadataScanNode.java | 1 - .../glue/translator/PhysicalPlanTranslator.java | 52 +--- .../glue/translator/PlanTranslatorContext.java | 1 - .../org/apache/doris/planner/HashJoinNode.java | 76 ------ .../org/apache/doris/planner/OlapScanNode.java | 7 - .../org/apache/doris/planner/ResultFileSink.java | 3 - .../java/org/apache/doris/planner/SortNode.java | 16 -- .../apache/doris/statistics/AggStatsDerive.java | 87 ------- .../doris/statistics/AnalyticEvalStatsDerive.java | 41 ---- .../doris/statistics/AssertNumRowsStatsDerive.java | 30 --- .../apache/doris/statistics/BaseStatsDerive.java | 152 ------------ .../org/apache/doris/statistics/DeriveFactory.java | 66 ------ .../doris/statistics/EmptySetStatsDerive.java | 31 --- .../doris/statistics/ExchangeStatsDerive.java | 34 --- .../doris/statistics/HashJoinStatsDerive.java | 254 -------------------- .../apache/doris/statistics/MysqlStatsDerive.java | 33 --- .../statistics/NestedLoopJoinStatsDerive.java | 48 ---- .../doris/statistics/OlapScanStatsDerive.java | 93 -------- .../apache/doris/statistics/SelectStatsDerive.java | 35 --- .../doris/statistics/StatsRecursiveDerive.java | 60 ----- .../doris/statistics/TableFunctionStatsDerive.java | 34 --- .../org/apache/doris/catalog/ColumnStatTest.java | 87 ------- .../translator/PhysicalPlanTranslatorTest.java | 2 +- 39 files changed, 34 insertions(+), 2019 deletions(-) diff --git a/fe/.idea/vcs.xml b/fe/.idea/vcs.xml index 7b2cdb1cbbd..e5158c553f8 100644 --- a/fe/.idea/vcs.xml +++ b/fe/.idea/vcs.xml @@ -1,20 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --> <project version="4"> <component name="IssueNavigationConfiguration"> <option name="links"> @@ -27,6 +11,10 @@ </option> </component> <component name="VcsDirectoryMappings"> - <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> + <mapping directory="$PROJECT_DIR$/.." vcs="Git" /> + <mapping directory="$PROJECT_DIR$/../contrib/apache-orc" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/../contrib/clucene" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/../contrib/faiss" vcs="Git" /> + <mapping directory="$PROJECT_DIR$/../contrib/openblas" vcs="Git" /> </component> -</project> +</project> \ No newline at end of file diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java index 71966b8c7ac..4b6c47a2587 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -374,11 +374,6 @@ public enum PrimitiveType { return (this == VARCHAR || this == CHAR || this == STRING); } - public boolean isIntegerType() { - return (this == TINYINT || this == SMALLINT - || this == INT || this == BIGINT); - } - public boolean isIPType() { return (this == IPV4 || this == IPV6); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index 7701d04753d..2cbb21489b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -452,7 +452,6 @@ public class RollupJobV2 extends AlterJobV2 implements GsonPostProcessable { for (Column column : rollupColumns) { SlotDescriptor destSlotDesc = descTable.addSlotDescriptor(destTupleDesc); - destSlotDesc.setIsMaterialized(true); destSlotDesc.setColumn(column); destSlotDesc.setIsNullable(column.isAllowNull()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index fcc7e25093d..0e9c3fe8049 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -526,7 +526,6 @@ public class SchemaChangeJobV2 extends AlterJobV2 implements GsonPostProcessable TupleDescriptor destTupleDesc = descTable.createTupleDescriptor(); for (Column column : fullSchema) { SlotDescriptor destSlotDesc = descTable.addSlotDescriptor(destTupleDesc); - destSlotDesc.setIsMaterialized(true); destSlotDesc.setColumn(column); destSlotDesc.setIsNullable(column.isAllowNull()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java index 934a109ef64..09c9b705974 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java @@ -105,9 +105,7 @@ public final class AggregateInfo extends AggregateInfoBase { int groupingExprNum = groupingExprs != null ? groupingExprs.size() : 0; Preconditions.checkState(groupingExprNum <= outputSlots.size()); for (int i = groupingExprNum; i < outputSlots.size(); ++i) { - if (outputSlots.get(i).isMaterialized()) { - materializedSlots.add(i - groupingExprNum); - } + materializedSlots.add(i - groupingExprNum); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java index fb6cc7df0a8..254edea5fbb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DescriptorTable.java @@ -21,19 +21,12 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.TableIf; -import org.apache.doris.common.AnalysisException; import org.apache.doris.common.IdGenerator; import org.apache.doris.thrift.TDescriptorTable; -import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; -import java.util.List; import java.util.Map; /** @@ -42,18 +35,10 @@ import java.util.Map; * them unique ids.. */ public class DescriptorTable { - private static final Logger LOG = LogManager.getLogger(DescriptorTable.class); - private final HashMap<TupleId, TupleDescriptor> tupleDescs = new HashMap<TupleId, TupleDescriptor>(); - // List of referenced tables with no associated TupleDescriptor to ship to the BE. - // For example, the output table of an insert query. - private final List<TableIf> referencedTables = new ArrayList<TableIf>(); private final IdGenerator<TupleId> tupleIdGenerator = TupleId.createGenerator(); private final IdGenerator<SlotId> slotIdGenerator = SlotId.createGenerator(); private final HashMap<SlotId, SlotDescriptor> slotDescs = Maps.newHashMap(); - - private final HashMap<SlotDescriptor, SlotDescriptor> outToIntermediateSlots = new HashMap<>(); - private TDescriptorTable thriftDescTable = null; // serialized version of this public DescriptorTable() { @@ -78,32 +63,6 @@ public class DescriptorTable { return result; } - /** - * Used by new optimizer. - */ - public SlotDescriptor addSlotDescriptor(TupleDescriptor d, int id) { - SlotDescriptor result = new SlotDescriptor(new SlotId(id), d); - d.addSlot(result); - slotDescs.put(result.getId(), result); - return result; - } - - /** - * Create copy of src with new id. The returned descriptor has its mem layout - * computed. - */ - public TupleDescriptor copyTupleDescriptor(TupleId srcId, String debugName) { - TupleDescriptor d = new TupleDescriptor(tupleIdGenerator.getNextId(), debugName); - tupleDescs.put(d.getId(), d); - // create copies of slots - TupleDescriptor src = tupleDescs.get(srcId); - for (SlotDescriptor slot : src.getSlots()) { - copySlotDescriptor(d, slot); - } - d.computeStatAndMemLayout(); - return d; - } - /** * Append copy of src to dest. */ @@ -118,71 +77,6 @@ public class DescriptorTable { return tupleDescs.get(id); } - public HashMap<SlotId, SlotDescriptor> getSlotDescs() { - return slotDescs; - } - - /** - * Return all tuple desc by idList. - */ - public List<TupleDescriptor> getTupleDesc(List<TupleId> idList) throws AnalysisException { - List<TupleDescriptor> result = Lists.newArrayList(); - for (TupleId tupleId : idList) { - TupleDescriptor tupleDescriptor = getTupleDesc(tupleId); - if (tupleDescriptor == null) { - throw new AnalysisException("Invalid tuple id:" + tupleId.toString()); - } - result.add(tupleDescriptor); - } - return result; - } - - public SlotDescriptor getSlotDesc(SlotId id) { - return slotDescs.get(id); - } - - public Collection<TupleDescriptor> getTupleDescs() { - return tupleDescs.values(); - } - - public void addReferencedTable(TableIf table) { - referencedTables.add(table); - } - - /** - * Marks all slots in list as materialized. - */ - public void markSlotsMaterialized(List<SlotId> ids) { - for (SlotId id : ids) { - getSlotDesc(id).setIsMaterialized(true); - } - } - - @Deprecated - public void computeMemLayout() { - for (TupleDescriptor d : tupleDescs.values()) { - d.computeMemLayout(); - } - } - - // Computes physical layout parameters of all descriptors and calculate the statistics of the tuple. - // Call this only after the last descriptor was added. - public void computeStatAndMemLayout() { - for (TupleDescriptor d : tupleDescs.values()) { - d.computeStatAndMemLayout(); - } - } - - public void addSlotMappingInfo(Map<SlotDescriptor, SlotDescriptor> mapping) { - outToIntermediateSlots.putAll(mapping); - } - - public void materializeIntermediateSlots() { - for (Map.Entry<SlotDescriptor, SlotDescriptor> entry : outToIntermediateSlots.entrySet()) { - entry.getValue().setIsMaterialized(entry.getKey().isMaterialized()); - } - } - public TDescriptorTable toThrift() { if (thriftDescTable != null) { return thriftDescTable; @@ -193,22 +87,16 @@ public class DescriptorTable { for (TupleDescriptor tupleD : tupleDescs.values()) { // inline view of a non-constant select has a non-materialized tuple descriptor // in the descriptor table just for type checking, which we need to skip - if (tupleD.isMaterialized()) { - result.addToTupleDescriptors(tupleD.toThrift()); - // an inline view of a constant select has a materialized tuple - // but its table has no id - if (tupleD.getTable() != null - && tupleD.getTable().getId() >= 0) { - referencedTbls.put(tupleD.getTable().getId(), tupleD.getTable()); - } - for (SlotDescriptor slotD : tupleD.getMaterializedSlots()) { - result.addToSlotDescriptors(slotD.toThrift()); - } + result.addToTupleDescriptors(tupleD.toThrift()); + // an inline view of a constant select has a materialized tuple + // but its table has no id + if (tupleD.getTable() != null + && tupleD.getTable().getId() >= 0) { + referencedTbls.put(tupleD.getTable().getId(), tupleD.getTable()); + } + for (SlotDescriptor slotD : tupleD.getSlots()) { + result.addToSlotDescriptors(slotD.toThrift()); } - } - - for (TableIf tbl : referencedTables) { - referencedTbls.put(tbl.getId(), tbl); } for (TableIf tbl : referencedTbls.values()) { @@ -218,29 +106,11 @@ public class DescriptorTable { return result; } - public String debugString() { - StringBuilder out = new StringBuilder(); - out.append("tuples:\n"); - for (TupleDescriptor desc : tupleDescs.values()) { - out.append(desc).append("\n"); - } - out.append("\n "); - out.append("slotDesc size: ").append(slotDescs.size()).append("\n"); - for (SlotDescriptor desc : slotDescs.values()) { - out.append(desc.debugString()); - out.append("\n"); - } - out.append("\n "); - return out.toString(); - } - public String getExplainString() { StringBuilder out = new StringBuilder(); out.append("\nTuples:\n"); for (TupleDescriptor desc : tupleDescs.values()) { - if (desc.isMaterialized()) { - out.append(desc.getExplainString()).append("\n"); - } + out.append(desc.getExplainString()).append("\n"); } return out.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index f0e3e026b93..286c512ad64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1157,18 +1157,6 @@ public abstract class Expr extends TreeNode<Expr> implements Cloneable, ExprStat return getMockedExprs(type.getSubTypes(), type.getSubTypeNullables()); } - public void materializeSrcExpr() { - if (this instanceof SlotRef) { - SlotRef thisRef = (SlotRef) this; - SlotDescriptor slotDesc = thisRef.getDesc(); - slotDesc.setIsMaterialized(true); - slotDesc.getSourceExprs().forEach(Expr::materializeSrcExpr); - } - for (Expr child : children) { - child.materializeSrcExpr(); - } - } - // This is only for transactional insert operation, // to check it the given value in insert stmt is LiteralExpr. // And if we write "1" to a boolean column, there will be a cast(1 as boolean) expr, diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index 085ee629ee2..9c3596a5b65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -21,13 +21,11 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.ColumnStats; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Type; import org.apache.doris.thrift.TSlotDescriptor; import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -55,21 +53,9 @@ public class SlotDescriptor { // path_ is set. private List<Expr> sourceExprs = Lists.newArrayList(); - // if false, this slot doesn't need to be materialized in parent tuple - // (and physical layout parameters are invalid) - private boolean isMaterialized; - // if false, this slot cannot be NULL private boolean isNullable; - // physical layout parameters - private int byteSize; - private int byteOffset = 0; // within tuple - private int slotIdx; // index within tuple struct - private int slotOffset; // index within slot array list - - private ColumnStats stats; // only set if 'column' isn't set - private boolean isAgg; // If set to false, then such slots will be ignored during // materialize them.Used to optimize to read less data and less memory usage private boolean needMaterialize = true; @@ -80,31 +66,18 @@ public class SlotDescriptor { this.id = id; this.parent = parent; - this.byteOffset = -1; // invalid - this.isMaterialized = false; this.isNullable = true; - this.isAgg = false; } public SlotDescriptor(SlotId id, TupleDescriptor parent, SlotDescriptor src) { this.id = id; this.parent = parent; - this.byteOffset = src.byteOffset; - this.slotIdx = src.slotIdx; - this.isMaterialized = src.isMaterialized; this.column = src.column; this.isNullable = src.isNullable; - this.byteSize = src.byteSize; - this.isAgg = false; - this.stats = src.stats; this.type = src.type; this.sourceExprs.add(new SlotRef(src)); } - public boolean getIsAgg() { - return isAgg; - } - public void setNeedMaterialize(boolean needMaterialize) { this.needMaterialize = needMaterialize; } @@ -113,10 +86,6 @@ public class SlotDescriptor { return !this.needMaterialize; } - public void setIsAgg(boolean agg) { - isAgg = agg; - } - public SlotId getId() { return id; } @@ -154,14 +123,6 @@ public class SlotDescriptor { this.column = column; } - public boolean isMaterialized() { - return isMaterialized; - } - - public void setIsMaterialized(boolean value) { - isMaterialized = value; - } - public boolean isAutoInc() { return isAutoInc; } @@ -170,22 +131,6 @@ public class SlotDescriptor { this.isAutoInc = isAutoInc; } - public void materializeSrcExpr() { - if (sourceExprs == null) { - return; - } - for (Expr expr : sourceExprs) { - if (!(expr instanceof SlotRef)) { - expr.materializeSrcExpr(); - continue; - } - SlotRef slotRef = (SlotRef) expr; - SlotDescriptor slotDesc = slotRef.getDesc(); - slotDesc.setIsMaterialized(true); - slotDesc.materializeSrcExpr(); - } - } - public boolean getIsNullable() { return isNullable; } @@ -194,51 +139,10 @@ public class SlotDescriptor { isNullable = value; } - public int getByteSize() { - return byteSize; - } - - public void setByteSize(int byteSize) { - this.byteSize = byteSize; - } - - public int getByteOffset() { - return byteOffset; - } - - public void setByteOffset(int byteOffset) { - this.byteOffset = byteOffset; - } - - public void setSlotIdx(int slotIdx) { - this.slotIdx = slotIdx; - } - - public void setStats(ColumnStats stats) { - this.stats = stats; - } - public void setMaterializedColumnName(String name) { this.materializedColumnName = name; } - public ColumnStats getStats() { - if (stats == null) { - stats = new ColumnStats(); - } - // FIXME(dhc): mock ndv - stats.setNumDistinctValues((long) parent.getCardinality()); - return stats; - } - - public void setSlotOffset(int slotOffset) { - this.slotOffset = slotOffset; - } - - public int getSlotOffset() { - return slotOffset; - } - public String getLabel() { return label; } @@ -251,10 +155,6 @@ public class SlotDescriptor { sourceExprs = Collections.singletonList(expr); } - public void addSourceExpr(Expr expr) { - sourceExprs.add(expr); - } - public List<Expr> getSourceExprs() { return sourceExprs; } @@ -274,46 +174,13 @@ public class SlotDescriptor { this.virtualColumn = virtualColumn; } - /** - * Initializes a slot by setting its source expression information - */ - public void initFromExpr(Expr expr) { - setIsNullable(expr.isNullable()); - setLabel(expr.toSql()); - Preconditions.checkState(sourceExprs.isEmpty()); - setSourceExpr(expr); - setStats(ColumnStats.fromExpr(expr)); - Preconditions.checkState(expr.getType().isValid()); - setType(expr.getType()); - } - - /** - * Return true if the physical layout of this descriptor matches the physical layout - * of the other descriptor, but not necessarily ids. - */ - public boolean layoutEquals(SlotDescriptor other) { - if (!getType().equals(other.getType())) { - return false; - } - if (isNullable != other.isNullable) { - return false; - } - if (getByteSize() != other.getByteSize()) { - return false; - } - if (getByteOffset() != other.getByteOffset()) { - return false; - } - return true; - } - public TSlotDescriptor toThrift() { // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask String colName = materializedColumnName != null ? materializedColumnName : ((column != null) ? column.getNonShadowName() : ""); TSlotDescriptor tSlotDescriptor = new TSlotDescriptor(id.asInt(), parent.getId().asInt(), type.toThrift(), -1, - byteOffset, 0, getIsNullable() ? 0 : -1, colName, slotIdx, - isMaterialized); + 0, 0, getIsNullable() ? 0 : -1, colName, -1, + true); tSlotDescriptor.setNeedMaterialize(needMaterialize); tSlotDescriptor.setIsAutoIncrement(isAutoInc); if (column != null) { @@ -339,8 +206,7 @@ public class SlotDescriptor { String typeStr = (type == null ? "null" : type.toString()); String parentTupleId = (parent == null) ? "null" : parent.getId().toString(); return MoreObjects.toStringHelper(this).add("id", id.asInt()).add("parent", parentTupleId).add("col", colStr) - .add("type", typeStr).add("materialized", isMaterialized).add("byteSize", byteSize) - .add("byteOffset", byteOffset).add("slotIdx", slotIdx).add("nullable", getIsNullable()) + .add("type", typeStr).add("nullable", getIsNullable()) .add("isAutoIncrement", isAutoInc).add("subColPath", subColPath) .add("virtualColumn", virtualColumn == null ? null : virtualColumn.toSql()).toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java index 5b9a1f61a16..5b1530c6b86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java @@ -290,15 +290,6 @@ public class TableRef implements ParseNode { return markTupleName; } - public void setMark(TupleDescriptor markTuple) { - this.isMark = markTuple != null; - if (isMark) { - this.markTupleName = markTuple.getAlias(); - } else { - this.markTupleName = null; - } - } - public Expr getOnClause() { return onClause; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java index 980cf3c4cfa..2c9f2b65ff2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleDescriptor.java @@ -20,28 +20,17 @@ package org.apache.doris.analysis; -import org.apache.doris.catalog.ColumnStats; -import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.TableIf; import org.apache.doris.thrift.TTupleDescriptor; import com.google.common.base.Joiner; import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; public class TupleDescriptor { - private static final Logger LOG = LogManager.getLogger(TupleDescriptor.class); private final TupleId id; private final String debugName; // debug only private final ArrayList<SlotDescriptor> slots; @@ -51,43 +40,21 @@ public class TupleDescriptor { // underlying table, if there is one private TableRef ref; - // All legal aliases of this tuple. - private String[] aliases; - - // If true, requires that aliases_.length() == 1. However, aliases_.length() == 1 - // does not imply an explicit alias because nested collection refs have only a - // single implicit alias. - private boolean hasExplicitAlias; - - // if false, this tuple doesn't need to be materialized - private boolean isMaterialized = true; - - private int byteSize; // of all slots plus null indicators - - // This cardinality is only used to mock slot ndv. - // Only tuple of olap scan node has this value. - private long cardinality; - - private float avgSerializedSize; // in bytes; includes serialization overhead - private int tableId = -1; public TupleDescriptor(TupleId id) { this.id = id; this.slots = new ArrayList<SlotDescriptor>(); this.debugName = ""; - this.cardinality = -1; } public TupleDescriptor(TupleId id, String debugName) { this.id = id; this.slots = new ArrayList<SlotDescriptor>(); this.debugName = debugName; - this.cardinality = -1; } public void addSlot(SlotDescriptor desc) { - desc.setSlotOffset(slots.size()); slots.add(desc); } @@ -107,10 +74,6 @@ public class TupleDescriptor { return slots; } - public void setTableId(int id) { - tableId = id; - } - /** * get slot desc by slot id. * @@ -126,34 +89,6 @@ public class TupleDescriptor { return null; } - public long getCardinality() { - return cardinality; - } - - public void setCardinality(long cardinality) { - this.cardinality = cardinality; - } - - public ArrayList<SlotDescriptor> getMaterializedSlots() { - ArrayList<SlotDescriptor> result = Lists.newArrayList(); - for (SlotDescriptor slot : slots) { - if (slot.isMaterialized()) { - result.add(slot); - } - } - return result; - } - - public ArrayList<SlotId> getMaterializedSlotIds() { - ArrayList<SlotId> result = Lists.newArrayList(); - for (SlotDescriptor slot : slots) { - if (slot.isMaterialized()) { - result.add(slot.getId()); - } - } - return result; - } - public ArrayList<SlotId> getAllSlotIds() { ArrayList<SlotId> result = Lists.newArrayList(); for (SlotDescriptor slot : slots) { @@ -175,15 +110,6 @@ public class TupleDescriptor { return null; } - public boolean hasVariantCol() { - for (SlotDescriptor slotDesc : slots) { - if (slotDesc.getColumn() != null && slotDesc.getColumn().getType().isVariantType()) { - return true; - } - } - return false; - } - public TableIf getTable() { return table; } @@ -192,43 +118,6 @@ public class TupleDescriptor { table = tbl; } - public int getByteSize() { - return byteSize; - } - - public void setIsMaterialized(boolean value) { - isMaterialized = value; - } - - public boolean isMaterialized() { - return isMaterialized; - } - - public float getAvgSerializedSize() { - return avgSerializedSize; - } - - public void setAliases(String[] aliases, boolean hasExplicitAlias) { - this.aliases = aliases; - this.hasExplicitAlias = hasExplicitAlias; - } - - public boolean hasExplicitAlias() { - return hasExplicitAlias; - } - - public String getAlias() { - return (aliases != null) ? aliases[0] : null; - } - - public String getLastAlias() { - return (aliases != null) ? aliases[aliases.length - 1] : null; - } - - public TableName getAliasAsName() { - return (aliases != null) ? new TableName(aliases[0]) : null; - } - public TTupleDescriptor toThrift() { TTupleDescriptor ttupleDesc = new TTupleDescriptor(id.asInt(), 0, 0); if (table != null && table.getId() >= 0) { @@ -240,149 +129,6 @@ public class TupleDescriptor { return ttupleDesc; } - /** - * This function is mainly used to calculate the statistics of the tuple and the layout information. - * Generally, it occurs after the plan node materializes the slot and before calculating the plan node statistics. - * PlanNode.init() { - * materializedSlot(); - * tupleDesc.computeStatAndMemLayout(); - * computeStat(); - * } - */ - public void computeStatAndMemLayout() { - computeStat(); - computeMemLayout(); - } - - /** - * This function is mainly used to evaluate the statistics of the tuple, - * such as the average size of each row. - * This function will be used before the computeStat() of the plan node - * and is the pre-work for evaluating the statistics of the plan node. - * - * This function is theoretically only called once when the plan node is init. - * However, the current code structure is relatively confusing - * In order to ensure that even if it is wrongly called a second time, no error will occur, - * so it will be initialized again at the beginning of the function. - * - * @deprecated In the future this function will be changed to a private function. - */ - @Deprecated - public void computeStat() { - // init stat - avgSerializedSize = 0; - - // compute stat - for (SlotDescriptor d : slots) { - if (!d.isMaterialized()) { - continue; - } - ColumnStats stats = d.getStats(); - if (stats.hasAvgSerializedSize()) { - avgSerializedSize += d.getStats().getAvgSerializedSize(); - } else { - // TODO: for computed slots, try to come up with stats estimates - avgSerializedSize += d.getType().getSlotSize(); - } - } - } - - /** - * @deprecated In the future this function will be changed to a private function. - */ - @Deprecated - public void computeMemLayout() { - // sort slots by size - List<List<SlotDescriptor>> slotsBySize = Lists.newArrayListWithCapacity(PrimitiveType.getMaxSlotSize()); - for (int i = 0; i <= PrimitiveType.getMaxSlotSize(); ++i) { - slotsBySize.add(new ArrayList<SlotDescriptor>()); - } - - // populate slotsBySize; also compute avgSerializedSize - for (SlotDescriptor d : slots) { - if (d.isMaterialized()) { - slotsBySize.get(d.getType().getSlotSize()).add(d); - } - } - // we shouldn't have anything of size 0 - Preconditions.checkState(slotsBySize.get(0).isEmpty()); - - // slotIdx is the index into the resulting tuple struct. The first (smallest) field - // is 0, next is 1, etc. - int slotIdx = 0; - for (int slotSize = 1; slotSize <= PrimitiveType.getMaxSlotSize(); ++slotSize) { - if (slotsBySize.get(slotSize).isEmpty()) { - continue; - } - - for (SlotDescriptor d : slotsBySize.get(slotSize)) { - d.setByteSize(slotSize); - d.setSlotIdx(slotIdx++); - byteSize += slotSize; - } - } - } - - /** - * Returns true if tuples of type 'this' can be assigned to tuples of type 'desc' - * (checks that both have the same number of slots and that slots are of the same type) - */ - public boolean isCompatible(TupleDescriptor desc) { - if (slots.size() != desc.slots.size()) { - return false; - } - for (int i = 0; i < slots.size(); ++i) { - if (slots.get(i).getType() != desc.slots.get(i).getType()) { - return false; - } - } - return true; - } - - /** - * Materialize all slots. - */ - public void materializeSlots() { - for (SlotDescriptor slot : slots) { - slot.setIsMaterialized(true); - } - } - - public void getTableIdToColumnNames(Map<Long, Set<String>> tableIdToColumnNames) { - for (SlotDescriptor slotDescriptor : slots) { - if (!slotDescriptor.isMaterialized()) { - continue; - } - if (slotDescriptor.getColumn() != null) { - TupleDescriptor parent = slotDescriptor.getParent(); - Preconditions.checkState(parent != null); - TableIf table = parent.getTable(); - Preconditions.checkState(table != null); - Long tableId = table.getId(); - Set<String> columnNames = tableIdToColumnNames.get(tableId); - if (columnNames == null) { - columnNames = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); - tableIdToColumnNames.put(tableId, columnNames); - } - columnNames.add(slotDescriptor.getColumn().getName()); - } else { - for (Expr expr : slotDescriptor.getSourceExprs()) { - expr.getTableIdToColumnNames(tableIdToColumnNames); - } - } - } - } - - public Set<String> getColumnNames() { - Map<Long, Set<String>> columnNamesInQueryOutput = Maps.newHashMap(); - getTableIdToColumnNames(columnNamesInQueryOutput); - Set<String> columnNames = Sets.newHashSet(); - for (Set<String> names : columnNamesInQueryOutput.values()) { - columnNames.addAll(names); - } - return columnNames; - } - @Override public String toString() { String tblStr = (table == null ? "null" : table.getName()); @@ -391,7 +137,7 @@ public class TupleDescriptor { slotStrings.add(slot.debugString()); } return MoreObjects.toStringHelper(this).add("id", id.asInt()).add("tbl", tblStr) - .add("is_materialized", isMaterialized).add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]") + .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]") .toString(); } @@ -407,7 +153,6 @@ public class TupleDescriptor { .add("id", id.asInt()) .add("name", debugName) .add("tbl", tblStr) - .add("is_materialized", isMaterialized) .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]") .toString(); } @@ -422,9 +167,7 @@ public class TupleDescriptor { .add("tbl", tblStr)); builder.append("\n"); for (SlotDescriptor slot : slots) { - if (slot.isMaterialized()) { - builder.append(slot.getExplainString(prefix)).append("\n"); - } + builder.append(slot.getExplainString(prefix)).append("\n"); } return builder.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnStats.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnStats.java deleted file mode 100644 index 198ca02f51b..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnStats.java +++ /dev/null @@ -1,176 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog; - -import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.SlotRef; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; -import com.google.gson.annotations.SerializedName; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.Objects; - -/** - * Statistics for a single column. - */ -public class ColumnStats { - private static final Logger LOG = LogManager.getLogger(ColumnStats.class); - - @SerializedName(value = "avgSerializedSize") - private float avgSerializedSize; // in bytes; includes serialization overhead - @SerializedName(value = "maxSize") - private long maxSize; // in bytes - @SerializedName(value = "numDistinctValues") - private long numDistinctValues; - @SerializedName(value = "numNulls") - private long numNulls; - - /** - * For fixed-length type (those which don't need additional storage besides - * the slot they occupy), sets avgSerializedSize and maxSize to their slot size. - */ - public ColumnStats() { - avgSerializedSize = -1; - maxSize = -1; - numDistinctValues = -1; - numNulls = -1; - } - - public ColumnStats(ColumnStats other) { - avgSerializedSize = other.avgSerializedSize; - maxSize = other.maxSize; - numDistinctValues = other.numDistinctValues; - numNulls = other.numNulls; - } - - public long getNumDistinctValues() { - return numDistinctValues; - } - - public void setNumDistinctValues(long numDistinctValues) { - this.numDistinctValues = numDistinctValues; - } - - public float getAvgSerializedSize() { - return avgSerializedSize; - } - - public void setAvgSerializedSize(float avgSize) { - this.avgSerializedSize = avgSize; - } - - public long getMaxSize() { - return maxSize; - } - - public void setMaxSize(long maxSize) { - this.maxSize = maxSize; - } - - public boolean hasNulls() { - return numNulls > 0; - } - - public long getNumNulls() { - return numNulls; - } - - public void setNumNulls(long numNulls) { - this.numNulls = numNulls; - } - - public boolean hasAvgSerializedSize() { - return avgSerializedSize >= 0; - } - - public boolean hasMaxSize() { - return maxSize >= 0; - } - - public boolean hasNumDistinctValues() { - return numDistinctValues >= 0; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this.getClass()).add("avgSerializedSize", - avgSerializedSize).add("maxSize", maxSize).add("numDistinct", numDistinctValues).add( - "numNulls", numNulls).toString(); - } - - @Override - public int hashCode() { - return Objects.hash(avgSerializedSize, maxSize, numDistinctValues, numNulls); - } - - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!(obj instanceof ColumnStats)) { - return false; - } - - ColumnStats stats = (ColumnStats) obj; - return (numDistinctValues == stats.numDistinctValues) - && (avgSerializedSize == stats.avgSerializedSize) - && (maxSize == stats.maxSize) - && (numNulls == stats.numNulls); - } - - /** - * For fixed-length type (those which don't need additional storage besides - * the slot they occupy), sets avgSerializedSize and maxSize to their slot size. - */ - public ColumnStats(PrimitiveType colType) { - avgSerializedSize = -1; - maxSize = -1; - numDistinctValues = -1; - numNulls = -1; - if (colType.isNumericType() || colType.isDateType()) { - avgSerializedSize = colType.getSlotSize(); - maxSize = colType.getSlotSize(); - } - } - - /** - * Creates ColumnStats from the given expr. Sets numDistinctValues and if the expr - * is a SlotRef also numNulls. - */ - public static ColumnStats fromExpr(Expr expr) { - Preconditions.checkNotNull(expr); - Preconditions.checkState(expr.getType().isValid()); - ColumnStats stats = new ColumnStats(expr.getType().getPrimitiveType()); - stats.setNumDistinctValues(expr.getNumDistinctValues()); - SlotRef slotRef = expr.unwrapSlotRef(); - if (slotRef == null) { - return stats; - } - ColumnStats slotStats = slotRef.getDesc().getStats(); - if (slotStats == null) { - return stats; - } - stats.numNulls = slotStats.getNumNulls(); - stats.avgSerializedSize = slotStats.getAvgSerializedSize(); - stats.maxSize = slotStats.getMaxSize(); - return stats; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java index 4448a68d0ed..4672fdf8ec3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java @@ -150,9 +150,6 @@ public abstract class FileQueryScanNode extends FileScanNode { List<Column> columns = desc.getTable().getBaseSchema(false); params.setNumOfColumnsFromFile(columns.size() - partitionKeys.size()); for (SlotDescriptor slot : desc.getSlots()) { - if (!slot.isMaterialized()) { - continue; - } TFileScanSlotInfo slotInfo = new TFileScanSlotInfo(); slotInfo.setSlotId(slot.getId().asInt()); slotInfo.setIsFileSlot(!partitionKeys.contains(slot.getColumn().getName())); @@ -167,10 +164,6 @@ public abstract class FileQueryScanNode extends FileScanNode { private void updateRequiredSlots() throws UserException { params.unsetRequiredSlots(); for (SlotDescriptor slot : desc.getSlots()) { - if (!slot.isMaterialized()) { - continue; - } - TFileScanSlotInfo slotInfo = new TFileScanSlotInfo(); slotInfo.setSlotId(slot.getId().asInt()); slotInfo.setIsFileSlot(!getPathPartitionKeys().contains(slot.getColumn().getName())); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/doris/source/RemoteDorisScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/doris/source/RemoteDorisScanNode.java index b35656ae8b3..520f135baaa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/doris/source/RemoteDorisScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/doris/source/RemoteDorisScanNode.java @@ -202,9 +202,6 @@ public class RemoteDorisScanNode extends FileQueryScanNode { private void createColumns() { columns.clear(); for (SlotDescriptor slot : desc.getSlots()) { - if (!slot.isMaterialized()) { - continue; - } Column col = slot.getColumn(); columns.add("`" + col.getName() + "`"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index db1a6a7b974..50f19821d03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -42,7 +42,6 @@ import org.apache.doris.datasource.jdbc.JdbcExternalTable; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.StatisticalType; -import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TJdbcScanNode; import org.apache.doris.thrift.TOdbcTableType; @@ -102,8 +101,7 @@ public class JdbcScanNode extends ExternalScanNode { public void init() throws UserException { super.init(); numNodes = numNodes <= 0 ? 1 : numNodes; - StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); - cardinality = (long) statsDeriveResult.getRowCount(); + cardinality = -1; } private void createJdbcFilters() { @@ -156,9 +154,6 @@ public class JdbcScanNode extends ExternalScanNode { private void createJdbcColumns() { columns.clear(); for (SlotDescriptor slot : desc.getSlots()) { - if (!slot.isMaterialized()) { - continue; - } Column col = slot.getColumn(); columns.add(tbl.getProperRemoteColumnName(jdbcType, col.getName())); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java index 2f1ff969b5e..da4c96c7a59 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java @@ -32,7 +32,6 @@ import org.apache.doris.datasource.jdbc.source.JdbcScanNode; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.StatisticalType; -import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TOdbcScanNode; import org.apache.doris.thrift.TOdbcTableType; @@ -82,8 +81,7 @@ public class OdbcScanNode extends ExternalScanNode { public void init() throws UserException { super.init(); numNodes = numNodes <= 0 ? 1 : numNodes; - StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); - cardinality = (long) statsDeriveResult.getRowCount(); + cardinality = -1; } @Override @@ -168,9 +166,6 @@ public class OdbcScanNode extends ExternalScanNode { private void createOdbcColumns() { columns.clear(); for (SlotDescriptor slot : desc.getSlots()) { - if (!slot.isMaterialized()) { - continue; - } Column col = slot.getColumn(); columns.add(JdbcTable.databaseProperName(odbcType, col.getName())); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java index 50c1d5752a1..64cba7f8a4e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java @@ -254,9 +254,6 @@ public class TrinoConnectorScanNode extends FileQueryScanNode { List<ColumnHandle> columnHandles = new ArrayList<>(); List<ColumnMetadata> columnMetadataList = new ArrayList<>(); for (SlotDescriptor slotDescriptor : source.getDesc().getSlots()) { - if (!slotDescriptor.isMaterialized()) { - continue; - } String colName = slotDescriptor.getColumn().getName(); if (columnMetadataMap.containsKey(colName)) { columnMetadataList.add(columnMetadataMap.get(colName)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java index 99646d8284c..77dbd29e5a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/MetadataScanNode.java @@ -70,7 +70,6 @@ public class MetadataScanNode extends ExternalScanNode { @Override protected void createScanRangeLocations() { List<String> requiredFields = desc.getSlots().stream() - .filter(slot -> slot.isMaterialized()) .map(slot -> slot.getColumn().getName()) .collect(java.util.stream.Collectors.toList()); TMetaScanRange metaScanRange = tvf.getMetaScanRange(requiredFields); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 8a8a342ccf0..e2da4a05ed4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -246,6 +246,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -299,8 +300,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla rootFragment.setOutputExprs(outputExprs); } Collections.reverse(context.getPlanFragments()); - // TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout - context.getDescTable().computeMemLayout(); if (context.getSessionVariable() != null && context.getSessionVariable().forbidUnknownColStats) { Set<ScanNode> scans = context.getScanNodeWithUnknownColumnStats(); if (!scans.isEmpty()) { @@ -547,7 +546,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla List<Column> targetTableColumns = hiveTableSink.getTargetTable().getFullSchema(); for (Column column : targetTableColumns) { SlotDescriptor slotDesc = context.addSlotDesc(hiveTuple); - slotDesc.setIsMaterialized(true); slotDesc.setType(column.getType()); slotDesc.setColumn(column); slotDesc.setIsNullable(column.isAllowNull()); @@ -568,7 +566,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla List<Column> targetTableColumns = icebergTableSink.getTargetTable().getFullSchema(); for (Column column : targetTableColumns) { SlotDescriptor slotDesc = context.addSlotDesc(hiveTuple); - slotDesc.setIsMaterialized(true); slotDesc.setType(column.getType()); slotDesc.setColumn(column); slotDesc.setIsNullable(column.isAllowNull()); @@ -1684,9 +1681,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla || joinType == JoinType.LEFT_SEMI_JOIN || joinType == JoinType.NULL_AWARE_LEFT_ANTI_JOIN)) { for (SlotDescriptor leftSlotDescriptor : leftSlotDescriptors) { - if (!leftSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = leftChildOutputMap.get(context.findExprId(leftSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && leftSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -1704,9 +1698,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } else if (hashJoin.getOtherJoinConjuncts().isEmpty() && (isHashJoinConjunctsEmpty != isMarkJoinConjunctsEmpty) && (joinType == JoinType.RIGHT_ANTI_JOIN || joinType == JoinType.RIGHT_SEMI_JOIN)) { for (SlotDescriptor rightSlotDescriptor : rightSlotDescriptors) { - if (!rightSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = rightChildOutputMap.get(context.findExprId(rightSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && rightSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -1723,9 +1714,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } } else { for (SlotDescriptor leftSlotDescriptor : leftSlotDescriptors) { - if (!leftSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = leftChildOutputMap.get(context.findExprId(leftSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && leftSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -1742,9 +1730,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla leftIntermediateSlotDescriptor.add(sd); } for (SlotDescriptor rightSlotDescriptor : rightSlotDescriptors) { - if (!rightSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = rightChildOutputMap.get(context.findExprId(rightSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && rightSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -1921,9 +1906,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla // TODO: because of the limitation of be, the VNestedLoopJoinNode will output column from both children // in the intermediate tuple, so fe have to do the same, if be fix the problem, we can change it back. for (SlotDescriptor leftSlotDescriptor : leftSlotDescriptors) { - if (!leftSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = leftChildOutputMap.get(context.findExprId(leftSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && leftSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -1935,9 +1917,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla leftIntermediateSlotDescriptor.add(sd); } for (SlotDescriptor rightSlotDescriptor : rightSlotDescriptors) { - if (!rightSlotDescriptor.isMaterialized()) { - continue; - } SlotReference sf = rightChildOutputMap.get(context.findExprId(rightSlotDescriptor.getId())); SlotDescriptor sd; if (sf == null && rightSlotDescriptor.getColumn().getName().equals(Column.ROWID_COL)) { @@ -2743,9 +2722,11 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla .map(context::findSlotRef).filter(Objects::nonNull).map(SlotRef::getSlotId) .collect(Collectors.toSet()); - for (SlotDescriptor slot : olapScanNode.getTupleDesc().getSlots()) { + Iterator<SlotDescriptor> it = olapScanNode.getTupleDesc().getSlots().iterator(); + while (it.hasNext()) { + SlotDescriptor slot = it.next(); if (!scanIds.contains(slot.getId())) { - slot.setIsMaterialized(false); + it.remove(); } } context.createSlotDesc(olapScanNode.getTupleDesc(), lazyScan.getRowId(), lazyScan.getTable()); @@ -3107,19 +3088,6 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla } } - private void injectRowIdColumnSlot(TupleDescriptor tupleDesc) { - SlotDescriptor slotDesc = context.addSlotDesc(tupleDesc); - if (LOG.isDebugEnabled()) { - LOG.debug("inject slot {}", slotDesc); - } - String name = Column.ROWID_COL; - Column col = new Column(name, Type.STRING, false, null, false, "", "rowid column"); - slotDesc.setType(Type.STRING); - slotDesc.setColumn(col); - slotDesc.setIsNullable(false); - slotDesc.setIsMaterialized(true); - } - /** * topN opt: using storage data ordering to accelerate topn operation. * refer pr: optimize topn query if order by columns is prefix of sort keys of table (#10694) @@ -3280,12 +3248,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla List<Expr> exprList = Lists.newArrayList(); Preconditions.checkState(resultExpressionList.size() == resultExprSlots.size()); for (int j = 0; j < resultExpressionList.size(); ++j) { - if (resultExprSlots.get(j).isMaterialized()) { - exprList.add(ExpressionTranslator.translate(resultExpressionList.get(j), context)); - // TODO: reconsider this, we may change nullable info in previous nereids rules not here. - resultExprSlots.get(j) - .setIsNullable(resultExprSlots.get(j).getIsNullable() || exprList.get(j).isNullable()); - } + exprList.add(ExpressionTranslator.translate(resultExpressionList.get(j), context)); + // TODO: reconsider this, we may change nullable info in previous nereids rules not here. + resultExprSlots.get(j) + .setIsNullable(resultExprSlots.get(j).getIsNullable() || exprList.get(j).isNullable()); } materializedResultExprLists.add(exprList); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 89e1da82950..3a28faba94a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -300,7 +300,6 @@ public class PlanTranslatorContext { Optional<Column> column = slotReference.getOriginalColumn(); column.ifPresent(slotDescriptor::setColumn); slotDescriptor.setType(slotReference.getDataType().toCatalogDataType()); - slotDescriptor.setIsMaterialized(true); SlotRef slotRef; if (slotReference instanceof VirtualSlotReference) { slotRef = new VirtualSlotRef(slotDescriptor); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java index 0d6aced5548..81f8e436d46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java @@ -24,12 +24,9 @@ import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; import org.apache.doris.analysis.JoinOperator; -import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotId; import org.apache.doris.analysis.TableRef; import org.apache.doris.analysis.TupleDescriptor; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.catalog.TableIf; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TEqJoinCondition; @@ -184,79 +181,6 @@ public class HashJoinNode extends JoinNodeBase { return hashOutputSlotIds; } - /** - * Holds the source scan slots of a <SlotRef> = <SlotRef> join predicate. - * The underlying table and column on both sides have stats. - */ - public static final class EqJoinConjunctScanSlots { - private final Expr eqJoinConjunct; - private final SlotDescriptor lhs; - private final SlotDescriptor rhs; - - private EqJoinConjunctScanSlots(Expr eqJoinConjunct, SlotDescriptor lhs, SlotDescriptor rhs) { - this.eqJoinConjunct = eqJoinConjunct; - this.lhs = lhs; - this.rhs = rhs; - } - - // Convenience functions. They return double to avoid excessive casts in callers. - public double lhsNdv() { - // return the estimated number of rows in this partition (-1 if unknown) - return Math.min(lhs.getStats().getNumDistinctValues(), lhsNumRows()); - } - - public double rhsNdv() { - return Math.min(rhs.getStats().getNumDistinctValues(), rhsNumRows()); - } - - public double lhsNumRows() { - TableIf table = lhs.getParent().getTable(); - Preconditions.checkState(table instanceof OlapTable); - return table.getRowCount(); - } - - public double rhsNumRows() { - TableIf table = rhs.getParent().getTable(); - Preconditions.checkState(table instanceof OlapTable); - return table.getRowCount(); - } - - /** - * Returns a new EqJoinConjunctScanSlots for the given equi-join conjunct or null if - * the given conjunct is not of the form <SlotRef> = <SlotRef> or if the underlying - * table/column of at least one side is missing stats. - */ - public static EqJoinConjunctScanSlots create(Expr eqJoinConjunct) { - if (!Expr.IS_EQ_BINARY_PREDICATE.apply(eqJoinConjunct)) { - return null; - } - SlotDescriptor lhsScanSlot = eqJoinConjunct.getChild(0).findSrcScanSlot(); - if (lhsScanSlot == null || !hasNumRowsAndNdvStats(lhsScanSlot)) { - return null; - } - SlotDescriptor rhsScanSlot = eqJoinConjunct.getChild(1).findSrcScanSlot(); - if (rhsScanSlot == null || !hasNumRowsAndNdvStats(rhsScanSlot)) { - return null; - } - return new EqJoinConjunctScanSlots(eqJoinConjunct, lhsScanSlot, rhsScanSlot); - } - - private static boolean hasNumRowsAndNdvStats(SlotDescriptor slotDesc) { - if (slotDesc.getColumn() == null) { - return false; - } - if (!slotDesc.getStats().hasNumDistinctValues()) { - return false; - } - return true; - } - - @Override - public String toString() { - return eqJoinConjunct.toSql(); - } - } - @Override protected String debugString() { return MoreObjects.toStringHelper(this).add("eqJoinConjuncts", eqJoinConjunctsDebugString()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index ef4a1932003..2bc03096bbb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -699,12 +699,6 @@ public class OlapScanNode extends ScanNode { bucketSeq2Bytes.merge(bucketSeq, oneReplicaBytes, Long::sum); scanRangeLocations.add(locations); } - - if (tablets.isEmpty()) { - desc.setCardinality(0); - } else { - desc.setCardinality(cardinality); - } } private String fastToString(long version) { @@ -758,7 +752,6 @@ public class OlapScanNode extends ScanNode { protected void createScanRangeLocations() throws UserException { scanRangeLocations = Lists.newArrayList(); if (selectedPartitionIds.isEmpty()) { - desc.setCardinality(0); return; } Preconditions.checkState(selectedIndexId != -1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ResultFileSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ResultFileSink.java index 2f8f9ae8ab1..9059d62e1d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ResultFileSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ResultFileSink.java @@ -156,17 +156,14 @@ public class ResultFileSink extends DataSink { public static TupleDescriptor constructFileStatusTupleDesc(DescriptorTable descriptorTable) { TupleDescriptor resultFileStatusTupleDesc = descriptorTable.createTupleDescriptor("result_file_status"); - resultFileStatusTupleDesc.setIsMaterialized(true); for (int i = 0; i < OutFileClause.RESULT_COL_NAMES.size(); ++i) { SlotDescriptor slotDescriptor = descriptorTable.addSlotDescriptor(resultFileStatusTupleDesc); slotDescriptor.setLabel(OutFileClause.RESULT_COL_NAMES.get(i)); slotDescriptor.setType(OutFileClause.RESULT_COL_TYPES.get(i)); slotDescriptor.setColumn(new Column(OutFileClause.RESULT_COL_NAMES.get(i), OutFileClause.RESULT_COL_TYPES.get(i))); - slotDescriptor.setIsMaterialized(true); slotDescriptor.setIsNullable(false); } - resultFileStatusTupleDesc.computeStatAndMemLayout(); return resultFileStatusTupleDesc; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index 477898abf30..7e8b1039986 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -21,7 +21,6 @@ package org.apache.doris.planner; import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SortInfo; import org.apache.doris.common.Pair; import org.apache.doris.qe.ConnectContext; @@ -217,27 +216,12 @@ public class SortNode extends PlanNode { return output.toString(); } - private void removeUnusedExprs() { - if (!isUnusedExprRemoved) { - if (resolvedTupleExprs != null) { - List<SlotDescriptor> slotDescriptorList = this.info.getSortTupleDescriptor().getSlots(); - for (int i = slotDescriptorList.size() - 1; i >= 0; i--) { - if (!slotDescriptorList.get(i).isMaterialized()) { - resolvedTupleExprs.remove(i); - } - } - } - isUnusedExprRemoved = true; - } - } - @Override protected void toThrift(TPlanNode msg) { msg.node_type = TPlanNodeType.SORT_NODE; TSortInfo sortInfo = info.toThrift(); Preconditions.checkState(tupleIds.size() == 1, "Incorrect size for tupleIds in SortNode"); - removeUnusedExprs(); if (resolvedTupleExprs != null) { sortInfo.setSortTupleSlotExprs(Expr.treesToThrift(resolvedTupleExprs)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AggStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AggStatsDerive.java deleted file mode 100644 index 9f27860acf9..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AggStatsDerive.java +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.Expr; -import org.apache.doris.common.UserException; -import org.apache.doris.planner.AggregationNode; - -import com.google.common.base.Preconditions; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.ArrayList; -import java.util.List; - -/** - * Derive AggNode statistics. - */ -public class AggStatsDerive extends BaseStatsDerive { - private static final Logger LOG = LogManager.getLogger(AggStatsDerive.class); - List<Expr> groupingExprs = new ArrayList<>(); - - @Override - public void init(PlanStats node) throws UserException { - Preconditions.checkState(node instanceof AggregationNode); - super.init(node); - groupingExprs.addAll(((AggregationNode) node).getAggInfo().getGroupingExprs()); - } - - @Override - protected long deriveRowCount() { - rowCount = 1; - // rowCount: product of # of distinct values produced by grouping exprs - for (Expr groupingExpr : groupingExprs) { - long numDistinct = groupingExpr.getNumDistinctValues(); - if (LOG.isDebugEnabled()) { - LOG.debug("grouping expr: " + groupingExpr.toSql() + " #distinct=" + Long.toString( - numDistinct)); - } - if (numDistinct == -1) { - rowCount = -1; - break; - } - // This is prone to overflow, because we keep multiplying cardinalities, - // even if the grouping exprs are functionally dependent (example: - // group by the primary key of a table plus a number of other columns from that - // same table) - // TODO: try to recognize functional dependencies - // TODO: as a shortcut, instead of recognizing functional dependencies, - // limit the contribution of a single table to the number of rows - // of that table (so that when we're grouping by the primary key col plus - // some others, the estimate doesn't overshoot dramatically) - rowCount *= numDistinct; - } - if (rowCount > 0) { - if (LOG.isDebugEnabled()) { - LOG.debug("sel=" + Double.toString(computeSelectivity())); - } - applyConjunctsSelectivity(); - } - // if we ended up with an overflow, the estimate is certain to be wrong - if (rowCount < 0) { - rowCount = -1; - } - - capRowCountAtLimit(); - if (LOG.isDebugEnabled()) { - LOG.debug("stats Agg: rowCount={}", rowCount); - } - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java deleted file mode 100644 index a940cbf2b93..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalyticEvalStatsDerive.java +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import com.google.common.base.Preconditions; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -/** - * Derive AnalyticEvalNode statistics. - */ -public class AnalyticEvalStatsDerive extends BaseStatsDerive { - private static final Logger LOG = LogManager.getLogger(AggStatsDerive.class); - - @Override - protected long deriveRowCount() { - Preconditions.checkState(!childrenStatsResult.isEmpty()); - rowCount = (long) (rowCount == -1 ? childrenStatsResult.get(0).getRowCount() : rowCount); - applyConjunctsSelectivity(); - capRowCountAtLimit(); - if (LOG.isDebugEnabled()) { - LOG.debug("stats AnalyticEval: rowCount={}", rowCount); - } - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AssertNumRowsStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AssertNumRowsStatsDerive.java deleted file mode 100644 index 5cbd99901e6..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AssertNumRowsStatsDerive.java +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -/** - * Derive AssertNumRowsNode statistics. - */ -public class AssertNumRowsStatsDerive extends BaseStatsDerive { - - @Override - protected long deriveRowCount() { - rowCount = 1; - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java deleted file mode 100644 index 584dcf514d3..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java +++ /dev/null @@ -1,152 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.Expr; -import org.apache.doris.common.UserException; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Base class for statistics derive. - */ -public class BaseStatsDerive { - private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class); - // estimate of the output rowCount of this node; - // invalid: -1 - protected long rowCount = -1; - protected long limit = -1; - - protected List<ExprStats> conjuncts = Lists.newArrayList(); - protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList(); - - protected void init(PlanStats node) throws UserException { - limit = node.getLimit(); - conjuncts.addAll(node.getConjuncts()); - - for (StatsDeriveResult result : node.getChildrenStats()) { - if (result == null) { - throw new UserException( - "childNode statsDeriveResult is null."); - } - childrenStatsResult.add(result); - } - } - - public StatsDeriveResult deriveStats() { - return new StatsDeriveResult(deriveRowCount()); - } - - public boolean hasLimit() { - return limit > -1; - } - - protected void applyConjunctsSelectivity() { - if (rowCount == -1) { - return; - } - applySelectivity(); - } - - private void applySelectivity() { - double selectivity = computeSelectivity(); - Preconditions.checkState(rowCount >= 0); - double preConjunctrowCount = rowCount; - rowCount = Math.round(rowCount * selectivity); - // don't round rowCount down to zero for safety. - if (rowCount == 0 && preConjunctrowCount > 0) { - rowCount = 1; - } - } - - protected double computeSelectivity() { - for (ExprStats expr : conjuncts) { - expr.setSelectivity(); - } - return computeCombinedSelectivity(conjuncts); - } - - /** - * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to - * address the following estimation challenges: - * - * <p> - * * 1. The individual selectivities of conjuncts may be unknown. - * * 2. Two selectivities, whether known or unknown, could be correlated. Assuming - * * independence can lead to significant underestimation. - * </p> - * - * <p> - * * The first issue is addressed by using a single default selectivity that is - * * representative of all conjuncts with unknown selectivities. - * * The second issue is addressed by an exponential backoff when multiplying each - * * additional selectivity into the final result. - * </p> - */ - protected double computeCombinedSelectivity(List<ExprStats> conjuncts) { - // Collect all estimated selectivities. - List<Double> selectivities = new ArrayList<>(); - for (ExprStats e : conjuncts) { - if (e.hasSelectivity()) { - selectivities.add(e.getSelectivity()); - } - } - if (selectivities.size() != conjuncts.size()) { - // Some conjuncts have no estimated selectivity. Use a single default - // representative selectivity for all those conjuncts. - selectivities.add(Expr.DEFAULT_SELECTIVITY); - } - // Sort the selectivities to get a consistent estimate, regardless of the original - // conjunct order. Sort in ascending order such that the most selective conjunct - // is fully applied. - Collections.sort(selectivities); - double result = 1.0; - // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n) - for (int i = 0; i < selectivities.size(); ++i) { - // Exponential backoff for each selectivity multiplied into the final result. - result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1)); - } - // Bound result in [0, 1] - return Math.max(0.0, Math.min(1.0, result)); - } - - protected void capRowCountAtLimit() { - if (hasLimit()) { - rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit); - } - } - - - // Currently it simply adds the number of rows of children - protected long deriveRowCount() { - for (StatsDeriveResult statsDeriveResult : childrenStatsResult) { - rowCount = (long) Math.max(rowCount, statsDeriveResult.getRowCount()); - } - applyConjunctsSelectivity(); - capRowCountAtLimit(); - return rowCount; - } - -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java deleted file mode 100644 index f7dd33e1fc2..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -public class DeriveFactory { - - public BaseStatsDerive getStatsDerive(StatisticalType statisticalType) { - switch (statisticalType) { - case AGG_NODE: - return new AggStatsDerive(); - case ANALYTIC_EVAL_NODE: - return new AnalyticEvalStatsDerive(); - case ASSERT_NUM_ROWS_NODE: - return new AssertNumRowsStatsDerive(); - case NESTED_LOOP_JOIN_NODE: - return new NestedLoopJoinStatsDerive(); - case EMPTY_SET_NODE: - case REPEAT_NODE: - return new EmptySetStatsDerive(); - case EXCHANGE_NODE: - return new ExchangeStatsDerive(); - case HASH_JOIN_NODE: - return new HashJoinStatsDerive(); - case OLAP_SCAN_NODE: - return new OlapScanStatsDerive(); - case MYSQL_SCAN_NODE: - case ODBC_SCAN_NODE: - return new MysqlStatsDerive(); - case SELECT_NODE: - case SORT_NODE: - return new SelectStatsDerive(); - case TABLE_FUNCTION_NODE: - return new TableFunctionStatsDerive(); - case BROKER_SCAN_NODE: - case EXCEPT_NODE: - case ES_SCAN_NODE: - case HIVE_SCAN_NODE: - case ICEBERG_SCAN_NODE: - case LAKESOUL_SCAN_NODE: - case REMOTE_DORIS_SCAN_NODE: - case PAIMON_SCAN_NODE: - case INTERSECT_NODE: - case SCHEMA_SCAN_NODE: - case STREAM_LOAD_SCAN_NODE: - case UNION_NODE: - case DEFAULT: - default: - return new BaseStatsDerive(); - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/EmptySetStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/EmptySetStatsDerive.java deleted file mode 100644 index d7bc3a992a7..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/EmptySetStatsDerive.java +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -/** - * Derive EmptySetNode statistics. - */ -public class EmptySetStatsDerive extends BaseStatsDerive { - // Current REPEAT_NODE also uses this derivation method - - @Override - protected long deriveRowCount() { - rowCount = 0; - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java deleted file mode 100644 index 1448c066fef..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExchangeStatsDerive.java +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import com.google.common.base.Preconditions; - -/** - * Derive ExchangeNode statistics. - */ -public class ExchangeStatsDerive extends BaseStatsDerive { - - @Override - protected long deriveRowCount() { - Preconditions.checkState(!childrenStatsResult.isEmpty()); - rowCount = (long) childrenStatsResult.get(0).getRowCount(); - capRowCountAtLimit(); - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java deleted file mode 100644 index ad3b704a293..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HashJoinStatsDerive.java +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.BinaryPredicate; -import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.JoinOperator; -import org.apache.doris.analysis.SlotDescriptor; -import org.apache.doris.analysis.SlotRef; -import org.apache.doris.catalog.ColumnStats; -import org.apache.doris.common.CheckedMath; -import org.apache.doris.common.UserException; -import org.apache.doris.planner.HashJoinNode; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.ArrayList; -import java.util.List; - -/** - * Derive HashJoinNode statistics. - */ -public class HashJoinStatsDerive extends BaseStatsDerive { - - private static final Logger LOG = LogManager.getLogger(HashJoinStatsDerive.class); - - private JoinOperator joinOp; - private List<BinaryPredicate> eqJoinConjuncts = Lists.newArrayList(); - - @Override - public void init(PlanStats node) throws UserException { - Preconditions.checkState(node instanceof HashJoinNode); - super.init(node); - joinOp = ((HashJoinNode) node).getJoinOp(); - eqJoinConjuncts.addAll(((HashJoinNode) node).getEqJoinConjuncts()); - } - - @Override - protected long deriveRowCount() { - if (joinOp.isSemiAntiJoin()) { - rowCount = getSemiJoinrowCount(); - } else if (joinOp.isInnerJoin() || joinOp.isOuterJoin()) { - rowCount = getJoinrowCount(); - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("joinOp:{} is not supported for HashJoinStatsDerive", joinOp); - } - } - capRowCountAtLimit(); - return rowCount; - } - - /** - * Returns the estimated rowCount of a semi join node. - * For a left semi join between child(0) and child(1), we look for equality join - * conditions "L.c = R.d" (with L being from child(0) and R from child(1)) and use as - * the rowCount estimate the minimum of - * |child(0)| * Min(NDV(L.c), NDV(R.d)) / NDV(L.c) - * over all suitable join conditions. The reasoning is that: - * -each row in child(0) is returned at most once - * -the probability of a row in child(0) having a match in R is - * Min(NDV(L.c), NDV(R.d)) / NDV(L.c) - * - *<p> - * For a left anti join we estimate the rowCount as the minimum of: - * |L| * Max(NDV(L.c) - NDV(R.d), NDV(L.c)) / NDV(L.c) - * over all suitable join conditions. The reasoning is that: - * - each row in child(0) is returned at most once - * - if NDV(L.c) > NDV(R.d) then the probability of row in L having a match - * in child(1) is (NDV(L.c) - NDV(R.d)) / NDV(L.c) - * - otherwise, we conservatively use |L| to avoid underestimation - *</p> - * - *<p> - * We analogously estimate the rowCount for right semi/anti joins, and treat the - * null-aware anti join like a regular anti join - *</p> - */ - private long getSemiJoinrowCount() { - Preconditions.checkState(joinOp.isSemiJoin()); - - // Return -1 if the rowCount of the returned side is unknown. - double rowCount; - if (joinOp == JoinOperator.RIGHT_SEMI_JOIN - || joinOp == JoinOperator.RIGHT_ANTI_JOIN) { - if (childrenStatsResult.get(1).getRowCount() == -1) { - return -1; - } - rowCount = childrenStatsResult.get(1).getRowCount(); - } else { - if (childrenStatsResult.get(0).getRowCount() == -1) { - return -1; - } - rowCount = childrenStatsResult.get(0).getRowCount(); - } - double minSelectivity = 1.0; - for (Expr eqJoinPredicate : eqJoinConjuncts) { - double lhsNdv = getNdv(eqJoinPredicate.getChild(0)); - lhsNdv = Math.min(lhsNdv, childrenStatsResult.get(0).getRowCount()); - double rhsNdv = getNdv(eqJoinPredicate.getChild(1)); - rhsNdv = Math.min(rhsNdv, childrenStatsResult.get(1).getRowCount()); - - // Skip conjuncts with unknown NDV on either side. - if (lhsNdv == -1 || rhsNdv == -1) { - continue; - } - - double selectivity = 1.0; - switch (joinOp) { - case LEFT_SEMI_JOIN: { - selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (lhsNdv); - break; - } - case RIGHT_SEMI_JOIN: { - selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (rhsNdv); - break; - } - case LEFT_ANTI_JOIN: - case NULL_AWARE_LEFT_ANTI_JOIN: { - selectivity = (double) (lhsNdv > rhsNdv ? (lhsNdv - rhsNdv) : lhsNdv) / (double) lhsNdv; - break; - } - case RIGHT_ANTI_JOIN: { - selectivity = (double) (rhsNdv > lhsNdv ? (rhsNdv - lhsNdv) : rhsNdv) / (double) rhsNdv; - break; - } - default: - Preconditions.checkState(false); - } - minSelectivity = Math.min(minSelectivity, selectivity); - } - - Preconditions.checkState(rowCount != -1); - return Math.round(rowCount * minSelectivity); - } - - /** - * Unwraps the SlotRef in expr and returns the NDVs of it. - * Returns -1 if the NDVs are unknown or if expr is not a SlotRef. - */ - private long getNdv(Expr expr) { - SlotRef slotRef = expr.unwrapSlotRef(false); - if (slotRef == null) { - return -1; - } - SlotDescriptor slotDesc = slotRef.getDesc(); - if (slotDesc == null) { - return -1; - } - ColumnStats stats = slotDesc.getStats(); - if (!stats.hasNumDistinctValues()) { - return -1; - } - return stats.getNumDistinctValues(); - } - - private long getJoinrowCount() { - Preconditions.checkState(joinOp.isInnerJoin() || joinOp.isOuterJoin()); - Preconditions.checkState(childrenStatsResult.size() == 2); - - long lhsCard = (long) childrenStatsResult.get(0).getRowCount(); - long rhsCard = (long) childrenStatsResult.get(1).getRowCount(); - if (lhsCard == -1 || rhsCard == -1) { - return lhsCard; - } - - // Collect join conjuncts that are eligible to participate in rowCount estimation. - List<HashJoinNode.EqJoinConjunctScanSlots> eqJoinConjunctSlots = new ArrayList<>(); - for (Expr eqJoinConjunct : eqJoinConjuncts) { - HashJoinNode.EqJoinConjunctScanSlots slots = HashJoinNode.EqJoinConjunctScanSlots.create(eqJoinConjunct); - if (slots != null) { - eqJoinConjunctSlots.add(slots); - } - } - - if (eqJoinConjunctSlots.isEmpty()) { - // There are no eligible equi-join conjuncts. - return lhsCard; - } - - return getGenericJoinrowCount(eqJoinConjunctSlots, lhsCard, rhsCard); - } - - /** - * Returns the estimated join rowCount of a generic N:M inner or outer join based - * on the given list of equi-join conjunct slots and the join input cardinalities. - * The returned result is >= 0. - * The list of join conjuncts must be non-empty and the cardinalities must be >= 0. - * - * <p> - * Generic estimation: - * rowCount = |child(0)| * |child(1)| / max(NDV(L.c), NDV(R.d)) - * - case A: NDV(L.c) <= NDV(R.d) - * every row from child(0) joins with |child(1)| / NDV(R.d) rows - * - case B: NDV(L.c) > NDV(R.d) - * every row from child(1) joins with |child(0)| / NDV(L.c) rows - * - we adjust the NDVs from both sides to account for predicates that may - * might have reduce the rowCount and NDVs - *</p> - */ - private long getGenericJoinrowCount(List<HashJoinNode.EqJoinConjunctScanSlots> eqJoinConjunctSlots, - long lhsCard, - long rhsCard) { - Preconditions.checkState(joinOp.isInnerJoin() || joinOp.isOuterJoin()); - Preconditions.checkState(!eqJoinConjunctSlots.isEmpty()); - Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0); - - long result = -1; - for (HashJoinNode.EqJoinConjunctScanSlots slots : eqJoinConjunctSlots) { - // Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs - // should only decrease. We ignore adjustments that would lead to an increase. - double lhsAdjNdv = slots.lhsNdv(); - if (slots.lhsNumRows() > lhsCard) { - lhsAdjNdv *= lhsCard / slots.lhsNumRows(); - } - double rhsAdjNdv = slots.rhsNdv(); - if (slots.rhsNumRows() > rhsCard) { - rhsAdjNdv *= rhsCard / slots.rhsNumRows(); - } - // A lower limit of 1 on the max Adjusted Ndv ensures we don't estimate - // rowCount more than the max possible. - long tmpNdv = Double.doubleToLongBits(Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv))); - long joinCard = tmpNdv == rhsCard - ? lhsCard - : CheckedMath.checkedMultiply( - Math.round((lhsCard / Math.max(1, Math.max(lhsAdjNdv, rhsAdjNdv)))), rhsCard); - if (result == -1) { - result = joinCard; - } else { - result = Math.min(result, joinCard); - } - } - Preconditions.checkState(result >= 0); - return result; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/MysqlStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/MysqlStatsDerive.java deleted file mode 100644 index 46adab483e3..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/MysqlStatsDerive.java +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -/** - * Derive MysqlScanNode statistics. - */ -public class MysqlStatsDerive extends BaseStatsDerive { - - @Override - protected long deriveRowCount() { - // this is just to avoid mysql scan node's rowCount being -1. So that we can calculate the join cost - // normally. - // We assume that the data volume of all mysql tables is very small, so set rowCount directly to 1. - rowCount = rowCount == -1 ? 1 : rowCount; - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java deleted file mode 100644 index c4cb221f214..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.common.CheckedMath; - -import com.google.common.base.Preconditions; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -/** - * Derive NestedLoopJoinNode statistics. - */ -public class NestedLoopJoinStatsDerive extends BaseStatsDerive { - private static final Logger LOG = LogManager.getLogger(NestedLoopJoinStatsDerive.class); - - @Override - protected long deriveRowCount() { - Preconditions.checkState(childrenStatsResult.size() == 2); - if (childrenStatsResult.get(0).getRowCount() == -1 || childrenStatsResult.get(1).getRowCount() == -1) { - rowCount = -1; - } else { - rowCount = CheckedMath.checkedMultiply((long) childrenStatsResult.get(0).getRowCount(), - (long) childrenStatsResult.get(1).getRowCount()); - applyConjunctsSelectivity(); - capRowCountAtLimit(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("stats CrossJoin: rowCount={}", Double.toString(rowCount)); - } - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java deleted file mode 100644 index 1eea9d8dc51..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.analysis.SlotDescriptor; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.Table; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.Id; -import org.apache.doris.common.UserException; -import org.apache.doris.planner.OlapScanNode; -import org.apache.doris.qe.ConnectContext; - -import com.google.common.base.Preconditions; - -import java.util.HashMap; -import java.util.Map; - -/** - * Derive OlapScanNode Statistics. - */ -public class OlapScanStatsDerive extends BaseStatsDerive { - - private OlapScanNode scanNode; - - private Map<Id, String> slotIdToTableIdAndColumnName; - - @Override - public void init(PlanStats node) throws UserException { - Preconditions.checkState(node instanceof OlapScanNode); - super.init(node); - scanNode = (OlapScanNode) node; - buildStructure(scanNode); - } - - @Override - public StatsDeriveResult deriveStats() { - /* - * Compute InAccurate cardinality before mv selector and tablet pruning. - * - Accurate statistical information relies on the selector of materialized views and bucket reduction. - * - However, Those both processes occur after the reorder algorithm is completed. - * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. - * - So only an inaccurate cardinality can be calculated here. - */ - - Map<Id, ColumnStatistic> columnStatisticMap = new HashMap<>(); - Table table = scanNode.getOlapTable(); - double rowCount = table.getRowCountForNereids(); - ConnectContext connectContext = ConnectContext.get(); - for (Map.Entry<Id, String> entry : slotIdToTableIdAndColumnName.entrySet()) { - String colName = entry.getValue(); - // TODO. Get index id for materialized view. - ColumnStatistic statistic = - Env.getCurrentEnv().getStatisticsCache().getColumnStatistics( - table.getDatabase().getCatalog().getId(), - table.getDatabase().getId(), table.getId(), -1, colName, connectContext); - columnStatisticMap.put(entry.getKey(), statistic); - } - return new StatsDeriveResult(rowCount, columnStatisticMap); - } - - /** - * Desc: Build OlapScaNode infrastructure. - * - * @param: node - * @return: void - */ - public void buildStructure(OlapScanNode node) throws AnalysisException { - slotIdToTableIdAndColumnName = new HashMap<>(); - for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { - if (!slot.isMaterialized()) { - continue; - } - String columnName = slot.getColumn().getName(); - slotIdToTableIdAndColumnName.put(slot.getId(), columnName); - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java deleted file mode 100644 index ce5ef37240f..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/SelectStatsDerive.java +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import com.google.common.base.Preconditions; - -/** - * Derive SelectNode statistics. - */ -public class SelectStatsDerive extends BaseStatsDerive { - - @Override - protected long deriveRowCount() { - Preconditions.checkState(!childrenStatsResult.isEmpty()); - rowCount = (long) childrenStatsResult.get(0).getRowCount(); - applyConjunctsSelectivity(); - capRowCountAtLimit(); - return rowCount; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java deleted file mode 100644 index bdeeaf610dd..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import org.apache.doris.common.UserException; -import org.apache.doris.planner.PlanNode; -import org.apache.doris.qe.ConnectContext; - -public class StatsRecursiveDerive { - private StatsRecursiveDerive() {} - - public static StatsRecursiveDerive getStatsRecursiveDerive() { - return Inner.INSTANCE; - } - - private static class Inner { - private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive(); - } - - /** - * Recursively complete the derivation of statistics for this node and all its children - * @param node - * This parameter is an input and output parameter, - * which will store the derivation result of statistical information in the corresponding node - */ - public void statsRecursiveDerive(PlanNode node) throws UserException { - if (ConnectContext.get().getState().isInternal()) { - node.setStatsDeriveResult(new StatsDeriveResult(0)); - return; - } - if (node.getStatsDeriveResult() != null) { - return; - } - for (PlanNode childNode : node.getChildren()) { - if (childNode.getStatsDeriveResult() == null) { - statsRecursiveDerive(childNode); - } - } - DeriveFactory deriveFactory = new DeriveFactory(); - BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getStatisticalType()); - deriveStats.init(node); - StatsDeriveResult result = deriveStats.deriveStats(); - node.setStatsDeriveResult(result); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java deleted file mode 100644 index ee670c00846..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableFunctionStatsDerive.java +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.statistics; - -import com.google.common.base.Preconditions; - -/** - * Derive TableFunctionNode statistics. - */ -public class TableFunctionStatsDerive extends BaseStatsDerive { - - @Override - protected long deriveRowCount() { - Preconditions.checkState(!childrenStatsResult.isEmpty()); - // TODO the rowCount = child rowCount * rowCount of list column - rowCount = (long) childrenStatsResult.get(0).getRowCount(); - return rowCount; - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java deleted file mode 100644 index 5e48f91069e..00000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColumnStatTest.java +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog; - -import org.apache.doris.common.io.Text; -import org.apache.doris.persist.gson.GsonUtils; - -import org.junit.Assert; -import org.junit.Test; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -public class ColumnStatTest { - - @Test - public void testSerialization() throws Exception { - // 1. Write objects to file - Path path = Files.createFile(Paths.get("./columnStats")); - DataOutputStream dos = new DataOutputStream(Files.newOutputStream(path)); - - ColumnStats stats1 = new ColumnStats(); - Text.writeString(dos, GsonUtils.GSON.toJson(stats1)); - - ColumnStats stats2 = new ColumnStats(); - stats2.setAvgSerializedSize(1.1f); - stats2.setNumDistinctValues(100L); - stats2.setMaxSize(1000L); - stats2.setNumNulls(10000L); - Text.writeString(dos, GsonUtils.GSON.toJson(stats2)); - - ColumnStats stats3 = new ColumnStats(); - stats3.setAvgSerializedSize(3.3f); - stats3.setNumDistinctValues(200L); - stats3.setMaxSize(2000L); - stats3.setNumNulls(20000L); - Text.writeString(dos, GsonUtils.GSON.toJson(stats3)); - - ColumnStats stats4 = new ColumnStats(stats3); - Text.writeString(dos, GsonUtils.GSON.toJson(stats4)); - - dos.flush(); - dos.close(); - - // 2. Read objects from file - DataInputStream dis = new DataInputStream(Files.newInputStream(path)); - ColumnStats rStats1 = GsonUtils.GSON.fromJson(Text.readString(dis), ColumnStats.class); - Assert.assertEquals(rStats1, stats1); - - ColumnStats rStats2 = GsonUtils.GSON.fromJson(Text.readString(dis), ColumnStats.class); - Assert.assertEquals(rStats2, stats2); - - ColumnStats rStats3 = GsonUtils.GSON.fromJson(Text.readString(dis), ColumnStats.class); - Assert.assertEquals(rStats3, stats3); - - ColumnStats rStats4 = GsonUtils.GSON.fromJson(Text.readString(dis), ColumnStats.class); - Assert.assertEquals(rStats4, stats4); - Assert.assertEquals(rStats4, stats3); - - Assert.assertEquals(rStats3, rStats3); - Assert.assertNotEquals(rStats3, this); - Assert.assertNotEquals(rStats2, rStats3); - - // 3. delete files - dis.close(); - Files.deleteIfExists(path); - } - -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java index a31b1abdaa1..e3ac7aff29c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslatorTest.java @@ -84,6 +84,6 @@ public class PhysicalPlanTranslatorTest { PlanNode planNode = fragment.getPlanRoot(); List<OlapScanNode> scanNodeList = new ArrayList<>(); planNode.collect(OlapScanNode.class::isInstance, scanNodeList); - Assertions.assertEquals(2, scanNodeList.get(0).getTupleDesc().getMaterializedSlots().size()); + Assertions.assertEquals(2, scanNodeList.get(0).getTupleDesc().getSlots().size()); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
