This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch nested_column_prune in repository https://gitbox.apache.org/repos/asf/doris.git
commit a2d41ad43ca2e63ca735cd28cd1cb45173a0f488 Author: 924060929 <[email protected]> AuthorDate: Thu Oct 23 13:53:32 2025 +0800 support iceberg's id access path --- .../org/apache/doris/analysis/SlotDescriptor.java | 18 +++++ .../glue/translator/PlanTranslatorContext.java | 2 + .../nereids/rules/rewrite/SlotTypeReplacer.java | 87 ++++++++++++++++++++- .../nereids/trees/expressions/SlotReference.java | 31 +++++++- .../java/org/apache/doris/planner/ScanNode.java | 89 ++++++++++++++++------ 5 files changed, 200 insertions(+), 27 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index 3c6a2c851a1..4160842d208 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -78,6 +78,8 @@ public class SlotDescriptor { private Expr virtualColumn = null; private TColumnAccessPaths allAccessPaths; private TColumnAccessPaths predicateAccessPaths; + private TColumnAccessPaths displayAllAccessPaths; + private TColumnAccessPaths displayPredicateAccessPaths; public SlotDescriptor(SlotId id, TupleDescriptor parent) { @@ -148,6 +150,22 @@ public class SlotDescriptor { this.predicateAccessPaths = predicateAccessPaths; } + public TColumnAccessPaths getDisplayAllAccessPaths() { + return displayAllAccessPaths; + } + + public void setDisplayAllAccessPaths(TColumnAccessPaths displayAllAccessPaths) { + this.displayAllAccessPaths = displayAllAccessPaths; + } + + public TColumnAccessPaths getDisplayPredicateAccessPaths() { + return displayPredicateAccessPaths; + } + + public void setDisplayPredicateAccessPaths(TColumnAccessPaths displayPredicateAccessPaths) { + this.displayPredicateAccessPaths = displayPredicateAccessPaths; + } + public TupleDescriptor getParent() { return parent; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 0f303b88b55..101b8c5c2b5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -340,6 +340,8 @@ public class PlanTranslatorContext { if (slotReference.getAllAccessPaths().isPresent()) { slotDescriptor.setAllAccessPaths(slotReference.getAllAccessPaths().get()); slotDescriptor.setPredicateAccessPaths(slotReference.getPredicateAccessPaths().get()); + slotDescriptor.setDisplayAllAccessPaths(slotReference.getDisplayAllAccessPaths().get()); + slotDescriptor.setDisplayPredicateAccessPaths(slotReference.getDisplayPredicateAccessPaths().get()); } return slotDescriptor; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java index ae61878f036..ba4a82ac851 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java @@ -18,7 +18,9 @@ package org.apache.doris.nereids.rules.rewrite; import org.apache.doris.analysis.AccessPathInfo; +import org.apache.doris.catalog.Column; import org.apache.doris.common.Pair; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.nereids.properties.OrderKey; import org.apache.doris.nereids.trees.expressions.ArrayItemReference; import org.apache.doris.nereids.trees.expressions.Expression; @@ -53,8 +55,14 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; +import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.MoreFieldsThread; +import org.apache.doris.thrift.TAccessPathType; +import org.apache.doris.thrift.TColumnAccessPaths; +import org.apache.doris.thrift.TColumnNameAccessPath; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; @@ -64,11 +72,13 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; +import java.util.ArrayList; import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; /** SlotTypeReplacer */ @@ -362,7 +372,31 @@ public class SlotTypeReplacer extends DefaultPlanRewriter<Void> { public Plan visitLogicalFileScan(LogicalFileScan fileScan, Void context) { Pair<Boolean, List<Slot>> replaced = replaceExpressions(fileScan.getOutput(), false, true); if (replaced.first) { - return fileScan.withCachedOutput(replaced.second); + List<Slot> replaceSlots = new ArrayList<>(replaced.second); + if (fileScan.getTable() instanceof IcebergExternalTable) { + for (int i = 0; i < replaceSlots.size(); i++) { + Slot slot = replaceSlots.get(i); + if (!(slot instanceof SlotReference)) { + continue; + } + SlotReference slotReference = (SlotReference) slot; + Optional<TColumnAccessPaths> allAccessPaths = slotReference.getAllAccessPaths(); + if (!allAccessPaths.isPresent() || !slotReference.getOriginalColumn().isPresent()) { + continue; + } + TColumnAccessPaths allAccessPathsWithId + = replaceIcebergAccessPathToId(allAccessPaths.get(), slotReference); + TColumnAccessPaths predicateAccessPathsWithId = replaceIcebergAccessPathToId( + slotReference.getPredicateAccessPaths().get(), slotReference); + replaceSlots.set(i, ((SlotReference) slot).withAccessPaths( + allAccessPathsWithId, + predicateAccessPathsWithId, + allAccessPaths.get(), + slotReference.getPredicateAccessPaths().get() + )); + } + } + return fileScan.withCachedOutput(replaceSlots); } return fileScan; } @@ -538,4 +572,55 @@ public class SlotTypeReplacer extends DefaultPlanRewriter<Void> { return e.withChildren(newChildren); } + + private TColumnAccessPaths replaceIcebergAccessPathToId( + TColumnAccessPaths originAccessPaths, SlotReference slotReference) { + Column column = slotReference.getOriginalColumn().get(); + List<TColumnNameAccessPath> replacedAllAccessPaths = new ArrayList<>(); + for (TColumnNameAccessPath nameAccessPath : originAccessPaths.name_access_paths) { + List<String> icebergColumnAccessPath = new ArrayList<>(nameAccessPath.path); + replaceIcebergAccessPathToId( + icebergColumnAccessPath, 0, slotReference.getDataType(), column + ); + replacedAllAccessPaths.add(new TColumnNameAccessPath(icebergColumnAccessPath)); + } + TColumnAccessPaths accessPathWithId = new TColumnAccessPaths(TAccessPathType.NAME); + accessPathWithId.name_access_paths = replacedAllAccessPaths; + return accessPathWithId; + } + + private void replaceIcebergAccessPathToId(List<String> originPath, int index, DataType type, Column column) { + if (index >= originPath.size()) { + return; + } + if (index == 0) { + originPath.set(index, String.valueOf(column.getUniqueId())); + replaceIcebergAccessPathToId(originPath, index + 1, type, column); + } else { + String fieldName = originPath.get(index); + if (type instanceof ArrayType) { + // skip replace * + replaceIcebergAccessPathToId( + originPath, index + 1, ((ArrayType) type).getItemType(), column.getChildren().get(0) + ); + } else if (type instanceof MapType) { + if (fieldName.equals("*") || fieldName.equals("VALUES")) { + replaceIcebergAccessPathToId( + originPath, index + 1, ((MapType) type).getValueType(), column.getChildren().get(1) + ); + } + } else if (type instanceof StructType) { + for (Column child : column.getChildren()) { + if (child.getName().equals(fieldName)) { + originPath.set(index, String.valueOf(child.getUniqueId())); + DataType childType = ((StructType) type).getNameToFields().get(fieldName).getDataType(); + replaceIcebergAccessPathToId(originPath, index + 1, childType, child); + break; + } + } + } else { + originPath.set(index, String.valueOf(column.getUniqueId())); + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java index ad09b4f870a..cb1a45e9170 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SlotReference.java @@ -59,6 +59,8 @@ public class SlotReference extends Slot { private final Column oneLevelColumn; private final Optional<TColumnAccessPaths> allAccessPaths; private final Optional<TColumnAccessPaths> predicateAccessPaths; + private final Optional<TColumnAccessPaths> displayAllAccessPaths; + private final Optional<TColumnAccessPaths> displayPredicateAccessPaths; public SlotReference(String name, DataType dataType) { this(StatementScopeIdGenerator.newExprId(), name, dataType, true, ImmutableList.of(), @@ -100,7 +102,8 @@ public class SlotReference extends Slot { @Nullable TableIf oneLevelTable, Column oneLevelColumn, List<String> subPath, Optional<Pair<Integer, Integer>> indexInSql) { this(exprId, name, dataType, nullable, qualifier, originalTable, originalColumn, oneLevelTable, - oneLevelColumn, subPath, indexInSql, Optional.empty(), Optional.empty()); + oneLevelColumn, subPath, indexInSql, Optional.empty(), Optional.empty(), + Optional.empty(), Optional.empty()); } /** @@ -118,7 +121,9 @@ public class SlotReference extends Slot { List<String> qualifier, @Nullable TableIf originalTable, @Nullable Column originalColumn, @Nullable TableIf oneLevelTable, Column oneLevelColumn, List<String> subPath, Optional<Pair<Integer, Integer>> indexInSql, - Optional<TColumnAccessPaths> allAccessPaths, Optional<TColumnAccessPaths> predicateAccessPaths) { + Optional<TColumnAccessPaths> allAccessPaths, Optional<TColumnAccessPaths> predicateAccessPaths, + Optional<TColumnAccessPaths> displayAllAccessPaths, + Optional<TColumnAccessPaths> displayPredicateAccessPaths) { super(indexInSql); this.exprId = exprId; this.name = name; @@ -133,6 +138,8 @@ public class SlotReference extends Slot { this.subPath = Objects.requireNonNull(subPath, "subPath can not be null"); this.allAccessPaths = allAccessPaths; this.predicateAccessPaths = predicateAccessPaths; + this.displayAllAccessPaths = displayAllAccessPaths; + this.displayPredicateAccessPaths = displayPredicateAccessPaths; } public static SlotReference of(String name, DataType type) { @@ -360,7 +367,17 @@ public class SlotReference extends Slot { public SlotReference withAccessPaths(TColumnAccessPaths allAccessPaths, TColumnAccessPaths predicateAccessPaths) { return new SlotReference(exprId, name, dataType, nullable, qualifier, originalTable, originalColumn, oneLevelTable, oneLevelColumn, - subPath, indexInSqlString, Optional.of(allAccessPaths), Optional.of(predicateAccessPaths)); + subPath, indexInSqlString, Optional.of(allAccessPaths), Optional.of(predicateAccessPaths), + Optional.of(allAccessPaths), Optional.of(predicateAccessPaths)); + } + + public SlotReference withAccessPaths( + TColumnAccessPaths allAccessPaths, TColumnAccessPaths predicateAccessPaths, + TColumnAccessPaths displayAllAccessPaths, TColumnAccessPaths displayPredicateAccessPaths) { + return new SlotReference(exprId, name, dataType, nullable, qualifier, + originalTable, originalColumn, oneLevelTable, oneLevelColumn, + subPath, indexInSqlString, Optional.of(allAccessPaths), Optional.of(predicateAccessPaths), + Optional.of(displayAllAccessPaths), Optional.of(displayPredicateAccessPaths)); } public Optional<TColumnAccessPaths> getAllAccessPaths() { @@ -370,4 +387,12 @@ public class SlotReference extends Slot { public Optional<TColumnAccessPaths> getPredicateAccessPaths() { return predicateAccessPaths; } + + public Optional<TColumnAccessPaths> getDisplayAllAccessPaths() { + return displayAllAccessPaths; + } + + public Optional<TColumnAccessPaths> getDisplayPredicateAccessPaths() { + return displayPredicateAccessPaths; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index e438b0984d1..44715603903 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -48,10 +48,13 @@ import org.apache.doris.datasource.FederationBackendPolicy; import org.apache.doris.datasource.SplitAssignment; import org.apache.doris.datasource.SplitGenerator; import org.apache.doris.datasource.SplitSource; +import org.apache.doris.datasource.iceberg.source.IcebergScanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.rpc.RpcException; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.system.Backend; +import org.apache.doris.thrift.TColumnAccessPaths; +import org.apache.doris.thrift.TColumnNameAccessPath; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPlanNode; import org.apache.doris.thrift.TScanRange; @@ -565,26 +568,43 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator { if (!slot.getType().equals(slot.getColumn().getType())) { prunedType = slot.getType().toString(); } + String displayAllAccessPathsString = null; + if (slot.getDisplayAllAccessPaths() != null + && slot.getDisplayAllAccessPaths().name_access_paths != null + && !slot.getDisplayAllAccessPaths().name_access_paths.isEmpty()) { + if (this instanceof IcebergScanNode) { + displayAllAccessPathsString = mergeIcebergAccessPathsWithId( + slot.getAllAccessPaths(), + slot.getDisplayAllAccessPaths() + ); + } else { + displayAllAccessPathsString = slot.getDisplayAllAccessPaths().name_access_paths + .stream() + .map(a -> StringUtils.join(a.path, ".")) + .collect(Collectors.joining(", ")); + } + } + String displayPredicateAccessPathsString = null; + if (slot.getDisplayPredicateAccessPaths() != null + && slot.getDisplayPredicateAccessPaths().name_access_paths != null + && !slot.getDisplayPredicateAccessPaths().name_access_paths.isEmpty()) { + if (this instanceof IcebergScanNode) { + displayPredicateAccessPathsString = mergeIcebergAccessPathsWithId( + slot.getPredicateAccessPaths(), + slot.getDisplayPredicateAccessPaths() + ); + } else { + displayPredicateAccessPathsString = slot.getPredicateAccessPaths().name_access_paths + .stream() + .map(a -> StringUtils.join(a.path, ".")) + .collect(Collectors.joining(", ")); + } + } + - String allAccessPathsString = null; - if (slot.getAllAccessPaths() != null - && slot.getAllAccessPaths().name_access_paths != null - && !slot.getAllAccessPaths().name_access_paths.isEmpty()) { - allAccessPathsString = slot.getAllAccessPaths().name_access_paths - .stream() - .map(a -> StringUtils.join(a.path, ".")) - .collect(Collectors.joining(", ")); - } - String predicateAccessPathsString = null; - if (slot.getPredicateAccessPaths() != null - && slot.getPredicateAccessPaths().name_access_paths != null - && !slot.getPredicateAccessPaths().name_access_paths.isEmpty()) { - predicateAccessPathsString = slot.getPredicateAccessPaths().name_access_paths - .stream() - .map(a -> StringUtils.join(a.path, ".")) - .collect(Collectors.joining(", ")); - } - if (prunedType == null && allAccessPathsString == null && predicateAccessPathsString == null) { + if (prunedType == null + && displayAllAccessPathsString == null + && displayPredicateAccessPathsString == null) { continue; } @@ -597,14 +617,37 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator { if (prunedType != null) { output.append(prefix).append(" pruned type: ").append(prunedType).append("\n"); } - if (allAccessPathsString != null) { - output.append(prefix).append(" all access paths: [").append(allAccessPathsString).append("]\n"); + if (displayAllAccessPathsString != null) { + output.append(prefix).append(" all access paths: [") + .append(displayAllAccessPathsString).append("]\n"); } - if (predicateAccessPathsString != null) { + if (displayPredicateAccessPathsString != null) { output.append(prefix).append(" predicate access paths: [") - .append(predicateAccessPathsString).append("]\n"); + .append(displayPredicateAccessPathsString).append("]\n"); + } + } + } + + private String mergeIcebergAccessPathsWithId( + TColumnAccessPaths accessPaths, TColumnAccessPaths displayAccessPaths) { + List<String> mergeDisplayAccessPaths = Lists.newArrayList(); + for (int i = 0; i < displayAccessPaths.name_access_paths.size(); i++) { + TColumnNameAccessPath nameAccessPath = displayAccessPaths.name_access_paths.get(i); + TColumnNameAccessPath idAccessPath = accessPaths.name_access_paths.get(i); + + List<String> mergedPath = new ArrayList<>(); + for (int j = 0; j < idAccessPath.path.size(); j++) { + String name = nameAccessPath.path.get(j); + String id = idAccessPath.path.get(j); + if (name.equals(id)) { + mergedPath.add(name); + } else { + mergedPath.add(name + "(" + id + ")"); + } } + mergeDisplayAccessPaths.add(StringUtils.join(mergedPath, ".")); } + return StringUtils.join(mergeDisplayAccessPaths, ", "); } public List<TupleId> getOutputTupleIds() { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
