This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 87d949800b0 [fix](fe) Avoid OFFSET path and NULL path for complex
datatype when its children are accessed. (#63229)
87d949800b0 is described below
commit 87d949800b07337d3a7d128e609903db8ca4b6ff
Author: minghong <[email protected]>
AuthorDate: Tue May 19 10:15:31 2026 +0800
[fix](fe) Avoid OFFSET path and NULL path for complex datatype when its
children are accessed. (#63229)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #62205
Problem Summary: cardinality/map_size on element_at(map, key) was
collected as an OFFSET-only access path. element_at(map, key) still
needs map keys for lookup, and pushing a nested *.OFFSET predicate path
can make BE route OFFSET to an array item child and fail with an invalid
access path. Fall back to normal element access for these expressions
while preserving OFFSET-only optimization for direct array/map
cardinality.
### Release note
None
### Check List (For Author)
- Test: Unit Test
- tools/fast-compile-fe.sh
- FE_UT_PARALLEL=0 ./run-fe-ut.sh --run
org.apache.doris.nereids.rules.rewrite.PruneNestedColumnTest#testCardinalityMapElementDoesNotUseOffsetPath
- FE_UT_PARALLEL=0 ./run-fe-ut.sh --run
org.apache.doris.nereids.rules.rewrite.PruneNestedColumnTest#testStructRootMapMixedAccessKeepsKeysPath+testCardinalityMapElementDoesNotUseOffsetPath
- cd fe && mvn checkstyle:check -pl fe-core -q
- ./build.sh --fe
- Behavior changed: No
- Does this need documentation: No
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Copilot <[email protected]>
---
.../rewrite/AccessPathExpressionCollector.java | 19 ++
.../nereids/rules/rewrite/NestedColumnPruning.java | 294 ++++++++++++++++++---
.../rules/rewrite/PruneNestedColumnTest.java | 114 +++++++-
.../column_pruning/null_column_pruning.out | 6 +
.../string_length_column_pruning.out | 9 +
.../column_pruning/null_column_pruning.groovy | 67 ++++-
.../string_length_column_pruning.groovy | 72 ++++-
7 files changed, 523 insertions(+), 58 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
index 47d83e93ca9..680eab7796c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
@@ -45,6 +45,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.ArraySortBy;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ArraySplit;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Cardinality;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Length;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
@@ -581,6 +582,24 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
return visit(isNull, context);
}
+ @Override
+ public Void visitIf(If ifExpr, CollectorContext context) {
+ if (isFunctionNullCheckPath(context.accessPathBuilder.accessPath)) {
+ ifExpr.getCondition().accept(this, new
CollectorContext(context.statementContext, context.bottomFilter));
+ ifExpr.getTrueValue().accept(this, copyContext(context));
+ ifExpr.getFalseValue().accept(this, copyContext(context));
+ return null;
+ }
+ return visit(ifExpr, context);
+ }
+
+ private static CollectorContext copyContext(CollectorContext context) {
+ CollectorContext copy = new CollectorContext(context.statementContext,
context.bottomFilter);
+
copy.accessPathBuilder.addSuffix(context.accessPathBuilder.getPathList());
+ copy.type = context.type;
+ return copy;
+ }
+
@Override
public Void visitNot(Not not, CollectorContext context) {
// NOT(IS NULL) == IS NOT NULL: same null-only access pattern
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
index f9a7d4def10..c7507deb32e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
@@ -283,6 +283,7 @@ public class NestedColumnPruning implements CustomRewriter {
}
// Offset-only access (e.g. length(str_col)): type stays
varchar,
// but we must still send the access path to BE so it
skips the char data.
+ stripExactCoveredDataSkippingSuffixPaths(slot,
allAccessPaths, allAccessPaths);
stripNullSuffixPaths(slot, allAccessPaths);
List<ColumnAccessPath> allPaths =
buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
@@ -348,35 +349,15 @@ public class NestedColumnPruning implements
CustomRewriter {
continue;
}
+ // If a field is read in full, its metadata-only NULL/OFFSET
access is redundant
+ // for any data type: e.g. [s] covers both [s.NULL] and [s.OFFSET].
+ stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths,
allAccessPaths);
+
// Strip OFFSET-suffix paths when a non-OFFSET path covers the
same nested field or
// container. The overlapping array/map container may live under
the root slot itself
// or under a nested struct field, so compare against the actual
nested prefix instead
// of gating this logic on the root slot type.
- int slotId = slot.getExprId().asInt();
- Collection<Pair<ColumnAccessPathType, List<String>>> paths =
allAccessPaths.get(slotId);
- List<List<String>> nonOffsetPaths = new ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : paths) {
- List<String> path = p.second;
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- nonOffsetPaths.add(path);
- }
- }
- List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
- List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new
ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(paths)) {
- OffsetPathRewrite rewrite = analyzeOffsetPathRewrite(
- slot.getDataType(), p.second, nonOffsetPaths);
- if (!rewrite.shouldRemoveOffsetPath()) {
- continue;
- }
- pathsToRemove.add(p);
- for (List<String> supplementalPath :
rewrite.getSupplementalPaths()) {
- pathsToAdd.add(Pair.of(p.first, supplementalPath));
- }
- }
- paths.removeAll(pathsToRemove);
- paths.addAll(pathsToAdd);
+ stripCoveredOffsetSuffixPaths(slot, allAccessPaths,
allAccessPaths);
// Strip NULL-suffix paths when a non-NULL path also exists for
the same slot.
// E.g. `SELECT col FROM t WHERE col IS NULL` — full data is
needed, NULL path is redundant.
@@ -399,11 +380,16 @@ public class NestedColumnPruning implements
CustomRewriter {
// third: build predicate access path
for (Entry<Slot, DataTypeAccessTree> kv :
slotIdToPredicateAccessTree.entrySet()) {
Slot slot = kv.getKey();
+ stripExactCoveredDataSkippingSuffixPaths(slot,
predicateAccessPaths, allAccessPaths);
+ stripCoveredOffsetSuffixPaths(slot, predicateAccessPaths,
allAccessPaths);
+ stripCoveredArrayNullSuffixPaths(slot, predicateAccessPaths,
allAccessPaths);
stripNullSuffixPaths(slot, predicateAccessPaths);
List<ColumnAccessPath> predicatePaths =
buildColumnAccessPaths(slot, predicateAccessPaths);
AccessPathInfo accessPathInfo =
result.get(slot.getExprId().asInt());
if (accessPathInfo != null) {
+ retainPredicatePathsInFinalAllAccessPaths(
+ predicatePaths, accessPathInfo.getAllAccessPaths());
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
}
}
@@ -414,6 +400,8 @@ public class NestedColumnPruning implements CustomRewriter {
buildColumnAccessPaths(slot, predicateAccessPaths);
AccessPathInfo accessPathInfo =
result.get(slot.getExprId().asInt());
if (accessPathInfo != null) {
+ retainPredicatePathsInFinalAllAccessPaths(
+ predicatePaths, accessPathInfo.getAllAccessPaths());
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
}
}
@@ -456,6 +444,11 @@ public class NestedColumnPruning implements CustomRewriter
{
return OffsetPathRewrite.keep();
}
List<String> prefix = path.subList(0, path.size() - 1);
+ return analyzePrefixCoverage(slotType, prefix, nonOffsetPaths);
+ }
+
+ private static OffsetPathRewrite analyzePrefixCoverage(
+ DataType slotType, List<String> prefix, List<List<String>>
nonOffsetPaths) {
List<List<String>> supplementalPaths = new ArrayList<>();
for (List<String> nonOffset : nonOffsetPaths) {
OffsetPathRewrite candidate =
compareOffsetPrefixCoverage(slotType, prefix, nonOffset);
@@ -473,6 +466,199 @@ public class NestedColumnPruning implements
CustomRewriter {
return
OffsetPathRewrite.rewriteWithSupplementalPaths(supplementalPaths);
}
+ /**
+ * Remove OFFSET-only paths from {@code targetAccessPaths} when data paths
in
+ * {@code coveringAccessPaths} already read the same array/map/string
container or a child
+ * under it.
+ *
+ * <p>Examples:
+ * <ul>
+ * <li>{@code [arr.OFFSET, arr.*.field]} becomes {@code [arr.*.field]}
because the array
+ * child read must keep BE on the normal data iterator path.</li>
+ * <li>{@code [map.*.OFFSET, map.VALUES]} becomes {@code [map.KEYS,
map.VALUES]} because
+ * {@code map['k']} still needs full keys for lookup, while values
cover the offset.</li>
+ * </ul>
+ */
+ private static void stripCoveredOffsetSuffixPaths(
+ Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ int slotId = slot.getExprId().asInt();
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty()) {
+ return;
+ }
+
+ List<List<String>> nonOffsetPaths = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
+ List<String> path = p.second;
+ if (path.isEmpty()
+ ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
+ nonOffsetPaths.add(path);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> path = p.second;
+ if (path.isEmpty()
+ ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
+ nonOffsetPaths.add(path);
+ }
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(targetPaths)) {
+ OffsetPathRewrite rewrite = analyzeOffsetPathRewrite(
+ slot.getDataType(), p.second, nonOffsetPaths);
+ if (!rewrite.shouldRemoveOffsetPath()) {
+ continue;
+ }
+ pathsToRemove.add(p);
+ for (List<String> supplementalPath :
rewrite.getSupplementalPaths()) {
+ pathsToAdd.add(Pair.of(p.first, supplementalPath));
+ }
+ }
+ targetPaths.removeAll(pathsToRemove);
+ targetPaths.addAll(pathsToAdd);
+ }
+
+ /**
+ * Remove array NULL-only paths from {@code targetAccessPaths} when
another path already reads
+ * the same array container or data under it. This mirrors OFFSET coverage
because an array
+ * element/data read must not be combined with an array NULL_MAP_ONLY read
for the same prefix.
+ *
+ * <p>Examples:
+ * <ul>
+ * <li>{@code [map.VALUES.NULL, map.VALUES.*.field]} becomes
+ * {@code [map.VALUES.*.field]}.</li>
+ * <li>{@code [map.*.NULL, map.VALUES.*.field]} becomes
+ * {@code [map.KEYS, map.VALUES.*.field]} so map lookup keys are
still available.</li>
+ * </ul>
+ */
+ private static void stripCoveredArrayNullSuffixPaths(
+ Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ int slotId = slot.getExprId().asInt();
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty()) {
+ return;
+ }
+
+ List<List<String>> nonNullPaths = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
+ List<String> path = p.second;
+ if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+ nonNullPaths.add(path);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> path = p.second;
+ if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+ nonNullPaths.add(path);
+ }
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(targetPaths)) {
+ List<String> path = p.second;
+ if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ Optional<DataType> prefixType = dataTypeAtPath(slot.getDataType(),
prefix);
+ if (!prefixType.isPresent() || !prefixType.get().isArrayType()) {
+ continue;
+ }
+ OffsetPathRewrite rewrite =
analyzePrefixCoverage(slot.getDataType(), prefix, nonNullPaths);
+ if (!rewrite.shouldRemoveOffsetPath()) {
+ continue;
+ }
+ pathsToRemove.add(p);
+ for (List<String> supplementalPath :
rewrite.getSupplementalPaths()) {
+ pathsToAdd.add(Pair.of(p.first, supplementalPath));
+ }
+ }
+ targetPaths.removeAll(pathsToRemove);
+ targetPaths.addAll(pathsToAdd);
+ }
+
+ /**
+ * Remove exact metadata-only NULL/OFFSET paths when the same field is
read in full.
+ * This rule is type-agnostic: once {@code s} itself is accessed, {@code
s.NULL} and
+ * {@code s.OFFSET} are redundant and unsafe to keep with the full data
path.
+ *
+ * <p>Examples:
+ * <ul>
+ * <li>{@code [str_col, str_col.NULL]} becomes {@code [str_col]}.</li>
+ * <li>{@code [arr, arr.OFFSET]} becomes {@code [arr]}.</li>
+ * <li>{@code [map.*, map.*.OFFSET]} becomes {@code [map.*]}.</li>
+ * </ul>
+ */
+ private static void stripExactCoveredDataSkippingSuffixPaths(
+ Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ int slotId = slot.getExprId().asInt();
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty()) {
+ return;
+ }
+
+ List<List<String>> fullAccessPaths = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
+ if (!isDataSkippingOnlyAccessPath(p.second)) {
+ fullAccessPaths.add(p.second);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ if (!isDataSkippingOnlyAccessPath(p.second)) {
+ fullAccessPaths.add(p.second);
+ }
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> path = p.second;
+ if (!isDataSkippingOnlyAccessPath(path)) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ for (List<String> fullAccessPath : fullAccessPaths) {
+ if (pathCoversPrefix(fullAccessPath, prefix)) {
+ pathsToRemove.add(p);
+ break;
+ }
+ }
+ }
+ targetPaths.removeAll(pathsToRemove);
+ }
+
+ private static Optional<DataType> dataTypeAtPath(DataType slotType,
List<String> path) {
+ if (path.isEmpty()) {
+ return Optional.empty();
+ }
+ DataType currentType = slotType;
+ for (int i = 1; i < path.size(); i++) {
+ String component = path.get(i);
+ if (currentType.isStructType()) {
+ StructField field = ((StructType)
currentType).getField(component);
+ if (field == null) {
+ return Optional.empty();
+ }
+ currentType = field.getDataType();
+ } else if (currentType.isArrayType()) {
+ if (!AccessPathInfo.ACCESS_ALL.equals(component)) {
+ return Optional.empty();
+ }
+ currentType = ((ArrayType) currentType).getItemType();
+ } else if (currentType.isMapType()) {
+ currentType = descendMapType((MapType) currentType, component);
+ } else {
+ return Optional.empty();
+ }
+ }
+ return Optional.of(currentType);
+ }
+
private static OffsetPathRewrite compareOffsetPrefixCoverage(
DataType slotType, List<String> prefix, List<String> nonOffset) {
if (nonOffset.isEmpty()) {
@@ -590,21 +776,27 @@ public class NestedColumnPruning implements
CustomRewriter {
}
/**
- * Strip NULL-suffix paths that are redundant because a non-NULL path
reads the same
- * column/subcolumn in full (its data inherently includes the null flag).
+ * Strip NULL-suffix paths that are redundant because a non-NULL path
reads child
+ * data below the same prefix or reads an OFFSET path over the same prefix.
*
- * For example, [int_col, NULL] is removed when [int_col] exists — reading
the full
- * column includes its null flag.
+ * <p>Examples:
+ * <ul>
+ * <li>{@code [struct_col.NULL, struct_col.city]} becomes {@code
[struct_col.city]}.</li>
+ * <li>{@code [str_col.NULL, str_col.OFFSET]} becomes {@code
[str_col.OFFSET]} because
+ * the offset read can provide nullness for variable-length
columns.</li>
+ * </ul>
*
- * A parent NULL path must also be removed when any child path is required
under the
+ * <p>A parent NULL path must also be removed when any child path is
required under the
* same prefix, e.g. [struct_col, NULL] with [struct_col, city]. This
looks like the
- * parent null map is still useful for predicates, but it cannot be kept in
+ * parent null map may still be useful for predicates, but it cannot be
kept in
* allAccessPaths with the current BE iterator contract: Struct/Array/Map
iterators
* treat a leading NULL sub-path as NULL_MAP_ONLY and skip all children.
If FE kept
* [struct_col.NULL, struct_col.city] in allAccessPaths, BE would read
only the
* struct null map and default-fill city instead of routing the city child
iterator.
- * predicateAccessPaths still retains the NULL path, while the normal
nullable
- * container read materializes the parent null map together with required
children.
+ * When the NULL path is removed from allAccessPaths, it must also be
removed from
+ * predicateAccessPaths so the BE can rely on predicate paths being a
subset of all
+ * paths. The normal nullable container read materializes the parent null
map
+ * together with required children.
*/
private static void stripNullSuffixPaths(
Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> allAccessPaths) {
@@ -655,10 +847,43 @@ public class NestedColumnPruning implements
CustomRewriter {
}
}
+ /**
+ * Keep predicate access paths as a subset of final all access paths after
NULL/OFFSET cleanup.
+ * Predicate paths are built from filter expressions first, but later
all-path rewrites may drop
+ * metadata-only paths or collapse paths to whole-column access. Any
predicate path not present
+ * in final all paths must be removed before sending access info to BE.
+ *
+ * <p>Examples:
+ * <ul>
+ * <li>All paths {@code [s]}, predicate paths {@code [s.city.NULL]}
becomes no predicate
+ * paths after parent NULL removal.</li>
+ * <li>All paths {@code [s.city.NULL, s.zip]}, predicate paths
+ * {@code [s.NULL, s.city.NULL]} becomes {@code [s.city.NULL]}.</li>
+ * </ul>
+ */
+ private static void retainPredicatePathsInFinalAllAccessPaths(
+ List<ColumnAccessPath> predicatePaths, List<ColumnAccessPath>
allPaths) {
+ if (predicatePaths.isEmpty()) {
+ return;
+ }
+
+ List<ColumnAccessPath> toRemove = new ArrayList<>();
+ for (ColumnAccessPath predicatePath : predicatePaths) {
+ if (!allPaths.contains(predicatePath)) {
+ toRemove.add(predicatePath);
+ }
+ }
+ predicatePaths.removeAll(toRemove);
+ }
+
private static boolean hasStrictPrefix(List<String> path, List<String>
prefix) {
return path.size() > prefix.size() && path.subList(0,
prefix.size()).equals(prefix);
}
+ private static boolean pathCoversPrefix(List<String> path, List<String>
prefix) {
+ return prefix.size() >= path.size() && prefix.subList(0,
path.size()).equals(path);
+ }
+
private static List<ColumnAccessPath> buildColumnAccessPaths(
Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> accessPaths) {
List<ColumnAccessPath> paths = new ArrayList<>();
@@ -682,7 +907,8 @@ public class NestedColumnPruning implements CustomRewriter {
if (accessWholeColumn) {
SlotReference slotReference = (SlotReference) slot;
String wholeColumnName =
slotReference.getOriginalColumn().get().getName();
- return ImmutableList.of(new
ColumnAccessPath(accessWholeColumnType, ImmutableList.of(wholeColumnName)));
+ return new ArrayList<>(
+ ImmutableList.of(new
ColumnAccessPath(accessWholeColumnType, ImmutableList.of(wholeColumnName))));
}
return paths;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
index 8d8d3441fa6..4c200629ac0 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
@@ -117,6 +117,23 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
+ " >\n"
+ ") properties ('replication_num'='1')");
+ createTable("create table nested_array_tbl(\n"
+ + " id int,\n"
+ + " a array<array<int>>\n"
+ + ") properties ('replication_num'='1')");
+
+ createTable("create table map_array_tbl(\n"
+ + " id int,\n"
+ + " map_arr_col map<string, array<int>>\n"
+ + ") properties ('replication_num'='1')");
+
+ createTable("create table map_array_value_tbl(\n"
+ + " id int,\n"
+ + " s struct<\n"
+ + " m: map<string, array<struct<verified: boolean, value:
int>>>\n"
+ + " >\n"
+ + ") properties ('replication_num'='1')");
+
connectContext.getSessionVariable().setDisableNereidsRules(RuleType.PRUNE_EMPTY_PARTITION.name());
connectContext.getSessionVariable().enableNereidsTimeout = false;
}
@@ -174,6 +191,80 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
ImmutableList.of(path("s", "m", "*", "OFFSET"), path("s", "m",
"VALUES", "OFFSET")));
}
+ @Test
+ public void testCardinalityArrayElementKeepsOffsetPath() throws Exception {
+ assertAllAccessPathsContain(
+ "select cardinality(element_at(a, 1)) from nested_array_tbl",
+ ImmutableList.of(path("a", "*", "OFFSET")),
+ ImmutableList.of(path("a", "*")));
+ }
+
+ @Test
+ public void testCardinalityMapElementKeepsValueOffsetPath() throws
Exception {
+ assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl",
+ "map<text,array<int>>",
+ ImmutableList.of(path("map_arr_col", "KEYS"),
path("map_arr_col", "VALUES", "OFFSET")),
+ ImmutableList.of());
+ }
+
+ @Test
+ public void testFullFieldAccessStripsExactDataSkippingPath() throws
Exception {
+ assertColumn("select struct_element(s, 'city') from tbl "
+ + "where struct_element(s, 'city') is null",
+ "struct<city:text>",
+ ImmutableList.of(path("s", "city")),
+ ImmutableList.of());
+
+ assertColumn("select cardinality(struct_element(s, 'data')),
struct_element(s, 'data') from tbl",
+ "struct<data:array<map<int,struct<a:int,b:double>>>>",
+ ImmutableList.of(path("s", "data")),
+ ImmutableList.of());
+
+ assertColumn("select cardinality(a), a from nested_array_tbl",
+ "array<array<int>>",
+ ImmutableList.of(path("a")),
+ ImmutableList.of());
+
+ assertColumn("select cardinality(map_arr_col['a']), map_arr_col['a']
from map_array_tbl",
+ "map<text,array<int>>",
+ ImmutableList.of(path("map_arr_col", "*")),
+ ImmutableList.of());
+ }
+
+ @Test
+ public void testCardinalityMapElementOffsetCoveredByValueFieldAccess()
throws Exception {
+ Pair<PhysicalPlan, List<SlotDescriptor>> result = collectComplexSlots(
+ "select struct_element(element_at(element_at(struct_element(s,
'm'), 'null'), 1), 'verified') "
+ + "from map_array_value_tbl "
+ + "where cardinality(element_at(struct_element(s,
'm'), 'null')) > 0");
+ TreeSet<ColumnAccessPath> allAccessPaths = new TreeSet<>();
+ TreeSet<ColumnAccessPath> predicateAccessPaths = new TreeSet<>();
+ for (SlotDescriptor slotDescriptor : result.second) {
+ allAccessPaths.addAll(slotDescriptor.getAllAccessPaths());
+
predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths());
+ }
+ Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "*", "*",
"verified")));
+ Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*",
"OFFSET")));
+ Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"*", "OFFSET")));
+ }
+
+ @Test
+ public void testMapElementArrayNullPathCoveredByValueFieldAccess() throws
Exception {
+ Pair<PhysicalPlan, List<SlotDescriptor>> result = collectComplexSlots(
+ "select struct_element(element_at(element_at(struct_element(s,
'm'), 'null'), 1), 'verified') "
+ + "from map_array_value_tbl "
+ + "where element_at(struct_element(s, 'm'), 'null') is
null");
+ TreeSet<ColumnAccessPath> allAccessPaths = new TreeSet<>();
+ TreeSet<ColumnAccessPath> predicateAccessPaths = new TreeSet<>();
+ for (SlotDescriptor slotDescriptor : result.second) {
+ allAccessPaths.addAll(slotDescriptor.getAllAccessPaths());
+
predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths());
+ }
+ Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "*", "*",
"verified")));
+ Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*",
"NULL")));
+ Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"*", "NULL")));
+ }
+
@Test
public void testVariantAccessPath() throws Exception {
assertColumn("select v['a']['B'] from variant_tbl",
@@ -554,12 +645,12 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
assertColumn("select map_keys(map_col) from str_tbl where
map_keys(map_col) is null",
"map<text,text>",
ImmutableList.of(path("map_col", "KEYS")),
- ImmutableList.of(path("map_col", "NULL"))
+ ImmutableList.of()
);
assertColumn("select map_values(map_col) from str_tbl where
map_values(map_col) is null",
"map<text,text>",
ImmutableList.of(path("map_col", "VALUES")),
- ImmutableList.of(path("map_col", "NULL"))
+ ImmutableList.of()
);
}
@@ -568,7 +659,12 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
assertColumn("select s from tbl where struct_element(s, 'city') is not
null",
"struct<city:text,data:array<map<int,struct<a:int,b:double>>>>",
ImmutableList.of(path("s")),
- ImmutableList.of(path("s", "city", "NULL"))
+ ImmutableList.of()
+ );
+ assertColumn("select s from tbl where struct_element(s, 'city') is
null",
+
"struct<city:text,data:array<map<int,struct<a:int,b:double>>>>",
+ ImmutableList.of(path("s")),
+ ImmutableList.of()
);
assertColumn("select struct_element(s, 'data') from tbl where
struct_element(s, 'city') is not null",
@@ -580,7 +676,7 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
assertColumn("select struct_element(s, 'data') from tbl where
struct_element(s, 'city') is not null and struct_element(s, 'data') is not
null",
"struct<city:text,data:array<map<int,struct<a:int,b:double>>>>",
ImmutableList.of(path("s", "city", "NULL"), path("s", "data")),
- ImmutableList.of(path("s", "city", "NULL"), path("s", "data",
"NULL"))
+ ImmutableList.of(path("s", "city", "NULL"))
);
}
@@ -1204,6 +1300,7 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
TreeSet<ColumnAccessPath> actualPredicateAccessPaths
= new TreeSet<>(slotDescriptor.getPredicateAccessPaths());
Assertions.assertEquals(expectPredicateAccessPathSet,
actualPredicateAccessPaths);
+
Assertions.assertTrue(actualAllAccessPaths.containsAll(actualPredicateAccessPaths));
Map<Integer, DataType> slotIdToDataTypes = new LinkedHashMap<>();
Consumer<Expression> assertHasSameType = e -> {
@@ -1278,21 +1375,22 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
// Parent NULL path must be stripped from allPaths when a child path
is also required.
// Otherwise BE StructFileColumnIterator sees the parent NULL sub-path
first, switches
// the whole struct iterator to NULL_MAP_ONLY, and skips the child
iterator.
- // predicateAccessPaths keeps [s, NULL] because the predicate itself
still uses it.
+ // predicateAccessPaths drops [s, NULL] too, keeping it a subset of
allAccessPaths.
assertColumn("select struct_element(s, 'city') from tbl where s is
null",
"struct<city:text>",
ImmutableList.of(path("s", "city")),
- ImmutableList.of(path("s", "NULL")));
+ ImmutableList.of());
// This shape is closer to the production bug: one predicate needs the
parent
// null map, another predicate needs a child null map, and the
projection needs
// a different child data path. The parent [s.NULL] cannot remain in
allPaths
- // with [s.data], but both predicate NULL paths must remain in
predicate paths.
+ // with [s.data], so it is also removed from predicate paths;
[s.city.NULL] stays
+ // because it is still present in allPaths.
assertColumn("select struct_element(s, 'data') from tbl "
+ "where s is null or struct_element(s, 'city') is
null",
"struct<city:text,data:array<map<int,struct<a:int,b:double>>>>",
ImmutableList.of(path("s", "city", "NULL"), path("s", "data")),
- ImmutableList.of(path("s", "NULL"), path("s", "city",
"NULL")));
+ ImmutableList.of(path("s", "city", "NULL")));
}
@Test
diff --git
a/regression-test/data/nereids_rules_p0/column_pruning/null_column_pruning.out
b/regression-test/data/nereids_rules_p0/column_pruning/null_column_pruning.out
index 440b393001c..5e18f989b1a 100644
---
a/regression-test/data/nereids_rules_p0/column_pruning/null_column_pruning.out
+++
b/regression-test/data/nereids_rules_p0/column_pruning/null_column_pruning.out
@@ -3,6 +3,8 @@
-- !2 --
+-- !string_full_access_strips_null --
+
-- !3 --
1
@@ -14,8 +16,12 @@
-- !6 --
+-- !array_full_access_strips_null --
+
-- !7 --
+-- !map_full_access_strips_null --
+
-- !8 --
-- !9 --
diff --git
a/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out
b/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out
index 658e8ebbee8..36b77d17167 100644
---
a/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out
+++
b/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out
@@ -1,4 +1,13 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !array_full_access_strips_offset --
+1 3 [1, 2, 3]
+
+-- !map_element_full_access_strips_offset --
+1 2 [1, 2]
+
+-- !map_value_array_predicate_offset_covered --
+true
+
-- !arr_struct_mixed --
1 true 10
2 true 30
diff --git
a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
index 2707670f7d8..cbbeac29357 100644
---
a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
+++
b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
@@ -24,8 +24,8 @@
// nested columns: <col>: all access paths: [<col>.NULL]
//
// When the same column is also accessed for data (e.g., projected or used in
-// struct_element), the NULL-only path must be stripped from allAccessPaths but
-// preserved in predicateAccessPaths.
+// struct_element), the NULL-only path must be stripped from allAccessPaths and
+// predicateAccessPaths unless the same path is still present in
allAccessPaths.
suite("null_column_pruning") {
sql """ DROP TABLE IF EXISTS ncp_tbl """
@@ -69,6 +69,19 @@ suite("null_column_pruning") {
order_qt_2 "select 1 from ncp_tbl where str_col is null";
+ // Direct full access to the same field covers its null flag for any data
type.
+ // The exact [str_col.NULL] metadata path must be removed.
+ explain {
+ sql "select id, str_col from ncp_tbl where str_col is null"
+ notContains "str_col.NULL"
+ notContains "predicate access paths:"
+ }
+
+ order_qt_string_full_access_strips_null """
+ select id, str_col from ncp_tbl where str_col is null
+ order by id
+ """
+
// ─── String IS NOT NULL only
────────────────────────────────────────────────
explain {
sql "select 1 from ncp_tbl where str_col is not null"
@@ -106,6 +119,19 @@ suite("null_column_pruning") {
order_qt_6 "select 1 from ncp_tbl where arr_col is null";
+ explain {
+ sql "select id, arr_col from ncp_tbl where arr_col is null"
+ contains "nested columns"
+ contains "all access paths: [arr_col]"
+ notContains "arr_col.NULL"
+ notContains "predicate access paths:"
+ }
+
+ order_qt_array_full_access_strips_null """
+ select id, arr_col from ncp_tbl where arr_col is null
+ order by id
+ """
+
// ─── Map IS NULL only
───────────────────────────────────────────────────────
explain {
sql "select 1 from ncp_tbl where map_col is null"
@@ -115,6 +141,19 @@ suite("null_column_pruning") {
order_qt_7 "select 1 from ncp_tbl where map_col is null";
+ explain {
+ sql "select id, map_col from ncp_tbl where map_col is null"
+ contains "nested columns"
+ contains "all access paths: [map_col]"
+ notContains "map_col.NULL"
+ notContains "predicate access paths:"
+ }
+
+ order_qt_map_full_access_strips_null """
+ select id, map_col from ncp_tbl where map_col is null
+ order by id
+ """
+
// ─── Int IS NULL only
───────────────────────────────────────────────────────
// Nullable primitive type (INT) accessed only via IS NULL → emit
[int_col, NULL]
// access path so BE only reads the null flag.
@@ -143,7 +182,7 @@ suite("null_column_pruning") {
sql "select int_col from ncp_tbl where int_col is null"
contains "nested columns"
contains "all access paths: [int_col]"
- contains "predicate access paths: [int_col.NULL]"
+ notContains "predicate access paths:"
}
order_qt_10 "select int_col from ncp_tbl where int_col is null";
@@ -153,14 +192,14 @@ suite("null_column_pruning") {
// The parent struct_col.NULL path must NOT stay in allAccessPaths with
child paths.
// BE StructFileColumnIterator treats a leading NULL sub-path as
NULL_MAP_ONLY; if
// allAccessPaths were [struct_col.NULL, struct_col.city], BE would skip
the city
- // child iterator and default-fill the projected value.
predicateAccessPaths still
- // keeps struct_col.NULL so the predicate requirement is visible, while
the normal
- // nullable struct read materializes the parent null map together with
child data.
+ // child iterator and default-fill the projected value. The normal
nullable struct
+ // read materializes the parent null map together with child data, and
+ // predicateAccessPaths is filtered so it remains a subset of
allAccessPaths.
explain {
sql "select struct_element(struct_col, 'city') from ncp_tbl where
struct_col is null"
contains "nested columns"
contains "all access paths: [struct_col.city]"
- contains "predicate access paths: [struct_col.NULL]"
+ notContains "predicate access paths:"
}
order_qt_11 "select struct_element(struct_col, 'city') from ncp_tbl where
struct_col is null";
@@ -174,7 +213,7 @@ suite("null_column_pruning") {
sql "select struct_element(struct_col, 'zip') from ncp_tbl where
struct_col is null or struct_element(struct_col, 'city') is null"
contains "nested columns"
contains "all access paths: [struct_col.city.NULL, struct_col.zip]"
- contains "predicate access paths: [struct_col.NULL,
struct_col.city.NULL]"
+ contains "predicate access paths: [struct_col.city.NULL]"
}
order_qt_parent_null_with_child_data "select struct_element(struct_col,
'zip') from ncp_tbl where struct_col is null or struct_element(struct_col,
'city') is null";
@@ -186,7 +225,7 @@ suite("null_column_pruning") {
sql "select struct_col from ncp_tbl where struct_col is null"
contains "nested columns"
contains "all access paths: [struct_col]"
- contains "predicate access paths: [struct_col.NULL]"
+ notContains "predicate access paths:"
}
order_qt_12 "select struct_col from ncp_tbl where struct_col is null";
@@ -200,7 +239,7 @@ suite("null_column_pruning") {
sql "select struct_element(struct_col, 'city') from ncp_tbl where
struct_element(struct_col, 'city') is null"
contains "nested columns"
contains "all access paths: [struct_col.city]"
- contains "predicate access paths: [struct_col.city.NULL]"
+ notContains "predicate access paths:"
}
order_qt_13 "select struct_element(struct_col, 'city') from ncp_tbl where
struct_element(struct_col, 'city') is null";
@@ -355,13 +394,13 @@ suite("null_column_pruning") {
// ─── Mixed: map_keys IS NULL + map_keys projected
──────────────────────────
// Projection needs key data, while the predicate checks whether the
parent map
- // is NULL. The parent NULL path is kept only in predicateAccessPaths so
BE does
- // not switch the whole map iterator to NULL_MAP_ONLY and skip the keys
child.
+ // is NULL. The parent NULL path must not stay in either access path list,
so BE
+ // does not switch the whole map iterator to NULL_MAP_ONLY and skip the
keys child.
explain {
sql "select map_keys(map_col) from ncp_tbl where map_keys(map_col) is
null"
contains "nested columns"
contains "all access paths: [map_col.KEYS]"
- contains "predicate access paths: [map_col.NULL]"
+ notContains "predicate access paths:"
}
order_qt_25 "select map_keys(map_col) from ncp_tbl where map_keys(map_col)
is null";
@@ -373,7 +412,7 @@ suite("null_column_pruning") {
sql "select map_values(map_col) from ncp_tbl where map_values(map_col)
is null"
contains "nested columns"
contains "all access paths: [map_col.VALUES]"
- contains "predicate access paths: [map_col.NULL]"
+ notContains "predicate access paths:"
}
order_qt_26 "select map_values(map_col) from ncp_tbl where
map_values(map_col) is null";
diff --git
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
index a0da286867c..2a4d56b570b 100644
---
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
+++
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
@@ -45,7 +45,8 @@ suite("string_length_column_pruning") {
struct_col STRUCT<f1: INT, f3: STRING>,
arr_col ARRAY<INT>,
map_col MAP<STRING, STRING>,
- map_arr_col MAP<STRING, ARRAY<INT>>
+ map_arr_col MAP<STRING, ARRAY<INT>>,
+ map_arr_struct_col MAP<STRING, ARRAY<STRUCT<verified: BOOLEAN,
value: INT>>>
) ENGINE = OLAP
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
@@ -53,7 +54,9 @@ suite("string_length_column_pruning") {
"""
sql """
INSERT INTO slcp_str_tbl VALUES
- (1, 'hello', named_struct('f1', 10, 'f3', 'world'), [1, 2, 3],
{'a': 'x', 'b': 'y'}, {'a': [1, 2], 'b': [3]})
+ (1, 'hello', named_struct('f1', 10, 'f3', 'world'), [1, 2, 3],
{'a': 'x', 'b': 'y'},
+ {'a': [1, 2], 'b': [3]},
+ map('a', array(named_struct('verified', true, 'value', 10))))
"""
// ─── Optimizable cases
──────────────────────────────────────────────────────
@@ -151,6 +154,21 @@ suite("string_length_column_pruning") {
notContains "type=bigint"
}
+ // Full access to the same array field covers its OFFSET metadata for any
data type.
+ explain {
+ sql "select id, cardinality(arr_col), arr_col from slcp_str_tbl"
+ contains "nested columns"
+ contains "all access paths: [arr_col]"
+ notContains "arr_col.OFFSET"
+ notContains "predicate access paths:"
+ notContains "type=bigint"
+ }
+
+ order_qt_array_full_access_strips_offset """
+ select id, cardinality(arr_col), arr_col from slcp_str_tbl
+ order by id
+ """
+
// ─── Map column cases
────────────────────────────────────────────────────────
// cardinality(map_col): only the offset array is needed → OFFSET access
path emitted,
@@ -254,6 +272,56 @@ suite("string_length_column_pruning") {
notContains "type=bigint"
}
+ // value array item also accessed directly → full VALUES item path covers
value OFFSET.
+ explain {
+ sql "select cardinality(map_arr_struct_col['a']),
map_arr_struct_col['a'][1].verified from slcp_str_tbl"
+ contains "nested columns"
+ contains "map_arr_struct_col.*.*.verified"
+ notContains "map_arr_struct_col.*.OFFSET"
+ notContains "type=bigint"
+ }
+
+ explain {
+ sql "select id, cardinality(map_arr_col['a']), map_arr_col['a'] from
slcp_str_tbl"
+ contains "nested columns"
+ contains "all access paths: [map_arr_col.*]"
+ notContains "map_arr_col.*.OFFSET"
+ notContains "predicate access paths:"
+ notContains "type=bigint"
+ }
+
+ order_qt_map_element_full_access_strips_offset """
+ select id, cardinality(map_arr_col['a']), map_arr_col['a'] from
slcp_str_tbl
+ order by id
+ """
+
+ // Predicate OFFSET path must also be removed when the projected value
field already
+ // makes the corresponding array data path available. predicateAccessPaths
remains a
+ // subset of allAccessPaths.
+ explain {
+ sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl
where cardinality(map_arr_struct_col['a']) > 0"
+ contains "nested columns"
+ contains "all access paths: [map_arr_struct_col.*.*.verified]"
+ notContains "map_arr_struct_col.*.OFFSET"
+ notContains "predicate access paths:"
+ notContains "type=bigint"
+ }
+
+ order_qt_map_value_array_predicate_offset_covered """
+ select map_arr_struct_col['a'][1].verified from slcp_str_tbl
+ where cardinality(map_arr_struct_col['a']) > 0
+ order by 1
+ """
+
+ // value array item also accessed directly → full VALUES item path covers
value NULL.
+ explain {
+ sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl
where map_arr_struct_col['a'] is null"
+ contains "nested columns"
+ contains "map_arr_struct_col.*.*.verified"
+ notContains "map_arr_struct_col.*.NULL"
+ notContains "predicate access paths:"
+ }
+
// ─── Non-optimizable cases
──────────────────────────────────────────────────
// str_col also projected directly → full chars data needed, OFFSET path
suppressed.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]