This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6cd402ba448 [fix](NestedColumnPruning) Rewrite the
NestedColumnPruning meta-path handling to properly eliminate redundant
NULL/OFFSET access paths and prevent unsafe BE reader mode combinations.
(#64535)
6cd402ba448 is described below
commit 6cd402ba448e335f1a4d1cbe0397d6eb7f5c383f
Author: minghong <[email protected]>
AuthorDate: Fri Jun 19 23:05:53 2026 +0800
[fix](NestedColumnPruning) Rewrite the NestedColumnPruning meta-path
handling to properly eliminate redundant NULL/OFFSET access paths and prevent
unsafe BE reader mode combinations. (#64535)
### What problem does this PR solve?
1. Map-star path normalization (normalizeMapValueMetaOnlyAccessPaths)
- Rewrites [m, *, OFFSET] → [m, KEYS] + [m, VALUES, OFFSET], and [m, *,
NULL] → [m, KEYS] + [m, VALUES, NULL]
- Unified OFFSET and NULL normalization via
collectMapValueMetaOnlyAccessPaths(String metaSuffix)
2. Meta path stripping (MetaPathStriper)
- Two-level approach:
- Level 1 (same-prefix): [prefix] > [prefix, OFFSET] > [prefix, NULL]
- Level 2 (deeper-prefix): deeper path covers shallower meta path,
merged into single stripMetaPathsByDeeperPrefix(metaSuffix) function
- Type-aware */VALUES/KEYS equivalence for map columns via
compareMetaPathPrefixCoverage
- Depth guard prevents same-depth cross-type from being removed in Level
2
- Supplemental KEYS path handling when removing OFFSET paths
3. MV fragment meta path handling
- skipMetaPath flag in AccessPathExpressionCollector — MV fragments skip
meta path collection at the source, falling through to default visitor
for data-only access
4. Lambda variable tracking fix
- Replaced getInputSlots()-based check with scanning ArrayItemSlot by
name to detect unreferenced lambda variables (fixes scope pollution in
nested lambdas)
5. Phase restructure
- Added Phase 1.5: expand+strip runs for both allAccessPaths and
predicateAccessPaths before either is built, ensuring predicate paths
use the complete covering set
6. String-like type handling
- Removed special isStringLikeType() skip, replaced with
shouldSkipAccessInfo that correctly handles full-access paths [col]
7. visitMapContainsEntry three-argument fix
- Visits all non-map arguments (arg1+arg2) instead of only arg1
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude <[email protected]>
---
.../rewrite/AccessPathExpressionCollector.java | 26 +-
.../nereids/rules/rewrite/AccessPathInfo.java | 2 +-
.../rules/rewrite/AccessPathPlanCollector.java | 13 +-
.../nereids/rules/rewrite/MetaPathStriper.java | 300 ++++++++
.../nereids/rules/rewrite/NestedColumnPruning.java | 812 +++++----------------
.../rules/rewrite/PruneNestedColumnTest.java | 202 ++++-
.../column_pruning/null_column_pruning.groovy | 17 +-
.../string_length_column_pruning.groovy | 53 +-
8 files changed, 717 insertions(+), 708 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
index 75dce5e3618..0633637b337 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
@@ -85,15 +85,17 @@ import java.util.Stack;
public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void, CollectorContext> {
private StatementContext statementContext;
private boolean bottomPredicate;
+ private boolean skipMetaPath;
private Multimap<Integer, CollectAccessPathResult> slotToAccessPaths;
private Stack<Map<String, Expression>> nameToLambdaArguments = new
Stack<>();
public AccessPathExpressionCollector(
StatementContext statementContext, Multimap<Integer,
CollectAccessPathResult> slotToAccessPaths,
- boolean bottomPredicate) {
+ boolean bottomPredicate, boolean skipMetaPath) {
this.statementContext = statementContext;
this.slotToAccessPaths = slotToAccessPaths;
this.bottomPredicate = bottomPredicate;
+ this.skipMetaPath = skipMetaPath;
}
public void collect(Expression expression) {
@@ -205,9 +207,13 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
// single check covers nested CHAR cases too.
if (arg.getDataType().isStringLikeType() &&
!arg.getDataType().isCharType()
&& context.accessPathBuilder.isEmpty()) {
+ if (skipMetaPath) {
+ return arg.accept(this,
+ new CollectorContext(context.statementContext, false));
+ }
CollectorContext offsetContext =
new CollectorContext(context.statementContext,
context.bottomFilter);
-
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET);
+
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET);
return arg.accept(this, offsetContext);
}
// fall through to default (recurse into children with fresh contexts)
@@ -219,9 +225,13 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
Expression arg = mapSize.child();
DataType argType = arg.getDataType();
if (argType.isMapType() && context.accessPathBuilder.isEmpty()) {
+ if (skipMetaPath) {
+ return arg.accept(this,
+ new CollectorContext(context.statementContext, false));
+ }
CollectorContext offsetContext =
new CollectorContext(context.statementContext,
context.bottomFilter);
-
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET);
+
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET);
return arg.accept(this, offsetContext);
}
return visit(mapSize, context);
@@ -234,9 +244,13 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
// Arrays and maps share the same offset-array + data storage layout
as strings on the BE.
DataType argType = arg.getDataType();
if ((argType.isArrayType() || argType.isMapType()) &&
context.accessPathBuilder.isEmpty()) {
+ if (skipMetaPath) {
+ return arg.accept(this,
+ new CollectorContext(context.statementContext, false));
+ }
CollectorContext offsetContext =
new CollectorContext(context.statementContext,
context.bottomFilter);
-
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET);
+
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_OFFSET);
// cardinality(map_keys(m)) == cardinality(m) ==
cardinality(map_values(m)):
// all three count map entries, so emit the same [map_col, OFFSET]
path.
Expression effectiveArg = (arg instanceof MapKeys || arg
instanceof MapValues)
@@ -621,6 +635,10 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
// and nested access (element_at(s, 'city') IS NULL → [s, city, NULL]).
// For unrecognized expressions, the default visitor resets context,
safely discarding NULL.
if (arg.nullable() && context.accessPathBuilder.isEmpty()) {
+ if (skipMetaPath) {
+ return arg.accept(this,
+ new CollectorContext(context.statementContext, false));
+ }
CollectorContext nullContext =
new CollectorContext(context.statementContext,
context.bottomFilter);
nullContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_NULL);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java
index 0b361c22760..8656aaacef7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathInfo.java
@@ -29,7 +29,7 @@ public class AccessPathInfo {
public static final String ACCESS_MAP_VALUES = "VALUES";
// Suffix appended to a string-column path to indicate that only the
offset array
// (not the char data) is needed — agreed with BE as the special path
component name.
- public static final String ACCESS_STRING_OFFSET = "OFFSET";
+ public static final String ACCESS_OFFSET = "OFFSET";
// Suffix appended to a column path to indicate that only the null flag
// (not the actual data) is needed — used when the column is only accessed
via IS NULL / IS NOT NULL.
public static final String ACCESS_NULL = "NULL";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java
index 6d592f584d2..7e7674d2058 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java
@@ -64,6 +64,11 @@ import java.util.TreeSet;
public class AccessPathPlanCollector extends DefaultPlanVisitor<Void,
StatementContext> {
private Multimap<Integer, CollectAccessPathResult> allSlotToAccessPaths =
LinkedHashMultimap.create();
private Map<Slot, List<CollectAccessPathResult>> scanSlotToAccessPaths =
new LinkedHashMap<>();
+ private boolean skipMetaPath;
+
+ public void setSkipMetaPath(boolean skipMetaPath) {
+ this.skipMetaPath = skipMetaPath;
+ }
public Map<Slot, List<CollectAccessPathResult>> collect(Plan root,
StatementContext context) {
root.accept(this, context);
@@ -83,7 +88,7 @@ public class AccessPathPlanCollector extends
DefaultPlanVisitor<Void, StatementC
List<Slot> output = generate.getGeneratorOutput();
AccessPathExpressionCollector exprCollector
- = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, false);
+ = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, false, skipMetaPath);
for (int i = 0; i < output.size(); i++) {
Slot generatorOutput = output.get(i);
Function function = generators.get(i);
@@ -229,7 +234,7 @@ public class AccessPathPlanCollector extends
DefaultPlanVisitor<Void, StatementC
@Override
public Void visitLogicalProject(LogicalProject<? extends Plan> project,
StatementContext context) {
AccessPathExpressionCollector exprCollector
- = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, false);
+ = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, false, skipMetaPath);
for (NamedExpression output : project.getProjects()) {
// e.g. select element_at(s, 'city') from (select s from tbl)a;
// we will not treat the inner `s` access all path
@@ -385,7 +390,7 @@ public class AccessPathPlanCollector extends
DefaultPlanVisitor<Void, StatementC
private void collectByExpressions(Plan plan, StatementContext context,
boolean bottomPredicate) {
AccessPathExpressionCollector exprCollector
- = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, bottomPredicate);
+ = new AccessPathExpressionCollector(context,
allSlotToAccessPaths, bottomPredicate, skipMetaPath);
for (Expression expression : plan.getExpressions()) {
exprCollector.collect(expression);
}
@@ -417,6 +422,6 @@ public class AccessPathPlanCollector extends
DefaultPlanVisitor<Void, StatementC
}
String lastComponent = path.get(path.size() - 1);
return AccessPathInfo.ACCESS_NULL.equals(lastComponent)
- || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent);
+ || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java
new file mode 100644
index 00000000000..8f08699ec23
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathStriper.java
@@ -0,0 +1,300 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.analysis.ColumnAccessPathType;
+import org.apache.doris.common.Pair;
+
+import com.google.common.collect.Multimap;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Strips redundant metadata-only (NULL/OFFSET) access paths using pure
+ * string-prefix comparison. Map-level wildcards must already have been
+ * expanded into {@code KEYS} + {@code VALUES} by the caller.
+ *
+ * <p>Single entry point: {@link #strip(int, Multimap, Multimap)}.
+ */
+public final class MetaPathStriper {
+
+ private MetaPathStriper() {}
+
+ /**
+ * Strip redundant metadata-only NULL/OFFSET paths using pure string-prefix
+ * comparison, keeping enough real paths for BE readers to avoid
+ * OFFSET_ONLY / NULL_MAP_ONLY modes that skip required child data.
+ *
+ * <p>Stripping is organised in two levels:
+ *
+ * <p><b>Level 1 — Same-prefix priority:</b> when two paths share the same
+ * prefix and differ only in the final meta suffix, the higher-priority one
+ * eliminates the lower.
+ * <pre>{@code
+ * Data > OFFSET > NULL
+ * }</pre>
+ * <ul>
+ * <li>{@code Data} strips {@code OFFSET}: {@code [a]} strips {@code [a,
OFFSET]}.</li>
+ * <li>{@code Data} strips {@code NULL}: {@code [a]} strips {@code [a,
NULL]}.</li>
+ * <li>{@code OFFSET} strips {@code NULL}: {@code [a, OFFSET]} strips
+ * {@code [a, NULL]}.</li>
+ * </ul>
+ *
+ * <p><b>Level 2 — Deeper-prefix coverage:</b> when a covering path goes
+ * deeper into the type tree, its data reader already materialises the
+ * container, making a shallower meta-only path redundant.
+ * <ul>
+ * <li>Target suffix {@code OFFSET}, covered by deeper:
+ * <ul>
+ * <li>{@code Data}: {@code [a, *, field]} strips {@code [a,
OFFSET]}.</li>
+ * <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a,
OFFSET]}.</li>
+ * <li>{@code NULL}: {@code [a, *, NULL]} strips {@code [a,
OFFSET]}.</li>
+ * </ul>
+ * </li>
+ * <li>Target suffix {@code NULL}, covered by deeper:
+ * <ul>
+ * <li>{@code Data}: {@code [a, b, c]} strips {@code [a, b,
NULL]}.</li>
+ * <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a,
NULL]}.</li>
+ * <li>{@code NULL}: {@code [a, *, NULL]} strips {@code [a,
NULL]}.</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
+ * <p>Pre-condition: map-level {@code *} wildcards must already have been
+ * expanded into {@code KEYS} + {@code VALUES} by the caller. This class
+ * uses pure string-prefix comparison and is type-unaware.
+ */
+ public static void strip(
+ int slotId,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ stripExactPrefixCoveredMetaPaths(slotId, targetAccessPaths,
coveringAccessPaths);
+ stripNullBySamePrefixOffset(slotId, targetAccessPaths);
+
+ stripMetaPathsByDeeperPrefix(slotId, AccessPathInfo.ACCESS_OFFSET,
+ targetAccessPaths, coveringAccessPaths);
+ stripMetaPathsByDeeperPrefix(slotId, AccessPathInfo.ACCESS_NULL,
+ targetAccessPaths, coveringAccessPaths);
+ }
+
+ // ========================================================================
+ // Path helpers
+ // ========================================================================
+
+ private static boolean isMetaPath(List<String> path) {
+ if (path.isEmpty()) {
+ return false;
+ }
+ String lastComponent = path.get(path.size() - 1);
+ return AccessPathInfo.ACCESS_NULL.equals(lastComponent)
+ || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent);
+ }
+
+ private static List<List<String>> collectPaths(
+ Collection<Pair<ColumnAccessPathType, List<String>>> a,
+ Collection<Pair<ColumnAccessPathType, List<String>>> b, boolean
meta) {
+ List<List<String>> result = new ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : a) {
+ if (!p.second.isEmpty() && isMetaPath(p.second) == meta) {
+ result.add(p.second);
+ }
+ }
+ for (Pair<ColumnAccessPathType, List<String>> p : b) {
+ if (!p.second.isEmpty() && isMetaPath(p.second) == meta) {
+ result.add(p.second);
+ }
+ }
+ return result;
+ }
+
+ private static boolean isPrefixCovered(List<String> prefix, List<String>
coveringPath) {
+ if (coveringPath.isEmpty()) {
+ return true;
+ }
+ int minLen = Math.min(prefix.size(), coveringPath.size());
+ for (int i = 0; i < minLen; i++) {
+ if (!prefix.get(i).equals(coveringPath.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // ========================================================================
+ // Level 1 — same-prefix priority
+ // ========================================================================
+
+ /**
+ * {@code [prefix]} strips {@code [prefix, OFFSET]} and {@code [prefix,
NULL]}.
+ */
+ private static void stripExactPrefixCoveredMetaPaths(
+ int slotId,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty()) {
+ return;
+ }
+
+ List<List<String>> fullAccessPaths = collectPaths(
+ coveringAccessPaths.get(slotId), targetPaths, false);
+
+ List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> path = p.second;
+ if (!isMetaPath(path)) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ for (List<String> fullAccessPath : fullAccessPaths) {
+ if (isPrefixCovered(prefix, fullAccessPath)
+ && prefix.size() >= fullAccessPath.size()) {
+ pathsToRemove.add(p);
+ break;
+ }
+ }
+ }
+ targetPaths.removeAll(pathsToRemove);
+ }
+
+ /**
+ * {@code [prefix, OFFSET]} strips {@code [prefix, NULL]}.
+ */
+ private static void stripNullBySamePrefixOffset(
+ int slotId, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> allAccessPaths) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths =
allAccessPaths.get(slotId);
+ if (slotPaths.isEmpty()) {
+ return;
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+ List<String> path = p.second;
+ if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ for (Pair<ColumnAccessPathType, List<String>> q : slotPaths) {
+ List<String> other = q.second;
+ if (other == path || other.isEmpty()) {
+ continue;
+ }
+ if (other.size() != path.size()
+ ||
!AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1))) {
+ continue;
+ }
+ List<String> otherPrefix = other.subList(0, other.size() - 1);
+ if (isPrefixCovered(prefix, otherPrefix)) {
+ toRemove.add(p);
+ break;
+ }
+ }
+ }
+ slotPaths.removeAll(toRemove);
+ }
+
+ // ========================================================================
+ // Level 2 — deeper-prefix coverage
+ // ========================================================================
+
+ private static void stripMetaPathsByDeeperPrefix(
+ int slotId, String metaSuffix,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty()) {
+ return;
+ }
+ Collection<Pair<ColumnAccessPathType, List<String>>> coveringPaths =
+ coveringAccessPaths.get(slotId);
+
+ List<List<String>> dataPaths = collectPaths(coveringPaths,
targetPaths, false);
+ stripMetaByDeeperDataPaths(slotId, targetAccessPaths, dataPaths,
metaSuffix);
+
+ List<List<String>> metaPaths = collectPaths(coveringPaths,
targetPaths, true);
+ stripMetaByDeeperMetaPaths(slotId, metaSuffix, metaPaths,
targetAccessPaths);
+ }
+
+ /**
+ * Strip target meta paths covered by a data path.
+ */
+ private static void stripMetaByDeeperDataPaths(
+ int slotId,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths,
+ List<List<String>> dataPaths, String metaSuffix) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
+ targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty() || dataPaths.isEmpty()) {
+ return;
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(targetPaths)) {
+ List<String> path = p.second;
+ if (path.isEmpty() || !metaSuffix.equals(path.get(path.size() -
1))) {
+ continue;
+ }
+ List<String> prefix = path.subList(0, path.size() - 1);
+ for (List<String> dataPath : dataPaths) {
+ if (isPrefixCovered(prefix, dataPath)) {
+ toRemove.add(p);
+ break;
+ }
+ }
+ }
+ targetPaths.removeAll(toRemove);
+ }
+
+ /**
+ * Strip target meta paths covered by a strictly deeper meta path.
+ */
+ private static void stripMetaByDeeperMetaPaths(
+ int slotId, String metaSuffix,
+ List<List<String>> coveringPaths,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
targetAccessPaths) {
+ Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
+ targetAccessPaths.get(slotId);
+ if (targetPaths.isEmpty() || coveringPaths.isEmpty()) {
+ return;
+ }
+
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
+ for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+ List<String> targetPath = p.second;
+ if (targetPath.isEmpty() ||
!metaSuffix.equals(targetPath.get(targetPath.size() - 1))) {
+ continue;
+ }
+ List<String> targetPrefix = targetPath.subList(0,
targetPath.size() - 1);
+ for (List<String> coveringPath : coveringPaths) {
+ if (coveringPath == targetPath || coveringPath.isEmpty()) {
+ continue;
+ }
+ if (coveringPath.size() - 1 <= targetPath.size() - 1) {
+ continue;
+ }
+ if (isPrefixCovered(targetPrefix, coveringPath)) {
+ toRemove.add(p);
+ break;
+ }
+ }
+ }
+ targetPaths.removeAll(toRemove);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
index 50416a29408..5a3a55d3782 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
@@ -91,9 +91,13 @@ public class NestedColumnPruning implements CustomRewriter {
return plan;
}
AccessPathPlanCollector collector = new AccessPathPlanCollector();
- Map<Slot, List<CollectAccessPathResult>> slotToAccessPaths =
collector.collect(plan, statementContext);
- Map<Integer, AccessPathInfo> slotToResult =
pruneDataType(slotToAccessPaths,
+ // MV rewrite fragments must not be perturbed by metadata-only
paths
+ // (NULL/OFFSET) that would otherwise prune the slot to a narrower
type.
+ // Skip collecting them at the source instead of working around
them downstream.
+ collector.setSkipMetaPath(
jobContext.getCascadesContext().isMaterializedViewRewritePlanFragment());
+ Map<Slot, List<CollectAccessPathResult>> slotToAccessPaths =
collector.collect(plan, statementContext);
+ Map<Integer, AccessPathInfo> slotToResult =
pruneDataType(slotToAccessPaths);
if (!slotToResult.isEmpty()) {
Map<Integer, AccessPathInfo> slotIdToPruneType =
Maps.newLinkedHashMap();
@@ -203,8 +207,7 @@ public class NestedColumnPruning implements CustomRewriter {
}
private static Map<Integer, AccessPathInfo> pruneDataType(
- Map<Slot, List<CollectAccessPathResult>> slotToAccessPaths,
- boolean skipDataSkippingOnlyAccessPath) {
+ Map<Slot, List<CollectAccessPathResult>> slotToAccessPaths) {
Map<Integer, AccessPathInfo> result = new LinkedHashMap<>();
Map<Slot, DataTypeAccessTree> slotIdToAllAccessTree = new
LinkedHashMap<>();
Map<Slot, DataTypeAccessTree> slotIdToPredicateAccessTree = new
LinkedHashMap<>();
@@ -236,19 +239,6 @@ public class NestedColumnPruning implements CustomRewriter
{
}
continue;
}
- if (skipDataSkippingOnlyAccessPath
- &&
containsDataSkippingOnlyAccessPath(collectAccessPathResults)) {
- // An MV rewrite child context optimizes a temporary plan
fragment rather
- // than the final plan. A nested metadata-only path such as
- // [s, city, NULL] or [s, city, OFFSET] would otherwise prune
the scan slot
- // to only that nested field, while the final MV rewritten
plan may still
- // reuse the same slot as a full complex value or need another
child. Drop
- // access-info for the whole slot instead of just removing
that path:
- // predicate expressions inside this fragment still reference
the original
- // slot shape, so partial pruning after deleting the
predicate-only path
- // could make the fragment itself inconsistent.
- continue;
- }
for (CollectAccessPathResult collectAccessPathResult :
collectAccessPathResults) {
List<String> path = collectAccessPathResult.getPath();
ColumnAccessPathType pathType =
collectAccessPathResult.getType();
@@ -270,100 +260,30 @@ public class NestedColumnPruning implements
CustomRewriter {
}
}
- // second: build non-predicate access paths
+ // phase 1.5: for slots with meta paths, expand map-star paths and
strip
+ // redundant meta paths. Strip predicate first using the COMPLETE
+ // allAccessPaths as covering, then strip allAccessPaths self-covering.
for (Entry<Slot, DataTypeAccessTree> kv :
slotIdToAllAccessTree.entrySet()) {
Slot slot = kv.getKey();
DataTypeAccessTree accessTree = kv.getValue();
- DataType prunedDataType =
accessTree.pruneDataType().orElse(slot.getDataType());
-
- if (slot.getDataType().isStringLikeType()) {
- if (accessTree.hasStringOffsetOnlyAccess()) {
- if (skipDataSkippingOnlyAccessPath) {
- continue;
- }
- // Offset-only access (e.g. length(str_col)): type stays
varchar,
- // but we must still send the access path to BE so it
skips the char data.
- stripExactCoveredDataSkippingSuffixPaths(slot,
allAccessPaths, allAccessPaths);
- stripNullSuffixPaths(slot, allAccessPaths);
- List<ColumnAccessPath> allPaths =
buildColumnAccessPaths(slot, allAccessPaths);
- result.put(slot.getExprId().asInt(),
- new AccessPathInfo(slot.getDataType(), allPaths,
new ArrayList<>()));
- } else if (accessTree.hasNullCheckOnlyAccess()) {
- if (skipDataSkippingOnlyAccessPath) {
- continue;
- }
- // Null-check-only access (e.g. str_col IS NULL): type
stays varchar,
- // but we send [col, NULL] access path so BE only reads
the null flag.
- List<ColumnAccessPath> allPaths =
buildColumnAccessPaths(slot, allAccessPaths);
- result.put(slot.getExprId().asInt(),
- new AccessPathInfo(slot.getDataType(), allPaths,
new ArrayList<>()));
- }
- // direct access (accessAll=true) or other: skip — no type
change, no access paths needed.
- continue;
- }
-
- if ((slot.getDataType().isArrayType() ||
slot.getDataType().isMapType())
- && accessTree.hasStringOffsetOnlyAccess()) {
- if (skipDataSkippingOnlyAccessPath) {
- continue;
- }
- // Offset-only access (e.g. length(arr_col) /
length(map_col)): type stays unchanged,
- // but we must send the OFFSET access path to BE so it skips
element/key-value data.
- List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot,
allAccessPaths);
- result.put(slot.getExprId().asInt(),
- new AccessPathInfo(slot.getDataType(), allPaths, new
ArrayList<>()));
- continue;
- }
-
- // Null-check-only access (e.g. col IS NULL / col IS NOT NULL):
type stays unchanged,
- // but we must send the [col, NULL] access path to BE so it only
reads the null flag.
- if (accessTree.hasNullCheckOnlyAccess()) {
- if (skipDataSkippingOnlyAccessPath) {
- continue;
- }
- List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot,
allAccessPaths);
- result.put(slot.getExprId().asInt(),
- new AccessPathInfo(slot.getDataType(), allPaths, new
ArrayList<>()));
+ if (!accessTree.hasOffsetPath() && !accessTree.hasNullPath()) {
continue;
}
+ int slotId = slot.getExprId().asInt();
+ // Expand both sets before stripping so covering is complete.
+ expandMapStarPaths(slot, allAccessPaths);
+ expandMapStarPaths(slot, predicateAccessPaths);
+ MetaPathStriper.strip(slotId, predicateAccessPaths,
allAccessPaths);
+ MetaPathStriper.strip(slotId, allAccessPaths, allAccessPaths);
+ }
- if (slot.getDataType().isMapType() &&
accessTree.hasMapValueOffsetOnlyAccess()) {
- if (skipDataSkippingOnlyAccessPath) {
- continue;
- }
- // length(map_col['key']): keys read in full (element lookup)
+ values offset-only.
- // Emit [col, KEYS] and [col, VALUES, OFFSET] directly instead
of the collected
- // [col, *, OFFSET] path which the BE cannot interpret for
split key/value access.
- String colName = slot.getName().toLowerCase();
- ColumnAccessPath keysColumnPath = ColumnAccessPath.data(
- new ArrayList<>(ImmutableList.of(colName,
AccessPathInfo.ACCESS_MAP_KEYS)));
-
- ColumnAccessPath valsOffsetColumnPath = ColumnAccessPath.data(
- new ArrayList<>(ImmutableList.of(colName,
AccessPathInfo.ACCESS_MAP_VALUES,
- AccessPathInfo.ACCESS_STRING_OFFSET)));
-
- result.put(slot.getExprId().asInt(), new AccessPathInfo(
- slot.getDataType(),
- ImmutableList.of(keysColumnPath, valsOffsetColumnPath),
- new ArrayList<>()));
- continue;
- }
-
- // If a field is read in full, its metadata-only NULL/OFFSET
access is redundant
- // for any data type: e.g. [s] covers both [s.NULL] and [s.OFFSET].
- stripExactCoveredDataSkippingSuffixPaths(slot, allAccessPaths,
allAccessPaths);
-
- // Strip OFFSET-suffix paths when a non-OFFSET path covers the
same nested field or
- // container. The overlapping array/map container may live under
the root slot itself
- // or under a nested struct field, so compare against the actual
nested prefix instead
- // of gating this logic on the root slot type.
- stripCoveredOffsetSuffixPaths(slot, allAccessPaths,
allAccessPaths);
-
- // Strip NULL-suffix paths when a non-NULL path also exists for
the same slot.
- // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is
needed, NULL path is redundant.
- stripNullSuffixPaths(slot, allAccessPaths);
+ // second: build non-predicate access paths
+ for (Entry<Slot, DataTypeAccessTree> kv :
slotIdToAllAccessTree.entrySet()) {
+ Slot slot = kv.getKey();
+ DataTypeAccessTree accessTree = kv.getValue();
+ DataType prunedDataType =
accessTree.pruneDataType().orElse(slot.getDataType());
List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot,
allAccessPaths);
- if (shouldSkipAccessInfo(slot, prunedDataType, allPaths,
predicateAccessPaths)) {
+ if (shouldSkipAccessInfo(slot, prunedDataType, allPaths)) {
continue;
}
result.put(slot.getExprId().asInt(),
@@ -377,13 +297,9 @@ public class NestedColumnPruning implements CustomRewriter
{
new AccessPathInfo(slot.getDataType(), allPaths, new
ArrayList<>()));
}
- // third: build predicate access path
+ // third: build predicate access path (strip already done in phase 1.5)
for (Entry<Slot, DataTypeAccessTree> kv :
slotIdToPredicateAccessTree.entrySet()) {
Slot slot = kv.getKey();
- stripExactCoveredDataSkippingSuffixPaths(slot,
predicateAccessPaths, allAccessPaths);
- stripCoveredOffsetSuffixPaths(slot, predicateAccessPaths,
allAccessPaths);
- stripCoveredArrayNullSuffixPaths(slot, predicateAccessPaths,
allAccessPaths);
- stripNullSuffixPaths(slot, predicateAccessPaths);
List<ColumnAccessPath> predicatePaths =
buildColumnAccessPaths(slot, predicateAccessPaths);
AccessPathInfo accessPathInfo =
result.get(slot.getExprId().asInt());
@@ -409,444 +325,6 @@ public class NestedColumnPruning implements
CustomRewriter {
return result;
}
- private static boolean containsDataSkippingOnlyAccessPath(
- List<CollectAccessPathResult> collectAccessPathResults) {
- for (CollectAccessPathResult collectAccessPathResult :
collectAccessPathResults) {
- if
(isDataSkippingOnlyAccessPath(collectAccessPathResult.getPath())) {
- return true;
- }
- }
- return false;
- }
-
- private static boolean isDataSkippingOnlyAccessPath(List<String> path) {
- if (path.isEmpty()) {
- return false;
- }
- String lastComponent = path.get(path.size() - 1);
- return AccessPathInfo.ACCESS_NULL.equals(lastComponent)
- || AccessPathInfo.ACCESS_STRING_OFFSET.equals(lastComponent);
- }
-
- /**
- * Decide whether an OFFSET-suffix path can be removed because another
non-OFFSET path
- * already covers the same container.
- *
- * <p>For map element_at paths, {@code *} means "read keys fully, then
follow the rest of
- * the path on the value side". So a VALUES path can cover the value-side
OFFSET access,
- * but it does NOT cover the key lookup requirement. In that case we
remove the OFFSET path
- * and add a KEYS-only path instead.
- */
- private static OffsetPathRewrite analyzeOffsetPathRewrite(
- DataType slotType, List<String> path, List<List<String>>
nonOffsetPaths) {
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- return OffsetPathRewrite.keep();
- }
- List<String> prefix = path.subList(0, path.size() - 1);
- return analyzePrefixCoverage(slotType, prefix, nonOffsetPaths);
- }
-
- private static OffsetPathRewrite analyzePrefixCoverage(
- DataType slotType, List<String> prefix, List<List<String>>
nonOffsetPaths) {
- List<List<String>> supplementalPaths = new ArrayList<>();
- for (List<String> nonOffset : nonOffsetPaths) {
- OffsetPathRewrite candidate =
compareOffsetPrefixCoverage(slotType, prefix, nonOffset);
- if (!candidate.shouldRemoveOffsetPath()) {
- continue;
- }
- if (candidate.getSupplementalPaths().isEmpty()) {
- return OffsetPathRewrite.remove();
- }
- supplementalPaths.addAll(candidate.getSupplementalPaths());
- }
- if (supplementalPaths.isEmpty()) {
- return OffsetPathRewrite.keep();
- }
- return
OffsetPathRewrite.rewriteWithSupplementalPaths(supplementalPaths);
- }
-
- /**
- * Remove OFFSET-only paths from {@code targetAccessPaths} when data paths
in
- * {@code coveringAccessPaths} already read the same array/map/string
container or a child
- * under it.
- *
- * <p>Examples:
- * <ul>
- * <li>{@code [arr.OFFSET, arr.*.field]} becomes {@code [arr.*.field]}
because the array
- * child read must keep BE on the normal data iterator path.</li>
- * <li>{@code [map.*.OFFSET, map.VALUES]} becomes {@code [map.KEYS,
map.VALUES]} because
- * {@code map['k']} still needs full keys for lookup, while values
cover the offset.</li>
- * </ul>
- */
- private static void stripCoveredOffsetSuffixPaths(
- Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
- Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
- int slotId = slot.getExprId().asInt();
- Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
- if (targetPaths.isEmpty()) {
- return;
- }
-
- List<List<String>> nonOffsetPaths = new ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
- List<String> path = p.second;
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- nonOffsetPaths.add(path);
- }
- }
- for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
- List<String> path = p.second;
- if (path.isEmpty()
- ||
!AccessPathInfo.ACCESS_STRING_OFFSET.equals(path.get(path.size() - 1))) {
- nonOffsetPaths.add(path);
- }
- }
-
- List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
- List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new
ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(targetPaths)) {
- OffsetPathRewrite rewrite = analyzeOffsetPathRewrite(
- slot.getDataType(), p.second, nonOffsetPaths);
- if (!rewrite.shouldRemoveOffsetPath()) {
- continue;
- }
- pathsToRemove.add(p);
- for (List<String> supplementalPath :
rewrite.getSupplementalPaths()) {
- pathsToAdd.add(Pair.of(p.first, supplementalPath));
- }
- }
- targetPaths.removeAll(pathsToRemove);
- targetPaths.addAll(pathsToAdd);
- }
-
- /**
- * Remove array NULL-only paths from {@code targetAccessPaths} when
another path already reads
- * the same array container or data under it. This mirrors OFFSET coverage
because an array
- * element/data read must not be combined with an array NULL_MAP_ONLY read
for the same prefix.
- *
- * <p>Examples:
- * <ul>
- * <li>{@code [map.VALUES.NULL, map.VALUES.*.field]} becomes
- * {@code [map.VALUES.*.field]}.</li>
- * <li>{@code [map.*.NULL, map.VALUES.*.field]} becomes
- * {@code [map.KEYS, map.VALUES.*.field]} so map lookup keys are
still available.</li>
- * </ul>
- */
- private static void stripCoveredArrayNullSuffixPaths(
- Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
- Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
- int slotId = slot.getExprId().asInt();
- Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
- if (targetPaths.isEmpty()) {
- return;
- }
-
- List<List<String>> nonNullPaths = new ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
- List<String> path = p.second;
- if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
- nonNullPaths.add(path);
- }
- }
- for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
- List<String> path = p.second;
- if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
- nonNullPaths.add(path);
- }
- }
-
- List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
- List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new
ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : new
ArrayList<>(targetPaths)) {
- List<String> path = p.second;
- if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
- continue;
- }
- List<String> prefix = path.subList(0, path.size() - 1);
- Optional<DataType> prefixType = dataTypeAtPath(slot.getDataType(),
prefix);
- if (!prefixType.isPresent() || !prefixType.get().isArrayType()) {
- continue;
- }
- OffsetPathRewrite rewrite =
analyzePrefixCoverage(slot.getDataType(), prefix, nonNullPaths);
- if (!rewrite.shouldRemoveOffsetPath()) {
- continue;
- }
- pathsToRemove.add(p);
- for (List<String> supplementalPath :
rewrite.getSupplementalPaths()) {
- pathsToAdd.add(Pair.of(p.first, supplementalPath));
- }
- }
- targetPaths.removeAll(pathsToRemove);
- targetPaths.addAll(pathsToAdd);
- }
-
- /**
- * Remove exact metadata-only NULL/OFFSET paths when the same field is
read in full.
- * This rule is type-agnostic: once {@code s} itself is accessed, {@code
s.NULL} and
- * {@code s.OFFSET} are redundant and unsafe to keep with the full data
path.
- *
- * <p>Examples:
- * <ul>
- * <li>{@code [str_col, str_col.NULL]} becomes {@code [str_col]}.</li>
- * <li>{@code [arr, arr.OFFSET]} becomes {@code [arr]}.</li>
- * <li>{@code [map.*, map.*.OFFSET]} becomes {@code [map.*]}.</li>
- * </ul>
- */
- private static void stripExactCoveredDataSkippingSuffixPaths(
- Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> targetAccessPaths,
- Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
coveringAccessPaths) {
- int slotId = slot.getExprId().asInt();
- Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
targetAccessPaths.get(slotId);
- if (targetPaths.isEmpty()) {
- return;
- }
-
- List<List<String>> fullAccessPaths = new ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p :
coveringAccessPaths.get(slotId)) {
- if (!isDataSkippingOnlyAccessPath(p.second)) {
- fullAccessPaths.add(p.second);
- }
- }
- for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
- if (!isDataSkippingOnlyAccessPath(p.second)) {
- fullAccessPaths.add(p.second);
- }
- }
-
- List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new
ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
- List<String> path = p.second;
- if (!isDataSkippingOnlyAccessPath(path)) {
- continue;
- }
- List<String> prefix = path.subList(0, path.size() - 1);
- for (List<String> fullAccessPath : fullAccessPaths) {
- if (pathCoversPrefix(fullAccessPath, prefix)) {
- pathsToRemove.add(p);
- break;
- }
- }
- }
- targetPaths.removeAll(pathsToRemove);
- }
-
- private static Optional<DataType> dataTypeAtPath(DataType slotType,
List<String> path) {
- if (path.isEmpty()) {
- return Optional.empty();
- }
- DataType currentType = slotType;
- for (int i = 1; i < path.size(); i++) {
- String component = path.get(i);
- if (currentType.isStructType()) {
- StructField field = ((StructType)
currentType).getField(component);
- if (field == null) {
- return Optional.empty();
- }
- currentType = field.getDataType();
- } else if (currentType.isArrayType()) {
- if (!AccessPathInfo.ACCESS_ALL.equals(component)) {
- return Optional.empty();
- }
- currentType = ((ArrayType) currentType).getItemType();
- } else if (currentType.isMapType()) {
- currentType = descendMapType((MapType) currentType, component);
- } else {
- return Optional.empty();
- }
- }
- return Optional.of(currentType);
- }
-
- private static OffsetPathRewrite compareOffsetPrefixCoverage(
- DataType slotType, List<String> prefix, List<String> nonOffset) {
- if (nonOffset.isEmpty()) {
- return OffsetPathRewrite.remove();
- }
- int minLen = Math.min(prefix.size(), nonOffset.size());
- List<List<String>> supplementalPaths = new ArrayList<>();
- DataType currentType = slotType;
- for (int i = 0; i < minLen; i++) {
- String prefixComponent = prefix.get(i);
- String nonOffsetComponent = nonOffset.get(i);
- if (i == 0) {
- if (!prefixComponent.equals(nonOffsetComponent)) {
- return OffsetPathRewrite.keep();
- }
- continue;
- }
- if (currentType.isStructType()) {
- if (!prefixComponent.equals(nonOffsetComponent)) {
- return OffsetPathRewrite.keep();
- }
- StructField field = ((StructType)
currentType).getField(prefixComponent);
- if (field == null) {
- return OffsetPathRewrite.keep();
- }
- currentType = field.getDataType();
- continue;
- }
- if (currentType.isArrayType()) {
- if (!prefixComponent.equals(nonOffsetComponent)
- || !AccessPathInfo.ACCESS_ALL.equals(prefixComponent))
{
- return OffsetPathRewrite.keep();
- }
- currentType = ((ArrayType) currentType).getItemType();
- continue;
- }
- if (currentType.isMapType()) {
- MapType mapType = (MapType) currentType;
- if (prefixComponent.equals(nonOffsetComponent)) {
- currentType = descendMapType(mapType, prefixComponent);
- continue;
- }
- if (AccessPathInfo.ACCESS_ALL.equals(prefixComponent)
- &&
AccessPathInfo.ACCESS_MAP_VALUES.equals(nonOffsetComponent)) {
- supplementalPaths.add(buildMapKeysOnlyPath(prefix, i));
- currentType = mapType.getValueType();
- continue;
- }
- if (AccessPathInfo.ACCESS_MAP_VALUES.equals(prefixComponent)
- &&
AccessPathInfo.ACCESS_ALL.equals(nonOffsetComponent)) {
- currentType = mapType.getValueType();
- continue;
- }
- if (AccessPathInfo.ACCESS_MAP_KEYS.equals(prefixComponent)
- &&
AccessPathInfo.ACCESS_ALL.equals(nonOffsetComponent)) {
- currentType = mapType.getKeyType();
- continue;
- }
- return OffsetPathRewrite.keep();
- }
- if (!prefixComponent.equals(nonOffsetComponent)) {
- return OffsetPathRewrite.keep();
- }
- }
- if (supplementalPaths.isEmpty()) {
- return OffsetPathRewrite.remove();
- }
- return
OffsetPathRewrite.rewriteWithSupplementalPaths(supplementalPaths);
- }
-
- private static DataType descendMapType(MapType mapType, String component) {
- if (AccessPathInfo.ACCESS_MAP_KEYS.equals(component)) {
- return mapType.getKeyType();
- }
- return mapType.getValueType();
- }
-
- private static List<String> buildMapKeysOnlyPath(List<String> prefix, int
mapTokenIndex) {
- List<String> keyPath = new ArrayList<>(prefix.subList(0,
mapTokenIndex));
- keyPath.add(AccessPathInfo.ACCESS_MAP_KEYS);
- return keyPath;
- }
-
- private static final class OffsetPathRewrite {
- private static final OffsetPathRewrite KEEP = new
OffsetPathRewrite(false, ImmutableList.of());
- private static final OffsetPathRewrite REMOVE = new
OffsetPathRewrite(true, ImmutableList.of());
-
- private final boolean removeOffsetPath;
- private final List<List<String>> supplementalPaths;
-
- private OffsetPathRewrite(boolean removeOffsetPath, List<List<String>>
supplementalPaths) {
- this.removeOffsetPath = removeOffsetPath;
- this.supplementalPaths = supplementalPaths;
- }
-
- private static OffsetPathRewrite keep() {
- return KEEP;
- }
-
- private static OffsetPathRewrite remove() {
- return REMOVE;
- }
-
- private static OffsetPathRewrite
rewriteWithSupplementalPaths(List<List<String>> supplementalPaths) {
- return new OffsetPathRewrite(true,
ImmutableList.copyOf(supplementalPaths));
- }
-
- private boolean shouldRemoveOffsetPath() {
- return removeOffsetPath;
- }
-
- private List<List<String>> getSupplementalPaths() {
- return supplementalPaths;
- }
- }
-
- /**
- * Strip NULL-suffix paths that are redundant because a non-NULL path
reads child
- * data below the same prefix or reads an OFFSET path over the same prefix.
- *
- * <p>Examples:
- * <ul>
- * <li>{@code [struct_col.NULL, struct_col.city]} becomes {@code
[struct_col.city]}.</li>
- * <li>{@code [str_col.NULL, str_col.OFFSET]} becomes {@code
[str_col.OFFSET]} because
- * the offset read can provide nullness for variable-length
columns.</li>
- * </ul>
- *
- * <p>A parent NULL path must also be removed when any child path is
required under the
- * same prefix, e.g. [struct_col, NULL] with [struct_col, city]. This
looks like the
- * parent null map may still be useful for predicates, but it cannot be
kept in
- * allAccessPaths with the current BE iterator contract: Struct/Array/Map
iterators
- * treat a leading NULL sub-path as NULL_MAP_ONLY and skip all children.
If FE kept
- * [struct_col.NULL, struct_col.city] in allAccessPaths, BE would read
only the
- * struct null map and default-fill city instead of routing the city child
iterator.
- * When the NULL path is removed from allAccessPaths, it must also be
removed from
- * predicateAccessPaths so the BE can rely on predicate paths being a
subset of all
- * paths. The normal nullable container read materializes the parent null
map
- * together with required children.
- */
- private static void stripNullSuffixPaths(
- Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> allAccessPaths) {
- int slotId = slot.getExprId().asInt();
- Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths =
allAccessPaths.get(slotId);
-
- List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
- for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
- List<String> path = p.second;
- if (path.isEmpty() ||
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
- continue;
- }
- // Prefix is the column/subcolumn path without the trailing NULL
suffix.
- // A non-NULL path that equals this prefix means the same
column/subcolumn
- // is read in full, making the NULL-only path redundant.
- // An OFFSET-suffix path over the same prefix is also enough for
the BE to
- // derive null-ness for variable-length columns, so [col.NULL] is
redundant
- // when [col.OFFSET] already exists.
- List<String> prefix = path.subList(0, path.size() - 1);
- boolean covered = false;
- for (Pair<ColumnAccessPathType, List<String>> q : slotPaths) {
- List<String> other = q.second;
- if (other.isEmpty()
- ||
AccessPathInfo.ACCESS_NULL.equals(other.get(other.size() - 1))) {
- continue;
- }
- if (other.equals(prefix)) {
- covered = true;
- break;
- }
- if (hasStrictPrefix(other, prefix)) {
- covered = true;
- break;
- }
- if (other.size() == prefix.size() + 1
- &&
AccessPathInfo.ACCESS_STRING_OFFSET.equals(other.get(other.size() - 1))
- && other.subList(0, prefix.size()).equals(prefix)) {
- covered = true;
- break;
- }
- }
- if (covered) {
- toRemove.add(p);
- }
- }
- for (Pair<ColumnAccessPathType, List<String>> r : toRemove) {
- allAccessPaths.remove(slotId, r);
- }
- }
-
/**
* Keep predicate access paths as a subset of final all access paths after
NULL/OFFSET cleanup.
* Predicate paths are built from filter expressions first, but later
all-path rewrites may drop
@@ -876,14 +354,6 @@ public class NestedColumnPruning implements CustomRewriter
{
predicatePaths.removeAll(toRemove);
}
- private static boolean hasStrictPrefix(List<String> path, List<String>
prefix) {
- return path.size() > prefix.size() && path.subList(0,
prefix.size()).equals(prefix);
- }
-
- private static boolean pathCoversPrefix(List<String> path, List<String>
prefix) {
- return prefix.size() >= path.size() && prefix.subList(0,
path.size()).equals(path);
- }
-
private static List<ColumnAccessPath> buildColumnAccessPaths(
Slot slot, Multimap<Integer, Pair<ColumnAccessPathType,
List<String>>> accessPaths) {
List<ColumnAccessPath> paths = new ArrayList<>();
@@ -914,17 +384,17 @@ public class NestedColumnPruning implements
CustomRewriter {
}
private static boolean shouldSkipAccessInfo(
- Slot slot, DataType prunedDataType, List<ColumnAccessPath>
allPaths,
- Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
predicateAccessPaths) {
+ Slot slot, DataType prunedDataType, List<ColumnAccessPath>
allPaths) {
if (!prunedDataType.equals(slot.getDataType())) {
return false;
}
if (slot.getDataType() instanceof NestedColumnPrunable ||
slot.getDataType().isVariantType()) {
return false;
}
- if (!predicateAccessPaths.get(slot.getExprId().asInt()).isEmpty()) {
- return false;
- }
+
+ // Only scalar / string-like types reach here (NestedColumnPrunable
and Variant
+ // returned false above). A single [col] path means the entire column
is read;
+ // no access info needs to be sent to BE.
if (allPaths.size() != 1) {
return false;
}
@@ -941,15 +411,18 @@ public class NestedColumnPruning implements
CustomRewriter {
// if access 's.a.b' the node 's' and 'a' has accessPartialChild, and
node 'b' has accessAll
private boolean accessPartialChild;
private boolean accessAll;
- // True when this string-typed node is accessed ONLY via the offset
array
- // (e.g. length(str_col) or length(element_at(c_struct,'f3'))).
- // When this flag is set and accessAll is NOT set, pruneDataType()
returns BigIntType
- // to signal that the BE only needs to read the offset array, not the
chars data.
- private boolean isStringOffsetOnly;
- // True when this column node is accessed ONLY via IS NULL / IS NOT
NULL.
- // When this flag is set and accessAll is NOT set, the BE only needs
to read the null flag,
- // not the actual column data.
- private boolean isNullCheckOnly;
+ // Cached marker set by setAccessByPath() when a path component is
OFFSET.
+ // Avoids scanning the multimap: hasStringOffsetOnlyAccess() reads
this flag in
+ // O(1) instead of checking every path for an OFFSET suffix. Used for
array, map,
+ // and string-like types (they share offset-based storage in BE).
+ // When set without accessAll, pruneDataType() keeps the node's type
so that BE
+ // reads only the offset structure, skipping element / key-value /
chars data.
+ private boolean hasOffsetPath;
+ // Cached marker set by setAccessByPath() when a path component is
NULL.
+ // Same purpose as hasOffsetPath — O(1) flag read instead of multimap
scan
+ // in hasNullCheckOnlyAccess(). When set without accessAll, BE reads
only the
+ // null bitmap, skipping actual column data.
+ private boolean hasNullPath;
// for the future, only access the meta of the column,
// e.g. `is not null` can only access the column's offset, not need to
read the data
private ColumnAccessPathType pathType;
@@ -992,80 +465,33 @@ public class NestedColumnPruning implements
CustomRewriter {
}
/**
- * True when a MAP column is accessed as {@code
length(map_col['key'])}: the keys must
- * be read in full (for the element lookup) while the values only need
the offset array
- * (since only their length, not their content, is used).
- * Expected access paths: [col, KEYS] and [col, VALUES, OFFSET].
+ * recursively search in the tree, if any node hasOffsetPath
*/
- public boolean hasMapValueOffsetOnlyAccess() {
- if (!isRoot) {
- return false;
- }
- DataTypeAccessTree child = children.values().iterator().next();
- if (!child.type.isMapType() || child.accessAll) {
- return false;
- }
- DataTypeAccessTree keysChild =
child.children.get(AccessPathInfo.ACCESS_MAP_KEYS);
- DataTypeAccessTree valsChild =
child.children.get(AccessPathInfo.ACCESS_MAP_VALUES);
- // Keys must be fully accessed (element-at lookup).
- if (!keysChild.accessAll) {
- return false;
- }
- // Values must be accessed offset-only (no deeper element reads).
- if (!valsChild.isStringOffsetOnly || valsChild.accessAll) {
- return false;
- }
- if (valsChild.type.isStringLikeType()) {
- // String value: accessAll check above is sufficient.
+ public boolean hasOffsetPath() {
+ if (hasOffsetPath) {
return true;
}
- if (valsChild.type.isArrayType()) {
- // Array value (e.g. MAP<STRING, ARRAY<INT>>): verify no
element was read directly
- // (e.g. map_col['k'][0] would set allChild.accessAll=true).
- DataTypeAccessTree allChild =
valsChild.children.get(AccessPathInfo.ACCESS_ALL);
- return !allChild.accessAll && !allChild.accessPartialChild;
- }
- return true;
- }
-
- /** True when the column is accessed ONLY via the offset array (e.g.
length(str_col),
- * length(arr_col), length(map_col)), meaning the type must not
change but an access
- * path still needs to be sent to BE so it can skip the char/element
data. */
- public boolean hasStringOffsetOnlyAccess() {
- if (isRoot) {
- DataTypeAccessTree child = children.values().iterator().next();
- if (!child.isStringOffsetOnly || child.accessAll) {
- return false;
- }
- if (child.type.isStringLikeType()) {
+ for (DataTypeAccessTree child : children.values()) {
+ if (child.hasOffsetPath()) {
return true;
}
- if (child.type.isArrayType()) {
- // True only if no element was accessed (element_at /
explode etc.)
- DataTypeAccessTree allChild =
child.children.get(AccessPathInfo.ACCESS_ALL);
- return !allChild.accessAll && !allChild.accessPartialChild;
- }
- if (child.type.isMapType()) {
- // True only if neither keys nor values were accessed
directly
- DataTypeAccessTree keysChild =
child.children.get(AccessPathInfo.ACCESS_MAP_KEYS);
- DataTypeAccessTree valsChild =
child.children.get(AccessPathInfo.ACCESS_MAP_VALUES);
- return !keysChild.accessAll &&
!keysChild.accessPartialChild
- && !valsChild.accessAll &&
!valsChild.accessPartialChild;
- }
- return false;
}
- return type.isStringLikeType() && isStringOffsetOnly && !accessAll;
+ return false;
}
- /** True when the column is accessed ONLY via IS NULL / IS NOT NULL,
- * meaning the BE only needs to read the null flag, not the actual
data. */
- public boolean hasNullCheckOnlyAccess() {
- if (isRoot) {
- DataTypeAccessTree child = children.values().iterator().next();
- return child.isNullCheckOnly && !child.accessAll
- && !child.isStringOffsetOnly &&
!child.accessPartialChild;
+ /**
+ * recursively search in the tree, if any node hasNullPath
+ */
+ public boolean hasNullPath() {
+ if (hasNullPath) {
+ return true;
+ }
+ for (DataTypeAccessTree child : children.values()) {
+ if (child.hasNullPath()) {
+ return true;
+ }
}
- return isNullCheckOnly && !accessAll && !isStringOffsetOnly &&
!accessPartialChild;
+ return false;
}
/** pruneCastType */
@@ -1163,7 +589,7 @@ public class NestedColumnPruning implements CustomRewriter
{
// Mark null-check-only and return without setting accessAll or
accessPartialChild,
// so that parent nodes can distinguish "null-only leaf" from "has
real sub-access".
if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_NULL)) {
- isNullCheckOnly = true;
+ hasNullPath = true;
return;
}
@@ -1180,9 +606,9 @@ public class NestedColumnPruning implements CustomRewriter
{
}
return;
} else if (this.type.isArrayType()) {
- if
(path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) {
+ if
(path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) {
// length(array_col) — only the offset array is needed,
not element data.
- isStringOffsetOnly = true;
+ hasOffsetPath = true;
return;
}
DataTypeAccessTree child =
children.get(AccessPathInfo.ACCESS_ALL);
@@ -1193,9 +619,9 @@ public class NestedColumnPruning implements CustomRewriter
{
return;
} else if (this.type.isMapType()) {
String fieldName = path.get(accessIndex);
- if (fieldName.equals(AccessPathInfo.ACCESS_STRING_OFFSET)) {
+ if (fieldName.equals(AccessPathInfo.ACCESS_OFFSET)) {
// length(map_col) — only the offset array is needed, not
key/value data.
- isStringOffsetOnly = true;
+ hasOffsetPath = true;
return;
}
if (fieldName.equals(AccessPathInfo.ACCESS_ALL)) {
@@ -1225,8 +651,8 @@ public class NestedColumnPruning implements CustomRewriter
{
} else if (type.isStringLikeType()) {
// String leaf accessed via the offset array (e.g. path ends
in "offset").
// Mark offset-only so pruneDataType() can return BigIntType
instead of full data.
- if
(path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) {
- isStringOffsetOnly = true;
+ if
(path.get(accessIndex).equals(AccessPathInfo.ACCESS_OFFSET)) {
+ hasOffsetPath = true;
return; // do NOT set accessAll — offset-only is
distinguishable from full access
}
// Any other sub-path on a string column means full data is
needed.
@@ -1269,10 +695,10 @@ public class NestedColumnPruning implements
CustomRewriter {
return children.values().iterator().next().pruneDataType();
} else if (accessAll) {
return Optional.of(type);
- } else if (isStringOffsetOnly && !accessPartialChild) {
+ } else if (hasOffsetPath && !accessPartialChild) {
// Only the offset array is accessed (e.g. length(str_col)).
return Optional.of(type);
- } else if (isNullCheckOnly && !accessPartialChild) {
+ } else if (hasNullPath && !accessPartialChild) {
// Only the null flag is accessed (e.g. col IS NULL /
element_at(s,'f') IS NULL).
// Return the node's type so that parent nodes include this
child in their pruned type,
// while the access path (ending in NULL) tells BE to skip
actual data reading.
@@ -1335,4 +761,104 @@ public class NestedColumnPruning implements
CustomRewriter {
}
}
}
+
+ /**
+ * Expand map-level {@code *} wildcards into {@code KEYS} + {@code VALUES}
+ * variants. For n map-level stars in a single path, n+1 paths are
+ * emitted: one all-VALUES path plus one KEYS-terminating path per star
+ * position. Array-level stars are left unchanged.
+ *
+ * <p>Paths with no map-level star are kept as-is.
+ */
+ private static void expandMapStarPaths(
+ Slot slot,
+ Multimap<Integer, Pair<ColumnAccessPathType, List<String>>>
accessPaths) {
+ int slotId = slot.getExprId().asInt();
+ Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths =
accessPaths.get(slotId);
+ if (slotPaths.isEmpty()) {
+ return;
+ }
+ DataType slotType = slot.getDataType();
+
+ List<Pair<ColumnAccessPathType, List<String>>> toAdd = new
ArrayList<>();
+ List<Pair<ColumnAccessPathType, List<String>>> toRemove = new
ArrayList<>();
+
+ for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+ List<String> path = p.second;
+ List<Integer> positions = new ArrayList<>();
+ findMapStarPositions(path, slotType, positions);
+ if (positions.isEmpty()) {
+ continue;
+ }
+ toRemove.add(p);
+ toAdd.addAll(expandOnePath(p.first, path, positions));
+ }
+
+ slotPaths.removeAll(toRemove);
+ slotPaths.addAll(toAdd);
+ }
+
+ private static void findMapStarPositions(
+ List<String> path, DataType slotType, List<Integer> positions) {
+ DataType current = slotType;
+ for (int i = 1; i < path.size(); i++) {
+ String component = path.get(i);
+ if (current.isStructType()) {
+ StructField field = ((StructType) current).getField(component);
+ if (field == null) {
+ break;
+ }
+ current = field.getDataType();
+ } else if (current.isArrayType()) {
+ if (!AccessPathInfo.ACCESS_ALL.equals(component)) {
+ break;
+ }
+ current = ((ArrayType) current).getItemType();
+ } else if (current.isMapType()) {
+ MapType mapType = (MapType) current;
+ if (AccessPathInfo.ACCESS_ALL.equals(component)) {
+ positions.add(i);
+ current = mapType.getValueType();
+ } else if (AccessPathInfo.ACCESS_MAP_KEYS.equals(component)) {
+ current = mapType.getKeyType();
+ } else if (AccessPathInfo.ACCESS_MAP_VALUES.equals(component))
{
+ current = mapType.getValueType();
+ } else {
+ current = mapType.getValueType();
+ }
+ } else {
+ break;
+ }
+ }
+ }
+
+ private static List<Pair<ColumnAccessPathType, List<String>>>
expandOnePath(
+ ColumnAccessPathType type, List<String> path, List<Integer>
positions) {
+ int n = positions.size();
+ List<Pair<ColumnAccessPathType, List<String>>> result = new
ArrayList<>(n + 1);
+
+ // All-VALUES path: replace every map * with VALUES
+ List<String> allValues = new ArrayList<>(path);
+ for (int pos : positions) {
+ allValues.set(pos, AccessPathInfo.ACCESS_MAP_VALUES);
+ }
+ result.add(Pair.of(type, allValues));
+
+ // KEYS-terminating path for each position
+ for (int i = 0; i < n; i++) {
+ int keysPos = positions.get(i);
+ List<String> keysPath = new ArrayList<>();
+ for (int j = 0; j < keysPos; j++) {
+ String component = path.get(j);
+ if (positions.contains(j)) {
+ component = AccessPathInfo.ACCESS_MAP_VALUES;
+ }
+ keysPath.add(component);
+ }
+ keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS);
+ result.add(Pair.of(type, keysPath));
+ }
+
+ return result;
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
index a50d4cdddc2..58411582aac 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java
@@ -199,6 +199,53 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
ImmutableList.of(path("a", "*")));
}
+ @Test
+ public void testDeeperOffsetPathCoversShallowerOffsetPath() throws
Exception {
+ // cardinality(a) on ARRAY<ARRAY<INT>> generates [a, OFFSET]
+ // cardinality(element_at(a, 1)) generates [a, *, OFFSET]
+ // [a, *, OFFSET] traverses into the outer array's items, so it must
read
+ // through the outer array structure. The shallower [a, OFFSET] is
therefore
+ // redundant and should be stripped.
+ assertAllAccessPathsContain(
+ "select cardinality(a), cardinality(element_at(a, 1)) from
nested_array_tbl",
+ ImmutableList.of(path("a", "*", "OFFSET")),
+ ImmutableList.of(path("a", "OFFSET")));
+ }
+
+ @Test
+ public void testOffsetPathCoversNullPathWithSamePrefix() throws Exception {
+ assertAllAccessPathsContain(
+ "select cardinality(a), a is null from nested_array_tbl",
+ ImmutableList.of(path("a", "OFFSET")),
+ ImmutableList.of(path("a", "NULL")));
+ }
+
+ @Test
+ public void testDeeperNullPathCoversShallowerNullPath() throws Exception {
+ // a IS NULL on ARRAY<ARRAY<INT>> generates [a, NULL]
+ // element_at(a, 1) IS NULL generates [a, *, NULL]
+ // [a, *, NULL] traverses into the outer array's items, which requires
+ // reading the outer array's null bitmap. Therefore [a, NULL] is
redundant
+ // and should be stripped.
+ assertAllAccessPathsContain(
+ "select a is null, element_at(a, 1) is null from
nested_array_tbl",
+ ImmutableList.of(path("a", "*", "NULL")),
+ ImmutableList.of(path("a", "NULL")));
+ }
+
+ @Test
+ public void testDeeperOffsetPathCoversShallowerNullPathForArray() throws
Exception {
+ // cardinality(element_at(a, 1)) on ARRAY<ARRAY<INT>> generates [a, *,
OFFSET]
+ // a IS NULL generates [a, NULL]
+ // [a, *, OFFSET] reads the offset of inner-array elements, which
requires
+ // reading through the outer array's null bitmap. Therefore [a, NULL]
is
+ // redundant and should be stripped.
+ assertAllAccessPathsContain(
+ "select cardinality(element_at(a, 1)), a is null from
nested_array_tbl",
+ ImmutableList.of(path("a", "*", "OFFSET")),
+ ImmutableList.of(path("a", "NULL")));
+ }
+
@Test
public void testCardinalityMapElementKeepsValueOffsetPath() throws
Exception {
assertColumn("select cardinality(map_arr_col['a']) from map_array_tbl",
@@ -207,6 +254,103 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
ImmutableList.of());
}
+ @Test
+ public void testDeeperNullCoversValueOffsetForMapArray() throws Exception {
+ // cardinality(map_arr_col['a']) -> normalized to KEYS + VALUES.OFFSET
+ // element_at(map_arr_col['a'], 1) IS NULL -> *.*.NULL
+ // *.*.NULL goes deeper into the value-side array, so VALUES.OFFSET
+ // at the map value level is redundant. Without type-aware comparison
+ // * is not lexically equal to VALUES, so VALUES.OFFSET would survive
+ // and cause BE to skip the item iterator.
+ assertAllAccessPathsContain(
+ "select cardinality(map_arr_col['a']), "
+ + "element_at(map_arr_col['a'], 1) is null from
map_array_tbl",
+ ImmutableList.of(path("map_arr_col", "KEYS"),
path("map_arr_col", "VALUES", "*", "NULL")),
+ ImmutableList.of(path("map_arr_col", "VALUES", "OFFSET")));
+ }
+
+ @Test
+ public void testDataPathCoversNullPathWithMapAwareComparison() throws
Exception {
+ // element_at(map_col, 'a') -> [map_col, KEYS] +
[map_col, VALUES]
+ // element_at(map_values(map_col), 1) IS NULL -> [map_col, VALUES,
NULL]
+ // Level 1 strips [map_col, VALUES, NULL] because [map_col, VALUES]
covers it.
+ //
+ // NOT: map_values(map_col) IS NULL — visitMapValues special-cases a
lone NULL
+ // suffix as a parent-map null check (isFunctionNullCheckPath),
producing
+ // [map_col, NULL] instead of [map_col, VALUES, NULL].
+ assertAllAccessPathsContain(
+ "select element_at(map_col, 'a') from str_tbl"
+ + " where element_at(map_values(map_col), 1) is null",
+ ImmutableList.of(path("map_col", "KEYS"), path("map_col",
"VALUES")),
+ ImmutableList.of(path("map_col", "VALUES", "NULL")));
+ }
+
+ @Test
+ public void testDataPathCoversOffsetPathWithMapAwareComparison() throws
Exception {
+ // element_at(map_col, 'a') -> [map_col,
KEYS] + [map_col, VALUES]
+ // length(element_at(map_values(map_col), 1)) > 0 -> [map_col,
VALUES, OFFSET]
+ // Level 2 strips [map_col, VALUES, OFFSET] because [map_col, VALUES]
covers it.
+ //
+ // NOT: cardinality(map_values(map_col)) — visitCardinality unwraps
MapValues
+ // and produces [map_col, OFFSET] instead of [map_col, VALUES, OFFSET].
+ assertAllAccessPathsContain(
+ "select element_at(map_col, 'a') from str_tbl"
+ + " where length(element_at(map_values(map_col), 1)) >
0",
+ ImmutableList.of(path("map_col", "KEYS"), path("map_col",
"VALUES")),
+ ImmutableList.of(path("map_col", "VALUES", "OFFSET")));
+ }
+
+ @Test
+ public void testMapValuesCoversStarNullPreservesKeysPath() throws
Exception {
+ // After map-* normalization:
+ // element_at(map_col, 'a') -> [map_col, KEYS] + [map_col,
VALUES]
+ // element_at(map_col, 'a') IS NULL -> [map_col, VALUES, NULL] +
[map_col, KEYS]
+ // element_at(map_values(map_col), 1) -> [map_col, VALUES]
+ // Level 1 strips [map_col, VALUES, NULL] because [map_col, VALUES]
covers it.
+ // The KEYS path is preserved for key lookup.
+ assertAllAccessPathsContain(
+ "select element_at(map_values(map_col), 1) from str_tbl"
+ + " where element_at(map_col, 'a') is null",
+ ImmutableList.of(path("map_col", "KEYS"), path("map_col",
"VALUES")),
+ ImmutableList.of(path("map_col", "VALUES", "NULL")));
+ }
+
+ @Test
+ public void testSupplementalKeyPathShouldStripExistingKeyNullPath() throws
Exception {
+ // After map-* normalization:
+ // projection: [map_arr_col, VALUES]
+ // filter: [map_arr_col, KEYS, NULL]
+ // OR [map_arr_col, *, *, NULL] → [map_arr_col, VALUES, *,
NULL]
+ // + [map_arr_col, KEYS]
+ // Level 1 strips [map_arr_col, KEYS, NULL] because [map_arr_col,
KEYS] now
+ // exists before the strip phase (produced by normalization, not
supplemental).
+ // Level 2 strips [map_arr_col, VALUES, *, NULL] because [map_arr_col,
VALUES]
+ // covers the value-side prefix.
+ assertAllAccessPathsContain(
+ "select element_at(map_values(map_arr_col), 1) from
map_array_tbl"
+ + " where element_at(map_keys(map_arr_col), 1) is null"
+ + " or element_at(element_at(map_arr_col, 'a'), 1) is
null",
+ ImmutableList.of(path("map_arr_col", "KEYS"),
path("map_arr_col", "VALUES")),
+ ImmutableList.of(path("map_arr_col", "VALUES", "*", "NULL"),
+ path("map_arr_col", "KEYS", "NULL")));
+ }
+
+ @Test
+ public void testNestedMapElementLengthKeepsValueOffsetPath() throws
Exception {
+ assertColumn("select length(element_at(element_at(s, 'm'), 'a')) from
nested_container_tbl",
+ "struct<m:map<text,text>>",
+ ImmutableList.of(path("s", "m", "KEYS"), path("s", "m",
"VALUES", "OFFSET")),
+ ImmutableList.of());
+ }
+
+ @Test
+ public void testNestedMapElementIsNullKeepsValueIsNullPath() throws
Exception {
+ assertColumn("select (element_at(element_at(s, 'm'), 'a')) is null
from nested_container_tbl",
+ "struct<m:map<text,text>>",
+ ImmutableList.of(path("s", "m", "KEYS"), path("s", "m",
"VALUES", "NULL")),
+ ImmutableList.of());
+ }
+
@Test
public void testFullFieldAccessStripsExactDataSkippingPath() throws
Exception {
assertColumn("select element_at(s, 'city') from tbl "
@@ -227,12 +371,13 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
assertColumn("select cardinality(map_arr_col['a']), map_arr_col['a']
from map_array_tbl",
"map<text,array<int>>",
- ImmutableList.of(path("map_arr_col", "*")),
+ ImmutableList.of(path("map_arr_col", "KEYS"),
path("map_arr_col", "VALUES")),
ImmutableList.of());
}
@Test
public void testCardinalityMapElementOffsetCoveredByValueFieldAccess()
throws Exception {
+ // [s, m, *, *, verified] strips [s, m, *, OFFSET] (pure string
prefix).
Pair<PhysicalPlan, List<SlotDescriptor>> result = collectComplexSlots(
"select element_at(element_at(element_at(element_at(s, 'm'),
'null'), 1), 'verified') "
+ "from map_array_value_tbl "
@@ -243,13 +388,14 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
allAccessPaths.addAll(slotDescriptor.getAllAccessPaths());
predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths());
}
- Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "*", "*",
"verified")));
- Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*",
"OFFSET")));
- Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"*", "OFFSET")));
+ Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES",
"*", "verified")));
+ Assertions.assertFalse(allAccessPaths.contains(path("s", "m",
"VALUES", "OFFSET")));
+ Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"VALUES", "OFFSET")));
}
@Test
public void testMapElementArrayNullPathCoveredByValueFieldAccess() throws
Exception {
+ // [s, m, *, *, verified] strips [s, m, *, NULL] (pure string prefix).
Pair<PhysicalPlan, List<SlotDescriptor>> result = collectComplexSlots(
"select element_at(element_at(element_at(element_at(s, 'm'),
'null'), 1), 'verified') "
+ "from map_array_value_tbl "
@@ -260,9 +406,9 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
allAccessPaths.addAll(slotDescriptor.getAllAccessPaths());
predicateAccessPaths.addAll(slotDescriptor.getPredicateAccessPaths());
}
- Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "*", "*",
"verified")));
- Assertions.assertFalse(allAccessPaths.contains(path("s", "m", "*",
"NULL")));
- Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"*", "NULL")));
+ Assertions.assertTrue(allAccessPaths.contains(path("s", "m", "VALUES",
"*", "verified")));
+ Assertions.assertFalse(allAccessPaths.contains(path("s", "m",
"VALUES", "NULL")));
+ Assertions.assertFalse(predicateAccessPaths.contains(path("s", "m",
"VALUES", "NULL")));
}
@Test
@@ -610,18 +756,18 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
);
assertColumn("select 100 from tbl where element_at(s, 'data')[1][1] is
not null",
"struct<data:array<map<int,struct<a:int,b:double>>>>",
- ImmutableList.of(path("s", "data", "*", "*", "NULL")),
- ImmutableList.of(path("s", "data", "*", "*", "NULL"))
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "NULL")),
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "NULL"))
);
assertColumn("select 100 from tbl where element_at(element_at(s,
'data')[1][1], 'a') is not null",
"struct<data:array<map<int,struct<a:int>>>>",
- ImmutableList.of(path("s", "data", "*", "*", "a", "NULL")),
- ImmutableList.of(path("s", "data", "*", "*", "a", "NULL"))
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "a", "NULL")),
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "a", "NULL"))
);
assertColumn("select 100 from tbl where element_at(element_at(s,
'data')[1][1], 'b') is not null",
"struct<data:array<map<int,struct<b:double>>>>",
- ImmutableList.of(path("s", "data", "*", "*", "b", "NULL")),
- ImmutableList.of(path("s", "data", "*", "*", "b", "NULL"))
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "b", "NULL")),
+ ImmutableList.of(path("s", "data", "*", "KEYS"), path("s",
"data", "*", "VALUES", "b", "NULL"))
);
}
@@ -1264,10 +1410,12 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
allAccessPaths.addAll(slotDescriptor.getAllAccessPaths());
}
for (ColumnAccessPath accessPath : expectContainAllAccessPaths) {
- Assertions.assertTrue(allAccessPaths.contains(accessPath));
+ Assertions.assertTrue(allAccessPaths.contains(accessPath),
+ "expected " + accessPath + " but allAccessPaths=" +
allAccessPaths);
}
for (ColumnAccessPath accessPath : expectNotContainAllAccessPaths) {
- Assertions.assertFalse(allAccessPaths.contains(accessPath));
+ Assertions.assertFalse(allAccessPaths.contains(accessPath),
+ "expected NOT " + accessPath + " but allAccessPaths=" +
allAccessPaths);
}
}
@@ -1466,6 +1614,8 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
new TreeSet<>(ImmutableList.of(path("str_col", "NULL"))),
new TreeSet<>(normalSlot.getPredicateAccessPaths().get()));
+ // MV fragment: IS NULL degrades to full column read via default
visitor.
+ // [str_col] full-access path passes shouldSkipAccessInfo → no pruning.
SlotReference fragmentSlot = rewriteAndFindScanSlot(
"select 1 from str_tbl where str_col is not null", "str_col",
true);
assertNoAccessPaths(fragmentSlot);
@@ -1479,14 +1629,16 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
new TreeSet<>(ImmutableList.of(path("s", "city", "NULL"))),
new
TreeSet<>(nestedNormalSlot.getPredicateAccessPaths().get()));
- // MV rewrite optimizes temporary fragments whose later consumers are
not visible.
- // If the fragment only needs nested null metadata, e.g.
[s.city.NULL], pruning the
- // scan slot to struct<city:...> can break the final rewritten MV plan
when it still
- // needs the full struct or another child. The fragment marker
therefore suppresses
- // nested null-only access info too, not just top-level [col.NULL].
+ // MV fragment: IS NULL degrades to element_at via default visitor,
+ // producing [s, city] data path. struct is NestedColumnPrunable so
+ // pruning to struct<city:text> is safe — no meta suffix remains.
SlotReference nestedFragmentSlot = rewriteAndFindScanSlot(
"select 1 from tbl where element_at(s, 'city') is not null",
"s", true);
- assertNoAccessPaths(nestedFragmentSlot);
+ Assertions.assertEquals(
+ new TreeSet<>(ImmutableList.of(path("s", "city"))),
+ new TreeSet<>(nestedFragmentSlot.getAllAccessPaths().get()));
+
Assertions.assertTrue(!nestedFragmentSlot.getPredicateAccessPaths().isPresent()
+ ||
nestedFragmentSlot.getPredicateAccessPaths().get().isEmpty());
}
@Test
@@ -1514,10 +1666,16 @@ public class PruneNestedColumnTest extends
TestWithFeService implements MemoPatt
new TreeSet<>(ImmutableList.of(path("c_struct", "f3",
"OFFSET"))),
new
TreeSet<>(nestedNormalSlot.getPredicateAccessPaths().get()));
+ // MV fragment: length() degrades to element_at via default visitor,
+ // producing [c_struct, f3] data path without OFFSET suffix.
SlotReference nestedFragmentSlot = rewriteAndFindScanSlot(
"select 1 from str_tbl where length(element_at(c_struct,
'f3')) > 0",
"c_struct", true);
- assertNoAccessPaths(nestedFragmentSlot);
+ Assertions.assertEquals(
+ new TreeSet<>(ImmutableList.of(path("c_struct", "f3"))),
+ new TreeSet<>(nestedFragmentSlot.getAllAccessPaths().get()));
+
Assertions.assertTrue(!nestedFragmentSlot.getPredicateAccessPaths().isPresent()
+ ||
nestedFragmentSlot.getPredicateAccessPaths().get().isEmpty());
}
/**
diff --git
a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
index 5039d3f4b90..69ba137d4af 100644
---
a/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
+++
b/regression-test/suites/nereids_rules_p0/column_pruning/null_column_pruning.groovy
@@ -180,9 +180,7 @@ suite("null_column_pruning") {
// covers the same prefix and inherently includes the null flag.
explain {
sql "select int_col from ncp_tbl where int_col is null"
- contains "nested columns"
- contains "all access paths: [int_col]"
- notContains "predicate access paths:"
+ notContains "nested columns"
}
order_qt_10 "select int_col from ncp_tbl where int_col is null";
@@ -351,7 +349,15 @@ suite("null_column_pruning") {
explain {
sql "select count(1) from ncp_tbl where map_col['a'] is null"
contains "nested columns"
- contains "map_col.*.NULL"
+ contains "map_col.KEYS"
+ contains "map_col.VALUES.NULL"
+ // expectedPlan
+ // nested columns:
+ // map_col:
+ // origin type: map<text,int>
+ // all access paths: [map_col.KEYS, map_col.VALUES.NULL]
+ // predicate access paths: [map_col.KEYS, map_col.VALUES.NULL]
+
}
order_qt_20 "select count(1) from ncp_tbl where map_col['a'] is null";
@@ -360,7 +366,8 @@ suite("null_column_pruning") {
explain {
sql "select count(1) from ncp_tbl where map_col['a'] is not null"
contains "nested columns"
- contains "map_col.*.NULL"
+ contains "map_col.KEYS"
+ contains "map_col.VALUES.NULL"
}
order_qt_21 "select count(1) from ncp_tbl where map_col['a'] is not null";
diff --git
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
index 48c0cb37a49..e0159f749f2 100644
---
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
+++
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
@@ -71,8 +71,7 @@ suite("string_length_column_pruning") {
}
sql "select length(str_col) from slcp_str_tbl"
- // length(str_col) in IF plus ORDER BY on a plain primitive column:
- // only str_col should appear in nested columns, and NULL is redundant
when OFFSET exists.
+ // [str_col, OFFSET] strips [str_col, NULL].
explain {
sql "select if(length(str_col) >= 5, true, false) a from slcp_str_tbl
order by id"
contains "nested columns"
@@ -154,7 +153,7 @@ suite("string_length_column_pruning") {
notContains "type=bigint"
}
- // Full access to the same array field covers its OFFSET metadata for any
data type.
+ // [arr_col] strips [arr_col, OFFSET].
explain {
sql "select id, cardinality(arr_col), arr_col from slcp_str_tbl"
contains "nested columns"
@@ -243,9 +242,7 @@ suite("string_length_column_pruning") {
// ─── Map with complex value cases
────────────────────────────────────────────
- // cardinality(map_arr_col['a']): value is ARRAY<INT>.
- // Keys read in full (element lookup); values need only the OFFSET array
(array size).
- // Expected paths: map_arr_col.KEYS + map_arr_col.VALUES.OFFSET
+ // Expected paths: [map_arr_col, KEYS] + [map_arr_col, VALUES, OFFSET]
explain {
sql "select cardinality(map_arr_col['a']) from slcp_str_tbl"
contains "nested columns"
@@ -272,20 +269,22 @@ suite("string_length_column_pruning") {
notContains "type=bigint"
}
- // value array item also accessed directly → full VALUES item path covers
value OFFSET.
+ // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col,
VALUES, OFFSET].
explain {
sql "select cardinality(map_arr_struct_col['a']),
map_arr_struct_col['a'][1].verified from slcp_str_tbl"
contains "nested columns"
- contains "map_arr_struct_col.*.*.verified"
- notContains "map_arr_struct_col.*.OFFSET"
+ contains "map_arr_struct_col.KEYS"
+ contains "map_arr_struct_col.VALUES.*.verified"
+ notContains "map_arr_struct_col.VALUES.OFFSET"
notContains "type=bigint"
}
+ // [map_arr_col, VALUES] strips [map_arr_col, VALUES, OFFSET].
explain {
sql "select id, cardinality(map_arr_col['a']), map_arr_col['a'] from
slcp_str_tbl"
contains "nested columns"
- contains "all access paths: [map_arr_col.*]"
- notContains "map_arr_col.*.OFFSET"
+ contains "all access paths: [map_arr_col.KEYS, map_arr_col.VALUES]"
+ notContains "map_arr_col.VALUES.OFFSET"
notContains "predicate access paths:"
notContains "type=bigint"
}
@@ -295,15 +294,14 @@ suite("string_length_column_pruning") {
order by id
"""
- // Predicate OFFSET path must also be removed when the projected value
field already
- // makes the corresponding array data path available. predicateAccessPaths
remains a
- // subset of allAccessPaths.
+ // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col,
VALUES, OFFSET].
+ // KEYS (data path) remains in predicateAccessPaths.
explain {
sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl
where cardinality(map_arr_struct_col['a']) > 0"
contains "nested columns"
- contains "all access paths: [map_arr_struct_col.*.*.verified]"
- notContains "map_arr_struct_col.*.OFFSET"
- notContains "predicate access paths:"
+ contains "all access paths: [map_arr_struct_col.KEYS,
map_arr_struct_col.VALUES.*.verified]"
+ contains "predicate access paths: [map_arr_struct_col.KEYS]"
+ notContains "map_arr_struct_col.VALUES.OFFSET"
notContains "type=bigint"
}
@@ -313,13 +311,15 @@ suite("string_length_column_pruning") {
order by 1
"""
- // value array item also accessed directly → full VALUES item path covers
value NULL.
+ // [map_arr_struct_col, VALUES, *, verified] strips [map_arr_struct_col,
VALUES, NULL].
+ // KEYS (data path) remains in predicateAccessPaths.
explain {
sql "select map_arr_struct_col['a'][1].verified from slcp_str_tbl
where map_arr_struct_col['a'] is null"
contains "nested columns"
- contains "map_arr_struct_col.*.*.verified"
- notContains "map_arr_struct_col.*.NULL"
- notContains "predicate access paths:"
+ contains "map_arr_struct_col.KEYS"
+ contains "map_arr_struct_col.VALUES.*.verified"
+ contains "predicate access paths: [map_arr_struct_col.KEYS]"
+ notContains "map_arr_struct_col.VALUES.NULL"
}
// ─── Non-optimizable cases
──────────────────────────────────────────────────
@@ -414,8 +414,7 @@ suite("string_length_column_pruning") {
notContains "bigint"
}
- // length(map_col['a']): keys read fully for element lookup, values
accessed offset-only.
- // Expect access paths: map_col.KEYS (full) + map_col.VALUES.OFFSET
+ // Expected paths: [map_col, KEYS] + [map_col, VALUES, OFFSET]
explain {
sql "select length(map_col['a']) from slcp_str_tbl"
contains "nested columns"
@@ -538,9 +537,7 @@ suite("string_length_column_pruning") {
// ─── Map element_at + map_values mixed access
─────────────────────────────────
- // length(map_col['a']) needs keys for the element_at lookup and value
offsets for length().
- // map_values(map_col)[1] needs full value data. The mixed query must
therefore keep a KEYS
- // path for element_at lookup while dropping the redundant value-side
OFFSET path.
+ // [map_col, VALUES] strips [map_col, VALUES, OFFSET]. KEYS is kept for
element_at lookup.
order_qt_map_element_with_map_values """
select length(map_col['a']), map_values(map_col)[1] from slcp_str_tbl
"""
@@ -554,9 +551,7 @@ suite("string_length_column_pruning") {
notContains "bigint"
}
- // Reverse direction: length(map_values(map_col)[1]) produces [map_col,
VALUES, OFFSET]
- // while map_col['a'] produces [map_col, *]. The * path reads full values,
so OFFSET
- // must be suppressed here as well.
+ // [map_col, VALUES] strips [map_col, VALUES, OFFSET].
explain {
sql "select length(map_values(map_col)[1]), map_col['a'] from
slcp_str_tbl"
notContains "OFFSET"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]