This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a2eb4d6eed6 [fix](NestedColumnPruning) collect full-access paths for
array columns unreferenced in lambda body (#64436)
a2eb4d6eed6 is described below
commit a2eb4d6eed6d4131c8e044b741dd31558a3db31b
Author: minghong <[email protected]>
AuthorDate: Wed Jun 17 15:10:40 2026 +0800
[fix](NestedColumnPruning) collect full-access paths for array columns
unreferenced in lambda body (#64436)
### What problem does this PR solve?
When a lambda body is a constant expression (e.g. x -> true) that never
references the array item variable, visitArrayItemSlot is not called, so
the bound array column's full-access path is never registered in
slotToAccessPaths. This gap is exposed when IS NULL or cardinality() has
already registered a data-skipping path ([col.NULL] or [col.OFFSET]) for
the same slot — NestedColumnPruning then incorrectly prunes the complex
column to null-only or offset-only.
Fix: in collectArrayPathInLambda, after visiting the body, check whether
each bound array's underlying Slot appears in the body's input slots
(via getInputSlots()). If not, manually visit the bound array with an
ACCESS_ALL prefix to register the full-access path.
Regression test lambda_null_pruning covers:
- Constant lambda body + IS NULL (the original bug)
- Multi-variable lambda where body references some but not all vars
- Array functions: array_count, array_map, array_filter
- Data-skipping triggers: IS NULL, IS NOT NULL, cardinality
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude Opus 4.8 <[email protected]>
---
.../rewrite/AccessPathExpressionCollector.java | 40 ++++++
.../column_pruning/lambda_null_pruning.out | 31 +++++
.../column_pruning/lambda_null_pruning.groovy | 150 +++++++++++++++++++++
3 files changed, 221 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
index fac90248986..e7dbc5ca05d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
@@ -25,6 +25,7 @@ import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.ArrayItemReference;
import
org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot;
import org.apache.doris.nereids.trees.expressions.Cast;
+import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.Not;
@@ -75,6 +76,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
+import java.util.Set;
import java.util.Stack;
/**
@@ -618,6 +620,44 @@ public class AccessPathExpressionCollector extends
DefaultExpressionVisitor<Void
} finally {
nameToLambdaArguments.pop();
}
+
+ // After visiting the lambda body, for any bound array whose lambda
variable
+ // was NOT referenced in the body (e.g. x -> true where x never
appears),
+ // visitArrayItemSlot was never called and the array column's access
path is
+ // missing. This gap is exposed when an is-null or offset-only path
has been
+ // registered for the same slot — NestedColumnPruning then incorrectly
prunes
+ // the complex column to null-only / offset-only instead of reading
full data.
+ //
+ // Detect usage by scanning the lambda body for ArrayItemSlots
matching the
+ // argument name, which is more reliable than getInputSlots() that
deliberately
+ // excludes ArrayItemSlot and may falsely match outer slots.
+ //
+ // Must use a fresh context: when the body DOES reference some
variables
+ // (e.g. (x,y) -> x > 0), visitArrayItemSlot mutates
context.accessPathBuilder
+ // in-place (addPrefix without cleanup). A fresh context isolates the
fallback
+ // path for unreferenced variables from pollution by referenced ones.
+ for (Expression argument : arguments) {
+ if (argument instanceof ArrayItemReference) {
+ ExprId argExprId = ((ArrayItemReference) argument).getExprId();
+ Set<ArrayItemSlot> arrayItemSlots = arguments.get(0)
+ .<ArrayItemSlot>collect(e -> e instanceof
ArrayItemSlot);
+ boolean isReferenced = false;
+ for (ArrayItemSlot slot : arrayItemSlots) {
+ if (slot.getExprId().equals(argExprId)) {
+ isReferenced = true;
+ break;
+ }
+ }
+ if (!isReferenced) {
+ Expression boundArray = argument.child(0);
+ CollectorContext fullAccessCtx = new CollectorContext(
+ context.statementContext, context.bottomFilter);
+
fullAccessCtx.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
+ continueCollectAccessPath(boundArray, fullAccessCtx);
+ }
+ }
+ }
+
return null;
}
diff --git
a/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out
b/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out
new file mode 100644
index 00000000000..1fadb109ebf
--- /dev/null
+++
b/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !case1 --
+1 false 3
+2 true 0
+3 false 0
+4 false 1
+
+-- !case2 --
+1 false false [1, 2, 3]
+2 true true \N
+3 false false []
+4 false false [null]
+
+-- !case3 --
+1 false false [11, 22, 33]
+2 true true \N
+3 false false []
+4 false false [null]
+
+-- !case4 --
+1 false [1, 2, 3]
+2 true \N
+3 false []
+4 false [null]
+
+-- !case6 --
+1 3 3
+2 \N 0
+3 0 0
+4 1 1
+
diff --git
a/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
new file mode 100644
index 00000000000..2d55528f184
--- /dev/null
+++
b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("lambda_null_pruning") {
+ sql """ DROP TABLE IF EXISTS lambda_null_pruning_tbl """
+ sql """
+ CREATE TABLE lambda_null_pruning_tbl (
+ id INT,
+ a ARRAY<INT> NULL,
+ b ARRAY<INT> NULL
+ ) ENGINE = OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+ """
+ sql """
+ INSERT INTO lambda_null_pruning_tbl VALUES
+ (1, [1, 2, 3], [10, 20, 30]),
+ (2, NULL, NULL),
+ (3, [], []),
+ (4, [null], [1])
+ """
+
+ // ================================================================
+ // Case 1: single-variable constant lambda body + IS NULL
+ // body = Literal(true), array item variable unreferenced
+ // collectArrayPathInLambda won't register full-access path.
+ // If IS NULL already registered [a.NULL], pruning goes wrong.
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, a IS NULL, array_count(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.NULL"
+ }
+
+ order_qt_case1 """
+ SELECT id, a IS NULL, array_count(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 2: two-variable lambda, body references x but not y
+ // array_map((x, y) -> x, a, b):
+ // x -> body references -> visitArrayItemSlot fires -> [a, *] OK
+ // y -> body does NOT reference -> visitArrayItemSlot missing
+ // b IS NULL -> [b, NULL] registered -> bug triggered
+ // After fix: fallback adds [b, *] for unreferenced y
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, a IS NULL, b IS NULL,
+ array_map((x, y) -> x, a, b)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.NULL"
+ notContains "b.NULL"
+ }
+
+ order_qt_case2 """
+ SELECT id, a IS NULL, b IS NULL,
+ array_map((x, y) -> x, a, b)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 3: two-variable lambda, body references both
+ // array_map((x, y) -> x + y, a, b):
+ // both x and y referenced -> visitArrayItemSlot fires for both
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, a IS NULL, b IS NULL,
+ array_map((x, y) -> x + y, a, b)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.NULL"
+ notContains "b.NULL"
+ }
+
+ order_qt_case3 """
+ SELECT id, a IS NULL, b IS NULL,
+ array_map((x, y) -> x + y, a, b)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 4: array_filter constant lambda + IS NULL
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, a IS NULL, array_filter(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.NULL"
+ }
+
+ order_qt_case4 """
+ SELECT id, a IS NULL, array_filter(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+
+ // ================================================================
+ // Case 5: IS NOT NULL also registers [a.NULL]
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, a IS NOT NULL, array_count(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.NULL"
+ }
+
+ // ================================================================
+ // Case 6: cardinality (OFFSET path) + array_count constant lambda
+ // ================================================================
+ explain {
+ sql """
+ SELECT id, cardinality(a), array_count(x -> true, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+ contains "nested columns"
+ notContains "a.OFFSET"
+ }
+
+ order_qt_case6 """
+ SELECT id, cardinality(a), array_count(x -> TRUE, a)
+ FROM lambda_null_pruning_tbl ORDER BY id
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]