This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a2eb4d6eed6 [fix](NestedColumnPruning) collect full-access paths for 
array columns unreferenced in lambda body (#64436)
a2eb4d6eed6 is described below

commit a2eb4d6eed6d4131c8e044b741dd31558a3db31b
Author: minghong <[email protected]>
AuthorDate: Wed Jun 17 15:10:40 2026 +0800

    [fix](NestedColumnPruning) collect full-access paths for array columns 
unreferenced in lambda body (#64436)
    
    ### What problem does this PR solve?
    
    When a lambda body is a constant expression (e.g. x -> true) that never
    references the array item variable, visitArrayItemSlot is not called, so
    the bound array column's full-access path is never registered in
    slotToAccessPaths. This gap is exposed when IS NULL or cardinality() has
    already registered a data-skipping path ([col.NULL] or [col.OFFSET]) for
    the same slot — NestedColumnPruning then incorrectly prunes the complex
    column to null-only or offset-only.
    
    Fix: in collectArrayPathInLambda, after visiting the body, check whether
    each bound array's underlying Slot appears in the body's input slots
    (via getInputSlots()). If not, manually visit the bound array with an
    ACCESS_ALL prefix to register the full-access path.
    
    Regression test lambda_null_pruning covers:
    - Constant lambda body + IS NULL (the original bug)
    - Multi-variable lambda where body references some but not all vars
    - Array functions: array_count, array_map, array_filter
    - Data-skipping triggers: IS NULL, IS NOT NULL, cardinality
    
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
    
    ---------
    
    Co-authored-by: Claude Opus 4.8 <[email protected]>
---
 .../rewrite/AccessPathExpressionCollector.java     |  40 ++++++
 .../column_pruning/lambda_null_pruning.out         |  31 +++++
 .../column_pruning/lambda_null_pruning.groovy      | 150 +++++++++++++++++++++
 3 files changed, 221 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
index fac90248986..e7dbc5ca05d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java
@@ -25,6 +25,7 @@ import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.ArrayItemReference;
 import 
org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot;
 import org.apache.doris.nereids.trees.expressions.Cast;
+import org.apache.doris.nereids.trees.expressions.ExprId;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.Not;
@@ -75,6 +76,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Objects;
+import java.util.Set;
 import java.util.Stack;
 
 /**
@@ -618,6 +620,44 @@ public class AccessPathExpressionCollector extends 
DefaultExpressionVisitor<Void
         } finally {
             nameToLambdaArguments.pop();
         }
+
+        // After visiting the lambda body, for any bound array whose lambda 
variable
+        // was NOT referenced in the body (e.g. x -> true where x never 
appears),
+        // visitArrayItemSlot was never called and the array column's access 
path is
+        // missing. This gap is exposed when an is-null or offset-only path 
has been
+        // registered for the same slot — NestedColumnPruning then incorrectly 
prunes
+        // the complex column to null-only / offset-only instead of reading 
full data.
+        //
+        // Detect usage by scanning the lambda body for ArrayItemSlots 
matching the
+        // argument name, which is more reliable than getInputSlots() that 
deliberately
+        // excludes ArrayItemSlot and may falsely match outer slots.
+        //
+        // Must use a fresh context: when the body DOES reference some 
variables
+        // (e.g. (x,y) -> x > 0), visitArrayItemSlot mutates 
context.accessPathBuilder
+        // in-place (addPrefix without cleanup). A fresh context isolates the 
fallback
+        // path for unreferenced variables from pollution by referenced ones.
+        for (Expression argument : arguments) {
+            if (argument instanceof ArrayItemReference) {
+                ExprId argExprId = ((ArrayItemReference) argument).getExprId();
+                Set<ArrayItemSlot> arrayItemSlots = arguments.get(0)
+                        .<ArrayItemSlot>collect(e -> e instanceof 
ArrayItemSlot);
+                boolean isReferenced = false;
+                for (ArrayItemSlot slot : arrayItemSlots) {
+                    if (slot.getExprId().equals(argExprId)) {
+                        isReferenced = true;
+                        break;
+                    }
+                }
+                if (!isReferenced) {
+                    Expression boundArray = argument.child(0);
+                    CollectorContext fullAccessCtx = new CollectorContext(
+                            context.statementContext, context.bottomFilter);
+                    
fullAccessCtx.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
+                    continueCollectAccessPath(boundArray, fullAccessCtx);
+                }
+            }
+        }
+
         return null;
     }
 
diff --git 
a/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out 
b/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out
new file mode 100644
index 00000000000..1fadb109ebf
--- /dev/null
+++ 
b/regression-test/data/nereids_rules_p0/column_pruning/lambda_null_pruning.out
@@ -0,0 +1,31 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !case1 --
+1      false   3
+2      true    0
+3      false   0
+4      false   1
+
+-- !case2 --
+1      false   false   [1, 2, 3]
+2      true    true    \N
+3      false   false   []
+4      false   false   [null]
+
+-- !case3 --
+1      false   false   [11, 22, 33]
+2      true    true    \N
+3      false   false   []
+4      false   false   [null]
+
+-- !case4 --
+1      false   [1, 2, 3]
+2      true    \N
+3      false   []
+4      false   [null]
+
+-- !case6 --
+1      3       3
+2      \N      0
+3      0       0
+4      1       1
+
diff --git 
a/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
 
b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
new file mode 100644
index 00000000000..2d55528f184
--- /dev/null
+++ 
b/regression-test/suites/nereids_rules_p0/column_pruning/lambda_null_pruning.groovy
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("lambda_null_pruning") {
+    sql """ DROP TABLE IF EXISTS lambda_null_pruning_tbl """
+    sql """
+        CREATE TABLE lambda_null_pruning_tbl (
+            id  INT,
+            a   ARRAY<INT> NULL,
+            b   ARRAY<INT> NULL
+        ) ENGINE = OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+    sql """
+        INSERT INTO lambda_null_pruning_tbl VALUES
+            (1, [1, 2, 3],    [10, 20, 30]),
+            (2, NULL,          NULL),
+            (3, [],            []),
+            (4, [null],        [1])
+    """
+
+    // ================================================================
+    // Case 1: single-variable constant lambda body + IS NULL
+    // body = Literal(true), array item variable unreferenced
+    // collectArrayPathInLambda won't register full-access path.
+    // If IS NULL already registered [a.NULL], pruning goes wrong.
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, a IS NULL, array_count(x -> true, a)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.NULL"
+    }
+
+    order_qt_case1 """
+        SELECT id, a IS NULL, array_count(x -> true, a)
+        FROM lambda_null_pruning_tbl ORDER BY id
+    """
+
+    // ================================================================
+    // Case 2: two-variable lambda, body references x but not y
+    // array_map((x, y) -> x, a, b):
+    //   x -> body references -> visitArrayItemSlot fires -> [a, *] OK
+    //   y -> body does NOT reference -> visitArrayItemSlot missing
+    //   b IS NULL -> [b, NULL] registered -> bug triggered
+    // After fix: fallback adds [b, *] for unreferenced y
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, a IS NULL, b IS NULL,
+                   array_map((x, y) -> x, a, b)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.NULL"
+        notContains "b.NULL"
+    }
+
+    order_qt_case2 """
+        SELECT id, a IS NULL, b IS NULL,
+               array_map((x, y) -> x, a, b)
+        FROM lambda_null_pruning_tbl ORDER BY id
+    """
+
+    // ================================================================
+    // Case 3: two-variable lambda, body references both
+    // array_map((x, y) -> x + y, a, b):
+    //   both x and y referenced -> visitArrayItemSlot fires for both
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, a IS NULL, b IS NULL,
+                   array_map((x, y) -> x + y, a, b)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.NULL"
+        notContains "b.NULL"
+    }
+
+    order_qt_case3 """
+        SELECT id, a IS NULL, b IS NULL,
+               array_map((x, y) -> x + y, a, b)
+        FROM lambda_null_pruning_tbl ORDER BY id
+    """
+
+    // ================================================================
+    // Case 4: array_filter constant lambda + IS NULL
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, a IS NULL, array_filter(x -> true, a)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.NULL"
+    }
+
+    order_qt_case4 """
+        SELECT id, a IS NULL, array_filter(x -> true, a)
+        FROM lambda_null_pruning_tbl ORDER BY id
+    """
+
+    // ================================================================
+    // Case 5: IS NOT NULL also registers [a.NULL]
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, a IS NOT NULL, array_count(x -> true, a)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.NULL"
+    }
+
+    // ================================================================
+    // Case 6: cardinality (OFFSET path) + array_count constant lambda
+    // ================================================================
+    explain {
+        sql """
+            SELECT id, cardinality(a), array_count(x -> true, a)
+            FROM lambda_null_pruning_tbl ORDER BY id
+        """
+        contains "nested columns"
+        notContains "a.OFFSET"
+    }
+
+    order_qt_case6 """
+        SELECT id, cardinality(a), array_count(x -> TRUE, a)
+        FROM lambda_null_pruning_tbl ORDER BY id
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to