This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch fix-null-offset-array
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6d70a7f98195d711f7fa5fe88881e7bb8e097fd5
Author: englefly <[email protected]>
AuthorDate: Tue Jun 9 11:57:42 2026 +0800

    [fix](nereids) strip NULL access path when OFFSET path exists for the same 
field in NestedColumnPruning
    
    stripNullSuffixPaths() already contains the logic to remove [col.NULL] when
    [col.OFFSET] covers the same prefix (lines 834-839), but it was only called
    in the string-like and general branches, not in the array/map early-continue
    branch. This caused queries like `SELECT cardinality(arr), arr IS NULL` to
    emit redundant [arr.NULL] alongside [arr.OFFSET] in the access paths.
    
    Fix: move stripNullSuffixPaths() call to line 278 (before all type-specific
    early-continue branches) so it applies uniformly to all complex data types.
    
    Add regression tests covering array root, map root, struct string subfield,
    struct array subfield, and struct map subfield.
---
 .../nereids/rules/rewrite/NestedColumnPruning.java |  6 +-
 .../string_length_column_pruning.groovy            | 79 +++++++++++++++++++++-
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
index 50416a29408..096b38cc0bf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/NestedColumnPruning.java
@@ -275,7 +275,7 @@ public class NestedColumnPruning implements CustomRewriter {
             Slot slot = kv.getKey();
             DataTypeAccessTree accessTree = kv.getValue();
             DataType prunedDataType = 
accessTree.pruneDataType().orElse(slot.getDataType());
-
+            stripNullSuffixPaths(slot, allAccessPaths);
             if (slot.getDataType().isStringLikeType()) {
                 if (accessTree.hasStringOffsetOnlyAccess()) {
                     if (skipDataSkippingOnlyAccessPath) {
@@ -284,7 +284,6 @@ public class NestedColumnPruning implements CustomRewriter {
                     // Offset-only access (e.g. length(str_col)): type stays 
varchar,
                     // but we must still send the access path to BE so it 
skips the char data.
                     stripExactCoveredDataSkippingSuffixPaths(slot, 
allAccessPaths, allAccessPaths);
-                    stripNullSuffixPaths(slot, allAccessPaths);
                     List<ColumnAccessPath> allPaths = 
buildColumnAccessPaths(slot, allAccessPaths);
                     result.put(slot.getExprId().asInt(),
                             new AccessPathInfo(slot.getDataType(), allPaths, 
new ArrayList<>()));
@@ -359,9 +358,6 @@ public class NestedColumnPruning implements CustomRewriter {
             // of gating this logic on the root slot type.
             stripCoveredOffsetSuffixPaths(slot, allAccessPaths, 
allAccessPaths);
 
-            // Strip NULL-suffix paths when a non-NULL path also exists for 
the same slot.
-            // E.g. `SELECT col FROM t WHERE col IS NULL` — full data is 
needed, NULL path is redundant.
-            stripNullSuffixPaths(slot, allAccessPaths);
             List<ColumnAccessPath> allPaths = buildColumnAccessPaths(slot, 
allAccessPaths);
             if (shouldSkipAccessInfo(slot, prunedDataType, allPaths, 
predicateAccessPaths)) {
                 continue;
diff --git 
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
 
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
index 48c0cb37a49..8ee676d0315 100644
--- 
a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy
@@ -12,7 +12,7 @@
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
+// specific language governing permissions and limitation
 // under the License.
 
 // Regression tests for the string-length OFFSET-only optimization.
@@ -662,4 +662,81 @@ suite("string_length_column_pruning") {
         contains "OFFSET"
     }
     order_qt_length_varchar "select length(v) from slcp_varchar_tbl"
+
+    // ─── OFFSET covers NULL across all complex data types 
──────────────────────
+    //
+    // When both OFFSET and NULL access paths exist for the same 
field/subfield,
+    // the NULL path is redundant because the OFFSET data already provides
+    // nullness information for variable-length columns. stripNullSuffixPaths()
+    // removes [col.NULL] when [col.OFFSET] exists for the same prefix.
+    //
+    // This applies uniformly to all complex data types: array, map, struct
+    // subfields of string, array, and map.
+
+    // Array root: cardinality(arr_col) -> [arr_col.OFFSET]
+    //             arr_col IS NULL      -> [arr_col.NULL]
+    // OFFSET covers NULL → [arr_col.NULL] must not appear.
+    explain {
+        sql "select cardinality(arr_col), arr_col is null from slcp_str_tbl"
+        contains "nested columns"
+        contains "arr_col.OFFSET"
+        notContains "arr_col.NULL"
+    }
+    sql "select cardinality(arr_col), arr_col is null from slcp_str_tbl"
+
+    // Map root: cardinality(map_col) -> [map_col.OFFSET]
+    //           map_col IS NULL      -> [map_col.NULL]
+    // OFFSET covers NULL → [map_col.NULL] must not appear.
+    explain {
+        sql "select cardinality(map_col), map_col is null from slcp_str_tbl"
+        contains "nested columns"
+        contains "map_col.OFFSET"
+        notContains "map_col.NULL"
+    }
+    sql "select cardinality(map_col), map_col is null from slcp_str_tbl"
+
+    // Struct string subfield: length(element_at(struct_col, 'f3')) -> 
[struct_col.f3.OFFSET]
+    //                         element_at(struct_col, 'f3') IS NULL  -> 
[struct_col.f3.NULL]
+    // OFFSET covers NULL → [struct_col.f3.NULL] must not appear.
+    explain {
+        sql """select length(element_at(struct_col, 'f3')),
+                     element_at(struct_col, 'f3') is null
+              from slcp_str_tbl"""
+        contains "nested columns"
+        contains "OFFSET"
+        notContains "struct_col.f3.NULL"
+    }
+    sql """select length(element_at(struct_col, 'f3')),
+                 element_at(struct_col, 'f3') is null
+          from slcp_str_tbl"""
+
+    // Struct array subfield: cardinality(element_at(s, 'arr')) -> 
[s.arr.OFFSET]
+    //                        element_at(s, 'arr') IS NULL      -> [s.arr.NULL]
+    // OFFSET covers NULL → [s.arr.NULL] must not appear.
+    explain {
+        sql """select cardinality(element_at(s, 'arr')),
+                     element_at(s, 'arr') is null
+              from slcp_struct_root_tbl"""
+        contains "nested columns"
+        contains "OFFSET"
+        notContains "s.arr.NULL"
+    }
+    sql """select cardinality(element_at(s, 'arr')),
+                 element_at(s, 'arr') is null
+          from slcp_struct_root_tbl"""
+
+    // Struct map subfield: cardinality(element_at(s, 'm')) -> [s.m.OFFSET]
+    //                      element_at(s, 'm') IS NULL      -> [s.m.NULL]
+    // OFFSET covers NULL → [s.m.NULL] must not appear.
+    explain {
+        sql """select cardinality(element_at(s, 'm')),
+                     element_at(s, 'm') is null
+              from slcp_struct_root_tbl"""
+        contains "nested columns"
+        contains "OFFSET"
+        notContains "s.m.NULL"
+    }
+    sql """select cardinality(element_at(s, 'm')),
+                 element_at(s, 'm') is null
+          from slcp_struct_root_tbl"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to