github-actions[bot] commented on code in PR #63736:
URL: https://github.com/apache/doris/pull/63736#discussion_r3317497710


##########
regression-test/suites/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.groovy:
##########
@@ -0,0 +1,342 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("topn_lazy_nested_column_pruning") {
+    sql """ set topn_lazy_materialization_threshold=1024; """
+    sql """ DROP TABLE IF EXISTS ncp_tbl """
+    sql """
+        CREATE TABLE ncp_tbl (
+            id          INT,
+            str_col     STRING NULL,
+            struct_col  STRUCT<city: STRING, zip: INT> NULL,
+            arr_col     ARRAY<INT> NULL,
+            map_col     MAP<STRING, INT> NULL,
+            int_col     INT NULL
+        ) ENGINE = OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1")
+    """
+
+    sql """
+        INSERT INTO ncp_tbl VALUES
+            (1, 'hello', named_struct('city', null, 'zip', 10001), [1, 2, 3], 
{'a': 1, 'b': 2 }, 1)
+    """
+
+        sql """
+    drop table if exists vt;
+    CREATE TABLE IF NOT EXISTS vt (
+        id BIGINT NOT NULL,
+        s varchar(100) null,
+        payload VARIANT<
+            'name' : STRING,
+            'age' : INT
+        > NULL
+    )
+    ENGINE = OLAP
+    DUPLICATE KEY(`id`)
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+        "replication_num" = "1",
+        "storage_format" = "V3"
+    );
+
+
+    INSERT INTO vt VALUES
+        (1, 'aaa', '{"name": "张三", "age": 25}'),
+        (2, 'bbb', '{"name": "李四", "age": 30}'),
+        (3, 'ccc', '{"name": "王五", "age": 28, "city": "北京"}');
+    """
+
+    // =============================================
+    // Test 1: STRUCT type - lazy mat + nested column pruning
+    // =============================================
+    explain {
+        sql """
+            select id, substring(struct_element(struct_col, 'city'), 1) as city
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        // struct_col lazy, scan only outputs id + rowId
+        contains("final projections: id[#0], 
__DORIS_GLOBAL_ROWID_COL__ncp_tbl[#6]")
+        // nested column pruning: struct_col pruned to city only
+        contains("nested columns:")
+        contains("pruned type: struct<city:text>")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+
+    // =============================================
+    // Test 2: STRUCT with select * - struct_col explicit in output
+    // =============================================
+    explain {
+        sql """
+            select *, substring(struct_element(struct_col, 'city'), 1) as city
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+
+    // =============================================
+    // Test 3: VARIANT type - lazy mat + sub path pruning
+    // =============================================
+    explain {
+        sql """
+            select id, s, substring(element_at(payload, 'name'), 1) as name
+            from vt
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        // payload lazy, scan only outputs id + rowId
+        contains("final projections: id[#0], __DORIS_GLOBAL_ROWID_COL__vt[#4]")
+        // sub path pruning for variant
+        contains("nested columns:")
+        contains("sub path: [name]")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__vt]")
+    }
+
+    // =============================================
+    // Test 4: STRUCT - verify actual query results
+    // =============================================
+    qt_struct_result """
+        select id, substring(struct_element(struct_col, 'city'), 1) as city
+        from ncp_tbl
+        order by id
+        limit 3
+    """
+
+    // =============================================
+    // Test 5: VARIANT - verify actual query results
+    // =============================================
+    qt_variant_result """
+        select id, s, substring(element_at(payload, 'name'), 1) as name
+        from vt
+        order by id
+        limit 3
+    """
+
+    // =============================================
+    // Test 6: MAP subscript - lazy mat + nested column pruning
+    // =============================================
+    explain {
+        sql """
+            select id, element_at(map_col, 'a') as val
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        // map_col lazy, scan only outputs id + rowId
+        contains("final projections: id[#0], 
__DORIS_GLOBAL_ROWID_COL__ncp_tbl[#6]")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+
+    // =============================================
+    // Test 7: MAP subscript - verify actual query results
+    // =============================================
+    qt_map_result """
+        select id, element_at(map_col, 'a') as val
+        from ncp_tbl
+        order by id
+        limit 3
+    """
+
+    // =============================================
+    // Test 8: ARRAY subscript - lazy mat + nested column pruning
+    // =============================================
+    explain {
+        sql """
+            select id, element_at(arr_col, 1) as val
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        // arr_col lazy, scan only outputs id + rowId
+        contains("final projections: id[#0], 
__DORIS_GLOBAL_ROWID_COL__ncp_tbl[#6]")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+
+    // =============================================
+    // Test 9: ARRAY subscript - verify actual query results
+    // =============================================
+    qt_array_result """
+        select id, element_at(arr_col, 1) as val
+        from ncp_tbl
+        order by id
+        limit 3
+    """
+
+    // =============================================
+    // Test 10: Multi-level VARIANT nested - insert nested data
+    // =============================================
+    sql """
+        INSERT INTO vt VALUES
+            (4, 'ddd', '{"address": {"city": "上海", "zip": "200000"}}'),
+            (5, 'eee', '{"address": {"city": "北京", "zip": "100000"}}')
+    """
+
+    // =============================================
+    // Test 11: Multi-level VARIANT nested - explain
+    //   Access payload['address']['city'] via two levels:
+    //   inner payload['address'] → variant with subColPath [address]
+    //   outer element_at(..., 'city') → final value
+    // =============================================
+    explain {
+        sql """
+            select id, element_at(payload['address'], 'city') as city
+            from vt
+            where id >= 4
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        // payload lazy, scan only outputs id + rowId
+        contains("final projections: id[#0], __DORIS_GLOBAL_ROWID_COL__vt[#4]")
+        // sub path pruning for variant: only read address sub-path during 
materialization
+        contains("nested columns:")
+        contains("sub path: [address.city]")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__vt]")
+    }
+
+    // =============================================
+    // Test 12: Multi-level VARIANT nested - verify query results
+    // =============================================
+    qt_variant_nested_result """
+        select id, element_at(payload['address'], 'city') as city
+        from vt
+        where id >= 4
+        order by id
+        limit 3
+    """
+
+    // =============================================
+    // Test 13: using_index=true with MAP subscript
+    //   Verify that map/array lazy mat still works when
+    //   topn_lazy_materialization_using_index is enabled.
+    //   Regression test for the risk that MaterializeProbeVisitor
+    //   .visitPhysicalProject skips alias→child slot tracing
+    //   when using_index=true, which could prevent base columns
+    //   from being probed as lazy candidates.
+    // =============================================
+    sql """ set topn_lazy_materialization_using_index = true; """
+    explain {
+        sql """
+            select id, element_at(map_col, 'a') as val
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+    qt_map_using_index_result """
+        select id, element_at(map_col, 'a') as val
+        from ncp_tbl
+        order by id
+        limit 3
+    """
+    sql """ set topn_lazy_materialization_using_index = false; """
+
+    // =============================================
+    // Test 14: using_index=true with ARRAY subscript
+    // =============================================
+    sql """ set topn_lazy_materialization_using_index = true; """
+    explain {
+        sql """
+            select id, element_at(arr_col, 1) as val
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+    qt_array_using_index_result """
+        select id, element_at(arr_col, 1) as val
+        from ncp_tbl
+        order by id
+        limit 3
+    """
+    sql """ set topn_lazy_materialization_using_index = false; """
+
+    // =============================================
+    // Test 15: STRUCT nested expr BEFORE id — verify column order preserved
+    //   SELECT city_expr, id should produce [city, id] not [id, city]
+    // =============================================
+    explain {
+        sql """
+            select substring(struct_element(struct_col, 'city'), 1) as city, id
+            from ncp_tbl
+            order by id
+            limit 3
+        """
+        contains("VMaterializeNode")
+        contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__ncp_tbl]")
+    }
+    qt_struct_col_order_result """

Review Comment:
   These three new `qt_` blocks (`qt_struct_col_order_result`, 
`qt_variant_col_order_result`, and `qt_map_col_order_result`) do not have 
corresponding sections in 
`regression-test/data/nereids_rules_p0/column_pruning/topn_lazy_nested_column_pruning.out`.
 The regression runner compares every `qt_` label against the `.out` file, so 
this suite will fail before it can validate the intended column-order behavior. 
Please regenerate/update the `.out` file to include `-- 
!struct_col_order_result --`, `-- !variant_col_order_result --`, and `-- 
!map_col_order_result --` with the actual ordered results.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to