(doris) branch branch-4.0 updated: branch-4.0: [fix](parquet) Fix struct column reading error when all queried fields are missing after schema evolution #59586 (#59839)

yiguolei Tue, 13 Jan 2026 17:59:16 -0800

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new d53be9fef84 branch-4.0: [fix](parquet) Fix struct column reading error 
when all queried fields are missing after schema evolution #59586 (#59839)
d53be9fef84 is described below

commit d53be9fef84db469e255a7890b2c6c3cc17f7a21
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jan 14 09:58:51 2026 +0800

    branch-4.0: [fix](parquet) Fix struct column reading error when all queried 
fields are missing after schema evolution #59586 (#59839)
    
    Cherry-picked from #59586
    
    Co-authored-by: Socrates <[email protected]>
---
 .../exec/format/parquet/vparquet_column_reader.cpp |  12 +-
 .../create_preinstalled_scripts/iceberg/run24.sql  | 151 ++++++++++++++++
 .../test_iceberg_struct_schema_evolution.out       | 161 +++++++++++++++++
 .../test_iceberg_struct_schema_evolution.groovy    | 194 +++++++++++++++++++++
 4 files changed, 514 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index e24ec85b2e1..00bdc51844e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -920,10 +920,14 @@ Status StructColumnReader::read_column_data(
             size_t field_rows = 0;
             bool field_eof = false;
 
-            // Use root_node to get the correct child node for the reference 
column
-            // reference_file_column_name is the file column name, use 
get_children_node_by_file_column_name
-            auto ref_child_node =
-                    
root_node->get_children_node_by_file_column_name(reference_file_column_name);
+            // Use ConstNode for the reference column instead of looking up 
from root_node.
+            // The reference column is only used to get RL/DL information for 
determining the number
+            // of elements in the struct. It may be a column that has been 
dropped from the table
+            // schema (e.g., 'removed' field), but still exists in older 
parquet files.
+            // Since we don't need schema mapping for this column (we just 
need its RL/DL levels),
+            // using ConstNode is safe and avoids the issue where the 
reference column doesn't exist
+            // in root_node (because it was dropped from table schema).
+            auto ref_child_node = 
TableSchemaChangeHelper::ConstNode::get_instance();
             not_missing_orig_column_size = temp_column->size();
 
             RETURN_IF_ERROR((*reference_reader)
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
new file mode 100644
index 00000000000..b5b19b1f15c
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
@@ -0,0 +1,151 @@
+use demo.test_db;
+
+DROP TABLE IF EXISTS test_struct_evolution;
+
+-- Test case for struct schema evolution bug
+-- Bug scenario: When querying a struct field after schema evolution, if all 
queried fields are missing
+-- in old Parquet files, the code tries to find a reference column from file 
schema. However, if the
+-- reference column (e.g., 'removed') was dropped from table schema, accessing 
it via root_node will fail.
+--
+-- Steps to reproduce:
+-- 1. Create table with struct containing: removed, rename, keep, drop_and_add
+-- 2. Insert data (creates Parquet file with these fields)
+-- 3. DROP a_struct.removed - removes field from table schema
+-- 4. DROP a_struct.drop_and_add then ADD a_struct.drop_and_add - gets new 
field ID
+-- 5. ADD a_struct.added - adds new field
+-- 6. Query struct_element(a_struct, 'drop_and_add') or 
struct_element(a_struct, 'added')
+--    -> This will fail because all queried fields are missing in old file, 
and the reference
+--       column 'removed' doesn't exist in root_node (it was dropped from 
table schema)
+
+-- Step 1: Create table
+CREATE TABLE test_struct_evolution (
+    id BIGINT,
+    a_struct STRUCT<removed: BIGINT, rename: BIGINT, keep: BIGINT, 
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2);
+
+-- Step 2: Insert data (creates Parquet file with original schema)
+INSERT INTO test_struct_evolution 
+SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12, 
'drop_and_add', 13);
+
+-- Step 3: Schema evolution - drop removed field
+ALTER TABLE test_struct_evolution DROP COLUMN a_struct.removed;
+
+-- Step 4: Rename field (field ID stays the same)
+ALTER TABLE test_struct_evolution RENAME COLUMN a_struct.rename TO renamed;
+
+-- Step 5: Drop and add drop_and_add (new field ID)
+ALTER TABLE test_struct_evolution DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution ADD COLUMN a_struct.drop_and_add BIGINT;
+
+-- Step 6: Add new field
+ALTER TABLE test_struct_evolution ADD COLUMN a_struct.added BIGINT;
+
+-- Step 7: Insert new data after schema evolution (creates new Parquet file)
+INSERT INTO test_struct_evolution 
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 
24);
+
+-- Now the table contains two Parquet files:
+-- - Old file: contains removed, rename, keep, drop_and_add (old field ID)
+-- - New file: contains renamed, keep, drop_and_add (new field ID), added
+--
+-- Querying struct_element(a_struct, 'drop_and_add') or 
struct_element(a_struct, 'added')
+-- on the old file will trigger the bug
+
+-- ============================================================
+-- ORC format test table (for completeness, though ORC doesn't have the same 
bug)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_orc;
+
+-- Create ORC format table with same schema evolution scenario
+CREATE TABLE test_struct_evolution_orc (
+    id BIGINT,
+    a_struct STRUCT<removed: BIGINT, rename: BIGINT, keep: BIGINT, 
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2);
+
+-- Insert initial data (creates ORC file with original schema)
+INSERT INTO test_struct_evolution_orc 
+SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12, 
'drop_and_add', 13);
+
+-- Schema evolution - same operations as Parquet table
+ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.removed;
+ALTER TABLE test_struct_evolution_orc RENAME COLUMN a_struct.rename TO renamed;
+ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.drop_and_add BIGINT;
+ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.added BIGINT;
+
+-- Insert new data after schema evolution (creates new ORC file)
+INSERT INTO test_struct_evolution_orc 
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added', 
24);
+
+-- ============================================================
+-- Case sensitivity test table (mixed case field names)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_case;
+
+-- Test case for struct schema evolution with mixed case field names
+-- This tests that case sensitivity is handled correctly when:
+-- - Field names have mixed case (e.g., REMOVED, rename, keep, drop_and_add)
+-- - Schema evolution operations are performed
+-- - Querying struct fields with different case patterns
+
+-- Step 1: Create table with mixed case field names
+CREATE TABLE test_struct_evolution_case (
+    id BIGINT,
+    a_struct STRUCT<REMOVED: BIGINT, rename: BIGINT, keep: BIGINT, 
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2);
+
+-- Step 2: Insert data (creates Parquet file with original schema)
+INSERT INTO test_struct_evolution_case 
+SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12, 
'drop_and_add', 13);
+
+-- Step 3: Schema evolution - drop REMOVED field (uppercase)
+ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.REMOVED;
+
+-- Step 4: Rename field (field ID stays the same)
+ALTER TABLE test_struct_evolution_case RENAME COLUMN a_struct.rename TO 
renamed;
+
+-- Step 5: Drop and add drop_and_add with case change (new field ID)
+-- Initial: drop_and_add (lowercase), after re-add: DROP_AND_ADD (uppercase)
+ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.DROP_AND_ADD BIGINT;
+
+-- Step 6: Add new field
+ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.added BIGINT;
+
+-- Step 7: Insert new data after schema evolution (creates new Parquet file)
+-- Note: Use DROP_AND_ADD (uppercase) in the new data
+INSERT INTO test_struct_evolution_case 
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added', 
24);
+
+-- ============================================================
+-- ORC format test table with mixed case (for completeness)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_case_orc;
+
+-- Create ORC format table with same schema evolution scenario and mixed case
+CREATE TABLE test_struct_evolution_case_orc (
+    id BIGINT,
+    a_struct STRUCT<REMOVED: BIGINT, rename: BIGINT, keep: BIGINT, 
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2);
+
+-- Insert initial data (creates ORC file with original schema)
+INSERT INTO test_struct_evolution_case_orc 
+SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12, 
'drop_and_add', 13);
+
+-- Schema evolution - same operations as Parquet table
+ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.REMOVED;
+ALTER TABLE test_struct_evolution_case_orc RENAME COLUMN a_struct.rename TO 
renamed;
+-- Drop and add with case change: drop_and_add (lowercase) -> DROP_AND_ADD 
(uppercase)
+ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.DROP_AND_ADD 
BIGINT;
+ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.added BIGINT;
+
+-- Insert new data after schema evolution (creates new ORC file)
+-- Note: Use DROP_AND_ADD (uppercase) in the new data
+INSERT INTO test_struct_evolution_case_orc 
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added', 
24);
+
diff --git 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
new file mode 100644
index 00000000000..a364316df42
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
@@ -0,0 +1,161 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !desc --
+id     bigint  Yes     true    \N      
+a_struct       
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint>     Yes     
true    \N      
+
+-- !select_all --
+1      {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2      {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !struct_keep --
+12
+22
+
+-- !struct_renamed --
+11
+21
+
+-- !struct_drop_and_add --
+\N
+23
+
+-- !struct_added --
+\N
+24
+
+-- !struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !struct_predicate_1 --
+1
+
+-- !struct_predicate_2 --
+1
+
+-- !struct_predicate_3 --
+1
+
+-- !struct_predicate_4 --
+2
+
+-- !struct_multi --
+11     12      \N      \N
+21     22      23      24
+
+-- !struct_distinct --
+11     \N      12
+21     24      22
+
+-- !orc_desc --
+id     bigint  Yes     true    \N      
+a_struct       
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint>     Yes     
true    \N      
+
+-- !orc_select_all --
+1      {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2      {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !orc_struct_keep --
+12
+22
+
+-- !orc_struct_renamed --
+11
+21
+
+-- !orc_struct_drop_and_add --
+\N
+23
+
+-- !orc_struct_added --
+\N
+24
+
+-- !orc_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !orc_struct_multi --
+11     12      \N      \N
+21     22      23      24
+
+-- !case_desc --
+id     bigint  Yes     true    \N      
+a_struct       
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint>     Yes     
true    \N      
+
+-- !case_select_all --
+1      {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2      {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_struct_keep --
+12
+22
+
+-- !case_struct_renamed --
+11
+21
+
+-- !case_struct_drop_and_add --
+\N
+23
+
+-- !case_struct_added --
+\N
+24
+
+-- !case_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_struct_predicate_1 --
+1
+
+-- !case_struct_predicate_2 --
+1
+
+-- !case_struct_predicate_3 --
+1
+
+-- !case_struct_predicate_4 --
+2
+
+-- !case_struct_multi --
+11     12      \N      \N
+21     22      23      24
+
+-- !case_struct_distinct --
+11     \N      12
+21     24      22
+
+-- !case_orc_desc --
+id     bigint  Yes     true    \N      
+a_struct       
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint>     Yes     
true    \N      
+
+-- !case_orc_select_all --
+1      {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2      {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_orc_struct_keep --
+12
+22
+
+-- !case_orc_struct_renamed --
+11
+21
+
+-- !case_orc_struct_drop_and_add --
+\N
+23
+
+-- !case_orc_struct_added --
+\N
+24
+
+-- !case_orc_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_orc_struct_multi --
+11     12      \N      \N
+21     22      23      24
+
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
new file mode 100644
index 00000000000..f4e95fa4fbd
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test for struct field schema evolution in Iceberg tables.
+// This test case verifies the fix for the bug where querying a struct field 
+// that was added after schema evolution fails when all queried columns are 
+// missing in the original file, and the reference column used for RL/DL 
+// was dropped from the table schema.
+//
+// Bug: "File column name 'removed' not found in struct children"
+// Fix: Use ConstNode for reference column when reading RL/DL information
+//
+// Prerequisites: 
+// - Tables created by run24.sql in docker iceberg scripts
+
+suite("test_iceberg_struct_schema_evolution", 
"p0,external,doris,external_docker,external_docker_doris") {
+
+    String enabled = context.config.otherConfigs.get("enableIcebergTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disable iceberg test.")
+        return
+    }
+
+    String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+    String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+    String catalog_name = "test_iceberg_struct_schema_evolution"
+
+    sql """drop catalog if exists ${catalog_name}"""
+    sql """
+    CREATE CATALOG ${catalog_name} PROPERTIES (
+        'type'='iceberg',
+        'iceberg.catalog.type'='rest',
+        'uri' = 'http://${externalEnvIp}:${rest_port}',
+        "s3.access_key" = "admin",
+        "s3.secret_key" = "password",
+        "s3.endpoint" = "http://${externalEnvIp}:${minio_port}";,
+        "s3.region" = "us-east-1"
+    );"""
+
+    logger.info("catalog " + catalog_name + " created")
+    sql """switch ${catalog_name};"""
+    logger.info("switched to catalog " + catalog_name)
+    sql """use test_db;"""
+
+    sql """set enable_fallback_to_original_planner=false;"""
+
+    def table_name = "test_struct_evolution"
+
+    // Verify table schema after evolution
+    qt_desc """DESC ${table_name}"""
+
+    // Test 1: Query all columns - should work
+    qt_select_all """SELECT * FROM ${table_name} ORDER BY id"""
+
+    // Test 2: Query struct field that exists in both old and new files
+    qt_struct_keep """SELECT struct_element(a_struct, 'keep') FROM 
${table_name} ORDER BY id"""
+    qt_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM 
${table_name} ORDER BY id"""
+
+    // Test 3: Query struct field that was dropped and re-added (BUG FIX TEST)
+    // This query would crash before the fix with:
+    // "Not support read struct 'a_struct' which columns are all missing"
+    // or "File column name 'removed' not found in struct children"
+    qt_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add') 
FROM ${table_name} ORDER BY id"""
+
+    // Test 4: Query struct field that was newly added (BUG FIX TEST)
+    qt_struct_added """SELECT struct_element(a_struct, 'added') FROM 
${table_name} ORDER BY id"""
+
+    // Test 5: Query entire struct column
+    qt_struct_full """SELECT a_struct FROM ${table_name} ORDER BY id"""
+
+    // Test 6: Query with predicate on struct field
+    qt_struct_predicate_1 """SELECT id FROM ${table_name} WHERE 
struct_element(a_struct, 'renamed') = 11 ORDER BY id"""
+    qt_struct_predicate_2 """SELECT id FROM ${table_name} WHERE 
struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id"""
+    qt_struct_predicate_3 """SELECT id FROM ${table_name} WHERE 
struct_element(a_struct, 'added') IS NULL ORDER BY id"""
+    qt_struct_predicate_4 """SELECT id FROM ${table_name} WHERE 
struct_element(a_struct, 'added') IS NOT NULL ORDER BY id"""
+
+    // Test 7: Multiple struct fields in one query
+    qt_struct_multi """SELECT struct_element(a_struct, 'renamed'), 
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), 
struct_element(a_struct, 'added') FROM ${table_name} ORDER BY id"""
+
+    // Test 8: DISTINCT query on struct fields
+    qt_struct_distinct """SELECT DISTINCT struct_element(a_struct, 'renamed'), 
struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') FROM 
${table_name} ORDER BY 1, 2, 3"""
+
+    // ============================================================
+    // Test with ORC format (for completeness)
+    // ============================================================
+    def orc_table_name = "test_struct_evolution_orc"
+
+    // Verify ORC table schema after evolution
+    qt_orc_desc """DESC ${orc_table_name}"""
+
+    // Test 1: Query all columns - should work
+    qt_orc_select_all """SELECT * FROM ${orc_table_name} ORDER BY id"""
+
+    // Test 2: Query struct field that exists in both old and new files
+    qt_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM 
${orc_table_name} ORDER BY id"""
+    qt_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM 
${orc_table_name} ORDER BY id"""
+
+    // Test 3: Query struct field that was dropped and re-added
+    qt_orc_struct_drop_and_add """SELECT struct_element(a_struct, 
'drop_and_add') FROM ${orc_table_name} ORDER BY id"""
+
+    // Test 4: Query struct field that was newly added
+    qt_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM 
${orc_table_name} ORDER BY id"""
+
+    // Test 5: Query entire struct column
+    qt_orc_struct_full """SELECT a_struct FROM ${orc_table_name} ORDER BY id"""
+
+    // Test 6: Multiple struct fields in one query
+    qt_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'), 
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), 
struct_element(a_struct, 'added') FROM ${orc_table_name} ORDER BY id"""
+
+    // ============================================================
+    // Test with mixed case field names (case sensitivity test)
+    // ============================================================
+    def case_table_name = "test_struct_evolution_case"
+
+    // Verify case-sensitive table schema after evolution
+    qt_case_desc """DESC ${case_table_name}"""
+
+    // Test 1: Query all columns - should work
+    qt_case_select_all """SELECT * FROM ${case_table_name} ORDER BY id"""
+
+    // Test 2: Query struct field that exists in both old and new files
+    qt_case_struct_keep """SELECT struct_element(a_struct, 'keep') FROM 
${case_table_name} ORDER BY id"""
+    qt_case_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM 
${case_table_name} ORDER BY id"""
+
+    // Test 3: Query struct field that was dropped and re-added with case 
change
+    // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system 
normalizes
+    // field names to lowercase, so we query with 'drop_and_add' (lowercase)
+    qt_case_struct_drop_and_add """SELECT struct_element(a_struct, 
'drop_and_add') FROM ${case_table_name} ORDER BY id"""
+
+    // Test 4: Query struct field that was newly added
+    qt_case_struct_added """SELECT struct_element(a_struct, 'added') FROM 
${case_table_name} ORDER BY id"""
+
+    // Test 5: Query entire struct column
+    qt_case_struct_full """SELECT a_struct FROM ${case_table_name} ORDER BY 
id"""
+
+    // Test 6: Query with predicate on struct field
+    qt_case_struct_predicate_1 """SELECT id FROM ${case_table_name} WHERE 
struct_element(a_struct, 'renamed') = 11 ORDER BY id"""
+    qt_case_struct_predicate_2 """SELECT id FROM ${case_table_name} WHERE 
struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id"""
+    qt_case_struct_predicate_3 """SELECT id FROM ${case_table_name} WHERE 
struct_element(a_struct, 'added') IS NULL ORDER BY id"""
+    qt_case_struct_predicate_4 """SELECT id FROM ${case_table_name} WHERE 
struct_element(a_struct, 'added') IS NOT NULL ORDER BY id"""
+
+    // Test 7: Multiple struct fields in one query
+    qt_case_struct_multi """SELECT struct_element(a_struct, 'renamed'), 
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), 
struct_element(a_struct, 'added') FROM ${case_table_name} ORDER BY id"""
+
+    // Test 8: DISTINCT query on struct fields
+    qt_case_struct_distinct """SELECT DISTINCT struct_element(a_struct, 
'renamed'), struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') 
FROM ${case_table_name} ORDER BY 1, 2, 3"""
+
+    // ============================================================
+    // Test with ORC format and mixed case field names
+    // ============================================================
+    def case_orc_table_name = "test_struct_evolution_case_orc"
+
+    // Verify ORC case-sensitive table schema after evolution
+    qt_case_orc_desc """DESC ${case_orc_table_name}"""
+
+    // Test 1: Query all columns - should work
+    qt_case_orc_select_all """SELECT * FROM ${case_orc_table_name} ORDER BY 
id"""
+
+    // Test 2: Query struct field that exists in both old and new files
+    qt_case_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM 
${case_orc_table_name} ORDER BY id"""
+    qt_case_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed') 
FROM ${case_orc_table_name} ORDER BY id"""
+
+    // Test 3: Query struct field that was dropped and re-added with case 
change
+    // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system 
normalizes
+    // field names to lowercase, so we query with 'drop_and_add' (lowercase)
+    qt_case_orc_struct_drop_and_add """SELECT struct_element(a_struct, 
'drop_and_add') FROM ${case_orc_table_name} ORDER BY id"""
+
+    // Test 4: Query struct field that was newly added
+    qt_case_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM 
${case_orc_table_name} ORDER BY id"""
+
+    // Test 5: Query entire struct column
+    qt_case_orc_struct_full """SELECT a_struct FROM ${case_orc_table_name} 
ORDER BY id"""
+
+    // Test 6: Multiple struct fields in one query
+    qt_case_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'), 
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'), 
struct_element(a_struct, 'added') FROM ${case_orc_table_name} ORDER BY id"""
+
+    // Clean up
+    sql """drop catalog if exists ${catalog_name}"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-4.0 updated: branch-4.0: [fix](parquet) Fix struct column reading error when all queried fields are missing after schema evolution #59586 (#59839)

Reply via email to