This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d53be9fef84 branch-4.0: [fix](parquet) Fix struct column reading error
when all queried fields are missing after schema evolution #59586 (#59839)
d53be9fef84 is described below
commit d53be9fef84db469e255a7890b2c6c3cc17f7a21
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Jan 14 09:58:51 2026 +0800
branch-4.0: [fix](parquet) Fix struct column reading error when all queried
fields are missing after schema evolution #59586 (#59839)
Cherry-picked from #59586
Co-authored-by: Socrates <[email protected]>
---
.../exec/format/parquet/vparquet_column_reader.cpp | 12 +-
.../create_preinstalled_scripts/iceberg/run24.sql | 151 ++++++++++++++++
.../test_iceberg_struct_schema_evolution.out | 161 +++++++++++++++++
.../test_iceberg_struct_schema_evolution.groovy | 194 +++++++++++++++++++++
4 files changed, 514 insertions(+), 4 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index e24ec85b2e1..00bdc51844e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -920,10 +920,14 @@ Status StructColumnReader::read_column_data(
size_t field_rows = 0;
bool field_eof = false;
- // Use root_node to get the correct child node for the reference
column
- // reference_file_column_name is the file column name, use
get_children_node_by_file_column_name
- auto ref_child_node =
-
root_node->get_children_node_by_file_column_name(reference_file_column_name);
+ // Use ConstNode for the reference column instead of looking up
from root_node.
+ // The reference column is only used to get RL/DL information for
determining the number
+ // of elements in the struct. It may be a column that has been
dropped from the table
+ // schema (e.g., 'removed' field), but still exists in older
parquet files.
+ // Since we don't need schema mapping for this column (we just
need its RL/DL levels),
+ // using ConstNode is safe and avoids the issue where the
reference column doesn't exist
+ // in root_node (because it was dropped from table schema).
+ auto ref_child_node =
TableSchemaChangeHelper::ConstNode::get_instance();
not_missing_orig_column_size = temp_column->size();
RETURN_IF_ERROR((*reference_reader)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
new file mode 100644
index 00000000000..b5b19b1f15c
--- /dev/null
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run24.sql
@@ -0,0 +1,151 @@
+use demo.test_db;
+
+DROP TABLE IF EXISTS test_struct_evolution;
+
+-- Test case for struct schema evolution bug
+-- Bug scenario: When querying a struct field after schema evolution, if all
queried fields are missing
+-- in old Parquet files, the code tries to find a reference column from file
schema. However, if the
+-- reference column (e.g., 'removed') was dropped from table schema, accessing
it via root_node will fail.
+--
+-- Steps to reproduce:
+-- 1. Create table with struct containing: removed, rename, keep, drop_and_add
+-- 2. Insert data (creates Parquet file with these fields)
+-- 3. DROP a_struct.removed - removes field from table schema
+-- 4. DROP a_struct.drop_and_add then ADD a_struct.drop_and_add - gets new
field ID
+-- 5. ADD a_struct.added - adds new field
+-- 6. Query struct_element(a_struct, 'drop_and_add') or
struct_element(a_struct, 'added')
+-- -> This will fail because all queried fields are missing in old file,
and the reference
+-- column 'removed' doesn't exist in root_node (it was dropped from
table schema)
+
+-- Step 1: Create table
+CREATE TABLE test_struct_evolution (
+ id BIGINT,
+ a_struct STRUCT<removed: BIGINT, rename: BIGINT, keep: BIGINT,
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2);
+
+-- Step 2: Insert data (creates Parquet file with original schema)
+INSERT INTO test_struct_evolution
+SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12,
'drop_and_add', 13);
+
+-- Step 3: Schema evolution - drop removed field
+ALTER TABLE test_struct_evolution DROP COLUMN a_struct.removed;
+
+-- Step 4: Rename field (field ID stays the same)
+ALTER TABLE test_struct_evolution RENAME COLUMN a_struct.rename TO renamed;
+
+-- Step 5: Drop and add drop_and_add (new field ID)
+ALTER TABLE test_struct_evolution DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution ADD COLUMN a_struct.drop_and_add BIGINT;
+
+-- Step 6: Add new field
+ALTER TABLE test_struct_evolution ADD COLUMN a_struct.added BIGINT;
+
+-- Step 7: Insert new data after schema evolution (creates new Parquet file)
+INSERT INTO test_struct_evolution
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added',
24);
+
+-- Now the table contains two Parquet files:
+-- - Old file: contains removed, rename, keep, drop_and_add (old field ID)
+-- - New file: contains renamed, keep, drop_and_add (new field ID), added
+--
+-- Querying struct_element(a_struct, 'drop_and_add') or
struct_element(a_struct, 'added')
+-- on the old file will trigger the bug
+
+-- ============================================================
+-- ORC format test table (for completeness, though ORC doesn't have the same
bug)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_orc;
+
+-- Create ORC format table with same schema evolution scenario
+CREATE TABLE test_struct_evolution_orc (
+ id BIGINT,
+ a_struct STRUCT<removed: BIGINT, rename: BIGINT, keep: BIGINT,
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2);
+
+-- Insert initial data (creates ORC file with original schema)
+INSERT INTO test_struct_evolution_orc
+SELECT 1, named_struct('removed', 10, 'rename', 11, 'keep', 12,
'drop_and_add', 13);
+
+-- Schema evolution - same operations as Parquet table
+ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.removed;
+ALTER TABLE test_struct_evolution_orc RENAME COLUMN a_struct.rename TO renamed;
+ALTER TABLE test_struct_evolution_orc DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.drop_and_add BIGINT;
+ALTER TABLE test_struct_evolution_orc ADD COLUMN a_struct.added BIGINT;
+
+-- Insert new data after schema evolution (creates new ORC file)
+INSERT INTO test_struct_evolution_orc
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'drop_and_add', 23, 'added',
24);
+
+-- ============================================================
+-- Case sensitivity test table (mixed case field names)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_case;
+
+-- Test case for struct schema evolution with mixed case field names
+-- This tests that case sensitivity is handled correctly when:
+-- - Field names have mixed case (e.g., REMOVED, rename, keep, drop_and_add)
+-- - Schema evolution operations are performed
+-- - Querying struct fields with different case patterns
+
+-- Step 1: Create table with mixed case field names
+CREATE TABLE test_struct_evolution_case (
+ id BIGINT,
+ a_struct STRUCT<REMOVED: BIGINT, rename: BIGINT, keep: BIGINT,
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'parquet', 'format-version' = 2);
+
+-- Step 2: Insert data (creates Parquet file with original schema)
+INSERT INTO test_struct_evolution_case
+SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12,
'drop_and_add', 13);
+
+-- Step 3: Schema evolution - drop REMOVED field (uppercase)
+ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.REMOVED;
+
+-- Step 4: Rename field (field ID stays the same)
+ALTER TABLE test_struct_evolution_case RENAME COLUMN a_struct.rename TO
renamed;
+
+-- Step 5: Drop and add drop_and_add with case change (new field ID)
+-- Initial: drop_and_add (lowercase), after re-add: DROP_AND_ADD (uppercase)
+ALTER TABLE test_struct_evolution_case DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.DROP_AND_ADD BIGINT;
+
+-- Step 6: Add new field
+ALTER TABLE test_struct_evolution_case ADD COLUMN a_struct.added BIGINT;
+
+-- Step 7: Insert new data after schema evolution (creates new Parquet file)
+-- Note: Use DROP_AND_ADD (uppercase) in the new data
+INSERT INTO test_struct_evolution_case
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added',
24);
+
+-- ============================================================
+-- ORC format test table with mixed case (for completeness)
+-- ============================================================
+DROP TABLE IF EXISTS test_struct_evolution_case_orc;
+
+-- Create ORC format table with same schema evolution scenario and mixed case
+CREATE TABLE test_struct_evolution_case_orc (
+ id BIGINT,
+ a_struct STRUCT<REMOVED: BIGINT, rename: BIGINT, keep: BIGINT,
drop_and_add: BIGINT>
+) USING ICEBERG
+TBLPROPERTIES ('write.format.default' = 'orc', 'format-version' = 2);
+
+-- Insert initial data (creates ORC file with original schema)
+INSERT INTO test_struct_evolution_case_orc
+SELECT 1, named_struct('REMOVED', 10, 'rename', 11, 'keep', 12,
'drop_and_add', 13);
+
+-- Schema evolution - same operations as Parquet table
+ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.REMOVED;
+ALTER TABLE test_struct_evolution_case_orc RENAME COLUMN a_struct.rename TO
renamed;
+-- Drop and add with case change: drop_and_add (lowercase) -> DROP_AND_ADD
(uppercase)
+ALTER TABLE test_struct_evolution_case_orc DROP COLUMN a_struct.drop_and_add;
+ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.DROP_AND_ADD
BIGINT;
+ALTER TABLE test_struct_evolution_case_orc ADD COLUMN a_struct.added BIGINT;
+
+-- Insert new data after schema evolution (creates new ORC file)
+-- Note: Use DROP_AND_ADD (uppercase) in the new data
+INSERT INTO test_struct_evolution_case_orc
+SELECT 2, named_struct('renamed', 21, 'keep', 22, 'DROP_AND_ADD', 23, 'added',
24);
+
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
new file mode 100644
index 00000000000..a364316df42
--- /dev/null
+++
b/regression-test/data/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.out
@@ -0,0 +1,161 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !desc --
+id bigint Yes true \N
+a_struct
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint> Yes
true \N
+
+-- !select_all --
+1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !struct_keep --
+12
+22
+
+-- !struct_renamed --
+11
+21
+
+-- !struct_drop_and_add --
+\N
+23
+
+-- !struct_added --
+\N
+24
+
+-- !struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !struct_predicate_1 --
+1
+
+-- !struct_predicate_2 --
+1
+
+-- !struct_predicate_3 --
+1
+
+-- !struct_predicate_4 --
+2
+
+-- !struct_multi --
+11 12 \N \N
+21 22 23 24
+
+-- !struct_distinct --
+11 \N 12
+21 24 22
+
+-- !orc_desc --
+id bigint Yes true \N
+a_struct
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint> Yes
true \N
+
+-- !orc_select_all --
+1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !orc_struct_keep --
+12
+22
+
+-- !orc_struct_renamed --
+11
+21
+
+-- !orc_struct_drop_and_add --
+\N
+23
+
+-- !orc_struct_added --
+\N
+24
+
+-- !orc_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !orc_struct_multi --
+11 12 \N \N
+21 22 23 24
+
+-- !case_desc --
+id bigint Yes true \N
+a_struct
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint> Yes
true \N
+
+-- !case_select_all --
+1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_struct_keep --
+12
+22
+
+-- !case_struct_renamed --
+11
+21
+
+-- !case_struct_drop_and_add --
+\N
+23
+
+-- !case_struct_added --
+\N
+24
+
+-- !case_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_struct_predicate_1 --
+1
+
+-- !case_struct_predicate_2 --
+1
+
+-- !case_struct_predicate_3 --
+1
+
+-- !case_struct_predicate_4 --
+2
+
+-- !case_struct_multi --
+11 12 \N \N
+21 22 23 24
+
+-- !case_struct_distinct --
+11 \N 12
+21 24 22
+
+-- !case_orc_desc --
+id bigint Yes true \N
+a_struct
struct<renamed:bigint,keep:bigint,drop_and_add:bigint,added:bigint> Yes
true \N
+
+-- !case_orc_select_all --
+1 {"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+2 {"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_orc_struct_keep --
+12
+22
+
+-- !case_orc_struct_renamed --
+11
+21
+
+-- !case_orc_struct_drop_and_add --
+\N
+23
+
+-- !case_orc_struct_added --
+\N
+24
+
+-- !case_orc_struct_full --
+{"renamed":11, "keep":12, "drop_and_add":null, "added":null}
+{"renamed":21, "keep":22, "drop_and_add":23, "added":24}
+
+-- !case_orc_struct_multi --
+11 12 \N \N
+21 22 23 24
+
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
new file mode 100644
index 00000000000..f4e95fa4fbd
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_struct_schema_evolution.groovy
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test for struct field schema evolution in Iceberg tables.
+// This test case verifies the fix for the bug where querying a struct field
+// that was added after schema evolution fails when all queried columns are
+// missing in the original file, and the reference column used for RL/DL
+// was dropped from the table schema.
+//
+// Bug: "File column name 'removed' not found in struct children"
+// Fix: Use ConstNode for reference column when reading RL/DL information
+//
+// Prerequisites:
+// - Tables created by run24.sql in docker iceberg scripts
+
+suite("test_iceberg_struct_schema_evolution",
"p0,external,doris,external_docker,external_docker_doris") {
+
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test.")
+ return
+ }
+
+ String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String catalog_name = "test_iceberg_struct_schema_evolution"
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """
+ CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ );"""
+
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """use test_db;"""
+
+ sql """set enable_fallback_to_original_planner=false;"""
+
+ def table_name = "test_struct_evolution"
+
+ // Verify table schema after evolution
+ qt_desc """DESC ${table_name}"""
+
+ // Test 1: Query all columns - should work
+ qt_select_all """SELECT * FROM ${table_name} ORDER BY id"""
+
+ // Test 2: Query struct field that exists in both old and new files
+ qt_struct_keep """SELECT struct_element(a_struct, 'keep') FROM
${table_name} ORDER BY id"""
+ qt_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM
${table_name} ORDER BY id"""
+
+ // Test 3: Query struct field that was dropped and re-added (BUG FIX TEST)
+ // This query would crash before the fix with:
+ // "Not support read struct 'a_struct' which columns are all missing"
+ // or "File column name 'removed' not found in struct children"
+ qt_struct_drop_and_add """SELECT struct_element(a_struct, 'drop_and_add')
FROM ${table_name} ORDER BY id"""
+
+ // Test 4: Query struct field that was newly added (BUG FIX TEST)
+ qt_struct_added """SELECT struct_element(a_struct, 'added') FROM
${table_name} ORDER BY id"""
+
+ // Test 5: Query entire struct column
+ qt_struct_full """SELECT a_struct FROM ${table_name} ORDER BY id"""
+
+ // Test 6: Query with predicate on struct field
+ qt_struct_predicate_1 """SELECT id FROM ${table_name} WHERE
struct_element(a_struct, 'renamed') = 11 ORDER BY id"""
+ qt_struct_predicate_2 """SELECT id FROM ${table_name} WHERE
struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id"""
+ qt_struct_predicate_3 """SELECT id FROM ${table_name} WHERE
struct_element(a_struct, 'added') IS NULL ORDER BY id"""
+ qt_struct_predicate_4 """SELECT id FROM ${table_name} WHERE
struct_element(a_struct, 'added') IS NOT NULL ORDER BY id"""
+
+ // Test 7: Multiple struct fields in one query
+ qt_struct_multi """SELECT struct_element(a_struct, 'renamed'),
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'),
struct_element(a_struct, 'added') FROM ${table_name} ORDER BY id"""
+
+ // Test 8: DISTINCT query on struct fields
+ qt_struct_distinct """SELECT DISTINCT struct_element(a_struct, 'renamed'),
struct_element(a_struct, 'added'), struct_element(a_struct, 'keep') FROM
${table_name} ORDER BY 1, 2, 3"""
+
+ // ============================================================
+ // Test with ORC format (for completeness)
+ // ============================================================
+ def orc_table_name = "test_struct_evolution_orc"
+
+ // Verify ORC table schema after evolution
+ qt_orc_desc """DESC ${orc_table_name}"""
+
+ // Test 1: Query all columns - should work
+ qt_orc_select_all """SELECT * FROM ${orc_table_name} ORDER BY id"""
+
+ // Test 2: Query struct field that exists in both old and new files
+ qt_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM
${orc_table_name} ORDER BY id"""
+ qt_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM
${orc_table_name} ORDER BY id"""
+
+ // Test 3: Query struct field that was dropped and re-added
+ qt_orc_struct_drop_and_add """SELECT struct_element(a_struct,
'drop_and_add') FROM ${orc_table_name} ORDER BY id"""
+
+ // Test 4: Query struct field that was newly added
+ qt_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM
${orc_table_name} ORDER BY id"""
+
+ // Test 5: Query entire struct column
+ qt_orc_struct_full """SELECT a_struct FROM ${orc_table_name} ORDER BY id"""
+
+ // Test 6: Multiple struct fields in one query
+ qt_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'),
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'),
struct_element(a_struct, 'added') FROM ${orc_table_name} ORDER BY id"""
+
+ // ============================================================
+ // Test with mixed case field names (case sensitivity test)
+ // ============================================================
+ def case_table_name = "test_struct_evolution_case"
+
+ // Verify case-sensitive table schema after evolution
+ qt_case_desc """DESC ${case_table_name}"""
+
+ // Test 1: Query all columns - should work
+ qt_case_select_all """SELECT * FROM ${case_table_name} ORDER BY id"""
+
+ // Test 2: Query struct field that exists in both old and new files
+ qt_case_struct_keep """SELECT struct_element(a_struct, 'keep') FROM
${case_table_name} ORDER BY id"""
+ qt_case_struct_renamed """SELECT struct_element(a_struct, 'renamed') FROM
${case_table_name} ORDER BY id"""
+
+ // Test 3: Query struct field that was dropped and re-added with case
change
+ // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system
normalizes
+ // field names to lowercase, so we query with 'drop_and_add' (lowercase)
+ qt_case_struct_drop_and_add """SELECT struct_element(a_struct,
'drop_and_add') FROM ${case_table_name} ORDER BY id"""
+
+ // Test 4: Query struct field that was newly added
+ qt_case_struct_added """SELECT struct_element(a_struct, 'added') FROM
${case_table_name} ORDER BY id"""
+
+ // Test 5: Query entire struct column
+ qt_case_struct_full """SELECT a_struct FROM ${case_table_name} ORDER BY
id"""
+
+ // Test 6: Query with predicate on struct field
+ qt_case_struct_predicate_1 """SELECT id FROM ${case_table_name} WHERE
struct_element(a_struct, 'renamed') = 11 ORDER BY id"""
+ qt_case_struct_predicate_2 """SELECT id FROM ${case_table_name} WHERE
struct_element(a_struct, 'drop_and_add') IS NULL ORDER BY id"""
+ qt_case_struct_predicate_3 """SELECT id FROM ${case_table_name} WHERE
struct_element(a_struct, 'added') IS NULL ORDER BY id"""
+ qt_case_struct_predicate_4 """SELECT id FROM ${case_table_name} WHERE
struct_element(a_struct, 'added') IS NOT NULL ORDER BY id"""
+
+ // Test 7: Multiple struct fields in one query
+ qt_case_struct_multi """SELECT struct_element(a_struct, 'renamed'),
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'),
struct_element(a_struct, 'added') FROM ${case_table_name} ORDER BY id"""
+
+ // Test 8: DISTINCT query on struct fields
+ qt_case_struct_distinct """SELECT DISTINCT struct_element(a_struct,
'renamed'), struct_element(a_struct, 'added'), struct_element(a_struct, 'keep')
FROM ${case_table_name} ORDER BY 1, 2, 3"""
+
+ // ============================================================
+ // Test with ORC format and mixed case field names
+ // ============================================================
+ def case_orc_table_name = "test_struct_evolution_case_orc"
+
+ // Verify ORC case-sensitive table schema after evolution
+ qt_case_orc_desc """DESC ${case_orc_table_name}"""
+
+ // Test 1: Query all columns - should work
+ qt_case_orc_select_all """SELECT * FROM ${case_orc_table_name} ORDER BY
id"""
+
+ // Test 2: Query struct field that exists in both old and new files
+ qt_case_orc_struct_keep """SELECT struct_element(a_struct, 'keep') FROM
${case_orc_table_name} ORDER BY id"""
+ qt_case_orc_struct_renamed """SELECT struct_element(a_struct, 'renamed')
FROM ${case_orc_table_name} ORDER BY id"""
+
+ // Test 3: Query struct field that was dropped and re-added with case
change
+ // Note: Even though we use DROP_AND_ADD (uppercase) in SQL, the system
normalizes
+ // field names to lowercase, so we query with 'drop_and_add' (lowercase)
+ qt_case_orc_struct_drop_and_add """SELECT struct_element(a_struct,
'drop_and_add') FROM ${case_orc_table_name} ORDER BY id"""
+
+ // Test 4: Query struct field that was newly added
+ qt_case_orc_struct_added """SELECT struct_element(a_struct, 'added') FROM
${case_orc_table_name} ORDER BY id"""
+
+ // Test 5: Query entire struct column
+ qt_case_orc_struct_full """SELECT a_struct FROM ${case_orc_table_name}
ORDER BY id"""
+
+ // Test 6: Multiple struct fields in one query
+ qt_case_orc_struct_multi """SELECT struct_element(a_struct, 'renamed'),
struct_element(a_struct, 'keep'), struct_element(a_struct, 'drop_and_add'),
struct_element(a_struct, 'added') FROM ${case_orc_table_name} ORDER BY id"""
+
+ // Clean up
+ sql """drop catalog if exists ${catalog_name}"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]