This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0279d9c575853e06e59d84eb57a7b317414e12f2
Author: kakachen <[email protected]>
AuthorDate: Thu Oct 30 19:15:40 2025 +0800

    Fix some tests's bugs of external table.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         |  17 +-
 .../table/hive/hive_orc_nested_column_utils.cpp    |  21 +++
 be/src/vec/exec/format/table/hive_reader.cpp       |  40 ++---
 .../iceberg/iceberg_orc_nested_column_utils.cpp    |  20 +++
 .../iceberg_parquet_nested_column_utils.cpp        |  16 +-
 be/src/vec/exec/format/table/iceberg_reader.cpp    |  22 +--
 .../hive/hive_reader_create_column_ids_test.cpp    | 184 ++++++++++++---------
 .../iceberg_reader_create_column_ids_test.cpp      | 122 +++++++++-----
 8 files changed, 274 insertions(+), 168 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index cfbad191772..11b75b820ca 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1984,12 +1984,9 @@ Status OrcReader::_fill_doris_data_column(const 
std::string& col_name,
             }
         } else {
             // Normal processing: convert ORC column to Doris column
-            auto status = _orc_column_to_doris_column<false>(
+            RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
                     key_col_name, doris_key_column, doris_key_type, 
root_node->get_key_node(),
-                    orc_key_type, orc_map->keys.get(), element_size);
-            if (!status.ok()) {
-                return status;
-            }
+                    orc_key_type, orc_map->keys.get(), element_size));
         }
 
         // Handle value column: if still missing, fill with default values
@@ -2005,14 +2002,14 @@ Status OrcReader::_fill_doris_data_column(const 
std::string& col_name,
             } else {
                 mutable_value_column->insert_many_defaults(element_size);
             }
-            return Status::OK();
         } else {
             // Normal processing: convert ORC column to Doris column
-            return _orc_column_to_doris_column<false>(value_col_name, 
doris_value_column,
-                                                      doris_value_type, 
root_node->get_value_node(),
-                                                      orc_value_type, 
orc_map->elements.get(),
-                                                      element_size);
+            RETURN_IF_ERROR(_orc_column_to_doris_column<false>(
+                    value_col_name, doris_value_column, doris_value_type,
+                    root_node->get_value_node(), orc_value_type, 
orc_map->elements.get(),
+                    element_size));
         }
+        return doris_map.deduplicate_keys();
     }
     case PrimitiveType::TYPE_STRUCT: {
         if (orc_column_type->getKind() != orc::TypeKind::STRUCT) {
diff --git a/be/src/vec/exec/format/table/hive/hive_orc_nested_column_utils.cpp 
b/be/src/vec/exec/format/table/hive/hive_orc_nested_column_utils.cpp
index 6e9fa381fc7..1cd02967f0d 100644
--- a/be/src/vec/exec/format/table/hive/hive_orc_nested_column_utils.cpp
+++ b/be/src/vec/exec/format/table/hive/hive_orc_nested_column_utils.cpp
@@ -95,6 +95,27 @@ void HiveOrcNestedColumnUtils::extract_nested_column_ids(
             } else if (i == 1) {
                 child_field_name = "VALUES";
             }
+
+            // Special handling for Orc MAP structure:
+            // When accessing only VALUES, we still need KEY structure for 
levels
+            // Check if we're at key child (i==0) and only VALUES is requested 
(no KEYS)
+            if (i == 0) {
+                bool has_keys_access = 
child_paths_by_table_col_name.find("KEYS") !=
+                                       child_paths_by_table_col_name.end();
+                bool has_values_access = 
child_paths_by_table_col_name.find("VALUES") !=
+                                         child_paths_by_table_col_name.end();
+
+                // If only VALUES is accessed (not KEYS), still include key 
structure for deduplicate_keys
+                if (!has_keys_access && has_values_access) {
+                    uint64_t key_start_id = child->getColumnId();
+                    uint64_t key_max_id = child->getMaximumColumnId();
+                    for (uint64_t id = key_start_id; id <= key_max_id; ++id) {
+                        column_ids.insert(id);
+                    }
+                    has_child_columns = true;
+                    continue; // Skip further processing of key child
+                }
+            }
             break;
         default:
             child_field_name = "";
diff --git a/be/src/vec/exec/format/table/hive_reader.cpp 
b/be/src/vec/exec/format/table/hive_reader.cpp
index 81c8cf47f42..b9bc01fe7b6 100644
--- a/be/src/vec/exec/format/table/hive_reader.cpp
+++ b/be/src/vec/exec/format/table/hive_reader.cpp
@@ -166,8 +166,6 @@ ColumnIdResult HiveOrcReader::_create_column_ids(const 
orc::Type* orc_type,
         }
         const orc::Type* orc_field = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by name
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -179,13 +177,13 @@ ColumnIdResult HiveOrcReader::_create_column_ids(const 
orc::Type* orc_type,
         }
 
         // complex types:
-
-        // collect and process all_access_paths -> column_ids
+        const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(orc_field, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        }
     }
 
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
@@ -261,8 +259,6 @@ ColumnIdResult 
HiveOrcReader::_create_column_ids_by_top_level_col_index(
         }
         const orc::Type* orc_field = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by pos
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -273,14 +269,14 @@ ColumnIdResult 
HiveOrcReader::_create_column_ids_by_top_level_col_index(
             continue;
         }
 
+        const auto& all_access_paths = slot->all_access_paths();
         // complex types
-
-        // collect and process all_access_paths -> column_ids
         process_access_paths(orc_field, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        }
     }
 
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
@@ -438,8 +434,6 @@ ColumnIdResult HiveParquetReader::_create_column_ids(const 
FieldDescriptor* fiel
         }
         auto field_schema = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by name
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -452,13 +446,13 @@ ColumnIdResult 
HiveParquetReader::_create_column_ids(const FieldDescriptor* fiel
         }
 
         // complex types:
-
-        // collect and process all_access_paths -> column_ids
+        const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(field_schema, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        }
     }
 
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
@@ -538,8 +532,6 @@ ColumnIdResult 
HiveParquetReader::_create_column_ids_by_top_level_col_index(
         }
         auto field_schema = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by position
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -551,12 +543,14 @@ ColumnIdResult 
HiveParquetReader::_create_column_ids_by_top_level_col_index(
             continue;
         }
 
-        // collect and process all_access_paths -> column_ids
+        // complex types:
+        const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(field_schema, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        }
     }
 
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
diff --git 
a/be/src/vec/exec/format/table/iceberg/iceberg_orc_nested_column_utils.cpp 
b/be/src/vec/exec/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
index 5dbbbfb2af2..e3aacded188 100644
--- a/be/src/vec/exec/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
+++ b/be/src/vec/exec/format/table/iceberg/iceberg_orc_nested_column_utils.cpp
@@ -97,6 +97,26 @@ void IcebergOrcNestedColumnUtils::extract_nested_column_ids(
             } else if (i == 1) {
                 child_field_id = "VALUES";
             }
+            // Special handling for Orc MAP structure:
+            // When accessing only VALUES, we still need KEY structure for 
levels
+            // Check if we're at key child (i==0) and only VALUES is requested 
(no KEYS)
+            if (i == 0) {
+                bool has_keys_access =
+                        child_paths_by_field_id.find("KEYS") != 
child_paths_by_field_id.end();
+                bool has_values_access =
+                        child_paths_by_field_id.find("VALUES") != 
child_paths_by_field_id.end();
+
+                // If only VALUES is accessed (not KEYS), still include key 
structure for deduplicate_keys
+                if (!has_keys_access && has_values_access) {
+                    uint64_t key_start_id = child->getColumnId();
+                    uint64_t key_max_id = child->getMaximumColumnId();
+                    for (uint64_t id = key_start_id; id <= key_max_id; ++id) {
+                        column_ids.insert(id);
+                    }
+                    has_child_columns = true;
+                    continue; // Skip further processing of key child
+                }
+            }
             break;
         default:
             child_field_id = "";
diff --git 
a/be/src/vec/exec/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp 
b/be/src/vec/exec/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
index cb2b2e6b8c7..7741d498f99 100644
--- 
a/be/src/vec/exec/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
+++ 
b/be/src/vec/exec/format/table/iceberg/iceberg_parquet_nested_column_utils.cpp
@@ -114,7 +114,10 @@ void 
IcebergParquetNestedColumnUtils::extract_nested_column_ids(
                     uint64_t key_start_id = child.get_column_id();
                     uint64_t key_max_id = child.get_max_column_id();
                     for (uint64_t id = key_start_id; id <= key_max_id; ++id) {
-                        column_ids.insert(id);
+                        auto inserted = column_ids.insert(id);
+                        if (inserted.second) {
+                            std::cout << "[IcebergNested] added column id: " 
<< id << std::endl;
+                        }
                     }
                     has_child_columns = true;
                     continue; // Skip further processing of key child
@@ -144,7 +147,10 @@ void 
IcebergParquetNestedColumnUtils::extract_nested_column_ids(
                 uint64_t start_id = child.get_column_id();
                 uint64_t max_column_id = child.get_max_column_id();
                 for (uint64_t id = start_id; id <= max_column_id; ++id) {
-                    column_ids.insert(id);
+                    auto inserted = column_ids.insert(id);
+                    if (inserted.second) {
+                        std::cout << "[IcebergNested] added column id: " << id 
<< std::endl;
+                    }
                 }
                 has_child_columns = true;
             } else {
@@ -166,7 +172,11 @@ void 
IcebergParquetNestedColumnUtils::extract_nested_column_ids(
     // This ensures parent struct/container nodes are included when their 
children are needed
     if (has_child_columns) {
         // Set automatically handles deduplication, so no need to check if it 
already exists
-        column_ids.insert(field_schema.get_column_id());
+        auto inserted = column_ids.insert(field_schema.get_column_id());
+        if (inserted.second) {
+            std::cout << "[IcebergNested] added parent column id: " << 
field_schema.get_column_id()
+                      << std::endl;
+        }
     }
 }
 
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp 
b/be/src/vec/exec/format/table/iceberg_reader.cpp
index b08229165ea..1a010dc4454 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -544,8 +544,6 @@ ColumnIdResult 
IcebergParquetReader::_create_column_ids(const FieldDescriptor* f
         }
         auto field_schema = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by name
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -558,13 +556,13 @@ ColumnIdResult 
IcebergParquetReader::_create_column_ids(const FieldDescriptor* f
         }
 
         // complex types:
-
-        // collect and process all_access_paths -> column_ids
+        const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(field_schema, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(field_schema, predicate_access_paths, 
filter_column_ids);
+        }
     }
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
 }
@@ -737,8 +735,6 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const 
orc::Type* orc_type,
         }
         const orc::Type* orc_field = it->second;
 
-        const auto& all_access_paths = slot->all_access_paths();
-
         // primitive (non-nested) types: direct mapping by name
         if ((slot->col_type() != TYPE_STRUCT && slot->col_type() != TYPE_ARRAY 
&&
              slot->col_type() != TYPE_MAP)) {
@@ -749,14 +745,14 @@ ColumnIdResult IcebergOrcReader::_create_column_ids(const 
orc::Type* orc_type,
             continue;
         }
 
-        // nested types:
-
-        // collect and process all_access_paths -> column_ids
+        // complex types:
+        const auto& all_access_paths = slot->all_access_paths();
         process_access_paths(orc_field, all_access_paths, column_ids);
 
-        // collect and process predicate_access_paths -> filter_column_ids
         const auto& predicate_access_paths = slot->predicate_access_paths();
-        process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        if (!predicate_access_paths.empty()) {
+            process_access_paths(orc_field, predicate_access_paths, 
filter_column_ids);
+        }
     }
 
     return ColumnIdResult(std::move(column_ids), std::move(filter_column_ids));
diff --git 
a/be/test/vec/exec/format/table/hive/hive_reader_create_column_ids_test.cpp 
b/be/test/vec/exec/format/table/hive/hive_reader_create_column_ids_test.cpp
index 7efca721fcd..d46acbdc134 100644
--- a/be/test/vec/exec/format/table/hive/hive_reader_create_column_ids_test.cpp
+++ b/be/test/vec/exec/format/table/hive/hive_reader_create_column_ids_test.cpp
@@ -738,7 +738,8 @@ protected:
     }
 
     // Helper function: Run Parquet test with different column ID extraction 
methods
-    void run_parquet_test_with_method(const 
std::vector<ColumnAccessPathConfig>& access_configs,
+    void run_parquet_test_with_method(const std::vector<std::string>& 
table_column_names,
+                                      const 
std::vector<ColumnAccessPathConfig>& access_configs,
                                       const std::set<uint64_t>& 
expected_column_ids,
                                       const std::set<uint64_t>& 
expected_filter_column_ids,
                                       bool use_top_level_method = false,
@@ -760,12 +761,13 @@ protected:
         TTableDescriptor t_table_desc;
 
         // Define all columns according to the schema
-        std::vector<std::string> table_column_names = {
-                "name",       "profile",           "tags", "friends", 
"recent_activity",
-                "attributes", "complex_attributes"};
-        std::vector<int> table_column_positions = {1, 2, 3, 4, 5, 6, 7};
-        std::vector<TPrimitiveType::type> table_column_types = {
-                // TPrimitiveType::BIGINT,    // id
+        std::vector<std::string> all_table_column_names = {"id",         
"name",
+                                                           "profile",    
"tags",
+                                                           "friends",    
"recent_activity",
+                                                           "attributes", 
"complex_attributes"};
+        std::vector<int> all_table_column_positions = {0, 1, 2, 3, 4, 5, 6, 7};
+        std::vector<TPrimitiveType::type> all_table_column_types = {
+                TPrimitiveType::BIGINT, // id
                 TPrimitiveType::STRING, // name
                 TPrimitiveType::STRUCT, // profile
                 TPrimitiveType::ARRAY,  // tags
@@ -775,6 +777,18 @@ protected:
                 TPrimitiveType::MAP     // complex_attributes
         };
 
+        std::vector<int> table_column_positions;
+        std::vector<TPrimitiveType::type> table_column_types;
+        for (const auto& col_name : table_column_names) {
+            auto it = std::find(all_table_column_names.begin(), 
all_table_column_names.end(),
+                                col_name);
+            if (it != all_table_column_names.end()) {
+                int idx = std::distance(all_table_column_names.begin(), it);
+                table_column_positions.push_back(idx);
+                table_column_types.push_back(all_table_column_types[idx]);
+            }
+        }
+
         const TupleDescriptor* tuple_descriptor = create_tuple_descriptor(
                 &desc_tbl, obj_pool, t_desc_table, t_table_desc, 
table_column_names,
                 table_column_positions, table_column_types, access_configs);
@@ -795,7 +809,8 @@ protected:
     }
 
     // Helper function: Run ORC test with different column ID extraction 
methods
-    void run_orc_test_with_method(const std::vector<ColumnAccessPathConfig>& 
access_configs,
+    void run_orc_test_with_method(const std::vector<std::string>& 
table_column_names,
+                                  const std::vector<ColumnAccessPathConfig>& 
access_configs,
                                   const std::set<uint64_t>& 
expected_column_ids,
                                   const std::set<uint64_t>& 
expected_filter_column_ids,
                                   bool use_top_level_method = false,
@@ -817,12 +832,13 @@ protected:
         TTableDescriptor t_table_desc;
 
         // Define all columns according to the schema
-        std::vector<std::string> table_column_names = {
-                "name",       "profile",           "tags", "friends", 
"recent_activity",
-                "attributes", "complex_attributes"};
-        std::vector<int> table_column_positions = {1, 2, 3, 4, 5, 6, 7};
-        std::vector<TPrimitiveType::type> table_column_types = {
-                // TPrimitiveType::BIGINT,    // id
+        std::vector<std::string> all_table_column_names = {"id",         
"name",
+                                                           "profile",    
"tags",
+                                                           "friends",    
"recent_activity",
+                                                           "attributes", 
"complex_attributes"};
+        std::vector<int> all_table_column_positions = {0, 1, 2, 3, 4, 5, 6, 7};
+        std::vector<TPrimitiveType::type> all_table_column_types = {
+                TPrimitiveType::BIGINT, // id
                 TPrimitiveType::STRING, // name
                 TPrimitiveType::STRUCT, // profile
                 TPrimitiveType::ARRAY,  // tags
@@ -832,6 +848,18 @@ protected:
                 TPrimitiveType::MAP     // complex_attributes
         };
 
+        std::vector<int> table_column_positions;
+        std::vector<TPrimitiveType::type> table_column_types;
+        for (const auto& col_name : table_column_names) {
+            auto it = std::find(all_table_column_names.begin(), 
all_table_column_names.end(),
+                                col_name);
+            if (it != all_table_column_names.end()) {
+                int idx = std::distance(all_table_column_names.begin(), it);
+                table_column_positions.push_back(idx);
+                table_column_types.push_back(all_table_column_types[idx]);
+            }
+        }
+
         const TupleDescriptor* tuple_descriptor = create_tuple_descriptor(
                 &desc_tbl, obj_pool, t_desc_table, t_table_desc, 
table_column_names,
                 table_column_positions, table_column_types, access_configs);
@@ -868,18 +896,19 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_1) {
                                      {"profile", "contact", "email"}};
 
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
+    std::vector<std::string> table_column_names = {"name", "profile"};
     std::set<uint64_t> expected_column_ids = {2, 3, 4, 7, 8, 9, 10, 11, 15, 
16, 18};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                                 true);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
 
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                             true);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids, true);
 }
 
 TEST_F(HiveReaderCreateColumnIdsTest, test_create_column_ids_2) {
@@ -915,20 +944,21 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_2) {
     access_config.predicate_paths = {{"profile", "address", "coordinates", 
"lat"},
                                      {"profile", "contact", "email"}};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2,  3,  4,  5,  6,  7,  8,  9, 
10,
                                               11, 12, 13, 14, 15, 16, 17, 18};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                                 true);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
 
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                             true);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids, true);
 }
 
 TEST_F(HiveReaderCreateColumnIdsTest, test_create_column_ids_3) {
@@ -964,19 +994,20 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_3) {
     access_config.predicate_paths = {{"profile", "address", "coordinates"},
                                      {"profile", "contact", "email"}};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 9, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                                 true);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
 
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                             true);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids, true);
 }
 
 TEST_F(HiveReaderCreateColumnIdsTest, test_create_column_ids_4) {
@@ -988,19 +1019,21 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_4) {
     access_config.all_column_paths = {};
     access_config.predicate_paths = {};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
-    std::set<uint64_t> expected_column_ids = {2};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
+    std::set<uint64_t> expected_column_ids = {2,  3,  4,  5,  6,  7,  8,  9, 
10,
+                                              11, 12, 13, 14, 15, 16, 17, 18};
     std::set<uint64_t> expected_filter_column_ids = {};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                                 true);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
 
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids,
-                             true);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids, true);
 }
 
 TEST_F(HiveReaderCreateColumnIdsTest, test_create_column_ids_5) {
@@ -1049,18 +1082,20 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_5) {
         access_configs.push_back(access_config);
     }
 
+    std::vector<std::string> table_column_names = {"name", "friends", 
"recent_activity"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2, 21, 22, 24, 25, 26, 27, 28, 
29, 30, 32};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {21, 22, 24, 26, 27, 28};
 
-    run_parquet_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids);
-    run_parquet_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids,
-                                 true);
+    run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
 
-    run_orc_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids, true);
+    run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                             expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                             expected_filter_column_ids, true);
 }
 
 TEST_F(HiveReaderCreateColumnIdsTest, test_create_column_ids_6) {
@@ -1146,33 +1181,32 @@ TEST_F(HiveReaderCreateColumnIdsTest, 
test_create_column_ids_6) {
     }
 
     {
+        std::vector<std::string> table_column_names = {"name", 
"complex_attributes"};
         // parquet values should access keys
         // column_ids should contain all necessary column IDs (set 
automatically deduplicates)
-        // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
-        std::set<uint64_t> parquet_expected_column_ids = {
-                2,  36, 37, 38, 39, 40, 44, 45, 48, 49, 52, 53, 54, 61, 62, 
63, 64,
-                65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 
82, 83};
-        // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
-        std::set<uint64_t> parquet_expected_filter_column_ids = {36, 37, 38, 
39, 40};
-
-        run_parquet_test_with_method(access_configs, 
parquet_expected_column_ids,
-                                     parquet_expected_filter_column_ids);
-        run_parquet_test_with_method(access_configs, 
parquet_expected_column_ids,
-                                     parquet_expected_filter_column_ids, true);
+        std::set<uint64_t> expected_column_ids = {2,  36, 37, 38, 39, 40, 44, 
45, 48, 49, 52, 53,
+                                                  54, 61, 62, 63, 64, 65, 66, 
67, 68, 69, 70, 71,
+                                                  72, 73, 74, 75, 76, 77, 79, 
80, 82, 83};
+        std::set<uint64_t> expected_filter_column_ids = {36, 37, 38, 39, 40};
+
+        run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                     expected_filter_column_ids);
+        run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                     expected_filter_column_ids, true);
     }
 
     {
+        std::vector<std::string> table_column_names = {"name", 
"complex_attributes"};
+        // orc values should access keys because need to deduplicate by keys
         // column_ids should contain all necessary column IDs (set 
automatically deduplicates)
-        // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
-        std::set<uint64_t> orc_expected_column_ids = {2,  36, 37, 38, 39, 40, 
44, 45, 48, 49, 52,
-                                                      53, 54, 61, 63, 64, 65, 
66, 67, 68, 69, 70,
-                                                      71, 72, 73, 74, 75, 76, 
77, 79, 80, 82, 83};
-        // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
-        std::set<uint64_t> orc_expected_filter_column_ids = {36, 37, 38, 39, 
40};
-        run_orc_test_with_method(access_configs, orc_expected_column_ids,
-                                 orc_expected_filter_column_ids);
-        run_orc_test_with_method(access_configs, orc_expected_column_ids,
-                                 orc_expected_filter_column_ids, true);
+        std::set<uint64_t> expected_column_ids = {2,  36, 37, 38, 39, 40, 44, 
45, 48, 49, 52, 53,
+                                                  54, 61, 62, 63, 64, 65, 66, 
67, 68, 69, 70, 71,
+                                                  72, 73, 74, 75, 76, 77, 79, 
80, 82, 83};
+        std::set<uint64_t> expected_filter_column_ids = {36, 37, 38, 39, 40};
+        run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids);
+        run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids, true);
     }
 }
 
diff --git 
a/be/test/vec/exec/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
 
b/be/test/vec/exec/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
index 47e0ad87a4a..b1152f159bd 100644
--- 
a/be/test/vec/exec/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
+++ 
b/be/test/vec/exec/format/table/iceberg/iceberg_reader_create_column_ids_test.cpp
@@ -770,7 +770,8 @@ protected:
     }
 
     // Helper function: run Parquet test with different column ID extraction 
methods
-    void run_parquet_test_with_method(const 
std::vector<ColumnAccessPathConfig>& access_configs,
+    void run_parquet_test_with_method(const std::vector<std::string>& 
table_column_names,
+                                      const 
std::vector<ColumnAccessPathConfig>& access_configs,
                                       const std::set<uint64_t>& 
expected_column_ids,
                                       const std::set<uint64_t>& 
expected_filter_column_ids,
                                       bool use_top_level_method = false,
@@ -792,12 +793,13 @@ protected:
         TTableDescriptor t_table_desc;
 
         // Define all columns according to the schema
-        std::vector<std::string> table_column_names = {
-                "name",       "profile",           "tags", "friends", 
"recent_activity",
-                "attributes", "complex_attributes"};
-        std::vector<int> table_column_positions = {1, 2, 3, 4, 5, 6, 7};
-        std::vector<TPrimitiveType::type> table_column_types = {
-                // TPrimitiveType::BIGINT,    // id
+        std::vector<std::string> all_table_column_names = {"id",         
"name",
+                                                           "profile",    
"tags",
+                                                           "friends",    
"recent_activity",
+                                                           "attributes", 
"complex_attributes"};
+        std::vector<int> all_table_column_positions = {0, 1, 2, 3, 4, 5, 6, 7};
+        std::vector<TPrimitiveType::type> all_table_column_types = {
+                TPrimitiveType::BIGINT, // id
                 TPrimitiveType::STRING, // name
                 TPrimitiveType::STRUCT, // profile
                 TPrimitiveType::ARRAY,  // tags
@@ -807,6 +809,18 @@ protected:
                 TPrimitiveType::MAP     // complex_attributes
         };
 
+        std::vector<int> table_column_positions;
+        std::vector<TPrimitiveType::type> table_column_types;
+        for (const auto& col_name : table_column_names) {
+            auto it = std::find(all_table_column_names.begin(), 
all_table_column_names.end(),
+                                col_name);
+            if (it != all_table_column_names.end()) {
+                int idx = std::distance(all_table_column_names.begin(), it);
+                table_column_positions.push_back(idx);
+                table_column_types.push_back(all_table_column_types[idx]);
+            }
+        }
+
         const TupleDescriptor* tuple_descriptor = create_tuple_descriptor(
                 &desc_tbl, obj_pool, t_desc_table, t_table_desc, 
table_column_names,
                 table_column_positions, table_column_types, access_configs);
@@ -827,7 +841,8 @@ protected:
     }
 
     // Helper function: run Orc test with different column ID extraction 
methods
-    void run_orc_test_with_method(const std::vector<ColumnAccessPathConfig>& 
access_configs,
+    void run_orc_test_with_method(const std::vector<std::string>& 
table_column_names,
+                                  const std::vector<ColumnAccessPathConfig>& 
access_configs,
                                   const std::set<uint64_t>& 
expected_column_ids,
                                   const std::set<uint64_t>& 
expected_filter_column_ids,
                                   bool use_top_level_method = false,
@@ -849,12 +864,13 @@ protected:
         TTableDescriptor t_table_desc;
 
         // Define all columns according to the schema
-        std::vector<std::string> table_column_names = {
-                "name",       "profile",           "tags", "friends", 
"recent_activity",
-                "attributes", "complex_attributes"};
-        std::vector<int> table_column_positions = {1, 2, 3, 4, 5, 6, 7};
-        std::vector<TPrimitiveType::type> table_column_types = {
-                // TPrimitiveType::BIGINT,    // id
+        std::vector<std::string> all_table_column_names = {"id",         
"name",
+                                                           "profile",    
"tags",
+                                                           "friends",    
"recent_activity",
+                                                           "attributes", 
"complex_attributes"};
+        std::vector<int> all_table_column_positions = {0, 1, 2, 3, 4, 5, 6, 7};
+        std::vector<TPrimitiveType::type> all_table_column_types = {
+                TPrimitiveType::BIGINT, // id
                 TPrimitiveType::STRING, // name
                 TPrimitiveType::STRUCT, // profile
                 TPrimitiveType::ARRAY,  // tags
@@ -864,6 +880,18 @@ protected:
                 TPrimitiveType::MAP     // complex_attributes
         };
 
+        std::vector<int> table_column_positions;
+        std::vector<TPrimitiveType::type> table_column_types;
+        for (const auto& col_name : table_column_names) {
+            auto it = std::find(all_table_column_names.begin(), 
all_table_column_names.end(),
+                                col_name);
+            if (it != all_table_column_names.end()) {
+                int idx = std::distance(all_table_column_names.begin(), it);
+                table_column_positions.push_back(idx);
+                table_column_types.push_back(all_table_column_types[idx]);
+            }
+        }
+
         const TupleDescriptor* tuple_descriptor = create_tuple_descriptor(
                 &desc_tbl, obj_pool, t_desc_table, t_table_desc, 
table_column_names,
                 table_column_positions, table_column_types, access_configs);
@@ -898,14 +926,15 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_1) {
                                       {"3", "11", "*", "23"}};
     access_config.predicate_paths = {{"3", "9", "14", "15"}, {"3", "10", 
"17"}};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2, 3, 4, 7, 8, 9, 10, 11, 15, 
16, 18};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
 }
 
 TEST_F(IcebergReaderCreateColumnIdsTest, test_create_column_ids_2) {
@@ -940,15 +969,16 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_2) {
     access_config.all_column_paths = {{"3"}};
     access_config.predicate_paths = {{"3", "9", "14", "15"}, {"3", "10", 
"17"}};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2,  3,  4,  5,  6,  7,  8,  9, 
10,
                                               11, 12, 13, 14, 15, 16, 17, 18};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
 }
 
 TEST_F(IcebergReaderCreateColumnIdsTest, test_create_column_ids_3) {
@@ -987,14 +1017,15 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_3) {
     // access_config.predicate_paths = {{"profile", "address", "coordinates"},
     //                                  {"profile", "contact", "email"}};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {3, 4, 7, 8, 9, 10, 11};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
 }
 
 TEST_F(IcebergReaderCreateColumnIdsTest, test_create_column_ids_4) {
@@ -1006,14 +1037,16 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_4) {
     access_config.all_column_paths = {};
     access_config.predicate_paths = {};
 
+    std::vector<std::string> table_column_names = {"name", "profile"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
-    std::set<uint64_t> expected_column_ids = {2};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
+    std::set<uint64_t> expected_column_ids = {2,  3,  4,  5,  6,  7,  8,  9, 
10,
+                                              11, 12, 13, 14, 15, 16, 17, 18};
     std::set<uint64_t> expected_filter_column_ids = {};
 
-    run_parquet_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method({access_config}, expected_column_ids, 
expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, {access_config}, 
expected_column_ids,
+                             expected_filter_column_ids);
 }
 
 TEST_F(IcebergReaderCreateColumnIdsTest, test_create_column_ids_5) {
@@ -1060,14 +1093,15 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_5) {
         access_configs.push_back(access_config);
     }
 
+    std::vector<std::string> table_column_names = {"name", "friends", 
"recent_activity"};
     // column_ids should contain all necessary column IDs (set automatically 
deduplicates)
-    // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
     std::set<uint64_t> expected_column_ids = {2, 21, 22, 24, 25, 26, 27, 28, 
29, 30, 32};
-    // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
     std::set<uint64_t> expected_filter_column_ids = {21, 22, 24, 26, 27, 28};
 
-    run_parquet_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids);
-    run_orc_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids);
+    run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids);
+    run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                             expected_filter_column_ids);
 }
 
 TEST_F(IcebergReaderCreateColumnIdsTest, test_create_column_ids_6) {
@@ -1148,29 +1182,29 @@ TEST_F(IcebergReaderCreateColumnIdsTest, 
test_create_column_ids_6) {
     }
 
     {
+        std::vector<std::string> table_column_names = {"name", 
"complex_attributes"};
         // parquet values should access keys
         // column_ids should contain all necessary column IDs (set 
automatically deduplicates)
-        // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
         std::set<uint64_t> expected_column_ids = {2,  36, 37, 38, 39, 40, 44, 
45, 48, 49, 52, 53,
                                                   54, 61, 62, 63, 64, 65, 66, 
67, 68, 69, 70, 71,
                                                   72, 73, 74, 75, 76, 77, 79, 
80, 82, 83};
-        // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
         std::set<uint64_t> expected_filter_column_ids = {36, 37, 38, 39, 40};
 
-        run_parquet_test_with_method(access_configs, expected_column_ids,
+        run_parquet_test_with_method(table_column_names, access_configs, 
expected_column_ids,
                                      expected_filter_column_ids);
     }
 
     {
+        std::vector<std::string> table_column_names = {"name", 
"complex_attributes"};
+        // orc values should access keys because need to deduplicate by keys
         // column_ids should contain all necessary column IDs (set 
automatically deduplicates)
-        // Expected IDs based on the schema: name(2), profile(3), address(4), 
coordinates(7), lat(8), lng(9), contact(10), email(11), hobbies(15), 
element(16), level(18)
-        std::set<uint64_t> expected_column_ids = {2,  36, 37, 38, 39, 40, 44, 
45, 48, 49, 52,
-                                                  53, 54, 61, 63, 64, 65, 66, 
67, 68, 69, 70,
-                                                  71, 72, 73, 74, 75, 76, 77, 
79, 80, 82, 83};
-        // Expected IDs based on the schema: profile(3), address(4), 
coordinates(7), lat(8), contact(10), email(11)
+        std::set<uint64_t> expected_column_ids = {2,  36, 37, 38, 39, 40, 44, 
45, 48, 49, 52, 53,
+                                                  54, 61, 62, 63, 64, 65, 66, 
67, 68, 69, 70, 71,
+                                                  72, 73, 74, 75, 76, 77, 79, 
80, 82, 83};
         std::set<uint64_t> expected_filter_column_ids = {36, 37, 38, 39, 40};
 
-        run_orc_test_with_method(access_configs, expected_column_ids, 
expected_filter_column_ids);
+        run_orc_test_with_method(table_column_names, access_configs, 
expected_column_ids,
+                                 expected_filter_column_ids);
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to