This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 07de4121563 [fix](parquet) complex type in parquet is case sensitive 
#29245 (#29249)
07de4121563 is described below

commit 07de412156322cc14e458049b61671bf0dc3e151
Author: Ashin Gau <[email protected]>
AuthorDate: Fri Dec 29 12:36:20 2023 +0800

    [fix](parquet) complex type in parquet is case sensitive #29245 (#29249)
---
 be/src/vec/exec/format/parquet/schema_desc.cpp | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp 
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index c9283c62889..08692de8743 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -167,9 +167,7 @@ Status FieldDescriptor::parse_node_field(const 
std::vector<tparquet::SchemaEleme
         auto child = &node_field->children[0];
         parse_physical_field(t_schema, false, child);
 
-        std::string lower_case_name;
-        transform(t_schema.name.begin(), t_schema.name.end(), 
lower_case_name.begin(), ::tolower);
-        node_field->name = lower_case_name;
+        node_field->name = to_lower(t_schema.name);
         node_field->type.type = TYPE_ARRAY;
         node_field->type.add_sub_type(child->type);
         node_field->is_nullable = false;
@@ -187,9 +185,7 @@ Status FieldDescriptor::parse_node_field(const 
std::vector<tparquet::SchemaEleme
 
 void FieldDescriptor::parse_physical_field(const tparquet::SchemaElement& 
physical_schema,
                                            bool is_nullable, FieldSchema* 
physical_field) {
-    std::string lower_case_name = physical_schema.name;
-    transform(lower_case_name.begin(), lower_case_name.end(), 
lower_case_name.begin(), ::tolower);
-    physical_field->name = lower_case_name;
+    physical_field->name = to_lower(physical_schema.name);
     physical_field->parquet_schema = physical_schema;
     physical_field->is_nullable = is_nullable;
     physical_field->physical_type = physical_schema.type;
@@ -443,7 +439,7 @@ Status FieldDescriptor::parse_group_field(const 
std::vector<tparquet::SchemaElem
         // produce a non-null list<struct>
         RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos, struct_field));
 
-        group_field->name = group_schema.name;
+        group_field->name = to_lower(group_schema.name);
         group_field->type.type = TYPE_ARRAY;
         group_field->type.add_sub_type(struct_field->type);
         group_field->is_nullable = false;
@@ -511,7 +507,7 @@ Status FieldDescriptor::parse_list_field(const 
std::vector<tparquet::SchemaEleme
         _next_schema_pos = curr_pos + 2;
     }
 
-    list_field->name = first_level.name;
+    list_field->name = to_lower(first_level.name);
     list_field->type.type = TYPE_ARRAY;
     list_field->type.add_sub_type(list_field->children[0].type);
     list_field->is_nullable = is_optional;
@@ -574,7 +570,7 @@ Status FieldDescriptor::parse_map_field(const 
std::vector<tparquet::SchemaElemen
     // produce MAP<STRUCT<KEY, VALUE>>
     RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos + 1, map_kv_field));
 
-    map_field->name = map_schema.name;
+    map_field->name = to_lower(map_schema.name);
     map_field->type.type = TYPE_MAP;
     map_field->type.add_sub_type(map_kv_field->type.children[0]);
     map_field->type.add_sub_type(map_kv_field->type.children[1]);
@@ -598,7 +594,7 @@ Status FieldDescriptor::parse_struct_field(const 
std::vector<tparquet::SchemaEle
     for (int i = 0; i < num_children; ++i) {
         RETURN_IF_ERROR(parse_node_field(t_schemas, _next_schema_pos, 
&struct_field->children[i]));
     }
-    struct_field->name = struct_schema.name;
+    struct_field->name = to_lower(struct_schema.name);
     struct_field->is_nullable = is_optional;
     struct_field->type.type = TYPE_STRUCT;
     for (int i = 0; i < num_children; ++i) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to