This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dca16796ad [fix](ParquetReader) definition level of repeated parent is
wrong (#17337)
dca16796ad is described below
commit dca16796adfac43f5bbcf74df33d212c2bcf8510
Author: Ashin Gau <[email protected]>
AuthorDate: Mon Mar 6 18:15:57 2023 +0800
[fix](ParquetReader) definition level of repeated parent is wrong (#17337)
Fix three bugs:
1. `repeated_parent_def_level ` should be the definition of its repeated
parent.
2. Failed to parse schema like `decimal(p, s)`
3. Fill wrong offsets for array type
---
be/src/vec/exec/format/parquet/schema_desc.cpp | 22 +++++++++++-----------
.../exec/format/parquet/vparquet_column_reader.cpp | 8 ++++----
.../doris/catalog/HiveMetaStoreClientHelper.java | 7 ++++++-
3 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index c7ff2353d4..ebd9a25352 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -55,11 +55,11 @@ static int num_children_node(const tparquet::SchemaElement&
schema) {
return schema.__isset.num_children ? schema.num_children : 0;
}
-static void set_child_node_level(FieldSchema* parent, size_t rep_inc = 0,
size_t def_inc = 0) {
+static void set_child_node_level(FieldSchema* parent, int16_t
repeated_parent_def_level) {
for (auto& child : parent->children) {
- child.repetition_level = parent->repetition_level + rep_inc;
- child.definition_level = parent->definition_level + def_inc;
- child.repeated_parent_def_level = parent->definition_level;
+ child.repetition_level = parent->repetition_level;
+ child.definition_level = parent->definition_level;
+ child.repeated_parent_def_level = repeated_parent_def_level;
}
}
@@ -129,7 +129,7 @@ Status FieldDescriptor::parse_node_field(const
std::vector<tparquet::SchemaEleme
node_field->repetition_level++;
node_field->definition_level++;
node_field->children.resize(1);
- set_child_node_level(node_field);
+ set_child_node_level(node_field, node_field->definition_level);
auto child = &node_field->children[0];
parse_physical_field(t_schema, false, child);
@@ -315,7 +315,7 @@ Status FieldDescriptor::parse_group_field(const
std::vector<tparquet::SchemaElem
group_field->repetition_level++;
group_field->definition_level++;
group_field->children.resize(1);
- set_child_node_level(group_field);
+ set_child_node_level(group_field, group_field->definition_level);
auto struct_field = &group_field->children[0];
// the list of struct:
// repeated group <name> (LIST) {
@@ -379,16 +379,16 @@ Status FieldDescriptor::parse_list_field(const
std::vector<tparquet::SchemaEleme
// optional field, and the third level element is the nested
structure in list
// produce nested structure like: LIST<INT>, LIST<MAP>,
LIST<LIST<...>>
// skip bag/list, it's a repeated element.
- set_child_node_level(list_field);
+ set_child_node_level(list_field, list_field->definition_level);
RETURN_IF_ERROR(parse_node_field(t_schemas, curr_pos + 2,
list_child));
} else {
// required field, produce the list of struct
- set_child_node_level(list_field);
+ set_child_node_level(list_field, list_field->definition_level);
RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos + 1,
list_child));
}
} else if (num_children == 0) {
// required two level list, for compatibility reason.
- set_child_node_level(list_field);
+ set_child_node_level(list_field, list_field->definition_level);
parse_physical_field(second_level, false, list_child);
_next_schema_pos = curr_pos + 2;
}
@@ -450,7 +450,7 @@ Status FieldDescriptor::parse_map_field(const
std::vector<tparquet::SchemaElemen
map_field->definition_level++;
map_field->children.resize(1);
- set_child_node_level(map_field);
+ set_child_node_level(map_field, map_field->repeated_parent_def_level);
auto map_kv_field = &map_field->children[0];
// produce MAP<STRUCT<KEY, VALUE>>
RETURN_IF_ERROR(parse_struct_field(t_schemas, curr_pos + 1, map_kv_field));
@@ -473,7 +473,7 @@ Status FieldDescriptor::parse_struct_field(const
std::vector<tparquet::SchemaEle
}
auto num_children = struct_schema.num_children;
struct_field->children.resize(num_children);
- set_child_node_level(struct_field);
+ set_child_node_level(struct_field,
struct_field->repeated_parent_def_level);
_next_schema_pos = curr_pos + 1;
for (int i = 0; i < num_children; ++i) {
RETURN_IF_ERROR(parse_node_field(t_schemas, _next_schema_pos,
&struct_field->children[i]));
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 4e270c30cf..3e94a3082f 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -40,7 +40,7 @@ static void fill_struct_null_map(FieldSchema* field, NullMap&
null_map,
DCHECK_EQ(num_levels, rep_levels.size());
size_t origin_size = null_map.size();
null_map.resize(origin_size + num_levels);
- size_t pos = 0;
+ size_t pos = origin_size;
for (size_t i = 0; i < num_levels; ++i) {
// skip the levels affect its ancestor or its descendants
if (def_levels[i] < field->repeated_parent_def_level ||
@@ -53,7 +53,7 @@ static void fill_struct_null_map(FieldSchema* field, NullMap&
null_map,
null_map[pos++] = 1;
}
}
- null_map.resize(origin_size + pos);
+ null_map.resize(pos + 1);
}
static void fill_array_offset(FieldSchema* field, ColumnArray::Offsets64&
offsets_data,
@@ -88,9 +88,9 @@ static void fill_array_offset(FieldSchema* field,
ColumnArray::Offsets64& offset
(*null_map_ptr)[offset_pos] = 1;
}
}
- offsets_data.resize(origin_size + offset_pos + 1);
+ offsets_data.resize(offset_pos + 1);
if (null_map_ptr != nullptr) {
- null_map_ptr->resize(origin_size + offset_pos + 1);
+ null_map_ptr->resize(offset_pos + 1);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
index 94370f3129..53b492082f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/HiveMetaStoreClientHelper.java
@@ -676,13 +676,18 @@ public class HiveMetaStoreClientHelper {
*/
private static int findNextNestedField(String commaSplitFields) {
int numLess = 0;
+ int numBracket = 0;
for (int i = 0; i < commaSplitFields.length(); i++) {
char c = commaSplitFields.charAt(i);
if (c == '<') {
numLess++;
} else if (c == '>') {
numLess--;
- } else if (c == ',' && numLess == 0) {
+ } else if (c == '(') {
+ numBracket++;
+ } else if (c == ')') {
+ numBracket--;
+ } else if (c == ',' && numLess == 0 && numBracket == 0) {
return i;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]