This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 0b3a6f50b8809a91bd5039214f755bb8611ae103 Author: Ashin Gau <[email protected]> AuthorDate: Sun Aug 20 22:59:18 2023 +0800 [fix](parquet) the key colum of map type in parquet may be nullable (#23180) Fix errors when reading map type with nullable key column in parquet file. `ParquetReader` support to read nullable key column, but add a check to prevent reading nullable key column. Unfortunately, this check error was not thrown correctly, causing the BE to crash, and thrown meaningless error logs in be.out: ``` ... 11# doris::vectorized::ParquetReader::get_columns(std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, doris::TypeDescriptor, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, doris::TypeDes [...] 12# doris::vectorized::VFileScanner::_get_next_reader() in /root/yun_you_external/output/be/lib/doris_be 13# doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /root/doris/be/src/vec/exec/scan/vfile_scanner.cpp:241 ... ``` --- be/src/vec/exec/format/parquet/schema_desc.cpp | 2 +- be/src/vec/exec/format/parquet/vparquet_file_metadata.cpp | 3 +-- regression-test/data/external_table_p2/hive/test_complex_types.out | 3 +++ .../suites/external_table_p2/hive/test_complex_types.groovy | 4 ++++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index dcc5140e89..05eaf30a80 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -480,7 +480,7 @@ Status FieldDescriptor::parse_map_field(const std::vector<tparquet::SchemaElemen } auto& map_key = t_schemas[curr_pos + 2]; if (!is_required_node(map_key)) { - return Status::InvalidArgument("the third level(map key) in map group must be required"); + LOG(WARNING) << "Filed " << map_schema.name << " is map type, but with nullable key column"; } if (map_key_value.num_children == 1) { diff --git a/be/src/vec/exec/format/parquet/vparquet_file_metadata.cpp b/be/src/vec/exec/format/parquet/vparquet_file_metadata.cpp index c199e72da7..95960f489e 100644 --- a/be/src/vec/exec/format/parquet/vparquet_file_metadata.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_file_metadata.cpp @@ -32,8 +32,7 @@ Status FileMetaData::init_schema() { if (_metadata.schema[0].num_children <= 0) { Status::Corruption("Invalid parquet schema"); } - _schema.parse_from_thrift(_metadata.schema); - return Status(); + return _schema.parse_from_thrift(_metadata.schema); } const tparquet::FileMetaData& FileMetaData::to_thrift() { diff --git a/regression-test/data/external_table_p2/hive/test_complex_types.out b/regression-test/data/external_table_p2/hive/test_complex_types.out index 8c4f9d04f4..c414a60a99 100644 --- a/regression-test/data/external_table_p2/hive/test_complex_types.out +++ b/regression-test/data/external_table_p2/hive/test_complex_types.out @@ -29,3 +29,6 @@ 2 \N \N \N {"h", 10} 3 [5, NULL] [[6, 7], [8, NULL], NULL] {"f":1, "g":NULL} {NULL, 9} +-- !map_with_nullable_key -- +\N \N \N \N \N \N \N \N \N test test aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...] + diff --git a/regression-test/suites/external_table_p2/hive/test_complex_types.groovy b/regression-test/suites/external_table_p2/hive/test_complex_types.groovy index c86c8c2562..5422e1d9a4 100644 --- a/regression-test/suites/external_table_p2/hive/test_complex_types.groovy +++ b/regression-test/suites/external_table_p2/hive/test_complex_types.groovy @@ -52,5 +52,9 @@ suite("test_complex_types", "p2") { qt_array_last """select max(array_last(i -> i > 0, capacity)) from byd where array_last(i -> i > 0, capacity) < 0.99""" qt_offsets_check """select * from complex_offsets_check order by id""" + + qt_map_with_nullable_key """select * from parquet_all_types limit 1""" + + sql """drop catalog ${catalog_name};""" } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
