This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit fbb6a0463daf993ac46308aad8cb51a59684261e Author: amory <[email protected]> AuthorDate: Wed Jul 19 12:09:34 2023 +0800 [FIX](map)fix arrow serde with map null key #21955 --- .../vec/data_types/serde/data_type_map_serde.cpp | 19 ++++++++++-- .../serde/data_type_serde_arrow_test.cpp | 36 ++++++++++++++++++++-- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index edb21a60ef..fcf67a8f53 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -67,9 +67,22 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMa array_builder->type()->name()); } else if (simd::contain_byte(keys_nullmap_data + offsets[r - 1], offsets[r] - offsets[r - 1], 1)) { - // arrow do not support key is null so we just put null with this row - checkArrowStatus(builder.AppendNull(), column.get_name(), - array_builder->type()->name()); + // arrow do not support key is null, so we ignore the null key-value + MutableColumnPtr key_mutable_data = nested_keys_column.clone_empty(); + MutableColumnPtr value_mutable_data = nested_values_column.clone_empty(); + for (size_t i = offsets[r - 1]; i < offsets[r]; ++i) { + if (keys_nullmap_data[i] == 1) { + continue; + } + key_mutable_data->insert_from(nested_keys_column, i); + value_mutable_data->insert_from(nested_values_column, i); + } + checkArrowStatus(builder.Append(), column.get_name(), array_builder->type()->name()); + + key_serde->write_column_to_arrow(*key_mutable_data, nullptr, key_builder, 0, + key_mutable_data->size()); + value_serde->write_column_to_arrow(*value_mutable_data, nullptr, value_builder, 0, + value_mutable_data->size()); } else { checkArrowStatus(builder.Append(), column.get_name(), array_builder->type()->name()); key_serde->write_column_to_arrow(nested_keys_column, nullptr, key_builder, diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp index c1913e6d86..92fbcc97c0 100644 --- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -56,6 +56,7 @@ #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_decimal.h" +#include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" @@ -76,6 +77,7 @@ #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time_v2.h" +#include "vec/io/io_helper.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/arrow_column_to_doris_column.h" @@ -95,6 +97,7 @@ void serialize_and_deserialize_arrow_test() { {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, false}, {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5, TYPE_DECIMAL32, false}, {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6, TYPE_DECIMAL64, false}, + {"k12", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 12, TYPE_DATETIMEV2, false}, }; } else { cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true}, @@ -327,6 +330,28 @@ void serialize_and_deserialize_arrow_test() { block.insert(test_datetime); } break; + case TYPE_DATETIMEV2: // uint64 + tslot.__set_slotType(type_desc.to_thrift()); + { + // 2022-01-01 11:11:11.111 + auto column_vector_datetimev2 = + vectorized::ColumnVector<vectorized::UInt64>::create(); + // auto& datetimev2_data = column_vector_datetimev2->get_data(); + DateV2Value<DateTimeV2ValueType> value; + string date_literal = "2022-01-01 11:11:11.111"; + value.from_date_str(date_literal.c_str(), date_literal.size()); + char to[64] = {}; + std::cout << "value: " << value.to_string(to) << std::endl; + for (int i = 0; i < row_num; ++i) { + column_vector_datetimev2->insert(value.to_date_int_val()); + } + vectorized::DataTypePtr datetimev2_type( + std::make_shared<vectorized::DataTypeDateTimeV2>()); + vectorized::ColumnWithTypeAndName test_datetimev2( + column_vector_datetimev2->get_ptr(), datetimev2_type, col_name); + block.insert(test_datetimev2); + } + break; case TYPE_ARRAY: // array type_desc.add_sub_type(TYPE_STRING, true); tslot.__set_slotType(type_desc.to_thrift()); @@ -487,6 +512,11 @@ void serialize_and_deserialize_arrow_test() { } } continue; + } else if (std::get<3>(t) == PrimitiveType::TYPE_DATETIMEV2) { + // now we only support read doris datetimev2 to arrow + block.erase(real_column_name); + new_block.erase(real_column_name); + continue; } arrow_column_to_doris_column(array, 0, column_with_type_and_name.column, column_with_type_and_name.type, block.rows(), "UTC"); @@ -579,9 +609,9 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) { column_with_type_and_name.type, block.rows(), "UTC"); std::cout << block.dump_data() << std::endl; std::cout << new_block.dump_data() << std::endl; - // new block row_index 0, 2 is should be empty - EXPECT_EQ(new_block.dump_one_line(0, 1), "{}"); - EXPECT_EQ(new_block.dump_one_line(2, 1), "{}"); + // new block row_index 0, 2 which row has key null will be filter + EXPECT_EQ(new_block.dump_one_line(0, 1), "{\"doris\":null, \"clever amory\":30}"); + EXPECT_EQ(new_block.dump_one_line(2, 1), "{\"test\":11}"); EXPECT_EQ(block.dump_data(1, 1), new_block.dump_data(1, 1)); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
