This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 00c9455f16 [fix](array-type) fix arrow column to doris array column
(#10855)
00c9455f16 is described below
commit 00c9455f16bb6998a399c977f9bfc5d42de32276
Author: camby <[email protected]>
AuthorDate: Sat Jul 16 11:49:42 2022 +0800
[fix](array-type) fix arrow column to doris array column (#10855)
* support merge array column, while convert from arrow column to doris
array column
* fix typo
Co-authored-by: cambyzju <[email protected]>
---
be/src/vec/utils/arrow_column_to_doris_column.cpp | 8 +++++---
.../vec/utils/arrow_column_to_doris_column_test.cpp | 18 ++++++++++++------
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp
b/be/src/vec/utils/arrow_column_to_doris_column.cpp
index 3d851d14e1..9f2f7ddb26 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.cpp
+++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp
@@ -274,12 +274,14 @@ static Status convert_offset_from_list_column(const
arrow::Array* array, size_t
auto concrete_array = down_cast<const arrow::ListArray*>(array);
auto arrow_offsets_array = concrete_array->offsets();
auto arrow_offsets =
down_cast<arrow::Int32Array*>(arrow_offsets_array.get());
+ auto prev_size = offsets_data.back();
for (int64_t i = array_idx + 1; i < array_idx + num_elements + 1; ++i) {
- // convert to doris offset, start from 0
- offsets_data.emplace_back(arrow_offsets->Value(i) -
arrow_offsets->Value(array_idx));
+ // convert to doris offset, start from offsets.back()
+ offsets_data.emplace_back(prev_size + arrow_offsets->Value(i) -
+ arrow_offsets->Value(array_idx));
}
*start_idx_for_data = arrow_offsets->Value(array_idx);
- *num_for_data = offsets_data.back();
+ *num_for_data = offsets_data.back() - prev_size;
return Status::OK();
}
diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
index 606132ca9d..4eec72ae65 100644
--- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
+++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp
@@ -652,13 +652,13 @@ void test_arrow_to_array_column(ColumnWithTypeAndName&
column,
std::shared_ptr<arrow::DataType> value_type,
std::shared_ptr<arrow::Array> values, const
std::string& value,
size_t& counter) {
- ASSERT_EQ(column.column->size(), counter);
auto array = create_array_array<ArrowType, is_nullable>(vec_offsets,
null_map, value_type,
values, counter);
+ auto old_size = column.column->size();
auto ret = arrow_column_to_doris_column(array.get(), 0, column.column,
column.type,
vec_offsets.size() - 1, "UTC");
ASSERT_EQ(ret.ok(), true);
- ASSERT_EQ(column.column->size(), counter);
+ ASSERT_EQ(column.column->size() - old_size, counter);
MutableColumnPtr data_column = nullptr;
vectorized::ColumnNullable* nullable_column = nullptr;
if (column.column->is_nullable()) {
@@ -669,14 +669,16 @@ void test_arrow_to_array_column(ColumnWithTypeAndName&
column,
data_column = (*std::move(column.column)).mutate();
}
auto& array_column = static_cast<ColumnArray&>(*data_column);
- EXPECT_EQ(array_column.size(), vec_offsets.size() - 1);
- for (size_t i = 0; i < array_column.size(); ++i) {
- auto v = get<Array>(array_column[i]);
+ EXPECT_EQ(array_column.size() - old_size, vec_offsets.size() - 1);
+ for (size_t i = 0; i < array_column.size() - old_size; ++i) {
+ auto v = get<Array>(array_column[old_size + i]);
EXPECT_EQ(v.size(), vec_offsets[i + 1] - vec_offsets[i]);
+ EXPECT_EQ(v.size(), array_column.get_offsets()[old_size + i] -
+ array_column.get_offsets()[old_size + i -
1]);
if (is_nullable) {
ASSERT_NE(nullable_column, nullptr);
NullMap& map_data = nullable_column->get_null_map_data();
- ASSERT_EQ(map_data[i], null_map[i]);
+ ASSERT_EQ(map_data[old_size + i], null_map[i]);
if (!null_map[i]) {
// check value
for (size_t j = 0; j < v.size(); ++j) {
@@ -713,6 +715,10 @@ void test_array(const std::vector<std::string>&
test_cases, size_t num_elements,
size_t counter = 0;
test_arrow_to_array_column<ArrowType, is_nullable>(column,
vec_offsets, null_map,
value_type, array,
value, counter);
+ // multi arrow array can merge into one array column, here test again
with non empty array column
+ counter = 0;
+ test_arrow_to_array_column<ArrowType, is_nullable>(column,
vec_offsets, null_map,
+ value_type, array,
value, counter);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]