This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 15e5a02e4af branch-4.0: [fix](variant) fix deserialize array from
sparse column #57561 (#57630)
15e5a02e4af is described below
commit 15e5a02e4af89f7d800bfb13d5be408d36c2ad88
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 4 17:41:52 2025 +0800
branch-4.0: [fix](variant) fix deserialize array from sparse column #57561
(#57630)
Cherry-picked from #57561
Co-authored-by: Sun Chenyang <[email protected]>
---
be/src/vec/columns/column_variant.cpp | 4 +-
be/test/vec/columns/column_variant_test.cpp | 94 +++++++++++++++++++++++++++++
2 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/columns/column_variant.cpp
b/be/src/vec/columns/column_variant.cpp
index 84dd8807f88..cc240a8682f 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -2316,7 +2316,9 @@ void
ColumnVariant::Subcolumn::deserialize_from_sparse_column(const ColumnString
// array needs to check nested type is same as least common type's nested
type
if (!same_as_least_common_type && type == PrimitiveType::TYPE_ARRAY) {
- const auto* nested_start_data = start_data + 1;
+ // |PrimitiveType::TYPE_ARRAY| + |size_t| + |nested_type|
+ // skip the first 1 byte for PrimitiveType::TYPE_ARRAY and the next
sizeof(size_t) bytes for the size of the array
+ const auto* nested_start_data = start_data + 1 + sizeof(size_t);
const PrimitiveType nested_type =
TabletColumn::get_primitive_type_by_field_type(
static_cast<FieldType>(*nested_start_data));
same_as_least_common_type = (nested_type !=
least_common_type.get_base_type_id());
diff --git a/be/test/vec/columns/column_variant_test.cpp
b/be/test/vec/columns/column_variant_test.cpp
index 06408f3bc1a..010dbd28014 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -3692,4 +3692,98 @@ TEST_F(ColumnVariantTest, test_variant_no_data_insert) {
EXPECT_TRUE(variant->only_have_default_values());
}
+TEST_F(ColumnVariantTest, test_variant_deserialize_from_sparse_column) {
+ auto sparse_column = ColumnVariant::create_sparse_column_fn();
+ auto& column_map = assert_cast<ColumnMap&>(*sparse_column);
+ auto& key = assert_cast<ColumnString&>(column_map.get_keys());
+ auto& value = assert_cast<ColumnString&>(column_map.get_values());
+ auto& offsets = column_map.get_offsets();
+
+ {
+ Field int_field = Field::create_field<TYPE_INT>(123);
+ Field array_field = Field::create_field<TYPE_ARRAY>(Array(1));
+ array_field.get<Array&>()[0] = int_field;
+ FieldInfo info = {PrimitiveType::TYPE_TINYINT, false, false, 1};
+ ColumnVariant::Subcolumn int_subcolumn(0, true, false);
+ int_subcolumn.insert(array_field, info);
+ int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+ info = {PrimitiveType::TYPE_INT, false, false, 1};
+ int_subcolumn.insert(array_field, info);
+ int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 1);
+
+ offsets.push_back(key.size());
+
+ ColumnVariant::Subcolumn subcolumn(0, true, false);
+ subcolumn.deserialize_from_sparse_column(&value, 0);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_base_type_id(),
PrimitiveType::TYPE_TINYINT);
+ auto v = subcolumn.get_last_field();
+ auto& arr = v.get<Array>();
+ EXPECT_EQ(arr.size(), 1);
+ EXPECT_EQ(arr[0].get<Int32>(), 123);
+
+ subcolumn.deserialize_from_sparse_column(&value, 1);
+ EXPECT_EQ(subcolumn.data.size(), 2);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_base_type_id(),
PrimitiveType::TYPE_INT);
+ auto v2 = subcolumn.get_last_field();
+ auto& arr2 = v2.get<Array>();
+ EXPECT_EQ(arr2.size(), 1);
+ EXPECT_EQ(arr2[0].get<Int32>(), 123);
+ }
+
+ column_map.clear();
+ offsets.clear();
+ key.clear();
+ value.clear();
+
+ {
+ Field int_field = Field::create_field<TYPE_INT>(123);
+ Field array_field = Field::create_field<TYPE_ARRAY>(Array(1));
+ array_field.get<Array&>()[0] = Field();
+ FieldInfo info = {PrimitiveType::TYPE_NULL, false, false, 1};
+ ColumnVariant::Subcolumn int_subcolumn(0, true, false);
+ int_subcolumn.insert(array_field, info);
+ int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+ array_field = Field::create_field<TYPE_ARRAY>(Array(2));
+ array_field.get<Array&>()[0] = Field();
+ array_field.get<Array&>()[1] = int_field;
+ info = {PrimitiveType::TYPE_INT, false, false, 1};
+ int_subcolumn.insert(array_field, info);
+ int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 1);
+
+ offsets.push_back(key.size());
+
+ ColumnVariant::Subcolumn subcolumn(0, true, false);
+ subcolumn.deserialize_from_sparse_column(&value, 0);
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ auto v = subcolumn.get_last_field();
+ auto& arr = v.get<Array>();
+ EXPECT_EQ(arr.size(), 1);
+ EXPECT_TRUE(arr[0].is_null());
+
+ subcolumn.deserialize_from_sparse_column(&value, 1);
+ EXPECT_EQ(subcolumn.data.size(), 2);
+ EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+ PrimitiveType::TYPE_ARRAY);
+ EXPECT_EQ(subcolumn.get_dimensions(), 1);
+ EXPECT_EQ(subcolumn.get_least_common_base_type_id(),
PrimitiveType::TYPE_INT);
+ auto v2 = subcolumn.get_last_field();
+ auto& arr2 = v2.get<Array>();
+ EXPECT_EQ(arr2.size(), 2);
+ EXPECT_TRUE(arr2[0].is_null());
+ EXPECT_EQ(arr2[1].get<Int32>(), 123);
+ }
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]