This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 15e5a02e4af branch-4.0: [fix](variant) fix deserialize array from 
sparse column #57561 (#57630)
15e5a02e4af is described below

commit 15e5a02e4af89f7d800bfb13d5be408d36c2ad88
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 4 17:41:52 2025 +0800

    branch-4.0: [fix](variant) fix deserialize array from sparse column #57561 
(#57630)
    
    Cherry-picked from #57561
    
    Co-authored-by: Sun Chenyang <[email protected]>
---
 be/src/vec/columns/column_variant.cpp       |  4 +-
 be/test/vec/columns/column_variant_test.cpp | 94 +++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/columns/column_variant.cpp 
b/be/src/vec/columns/column_variant.cpp
index 84dd8807f88..cc240a8682f 100644
--- a/be/src/vec/columns/column_variant.cpp
+++ b/be/src/vec/columns/column_variant.cpp
@@ -2316,7 +2316,9 @@ void 
ColumnVariant::Subcolumn::deserialize_from_sparse_column(const ColumnString
 
     // array needs to check nested type is same as least common type's nested 
type
     if (!same_as_least_common_type && type == PrimitiveType::TYPE_ARRAY) {
-        const auto* nested_start_data = start_data + 1;
+        // |PrimitiveType::TYPE_ARRAY| + |size_t| + |nested_type|
+        // skip the first 1 byte for PrimitiveType::TYPE_ARRAY and the next 
sizeof(size_t) bytes for the size of the array
+        const auto* nested_start_data = start_data + 1 + sizeof(size_t);
         const PrimitiveType nested_type = 
TabletColumn::get_primitive_type_by_field_type(
                 static_cast<FieldType>(*nested_start_data));
         same_as_least_common_type = (nested_type != 
least_common_type.get_base_type_id());
diff --git a/be/test/vec/columns/column_variant_test.cpp 
b/be/test/vec/columns/column_variant_test.cpp
index 06408f3bc1a..010dbd28014 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -3692,4 +3692,98 @@ TEST_F(ColumnVariantTest, test_variant_no_data_insert) {
     EXPECT_TRUE(variant->only_have_default_values());
 }
 
+TEST_F(ColumnVariantTest, test_variant_deserialize_from_sparse_column) {
+    auto sparse_column = ColumnVariant::create_sparse_column_fn();
+    auto& column_map = assert_cast<ColumnMap&>(*sparse_column);
+    auto& key = assert_cast<ColumnString&>(column_map.get_keys());
+    auto& value = assert_cast<ColumnString&>(column_map.get_values());
+    auto& offsets = column_map.get_offsets();
+
+    {
+        Field int_field = Field::create_field<TYPE_INT>(123);
+        Field array_field = Field::create_field<TYPE_ARRAY>(Array(1));
+        array_field.get<Array&>()[0] = int_field;
+        FieldInfo info = {PrimitiveType::TYPE_TINYINT, false, false, 1};
+        ColumnVariant::Subcolumn int_subcolumn(0, true, false);
+        int_subcolumn.insert(array_field, info);
+        int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+        info = {PrimitiveType::TYPE_INT, false, false, 1};
+        int_subcolumn.insert(array_field, info);
+        int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 1);
+
+        offsets.push_back(key.size());
+
+        ColumnVariant::Subcolumn subcolumn(0, true, false);
+        subcolumn.deserialize_from_sparse_column(&value, 0);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_base_type_id(), 
PrimitiveType::TYPE_TINYINT);
+        auto v = subcolumn.get_last_field();
+        auto& arr = v.get<Array>();
+        EXPECT_EQ(arr.size(), 1);
+        EXPECT_EQ(arr[0].get<Int32>(), 123);
+
+        subcolumn.deserialize_from_sparse_column(&value, 1);
+        EXPECT_EQ(subcolumn.data.size(), 2);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_base_type_id(), 
PrimitiveType::TYPE_INT);
+        auto v2 = subcolumn.get_last_field();
+        auto& arr2 = v2.get<Array>();
+        EXPECT_EQ(arr2.size(), 1);
+        EXPECT_EQ(arr2[0].get<Int32>(), 123);
+    }
+
+    column_map.clear();
+    offsets.clear();
+    key.clear();
+    value.clear();
+
+    {
+        Field int_field = Field::create_field<TYPE_INT>(123);
+        Field array_field = Field::create_field<TYPE_ARRAY>(Array(1));
+        array_field.get<Array&>()[0] = Field();
+        FieldInfo info = {PrimitiveType::TYPE_NULL, false, false, 1};
+        ColumnVariant::Subcolumn int_subcolumn(0, true, false);
+        int_subcolumn.insert(array_field, info);
+        int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 0);
+
+        array_field = Field::create_field<TYPE_ARRAY>(Array(2));
+        array_field.get<Array&>()[0] = Field();
+        array_field.get<Array&>()[1] = int_field;
+        info = {PrimitiveType::TYPE_INT, false, false, 1};
+        int_subcolumn.insert(array_field, info);
+        int_subcolumn.serialize_to_sparse_column(&key, "b", &value, 1);
+
+        offsets.push_back(key.size());
+
+        ColumnVariant::Subcolumn subcolumn(0, true, false);
+        subcolumn.deserialize_from_sparse_column(&value, 0);
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        auto v = subcolumn.get_last_field();
+        auto& arr = v.get<Array>();
+        EXPECT_EQ(arr.size(), 1);
+        EXPECT_TRUE(arr[0].is_null());
+
+        subcolumn.deserialize_from_sparse_column(&value, 1);
+        EXPECT_EQ(subcolumn.data.size(), 2);
+        EXPECT_EQ(subcolumn.get_least_common_type()->get_primitive_type(),
+                  PrimitiveType::TYPE_ARRAY);
+        EXPECT_EQ(subcolumn.get_dimensions(), 1);
+        EXPECT_EQ(subcolumn.get_least_common_base_type_id(), 
PrimitiveType::TYPE_INT);
+        auto v2 = subcolumn.get_last_field();
+        auto& arr2 = v2.get<Array>();
+        EXPECT_EQ(arr2.size(), 2);
+        EXPECT_TRUE(arr2[0].is_null());
+        EXPECT_EQ(arr2[1].get<Int32>(), 123);
+    }
+}
+
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to