This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 31f9d3241ec branch-3.1: [fix](variant) fix insert array<json> and json 
(#55044)
31f9d3241ec is described below

commit 31f9d3241eca21448f43c1c91ad7322b25298f51
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Aug 20 16:35:30 2025 +0800

    branch-3.1: [fix](variant) fix insert array<json> and json (#55044)
    
    In #51307, `subcolumn::insert` was refactored, and a small-scale fix was
    applied on the 3.1 branch.
---
 be/src/vec/columns/column_object.cpp        |   7 +-
 be/test/vec/columns/column_variant_test.cpp | 236 ++++++++++++++++++++++++++++
 2 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index f0390d422c8..361e108029d 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -251,7 +251,8 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo 
info) {
         // so we should set specified info to create correct types, and those 
predefined types are static and
         // no conflict, so we can set them directly.
         add_new_column_part(base_data_type);
-    } else if (least_common_type.get_base_type_id() != base_type.idx && 
!base_type.is_nothing()) {
+    } else if ((least_common_type.get_base_type_id() != base_type.idx || 
value_dim != column_dim) &&
+               !base_type.is_nothing()) {
         if (schema_util::is_conversion_required_between_integers(
                     base_type.idx, least_common_type.get_base_type_id())) {
             VLOG_DEBUG << "Conversion between " << getTypeName(base_type.idx) 
<< " and "
@@ -262,7 +263,9 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo 
info) {
             if (!least_type->equals(*base_data_type)) {
                 type_changed = true;
             }
-            add_new_column_part(least_type);
+            if (!least_type->equals(*least_common_type.get())) {
+                add_new_column_part(least_type);
+            }
         }
     }
     // 1. type changed means encounter different type, we need to convert it 
to the least common type
diff --git a/be/test/vec/columns/column_variant_test.cpp 
b/be/test/vec/columns/column_variant_test.cpp
index d6f35a50bb1..0f39c9d2817 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -2349,4 +2349,240 @@ TEST_F(ColumnObjectTest, subcolumn_operations_coverage) 
{
         }
     }
 }
+
+TEST_F(ColumnObjectTest, subcolumn_insert_range_from_test) {
+    ColumnObject::Subcolumn subcolumn(0, true /* is_nullable */, false /* 
is_root */);
+    Field int_field(200000);
+    Field string_field("hello");
+
+    Array array_int(2);
+    array_int[0] = int_field;
+    array_int[1] = int_field;
+    Field array_int_field(array_int);
+    ColumnObject::Subcolumn subcolumn2(0, true /* is_nullable */, false /* 
is_root */);
+    subcolumn2.insert(array_int_field);
+    subcolumn2.finalize();
+
+    Array array_tiny_int(2);
+    Field tiny_int(100);
+    array_tiny_int[0] = tiny_int;
+    array_tiny_int[1] = tiny_int;
+    Field array_tiny_int_field(array_tiny_int);
+    ColumnObject::Subcolumn subcolumn1(0, true /* is_nullable */, false /* 
is_root */);
+    subcolumn1.insert(array_tiny_int_field);
+    subcolumn1.finalize();
+
+    Array array_string(2);
+    array_string[0] = string_field;
+    array_string[1] = string_field;
+    Field array_string_field(array_string);
+    ColumnObject::Subcolumn subcolumn3(0, true /* is_nullable */, false /* 
is_root */);
+    subcolumn3.insert(array_string_field);
+    subcolumn3.finalize();
+
+    subcolumn.insert_range_from(subcolumn1, 0, 1);
+    subcolumn.insert_range_from(subcolumn2, 0, 1);
+    subcolumn.insert_range_from(subcolumn3, 0, 1);
+    subcolumn.finalize();
+    EXPECT_EQ(subcolumn.data.size(), 1);
+    
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(), 
TypeIndex::Array);
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_test) {
+    ColumnObject::Subcolumn subcolumn(0, true /* is_nullable */, false /* 
is_root */);
+    Field int_field(200000);
+    Field string_field("hello");
+    Array array_int(2);
+    array_int[0] = int_field;
+    array_int[1] = int_field;
+    Field array_int_field(array_int);
+
+    Array array_int2(2);
+    Field tiny_int(100);
+    array_int2[0] = tiny_int;
+    array_int2[1] = tiny_int;
+    Field array_int2_field(array_int2);
+
+    Array array_string(2);
+    array_string[0] = string_field;
+    array_string[1] = string_field;
+    Field array_string_field(array_string);
+
+    subcolumn.insert(array_int2_field);
+    subcolumn.insert(array_int_field);
+    subcolumn.insert(array_string_field);
+    subcolumn.finalize();
+    EXPECT_EQ(subcolumn.data.size(), 1);
+    
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(), 
TypeIndex::Array);
+
+    subcolumn.insert(string_field);
+    subcolumn.insert(int_field);
+    EXPECT_EQ(subcolumn.data.size(), 2);
+    
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(), 
TypeIndex::JSONB);
+    subcolumn.finalize();
+    EXPECT_EQ(subcolumn.data.size(), 1);
+    
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(), 
TypeIndex::JSONB);
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_test_advanced) {
+    std::vector<Field> fields;
+
+    fields.emplace_back(Field(Null()));
+
+    fields.emplace_back(Field(true));
+
+    fields.emplace_back(Field(922337203685477588));
+
+    fields.emplace_back(Field(-3.14159265359));
+
+    fields.emplace_back(Field("hello world"));
+
+    Array arr_boolean(2);
+    arr_boolean[0] = Field(true);
+    arr_boolean[1] = Field(false);
+    Field arr_boolean_field(arr_boolean);
+    fields.emplace_back(arr_boolean_field);
+
+    Array arr_int64(2);
+    arr_int64[0] = Field(1232323232323232323);
+    arr_int64[1] = Field(2232323223232323232);
+    Field arr_int64_field(arr_int64);
+    fields.emplace_back(arr_int64_field);
+
+    Array arr_double(2);
+    arr_double[0] = Field(1.1);
+    arr_double[1] = Field(2.2);
+    Field arr_double_field(arr_double);
+    fields.emplace_back(arr_double_field);
+
+    Array arr_string(2);
+    arr_string[0] = Field("one");
+    arr_string[1] = Field("two");
+    Field arr_string_field(arr_string);
+    fields.emplace_back(arr_string_field);
+
+    Array arr_jsonb(5);
+    arr_jsonb[0] = Field("one");
+    arr_jsonb[1] = Field(1.1);
+    arr_jsonb[2] = Field(true);
+    arr_jsonb[3] = Field(1232323232323232323);
+    arr_jsonb[4] = Field(1232323232323232323);
+    Field arr_jsonb_field(arr_jsonb);
+    fields.emplace_back(arr_jsonb_field);
+
+    std::random_device rd;
+    std::mt19937 g(rd());
+
+    for (int i = 0; i < (1 << fields.size()); i++) {
+        std::shuffle(fields.begin(), fields.end(), g);
+        auto subcolumn = ColumnObject::Subcolumn(0, true, false);
+
+        for (const auto& field : fields) {
+            subcolumn.insert(field);
+        }
+
+        subcolumn.finalize();
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        // std::cout << "least common type: " << 
subcolumn.get_least_common_type()->get_name() << std::endl;
+        EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), 
TypeIndex::JSONB);
+
+        for (const auto& field : fields) {
+            subcolumn.insert(field);
+        }
+        EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), 
TypeIndex::JSONB);
+        subcolumn.finalize();
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
+                  TypeIndex::JSONB);
+
+        if (i % 1000 == 0) {
+            std::cout << "insert count " << i << std::endl;
+        }
+    }
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_range_from_test_advanced) {
+    std::vector<Field> fields;
+
+    fields.emplace_back(Field(Null()));
+
+    fields.emplace_back(Field(true));
+
+    fields.emplace_back(Field(922337203685477588));
+
+    fields.emplace_back(Field(-3.14159265359));
+
+    fields.emplace_back(Field("hello world"));
+
+    Array arr_boolean(2);
+    arr_boolean[0] = Field(true);
+    arr_boolean[1] = Field(false);
+    Field arr_boolean_field(arr_boolean);
+    fields.emplace_back(arr_boolean_field);
+
+    Array arr_int64(2);
+    arr_int64[0] = Field(1232323232323232323);
+    arr_int64[1] = Field(2232323223232323232);
+    Field arr_int64_field(arr_int64);
+    fields.emplace_back(arr_int64_field);
+
+    Array arr_largeint(2);
+    arr_largeint[0] = Field(1232323232323232323);
+    arr_largeint[1] = Field(2232323223232323232);
+    Field arr_largeint_field(arr_largeint);
+    fields.emplace_back(arr_largeint_field);
+
+    Array arr_double(2);
+    arr_double[0] = Field(1.1);
+    arr_double[1] = Field(2.2);
+    Field arr_double_field(arr_double);
+    fields.emplace_back(arr_double_field);
+
+    Array arr_string(2);
+    arr_string[0] = Field("one");
+    arr_string[1] = Field("two");
+    Field arr_string_field(arr_string);
+    fields.emplace_back(arr_string_field);
+
+    Array arr_jsonb(5);
+    arr_jsonb[0] = Field("one");
+    arr_jsonb[1] = Field(1.1);
+    arr_jsonb[2] = Field(true);
+    arr_jsonb[3] = Field(1232323232323232323);
+    arr_jsonb[4] = Field(1232323232323232323);
+    Field arr_jsonb_field(arr_jsonb);
+    fields.emplace_back(arr_jsonb_field);
+
+    std::random_device rd;
+    std::mt19937 g(rd());
+
+    for (int i = 0; i < (1 << fields.size()); i++) {
+        std::shuffle(fields.begin(), fields.end(), g);
+        auto subcolumn = ColumnObject::Subcolumn(0, true, false);
+
+        for (const auto& field : fields) {
+            auto subcolumn_tmp = ColumnObject::Subcolumn(0, true, false);
+            subcolumn_tmp.insert(field);
+            subcolumn.insert_range_from(subcolumn_tmp, 0, 1);
+        }
+
+        subcolumn.finalize();
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        // std::cout << "least common type: " << 
subcolumn.get_least_common_type()->get_name() << std::endl;
+        EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), 
TypeIndex::JSONB);
+
+        for (const auto& field : fields) {
+            subcolumn.insert(field);
+        }
+        EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(), 
TypeIndex::JSONB);
+        subcolumn.finalize();
+        EXPECT_EQ(subcolumn.data.size(), 1);
+        
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
+                  TypeIndex::JSONB);
+
+        if (i % 1000 == 0) {
+            std::cout << "insert count " << i << std::endl;
+        }
+    }
+}
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to