This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 31f9d3241ec branch-3.1: [fix](variant) fix insert array<json> and json
(#55044)
31f9d3241ec is described below
commit 31f9d3241eca21448f43c1c91ad7322b25298f51
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Aug 20 16:35:30 2025 +0800
branch-3.1: [fix](variant) fix insert array<json> and json (#55044)
In #51307, `subcolumn::insert` was refactored, and a small-scale fix was
applied on the 3.1 branch.
---
be/src/vec/columns/column_object.cpp | 7 +-
be/test/vec/columns/column_variant_test.cpp | 236 ++++++++++++++++++++++++++++
2 files changed, 241 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index f0390d422c8..361e108029d 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -251,7 +251,8 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo
info) {
// so we should set specified info to create correct types, and those
predefined types are static and
// no conflict, so we can set them directly.
add_new_column_part(base_data_type);
- } else if (least_common_type.get_base_type_id() != base_type.idx &&
!base_type.is_nothing()) {
+ } else if ((least_common_type.get_base_type_id() != base_type.idx ||
value_dim != column_dim) &&
+ !base_type.is_nothing()) {
if (schema_util::is_conversion_required_between_integers(
base_type.idx, least_common_type.get_base_type_id())) {
VLOG_DEBUG << "Conversion between " << getTypeName(base_type.idx)
<< " and "
@@ -262,7 +263,9 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo
info) {
if (!least_type->equals(*base_data_type)) {
type_changed = true;
}
- add_new_column_part(least_type);
+ if (!least_type->equals(*least_common_type.get())) {
+ add_new_column_part(least_type);
+ }
}
}
// 1. type changed means encounter different type, we need to convert it
to the least common type
diff --git a/be/test/vec/columns/column_variant_test.cpp
b/be/test/vec/columns/column_variant_test.cpp
index d6f35a50bb1..0f39c9d2817 100644
--- a/be/test/vec/columns/column_variant_test.cpp
+++ b/be/test/vec/columns/column_variant_test.cpp
@@ -2349,4 +2349,240 @@ TEST_F(ColumnObjectTest, subcolumn_operations_coverage)
{
}
}
}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_range_from_test) {
+ ColumnObject::Subcolumn subcolumn(0, true /* is_nullable */, false /*
is_root */);
+ Field int_field(200000);
+ Field string_field("hello");
+
+ Array array_int(2);
+ array_int[0] = int_field;
+ array_int[1] = int_field;
+ Field array_int_field(array_int);
+ ColumnObject::Subcolumn subcolumn2(0, true /* is_nullable */, false /*
is_root */);
+ subcolumn2.insert(array_int_field);
+ subcolumn2.finalize();
+
+ Array array_tiny_int(2);
+ Field tiny_int(100);
+ array_tiny_int[0] = tiny_int;
+ array_tiny_int[1] = tiny_int;
+ Field array_tiny_int_field(array_tiny_int);
+ ColumnObject::Subcolumn subcolumn1(0, true /* is_nullable */, false /*
is_root */);
+ subcolumn1.insert(array_tiny_int_field);
+ subcolumn1.finalize();
+
+ Array array_string(2);
+ array_string[0] = string_field;
+ array_string[1] = string_field;
+ Field array_string_field(array_string);
+ ColumnObject::Subcolumn subcolumn3(0, true /* is_nullable */, false /*
is_root */);
+ subcolumn3.insert(array_string_field);
+ subcolumn3.finalize();
+
+ subcolumn.insert_range_from(subcolumn1, 0, 1);
+ subcolumn.insert_range_from(subcolumn2, 0, 1);
+ subcolumn.insert_range_from(subcolumn3, 0, 1);
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
TypeIndex::Array);
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_test) {
+ ColumnObject::Subcolumn subcolumn(0, true /* is_nullable */, false /*
is_root */);
+ Field int_field(200000);
+ Field string_field("hello");
+ Array array_int(2);
+ array_int[0] = int_field;
+ array_int[1] = int_field;
+ Field array_int_field(array_int);
+
+ Array array_int2(2);
+ Field tiny_int(100);
+ array_int2[0] = tiny_int;
+ array_int2[1] = tiny_int;
+ Field array_int2_field(array_int2);
+
+ Array array_string(2);
+ array_string[0] = string_field;
+ array_string[1] = string_field;
+ Field array_string_field(array_string);
+
+ subcolumn.insert(array_int2_field);
+ subcolumn.insert(array_int_field);
+ subcolumn.insert(array_string_field);
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
TypeIndex::Array);
+
+ subcolumn.insert(string_field);
+ subcolumn.insert(int_field);
+ EXPECT_EQ(subcolumn.data.size(), 2);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
TypeIndex::JSONB);
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
TypeIndex::JSONB);
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_test_advanced) {
+ std::vector<Field> fields;
+
+ fields.emplace_back(Field(Null()));
+
+ fields.emplace_back(Field(true));
+
+ fields.emplace_back(Field(922337203685477588));
+
+ fields.emplace_back(Field(-3.14159265359));
+
+ fields.emplace_back(Field("hello world"));
+
+ Array arr_boolean(2);
+ arr_boolean[0] = Field(true);
+ arr_boolean[1] = Field(false);
+ Field arr_boolean_field(arr_boolean);
+ fields.emplace_back(arr_boolean_field);
+
+ Array arr_int64(2);
+ arr_int64[0] = Field(1232323232323232323);
+ arr_int64[1] = Field(2232323223232323232);
+ Field arr_int64_field(arr_int64);
+ fields.emplace_back(arr_int64_field);
+
+ Array arr_double(2);
+ arr_double[0] = Field(1.1);
+ arr_double[1] = Field(2.2);
+ Field arr_double_field(arr_double);
+ fields.emplace_back(arr_double_field);
+
+ Array arr_string(2);
+ arr_string[0] = Field("one");
+ arr_string[1] = Field("two");
+ Field arr_string_field(arr_string);
+ fields.emplace_back(arr_string_field);
+
+ Array arr_jsonb(5);
+ arr_jsonb[0] = Field("one");
+ arr_jsonb[1] = Field(1.1);
+ arr_jsonb[2] = Field(true);
+ arr_jsonb[3] = Field(1232323232323232323);
+ arr_jsonb[4] = Field(1232323232323232323);
+ Field arr_jsonb_field(arr_jsonb);
+ fields.emplace_back(arr_jsonb_field);
+
+ std::random_device rd;
+ std::mt19937 g(rd());
+
+ for (int i = 0; i < (1 << fields.size()); i++) {
+ std::shuffle(fields.begin(), fields.end(), g);
+ auto subcolumn = ColumnObject::Subcolumn(0, true, false);
+
+ for (const auto& field : fields) {
+ subcolumn.insert(field);
+ }
+
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ // std::cout << "least common type: " <<
subcolumn.get_least_common_type()->get_name() << std::endl;
+ EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(),
TypeIndex::JSONB);
+
+ for (const auto& field : fields) {
+ subcolumn.insert(field);
+ }
+ EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(),
TypeIndex::JSONB);
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
+ TypeIndex::JSONB);
+
+ if (i % 1000 == 0) {
+ std::cout << "insert count " << i << std::endl;
+ }
+ }
+}
+
+TEST_F(ColumnObjectTest, subcolumn_insert_range_from_test_advanced) {
+ std::vector<Field> fields;
+
+ fields.emplace_back(Field(Null()));
+
+ fields.emplace_back(Field(true));
+
+ fields.emplace_back(Field(922337203685477588));
+
+ fields.emplace_back(Field(-3.14159265359));
+
+ fields.emplace_back(Field("hello world"));
+
+ Array arr_boolean(2);
+ arr_boolean[0] = Field(true);
+ arr_boolean[1] = Field(false);
+ Field arr_boolean_field(arr_boolean);
+ fields.emplace_back(arr_boolean_field);
+
+ Array arr_int64(2);
+ arr_int64[0] = Field(1232323232323232323);
+ arr_int64[1] = Field(2232323223232323232);
+ Field arr_int64_field(arr_int64);
+ fields.emplace_back(arr_int64_field);
+
+ Array arr_largeint(2);
+ arr_largeint[0] = Field(1232323232323232323);
+ arr_largeint[1] = Field(2232323223232323232);
+ Field arr_largeint_field(arr_largeint);
+ fields.emplace_back(arr_largeint_field);
+
+ Array arr_double(2);
+ arr_double[0] = Field(1.1);
+ arr_double[1] = Field(2.2);
+ Field arr_double_field(arr_double);
+ fields.emplace_back(arr_double_field);
+
+ Array arr_string(2);
+ arr_string[0] = Field("one");
+ arr_string[1] = Field("two");
+ Field arr_string_field(arr_string);
+ fields.emplace_back(arr_string_field);
+
+ Array arr_jsonb(5);
+ arr_jsonb[0] = Field("one");
+ arr_jsonb[1] = Field(1.1);
+ arr_jsonb[2] = Field(true);
+ arr_jsonb[3] = Field(1232323232323232323);
+ arr_jsonb[4] = Field(1232323232323232323);
+ Field arr_jsonb_field(arr_jsonb);
+ fields.emplace_back(arr_jsonb_field);
+
+ std::random_device rd;
+ std::mt19937 g(rd());
+
+ for (int i = 0; i < (1 << fields.size()); i++) {
+ std::shuffle(fields.begin(), fields.end(), g);
+ auto subcolumn = ColumnObject::Subcolumn(0, true, false);
+
+ for (const auto& field : fields) {
+ auto subcolumn_tmp = ColumnObject::Subcolumn(0, true, false);
+ subcolumn_tmp.insert(field);
+ subcolumn.insert_range_from(subcolumn_tmp, 0, 1);
+ }
+
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+ // std::cout << "least common type: " <<
subcolumn.get_least_common_type()->get_name() << std::endl;
+ EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(),
TypeIndex::JSONB);
+
+ for (const auto& field : fields) {
+ subcolumn.insert(field);
+ }
+ EXPECT_EQ(subcolumn.least_common_type.get_base_type_id(),
TypeIndex::JSONB);
+ subcolumn.finalize();
+ EXPECT_EQ(subcolumn.data.size(), 1);
+
EXPECT_EQ(remove_nullable(subcolumn.get_least_common_type())->get_type_id(),
+ TypeIndex::JSONB);
+
+ if (i % 1000 == 0) {
+ std::cout << "insert count " << i << std::endl;
+ }
+ }
+}
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]