This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch feat-nested in repository https://gitbox.apache.org/repos/asf/doris.git
commit 9dbfe29ffa0869bd916bc63d12d00db59ab23bab Author: eldenmoon <[email protected]> AuthorDate: Fri Jan 9 20:13:02 2026 +0800 Add unit tests for NestedGroupBuilder. Cover basic array<object> extraction from JSONB and the conflict rule that prefers nested arrays over scalar values on the same path. --- .../segment_v2/nested_group_builder_test.cpp | 90 ++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/be/test/olap/rowset/segment_v2/nested_group_builder_test.cpp b/be/test/olap/rowset/segment_v2/nested_group_builder_test.cpp new file mode 100644 index 00000000000..64f550f5001 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/nested_group_builder_test.cpp @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include <string> +#include <vector> + +#include "olap/rowset/segment_v2/variant/nested_group_builder.h" +#include "runtime/jsonb_value.h" +#include "vec/columns/column_string.h" + +namespace doris::segment_v2 { + +static vectorized::ColumnPtr make_jsonb_column(const std::vector<std::string>& json_strings) { + auto col = vectorized::ColumnString::create(); + doris::JsonBinaryValue jsonb_value; + for (const auto& s : json_strings) { + EXPECT_TRUE(jsonb_value.from_json_string(s).ok()); + col->insert_data(jsonb_value.value(), jsonb_value.size()); + } + return col->get_ptr(); +} + +TEST(NestedGroupBuilderTest, BuildSimpleArrayObject) { + NestedGroupBuilder builder; + builder.set_max_depth(8); + + auto jsonb_col = make_jsonb_column({R"({"items":[{"a":1},{"a":2}]})"}); + NestedGroupsMap groups; + ASSERT_TRUE(builder.build_from_jsonb(jsonb_col, groups, jsonb_col->size()).ok()); + + auto it = groups.find(vectorized::PathInData("items")); + ASSERT_TRUE(it != groups.end()); + ASSERT_TRUE(it->second); + + const auto& group = *it->second; + ASSERT_TRUE(group.offsets); + const auto& offsets = assert_cast<const vectorized::ColumnOffset64&>(*group.offsets).get_data(); + ASSERT_EQ(offsets.size(), 1); + EXPECT_EQ(offsets[0], 2); + + ASSERT_EQ(group.children.size(), 1); + auto it_child = group.children.find(vectorized::PathInData("a")); + ASSERT_TRUE(it_child != group.children.end()); + EXPECT_EQ(it_child->second.size(), 2); +} + +TEST(NestedGroupBuilderTest, ConflictArrayObjectWinsOverScalar) { + NestedGroupBuilder builder; + builder.set_max_depth(8); + + // First element: a is scalar. Second element: a becomes array<object>. + auto jsonb_col = make_jsonb_column({R"({"items":[{"a":1},{"a":[{"b":2}]}]})"}); + NestedGroupsMap groups; + ASSERT_TRUE(builder.build_from_jsonb(jsonb_col, groups, jsonb_col->size()).ok()); + + auto it = groups.find(vectorized::PathInData("items")); + ASSERT_TRUE(it != groups.end()); + ASSERT_TRUE(it->second); + + const auto& group = *it->second; + // Scalar child "a" should be discarded once we see array<object> on the same path. + EXPECT_TRUE(group.children.find(vectorized::PathInData("a")) == group.children.end()); + ASSERT_TRUE(group.nested_groups.contains(vectorized::PathInData("a"))); + + const auto& nested = *group.nested_groups.at(vectorized::PathInData("a")); + ASSERT_TRUE(nested.offsets); + const auto& offsets = assert_cast<const vectorized::ColumnOffset64&>(*nested.offsets).get_data(); + ASSERT_EQ(offsets.size(), 2); + EXPECT_EQ(offsets[0], 0); // first element has empty nested array + EXPECT_EQ(offsets[1], 1); // second element has 1 nested object +} + +} // namespace doris::segment_v2 + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
