This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch feat-nested
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fbe54a0cbab7f773223f326292be4faf1ca797ce
Author: Claude Code <[email protected]>
AuthorDate: Mon Jan 12 10:37:01 2026 +0800

    Refactor variant nested group handling and improve iterator management
    
    - Enhanced HierarchicalDataIterator with shared root support and improved 
error handling
    - Refactored NestedGroupBuilder to consolidate offset padding logic via 
OffsetManager
    - Improved variant column reader/writer implementation for nested structures
    - Updated variant nested builder with better memory management
    
    Co-Authored-By: Claude Sonnet 4.5 <[email protected]>
---
 .../variant/hierarchical_data_iterator.cpp         |  16 +-
 .../segment_v2/variant/nested_group_builder.cpp    | 160 ++++---
 .../segment_v2/variant/nested_group_builder.h      |  21 +
 .../segment_v2/variant/variant_column_reader.cpp   | 533 ++-------------------
 .../variant/variant_column_writer_impl.cpp         |  11 +-
 .../variant/variant_column_writer_impl.h           |   4 -
 be/src/vec/json/variant_nested_builder.cpp         |  16 -
 be/src/vec/json/variant_nested_builder.h           |   5 -
 8 files changed, 172 insertions(+), 594 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp 
b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
index dd375cb1dcc..7087d413929 100644
--- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
@@ -59,6 +59,14 @@ Status HierarchicalDataIterator::create(ColumnIteratorUPtr* 
reader, int32_t col_
                 // use set_root to share instead
                 continue;
             }
+            // Skip NestedGroup subcolumns (columns with __ng. prefix in path).
+            // NestedGroup columns only contain rows that have the nested 
array, not all rows.
+            // They need special handling via NestedGroupWholeIterator, not 
regular hierarchical merge.
+            const auto& leaf_path = leaves_paths[i].get_path();
+            if (leaf_path.find("__ng.") != std::string::npos) {
+                VLOG_DEBUG << "Skipping NestedGroup subcolumn: " << leaf_path;
+                continue;
+            }
             RETURN_IF_ERROR(
                     stream_iter->add_stream(col_uid, leaves[i], 
column_reader_cache, stats));
         }
@@ -274,7 +282,13 @@ Status 
HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c
     RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
         MutableColumnPtr column = node.data.column->get_ptr();
         PathInData relative_path = 
node.path.copy_pop_nfront(_path.get_parts().size());
-        DCHECK(column->size() == nrows);
+        VLOG_DEBUG << "HierarchicalDataIterator::_init_container: node.path=" 
<< node.path.get_path()
+                  << ", relative_path=" << relative_path.get_path()
+                  << ", column_size=" << column->size() << ", nrows=" << nrows
+                  << ", has_nested_part=" << node.path.has_nested_part()
+                  << ", type=" << (node.data.type ? node.data.type->get_name() 
: "null");
+        CHECK(column->size() == nrows) << "column->size()=" << column->size() 
<< ", nrows=" << nrows
+                                       << ", path=" << node.path.get_path();
         if (node.path.has_nested_part()) {
             if (node.data.type->get_primitive_type() != 
PrimitiveType::TYPE_ARRAY) {
                 return Status::InternalError(
diff --git a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp 
b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp
index d4e50154387..1dcc5fef16f 100755
--- a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp
@@ -217,7 +217,6 @@ Status NestedGroupBuilder::_process_array_of_objects(const 
doris::JsonbValue* ar
     return Status::OK();
 }
 
-// 
NOLINTNEXTLINE(readability-function-cognitive-complexity,readability-function-size)
 Status NestedGroupBuilder::_process_object_as_paths(
         const doris::JsonbValue* obj_value, const vectorized::PathInData& 
current_prefix,
         NestedGroup& group, size_t element_flat_idx,
@@ -244,68 +243,92 @@ Status NestedGroupBuilder::_process_object_as_paths(
         }
 
         if (v->isObject()) {
-            // flatten object fields into dotted paths.
-            RETURN_IF_ERROR(_process_object_as_paths(v, next_prefix, group, 
element_flat_idx,
-                                                    seen_child_paths, 
seen_nested_paths,
-                                                    depth + 1));
-            continue;
+            RETURN_IF_ERROR(_process_object_field(v, next_prefix, group, 
element_flat_idx,
+                                                 seen_child_paths, 
seen_nested_paths, depth));
+        } else if (v->isArray() && _is_array_of_objects(v)) {
+            RETURN_IF_ERROR(_process_nested_array_field(v, next_prefix, group, 
element_flat_idx,
+                                                       seen_nested_paths, 
depth));
+        } else {
+            RETURN_IF_ERROR(_process_scalar_field(v, next_prefix, group, 
element_flat_idx,
+                                                 seen_child_paths));
         }
+    }
 
-        if (v->isArray() && _is_array_of_objects(v)) {
-            // Nested array<object> inside this group.
-            // array<object> has the highest priority. If the same path was
-            // previously treated as a scalar child, discard it.
-            if (auto it_child = group.children.find(next_prefix); it_child != 
group.children.end()) {
-                group.children.erase(it_child);
-            }
-            std::shared_ptr<NestedGroup>& ng = 
group.nested_groups[next_prefix];
-            if (!ng) {
-                ng = std::make_shared<NestedGroup>();
-                ng->path = next_prefix;
-            }
+    return Status::OK();
+}
 
-            if (_handle_conflict(*ng, /*is_array_object=*/true)) {
-                continue;
-            }
+Status NestedGroupBuilder::_process_object_field(
+        const doris::JsonbValue* obj_value, const vectorized::PathInData& 
next_prefix,
+        NestedGroup& group, size_t element_flat_idx,
+        std::unordered_set<std::string>& seen_child_paths,
+        std::unordered_set<std::string>& seen_nested_paths, size_t depth) {
+    // Recursively flatten object fields into dotted paths.
+    return _process_object_as_paths(obj_value, next_prefix, group, 
element_flat_idx,
+                                   seen_child_paths, seen_nested_paths, depth 
+ 1);
+}
 
-            // Ensure offsets size up to current parent element.
-            // Fill missing parent elements with empty arrays.
-            OffsetManager::backfill_to_element(*ng, element_flat_idx);
+Status NestedGroupBuilder::_process_nested_array_field(
+        const doris::JsonbValue* arr_value, const vectorized::PathInData& 
next_prefix,
+        NestedGroup& group, size_t element_flat_idx,
+        std::unordered_set<std::string>& seen_nested_paths, size_t depth) {
+    // Nested array<object> inside this group.
+    // array<object> has the highest priority. If the same path was
+    // previously treated as a scalar child, discard it.
+    if (auto it_child = group.children.find(next_prefix); it_child != 
group.children.end()) {
+        group.children.erase(it_child);
+    }
 
-            // Process nested group for this parent element (one offsets entry 
appended inside).
-            RETURN_IF_ERROR(_process_array_of_objects(v, *ng, 
element_flat_idx, depth + 1));
-            seen_nested_paths.insert(ng->path.get_path());
-            continue;
-        }
+    std::shared_ptr<NestedGroup>& ng = group.nested_groups[next_prefix];
+    if (!ng) {
+        ng = std::make_shared<NestedGroup>();
+        ng->path = next_prefix;
+    }
 
-        // Scalar / non-array value becomes a child subcolumn.
-        // if this path is already a nested array<object>, discard scalars.
-        if (group.nested_groups.contains(next_prefix)) {
-            continue;
-        }
-        vectorized::Field f;
-        RETURN_IF_ERROR(_jsonb_to_field(v, f));
-
-        // Ensure subcolumn exists and is nullable (for NestedGroup children, 
we need nullable
-        // to support NULL values when a field is missing in some rows)
-        if (group.children.find(next_prefix) == group.children.end()) {
-            // Create a new nullable subcolumn for this path
-            group.children[next_prefix] = 
vectorized::ColumnVariant::Subcolumn(0, true, false);
-        }
+    if (_handle_conflict(*ng, /*is_array_object=*/true)) {
+        return Status::OK();
+    }
 
-        auto& sub = group.children[next_prefix];
-        if (sub.size() < element_flat_idx) {
-            sub.insert_many_defaults(element_flat_idx - sub.size());
-        }
-        try {
-            sub.insert(f);
-        } catch (const doris::Exception& e) {
-            return Status::InternalError("NestedGroupBuilder insert failed at 
{}: {}",
-                                         next_prefix.get_path(), 
e.to_string());
-        }
-        seen_child_paths.insert(next_prefix.get_path());
+    // Ensure offsets size up to current parent element.
+    // Fill missing parent elements with empty arrays.
+    OffsetManager::backfill_to_element(*ng, element_flat_idx);
+
+    // Process nested group for this parent element (one offsets entry 
appended inside).
+    RETURN_IF_ERROR(_process_array_of_objects(arr_value, *ng, 
element_flat_idx, depth + 1));
+    seen_nested_paths.insert(ng->path.get_path());
+    return Status::OK();
+}
+
+Status NestedGroupBuilder::_process_scalar_field(
+        const doris::JsonbValue* value, const vectorized::PathInData& 
next_prefix,
+        NestedGroup& group, size_t element_flat_idx,
+        std::unordered_set<std::string>& seen_child_paths) {
+    // Scalar / non-array value becomes a child subcolumn.
+    // If this path is already a nested array<object>, discard scalars.
+    if (group.nested_groups.contains(next_prefix)) {
+        return Status::OK();
     }
 
+    vectorized::Field f;
+    RETURN_IF_ERROR(_jsonb_to_field(value, f));
+
+    // Ensure subcolumn exists and is nullable (for NestedGroup children, we 
need nullable
+    // to support NULL values when a field is missing in some rows)
+    if (group.children.find(next_prefix) == group.children.end()) {
+        // Create a new nullable subcolumn for this path
+        group.children[next_prefix] = vectorized::ColumnVariant::Subcolumn(0, 
true, false);
+    }
+
+    auto& sub = group.children[next_prefix];
+    if (sub.size() < element_flat_idx) {
+        sub.insert_many_defaults(element_flat_idx - sub.size());
+    }
+    try {
+        sub.insert(f);
+    } catch (const doris::Exception& e) {
+        return Status::InternalError("NestedGroupBuilder insert failed at {}: 
{}",
+                                     next_prefix.get_path(), e.to_string());
+    }
+    seen_child_paths.insert(next_prefix.get_path());
     return Status::OK();
 }
 
@@ -350,7 +373,34 @@ Status NestedGroupBuilder::_jsonb_to_field(const 
doris::JsonbValue* value,
         return Status::OK();
     }
 
-    return Status::InvalidArgument("NestedGroupBuilder cannot convert 
container type {} to field",
+    // For arrays (non array<object>), recursively convert each element to 
Field
+    // and create an Array type Field.
+    if (value->isArray()) {
+        const auto* arr = value->unpack<doris::ArrayVal>();
+        vectorized::Array arr_field;
+        const int n = arr->numElem();
+        arr_field.reserve(n);
+        for (int i = 0; i < n; ++i) {
+            const auto* elem = arr->get(i);
+            vectorized::Field elem_field;
+            RETURN_IF_ERROR(_jsonb_to_field(elem, elem_field));
+            arr_field.push_back(std::move(elem_field));
+        }
+        out = 
vectorized::Field::create_field<PrimitiveType::TYPE_ARRAY>(std::move(arr_field));
+        return Status::OK();
+    }
+
+    // // For objects that reach here (shouldn't happen normally as objects 
should be
+    // // recursively flattened in _process_object_as_paths), serialize as 
JSONB as fallback.
+    // if (value->isObject()) {
+    //     const char* data = reinterpret_cast<const char*>(value);
+    //     size_t size = value->size();
+    //     out = vectorized::Field::create_field<PrimitiveType::TYPE_JSONB>(
+    //             vectorized::JsonbField(data, size));
+    //     return Status::OK();
+    // }
+
+    return Status::InvalidArgument("NestedGroupBuilder cannot convert type {} 
to field",
                                    value->typeName());
 }
 
diff --git a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h 
b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h
index 040f946dc8d..723569f26dc 100755
--- a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h
+++ b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h
@@ -111,6 +111,27 @@ private:
     Status _process_array_of_objects(const doris::JsonbValue* arr_value, 
NestedGroup& group,
                                     size_t parent_row_idx, size_t depth);
 
+    // Process nested object field by recursively flattening into dotted paths.
+    Status _process_object_field(const doris::JsonbValue* obj_value,
+                                const vectorized::PathInData& next_prefix,
+                                NestedGroup& group, size_t element_flat_idx,
+                                std::unordered_set<std::string>& 
seen_child_paths,
+                                std::unordered_set<std::string>& 
seen_nested_paths,
+                                size_t depth);
+
+    // Process nested array<object> field within a NestedGroup.
+    Status _process_nested_array_field(const doris::JsonbValue* arr_value,
+                                      const vectorized::PathInData& 
next_prefix,
+                                      NestedGroup& group, size_t 
element_flat_idx,
+                                      std::unordered_set<std::string>& 
seen_nested_paths,
+                                      size_t depth);
+
+    // Process scalar field and insert into subcolumn.
+    Status _process_scalar_field(const doris::JsonbValue* value,
+                                const vectorized::PathInData& next_prefix,
+                                NestedGroup& group, size_t element_flat_idx,
+                                std::unordered_set<std::string>& 
seen_child_paths);
+
     // Return true if this array can be treated as array<object> (nulls 
allowed).
     bool _is_array_of_objects(const doris::JsonbValue* arr_value) const;
 
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
index daa743baa12..8418bc0d2e9 100755
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
@@ -32,6 +32,7 @@
 #include "olap/rowset/segment_v2/segment.h"
 #include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
 #include "olap/rowset/segment_v2/variant/nested_group_builder.h"
+#include "olap/rowset/segment_v2/variant/nested_group_iterator.h"
 #include "olap/rowset/segment_v2/variant/sparse_column_extract_iterator.h"
 #include "olap/rowset/segment_v2/variant/sparse_column_merge_iterator.h"
 #include "olap/tablet_schema.h"
@@ -50,488 +51,6 @@ namespace doris::segment_v2 {
 
 #include "common/compile_check_begin.h"
 
-namespace {
-
-// read offsets in range [start, start+count) and also return previous offset.
-Status _read_offsets_with_prev(ColumnIterator* offsets_iter, ordinal_t start, 
size_t count,
-                               uint64_t* prev, std::vector<uint64_t>* out) {
-    *prev = 0;
-    out->clear();
-    if (count == 0) {
-        return Status::OK();
-    }
-    if (start > 0) {
-        RETURN_IF_ERROR(offsets_iter->seek_to_ordinal(start - 1));
-        vectorized::MutableColumnPtr prev_col = 
vectorized::ColumnOffset64::create();
-        size_t one = 1;
-        bool has_null = false;
-        RETURN_IF_ERROR(offsets_iter->next_batch(&one, prev_col, &has_null));
-        auto* prev_data = 
assert_cast<vectorized::ColumnOffset64*>(prev_col.get());
-        if (!prev_data->get_data().empty()) {
-            *prev = prev_data->get_data()[0];
-        }
-    }
-    RETURN_IF_ERROR(offsets_iter->seek_to_ordinal(start));
-    vectorized::MutableColumnPtr off_col = 
vectorized::ColumnOffset64::create();
-    bool has_null = false;
-    RETURN_IF_ERROR(offsets_iter->next_batch(&count, off_col, &has_null));
-    auto* off_data = assert_cast<vectorized::ColumnOffset64*>(off_col.get());
-    out->assign(off_data->get_data().begin(), off_data->get_data().end());
-    return Status::OK();
-}
-
-// Iterator for reading the whole NestedGroup as 
Nullable(Variant(root_type=JSONB)).
-class NestedGroupWholeIterator : public ColumnIterator {
-public:
-    explicit NestedGroupWholeIterator(const NestedGroupReader* group_reader)
-            : _group_reader(group_reader) {}
-
-    Status init(const ColumnIteratorOptions& opts) override {
-        DCHECK(_group_reader && _group_reader->is_valid());
-        _iter_opts = opts;
-
-        // Build iterators for this group recursively.
-        RETURN_IF_ERROR(_build_group_state(_root_state, _group_reader));
-        return Status::OK();
-    }
-
-    Status seek_to_ordinal(ordinal_t ord_idx) override {
-        _current_ordinal = ord_idx;
-        RETURN_IF_ERROR(_seek_group_state(_root_state, ord_idx));
-        return Status::OK();
-    }
-
-    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override {
-        RETURN_IF_ERROR(_append_group_as_any(_root_state, _current_ordinal, 
*n, dst));
-        _current_ordinal += *n;
-        *has_null = false;
-        return Status::OK();
-    }
-
-    Status read_by_rowids(const rowid_t* rowids, const size_t count,
-                          vectorized::MutableColumnPtr& dst) override {
-        bool has_null = false;
-        for (size_t i = 0; i < count; ++i) {
-            RETURN_IF_ERROR(seek_to_ordinal(rowids[i]));
-            size_t one = 1;
-            RETURN_IF_ERROR(next_batch(&one, dst, &has_null));
-        }
-        return Status::OK();
-    }
-
-    ordinal_t get_current_ordinal() const override { return _current_ordinal; }
-
-private:
-    struct GroupState {
-        const NestedGroupReader* reader = nullptr;
-        ColumnIteratorUPtr offsets_iter;
-        std::unordered_map<std::string, ColumnIteratorUPtr> child_iters;
-        std::unordered_map<std::string, std::unique_ptr<GroupState>> 
nested_groups;
-    };
-
-    Status _build_group_state(GroupState& state, const NestedGroupReader* 
reader) {
-        state.reader = reader;
-        
RETURN_IF_ERROR(reader->offsets_reader->new_iterator(&state.offsets_iter, 
nullptr));
-        RETURN_IF_ERROR(state.offsets_iter->init(_iter_opts));
-        for (const auto& [name, cr] : reader->child_readers) {
-            ColumnIteratorUPtr it;
-            RETURN_IF_ERROR(cr->new_iterator(&it, nullptr));
-            RETURN_IF_ERROR(it->init(_iter_opts));
-            state.child_iters.emplace(name, std::move(it));
-        }
-        for (const auto& [name, nested] : reader->nested_group_readers) {
-            auto st = std::make_unique<GroupState>();
-            RETURN_IF_ERROR(_build_group_state(*st, nested.get()));
-            state.nested_groups.emplace(name, std::move(st));
-        }
-        return Status::OK();
-    }
-
-    Status _seek_group_state(GroupState& state, ordinal_t row_ord) {
-        RETURN_IF_ERROR(state.offsets_iter->seek_to_ordinal(row_ord));
-        uint64_t start_off = 0;
-        if (row_ord > 0) {
-            std::vector<uint64_t> prev;
-            RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), 
row_ord, 1, &start_off,
-                                                   &prev));
-            if (!prev.empty()) {
-                start_off = prev[0];
-            }
-        }
-        for (auto& [_, it] : state.child_iters) {
-            RETURN_IF_ERROR(it->seek_to_ordinal(start_off));
-        }
-        for (auto& [_, nested] : state.nested_groups) {
-            // nested groups are indexed by parent element ordinal (flat index)
-            RETURN_IF_ERROR(_seek_group_state(*nested, start_off));
-        }
-        return Status::OK();
-    }
-
-    // output selector:
-    // - If dst is Nullable(JSONB) (ColumnNullable(ColumnString)), append 
JSONB blobs.
-    // - Otherwise append Nullable(Variant(root_type=JSONB)).
-    Status _append_group_as_any(GroupState& state, ordinal_t row_ord, size_t 
row_cnt,
-                               vectorized::MutableColumnPtr& dst) {
-        if (auto* dst_nullable = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(dst.get());
-            dst_nullable != nullptr) {
-            if (vectorized::check_and_get_column<vectorized::ColumnString>(
-                        &dst_nullable->get_nested_column()) != nullptr) {
-                return _append_group_as_jsonb_blob(state, row_ord, row_cnt, 
dst);
-            }
-        }
-        return _append_group_as_jsonb(state, row_ord, row_cnt, dst);
-    }
-
-    // build a JSONB blob column (Nullable(JSONB)) for rows [row_ord, 
row_ord+row_cnt).
-    // This is used for nested groups inside an element object.
-    Status _append_group_as_jsonb_blob(GroupState& state, ordinal_t row_ord, 
size_t row_cnt,
-                                       vectorized::MutableColumnPtr& dst) {
-        auto* dst_nullable = 
assert_cast<vectorized::ColumnNullable*>(dst.get());
-        auto& dst_str = 
assert_cast<vectorized::ColumnString&>(dst_nullable->get_nested_column());
-        auto& dst_nullmap = dst_nullable->get_null_map_column().get_data();
-
-        uint64_t start_off = 0;
-        std::vector<uint64_t> offsets;
-        RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), 
row_ord, row_cnt, &start_off,
-                                               &offsets));
-        uint64_t end_off = offsets.empty() ? start_off : offsets.back();
-        auto elem_count = static_cast<size_t>(end_off - start_off);
-
-        RETURN_IF_ERROR(_seek_child_iters(state, start_off));
-
-        vectorized::MutableColumnPtr elem_obj = 
vectorized::ColumnVariant::create(0, elem_count);
-        auto* elem_obj_ptr = 
assert_cast<vectorized::ColumnVariant*>(elem_obj.get());
-        RETURN_IF_ERROR(
-                _fill_elem_object_with_scalar_children(state, start_off, 
elem_count, elem_obj_ptr));
-        RETURN_IF_ERROR(_fill_elem_object_with_nested_groups(state, start_off, 
elem_count, elem_obj_ptr));
-
-        doris::JsonBinaryValue jsonb_value;
-        for (size_t r = 0; r < offsets.size(); ++r) {
-            uint64_t off = offsets[r];
-            uint64_t prev = (r == 0) ? start_off : offsets[r - 1];
-
-            std::string json;
-            json.push_back('[');
-            for (uint64_t i = prev; i < off; ++i) {
-                if (i > prev) {
-                    json.push_back(',');
-                }
-                std::string obj;
-                
elem_obj_ptr->serialize_one_row_to_string(static_cast<int64_t>(i), &obj);
-                json.append(obj);
-            }
-            json.push_back(']');
-
-            RETURN_IF_ERROR(jsonb_value.from_json_string(json.data(), 
json.size()));
-            dst_str.insert_data(jsonb_value.value(), jsonb_value.size());
-            dst_nullmap.push_back(0);
-        }
-        return Status::OK();
-    }
-
-    // build a Variant(root_type=JSONB) column for rows [row_ord, 
row_ord+row_cnt)
-    // and append into dst (expected Nullable(Variant) or Variant).
-    Status _append_group_as_jsonb(GroupState& state, ordinal_t row_ord, size_t 
row_cnt,
-                                 vectorized::MutableColumnPtr& dst) {
-        uint64_t start_off = 0;
-        std::vector<uint64_t> offsets;
-        RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), 
row_ord, row_cnt, &start_off,
-                                               &offsets));
-        uint64_t end_off = offsets.empty() ? start_off : offsets.back();
-        auto elem_count = static_cast<size_t>(end_off - start_off);
-
-        // Seek all child iterators to start offset (safe for both 
sequential/random usage).
-        RETURN_IF_ERROR(_seek_child_iters(state, start_off));
-
-        vectorized::MutableColumnPtr elem_obj = 
vectorized::ColumnVariant::create(0, elem_count);
-        auto* elem_obj_ptr = 
assert_cast<vectorized::ColumnVariant*>(elem_obj.get());
-        RETURN_IF_ERROR(_fill_elem_object_with_scalar_children(state, 
start_off, elem_count, elem_obj_ptr));
-        RETURN_IF_ERROR(_fill_elem_object_with_nested_groups(state, start_off, 
elem_count, elem_obj_ptr));
-        return _append_variant_jsonb_rows_from_elem_object(*elem_obj_ptr, 
start_off, offsets, dst);
-    }
-
-    Status _seek_child_iters(GroupState& state, uint64_t start_off) {
-        for (auto& [_, it] : state.child_iters) {
-            RETURN_IF_ERROR(it->seek_to_ordinal(start_off));
-        }
-        return Status::OK();
-    }
-
-    Status _fill_elem_object_with_scalar_children(GroupState& state, uint64_t 
start_off,
-                                                  size_t elem_count,
-                                                  vectorized::ColumnVariant* 
elem_obj_ptr) {
-        for (auto& [name, it] : state.child_iters) {
-            auto type = 
state.reader->child_readers.at(name)->get_vec_data_type();
-            auto col = type->create_column();
-            if (elem_count > 0) {
-                size_t to_read = elem_count;
-                bool child_has_null = false;
-                RETURN_IF_ERROR(it->next_batch(&to_read, col, 
&child_has_null));
-            }
-            vectorized::PathInData p(name);
-            bool ok = elem_obj_ptr->add_sub_column(p, col->assume_mutable(), 
type);
-            if (!ok) {
-                return Status::InternalError("Duplicated NestedGroup child 
field {}", name);
-            }
-        }
-        return Status::OK();
-    }
-
-    Status _fill_elem_object_with_nested_groups(GroupState& state, uint64_t 
start_off, size_t elem_count,
-                                                vectorized::ColumnVariant* 
elem_obj_ptr) {
-        auto jsonb_type = 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>());
-        for (auto& [name, nested_state] : state.nested_groups) {
-            auto nested_jsonb = vectorized::ColumnString::create();
-            auto nested_nullable = vectorized::ColumnNullable::create(
-                    std::move(nested_jsonb), 
vectorized::ColumnUInt8::create());
-            auto nested_mut = nested_nullable->assume_mutable();
-            if (elem_count > 0) {
-                size_t tmp_n = elem_count;
-                RETURN_IF_ERROR(_append_group_as_jsonb_blob(*nested_state, 
start_off, tmp_n, nested_mut));
-            } else {
-                nested_mut->insert_many_defaults(elem_count);
-            }
-            vectorized::PathInData p(name);
-            bool ok = elem_obj_ptr->add_sub_column(p, std::move(nested_mut), 
jsonb_type);
-            if (!ok) {
-                return Status::InternalError("Duplicated NestedGroup nested 
field {}", name);
-            }
-        }
-        return Status::OK();
-    }
-
-    Status _append_variant_jsonb_rows_from_elem_object(const 
vectorized::ColumnVariant& elem_obj,
-                                                       uint64_t start_off,
-                                                       const 
std::vector<uint64_t>& offsets,
-                                                       
vectorized::MutableColumnPtr& dst) {
-        auto* dst_nullable = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(dst.get());
-        auto& dst_variant =
-                dst_nullable ? 
assert_cast<vectorized::ColumnVariant&>(dst_nullable->get_nested_column())
-                             : assert_cast<vectorized::ColumnVariant&>(*dst);
-
-        doris::JsonBinaryValue jsonb_value;
-        for (size_t r = 0; r < offsets.size(); ++r) {
-            uint64_t off = offsets[r];
-            uint64_t prev = (r == 0) ? start_off : offsets[r - 1];
-
-            std::string json;
-            json.push_back('[');
-            for (uint64_t i = prev; i < off; ++i) {
-                if (i > prev) {
-                    json.push_back(',');
-                }
-                std::string obj;
-                elem_obj.serialize_one_row_to_string(static_cast<int64_t>(i), 
&obj);
-                json.append(obj);
-            }
-            json.push_back(']');
-
-            RETURN_IF_ERROR(jsonb_value.from_json_string(json.data(), 
json.size()));
-            vectorized::Field jsonb_field = 
vectorized::Field::create_field<PrimitiveType::TYPE_JSONB>(
-                    vectorized::JsonbField(jsonb_value.value(), 
jsonb_value.size()));
-
-            vectorized::VariantMap object;
-            object.try_emplace(vectorized::PathInData {},
-                               vectorized::FieldWithDataType(jsonb_field));
-            vectorized::Field variant_field =
-                    
vectorized::Field::create_field<PrimitiveType::TYPE_VARIANT>(std::move(object));
-            dst_variant.insert(variant_field);
-            if (dst_nullable) {
-                dst_nullable->get_null_map_column().get_data().push_back(0);
-            }
-        }
-        return Status::OK();
-    }
-
-    const NestedGroupReader* _group_reader;
-    ColumnIteratorOptions _iter_opts;
-    GroupState _root_state;
-    ordinal_t _current_ordinal = 0;
-};
-
-} // namespace
-
-namespace {
-
-// when reading the whole Variant column, merge top-level NestedGroups back
-// into ColumnVariant as subcolumns (path = array path). This enables 
discarding JSONB
-// subcolumns physically while keeping query results correct.
-class VariantRootWithNestedGroupsIterator : public ColumnIterator {
-public:
-    VariantRootWithNestedGroupsIterator(ColumnIteratorUPtr root_iter,
-                                        const NestedGroupReaders* 
nested_group_readers)
-            : _root_iter(std::move(root_iter)) {
-        if (!nested_group_readers) {
-            return;
-        }
-        for (const auto& [path, reader] : *nested_group_readers) {
-            if (reader && reader->is_valid()) {
-                _nested_group_readers.emplace_back(path, reader.get());
-            }
-        }
-    }
-
-    Status init(const ColumnIteratorOptions& opts) override {
-        RETURN_IF_ERROR(_root_iter->init(opts));
-        _nested_group_iters.clear();
-        _nested_group_iters.reserve(_nested_group_readers.size());
-        for (const auto& [path, reader] : _nested_group_readers) {
-            auto it = std::make_unique<NestedGroupWholeIterator>(reader);
-            RETURN_IF_ERROR(it->init(opts));
-            _nested_group_iters.emplace_back(path, std::move(it));
-        }
-        return Status::OK();
-    }
-
-    Status seek_to_ordinal(ordinal_t ord_idx) override {
-        RETURN_IF_ERROR(_root_iter->seek_to_ordinal(ord_idx));
-        for (auto& [_, it] : _nested_group_iters) {
-            RETURN_IF_ERROR(it->seek_to_ordinal(ord_idx));
-        }
-        return Status::OK();
-    }
-
-    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override {
-        auto& obj =
-                dst->is_nullable()
-                        ? assert_cast<vectorized::ColumnVariant&>(
-                                  
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
-                        : assert_cast<vectorized::ColumnVariant&>(*dst);
-        auto most_common_type = obj.get_most_common_type(); // 
NOLINT(readability-static-accessed-through-instance)
-
-        // Check if we have a top-level NestedGroup ($root path)
-        bool has_root_nested_group = false;
-        for (const auto& [path, _] : _nested_group_readers) {
-            if (path == std::string(kRootNestedGroupPath)) {
-                has_root_nested_group = true;
-                break;
-            }
-        }
-
-        auto root_column = most_common_type->create_column();
-        RETURN_IF_ERROR(_root_iter->next_batch(n, root_column, has_null));
-
-        // Fill dst nullmap from root.
-        if (root_column->is_nullable() && dst->is_nullable()) {
-            auto& dst_null_map =
-                    
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
-            auto& src_null_map =
-                    
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
-            dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
-        }
-
-        // Build a tmp ColumnVariant for this batch: root + nested-group 
subcolumns.
-        auto tmp = vectorized::ColumnVariant::create(0, root_column->size());
-        auto& tmp_obj = assert_cast<vectorized::ColumnVariant&>(*tmp);
-        
-        // If we have a $root NestedGroup, it will replace the root JSONB.
-        // Otherwise, add the root column as usual.
-        if (!has_root_nested_group) {
-            tmp_obj.add_sub_column({}, std::move(root_column), 
most_common_type);
-        }
-
-        auto ng_type = 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>());
-        for (auto& [path, it] : _nested_group_iters) {
-            auto col = ng_type->create_column();
-            size_t m = *n;
-            bool tmp_has_null = false;
-            RETURN_IF_ERROR(it->next_batch(&m, col, &tmp_has_null));
-            
-            // Special handling for top-level NestedGroup ($root path):
-            // Use NestedGroup data as root instead of adding as subcolumn
-            if (path == std::string(kRootNestedGroupPath)) {
-                tmp_obj.add_sub_column(vectorized::PathInData(), 
std::move(col), ng_type);
-            } else {
-                // Normal subcolumn handling
-                tmp_obj.add_sub_column(vectorized::PathInData(path), 
std::move(col), ng_type);
-            }
-        }
-
-        obj.insert_range_from(*tmp, 0, tmp_obj.rows());
-        if (!obj.is_finalized()) {
-            obj.finalize();
-        }
-#ifndef NDEBUG
-        obj.check_consistency();
-#endif
-        *has_null = false;
-        return Status::OK();
-    }
-
-    Status read_by_rowids(const rowid_t* rowids, const size_t count,
-                          vectorized::MutableColumnPtr& dst) override {
-        auto& obj =
-                dst->is_nullable()
-                        ? assert_cast<vectorized::ColumnVariant&>(
-                                  
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
-                        : assert_cast<vectorized::ColumnVariant&>(*dst);
-        auto most_common_type = obj.get_most_common_type(); // 
NOLINT(readability-static-accessed-through-instance)
-
-        // Check if we have a top-level NestedGroup ($root path)
-        bool has_root_nested_group = false;
-        for (const auto& [path, _] : _nested_group_readers) {
-            if (path == std::string(kRootNestedGroupPath)) {
-                has_root_nested_group = true;
-                break;
-            }
-        }
-
-        auto root_column = most_common_type->create_column();
-        RETURN_IF_ERROR(_root_iter->read_by_rowids(rowids, count, 
root_column));
-
-        if (root_column->is_nullable() && dst->is_nullable()) {
-            auto& dst_null_map =
-                    
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
-            auto& src_null_map =
-                    
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
-            dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
-        }
-
-        auto tmp = vectorized::ColumnVariant::create(0, root_column->size());
-        auto& tmp_obj = assert_cast<vectorized::ColumnVariant&>(*tmp);
-        
-        // If we have a $root NestedGroup, it will replace the root JSONB.
-        // Otherwise, add the root column as usual.
-        if (!has_root_nested_group) {
-            tmp_obj.add_sub_column({}, std::move(root_column), 
most_common_type);
-        }
-
-        auto ng_type = 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>());
-        for (auto& [path, it] : _nested_group_iters) {
-            auto col = ng_type->create_column();
-            RETURN_IF_ERROR(it->read_by_rowids(rowids, count, col));
-            
-            // Special handling for top-level NestedGroup ($root path):
-            // Use NestedGroup data as root instead of adding as subcolumn
-            if (path == std::string(kRootNestedGroupPath)) {
-                tmp_obj.add_sub_column(vectorized::PathInData(), 
std::move(col), ng_type);
-            } else {
-                // Normal subcolumn handling
-                tmp_obj.add_sub_column(vectorized::PathInData(path), 
std::move(col), ng_type);
-            }
-        }
-
-        obj.insert_range_from(*tmp, 0, tmp_obj.rows());
-        if (!obj.is_finalized()) {
-            obj.finalize();
-        }
-#ifndef NDEBUG
-        obj.check_consistency();
-#endif
-        return Status::OK();
-    }
-
-    ordinal_t get_current_ordinal() const override { return 
_root_iter->get_current_ordinal(); }
-
-private:
-    ColumnIteratorUPtr _root_iter;
-    std::vector<std::pair<std::string, const NestedGroupReader*>> 
_nested_group_readers;
-    std::vector<std::pair<std::string, 
std::unique_ptr<NestedGroupWholeIterator>>> _nested_group_iters;
-};
-
-} // namespace
-
 const SubcolumnColumnMetaInfo::Node* 
VariantColumnReader::get_subcolumn_meta_by_path(
         const vectorized::PathInData& relative_path) const {
     const auto* node = _subcolumns_meta_info->find_leaf(relative_path);
@@ -973,13 +492,8 @@ Status VariantColumnReader::_new_iterator_with_flat_leaves(
         return Status::OK();
     }
     if (relative_path.empty()) {
-        // root path: merge NestedGroup JSONB back as subcolumns if present
-        if (!_nested_group_readers.empty()) {
-            *iterator = std::make_unique<VariantRootWithNestedGroupsIterator>(
-                    std::make_unique<FileColumnIterator>(_root_column_reader), 
&_nested_group_readers);
-            return Status::OK();
-        }
-        // root path, use VariantRootColumnIterator
+        // root path in flat-leaf mode: use VariantRootColumnIterator for 
simple root read.
+        // In flat-leaf compaction, we read individual columns directly 
without hierarchical merge.
         *iterator = std::make_unique<VariantRootColumnIterator>(
                 std::make_unique<FileColumnIterator>(_root_column_reader));
         return Status::OK();
@@ -1059,10 +573,29 @@ Status 
VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator,
     }
 
     // Whole NestedGroup access: reading $.nested should return the 
array<object> itself.
+    // With redundant storage, try to use JSONB subcolumn directly (WHOLE 
mode),
+    // falling back to NestedGroup columnar storage (PRUNED mode) if JSONB not 
available.
     if (!relative_path.empty()) {
         const NestedGroupReader* gr = 
get_nested_group_reader(relative_path.get_path());
         if (gr && gr->is_valid()) {
-            *iterator = std::make_unique<NestedGroupWholeIterator>(gr);
+            // Try to get JSONB subcolumn iterator for WHOLE mode
+            const auto* leaf_node = 
_subcolumns_meta_info->find_leaf(relative_path);
+            if (leaf_node != nullptr) {
+                // JSONB subcolumn exists, use WHOLE mode for direct read
+                std::shared_ptr<ColumnReader> jsonb_reader;
+                Status st = column_reader_cache->get_path_column_reader(
+                        col_uid, relative_path, &jsonb_reader, opt->stats, 
leaf_node);
+                if (st.ok() && jsonb_reader) {
+                    ColumnIteratorUPtr jsonb_iter;
+                    RETURN_IF_ERROR(jsonb_reader->new_iterator(&jsonb_iter, 
nullptr));
+                    *iterator = std::make_unique<NestedGroupWholeIterator>(
+                            gr, NestedGroupReadMode::WHOLE, 
std::move(jsonb_iter));
+                    return Status::OK();
+                }
+            }
+            // JSONB subcolumn not available, fallback to PRUNED mode
+            *iterator = std::make_unique<NestedGroupWholeIterator>(
+                    gr, NestedGroupReadMode::PRUNED, nullptr);
             return Status::OK();
         }
     }
@@ -1137,20 +670,12 @@ Status 
VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator,
                 column_reader_cache, sparse_column_cache_ptr);
     }
 
-    // Whole Variant read: merge NestedGroup JSONB back as subcolumns if 
present.
-    if (relative_path.empty()) {
-        if (!_nested_group_readers.empty()) {
-            *iterator = std::make_unique<VariantRootWithNestedGroupsIterator>(
-                    std::make_unique<FileColumnIterator>(_root_column_reader), 
&_nested_group_readers);
-            return Status::OK();
-        }
-        *iterator = std::make_unique<VariantRootColumnIterator>(
-                std::make_unique<FileColumnIterator>(_root_column_reader));
-        return Status::OK();
-    }
-
-    // Check if path is prefix, example sparse columns path: a.b.c, a.b.e, 
access prefix: a.b.
-    // Or access root path
+    // Check if path is prefix or root path (empty).
+    // has_prefix_path returns true for empty path, so this covers whole 
Variant reads.
+    // With redundant storage (JSONB subcolumns + NestedGroup), we read from 
JSONB subcolumns
+    // directly via hierarchical reader, which preserves complete data 
including structure conflicts.
+    // NestedGroup is stored for future column pruning optimization but not 
used here.
+    // Example sparse columns path: a.b.c, a.b.e, access prefix: a.b.
     if (has_prefix_path(relative_path)) {
         // Example {"b" : {"c":456,"e":7.111}}
         // b.c is sparse column, b.e is subcolumn, so b is both the prefix of 
sparse column and subcolumn
diff --git 
a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp 
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
index 1a807c37f3d..0762df2ca74 100755
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp
@@ -485,9 +485,6 @@ Status 
VariantColumnWriterImpl::_process_subcolumns(vectorized::ColumnVariant* p
             // already handled
             continue;
         }
-        if (_skip_subcolumn_paths.find(entry->path.get_path()) != 
_skip_subcolumn_paths.end()) {
-            continue;
-        }
         CHECK(entry->data.is_finalized());
 
         // create subcolumn writer if under limit; otherwise externalize 
ColumnMetaPB via IndexedColumn
@@ -594,12 +591,11 @@ Status VariantColumnWriterImpl::finalize() {
     ptr->check_consistency();
 #endif
 
-    // Build NestedGroups from JSONB columns before writing subcolumns, so we 
can skip
-    // JSONB subcolumns that are expanded into NestedGroup.
+    // Build NestedGroups from JSONB columns. Both JSONB and NestedGroup are 
stored
+    // for redundancy: JSONB for Whole reads, NestedGroup for column pruning.
     doris::segment_v2::NestedGroupsMap nested_groups;
     doris::segment_v2::NestedGroupBuilder ng_builder;
     
ng_builder.set_max_depth(static_cast<size_t>(config::variant_nested_group_max_depth));
-    _skip_subcolumn_paths.clear();
 
     if (ptr->get_root_type() &&
         
vectorized::remove_nullable(ptr->get_root_type())->get_primitive_type() ==
@@ -618,9 +614,6 @@ Status VariantColumnWriterImpl::finalize() {
         }
         
RETURN_IF_ERROR(ng_builder.build_from_jsonb(entry->data.get_finalized_column_ptr()->get_ptr(),
                                                    entry->path, nested_groups, 
entry->data.size()));
-        if (nested_groups.find(entry->path) != nested_groups.end()) {
-            _skip_subcolumn_paths.insert(entry->path.get_path());
-        }
     }
 
     size_t num_rows = _column->size();
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h 
b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
index 90e302ee0b7..d4e3e75eedf 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h
@@ -166,10 +166,6 @@ private:
     // hold the references of subcolumns info
     std::unordered_map<std::string, TabletSchema::SubColumnInfo> 
_subcolumns_info;
     std::unordered_map<std::string, NestedGroupWriter> _nested_group_writers;
-
-    // English comment: JSONB subcolumns that are expanded into NestedGroup 
should not be written
-    // as physical subcolumns in the segment.
-    std::unordered_set<std::string> _skip_subcolumn_paths;
 };
 
 void _init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const 
TabletColumn& column,
diff --git a/be/src/vec/json/variant_nested_builder.cpp 
b/be/src/vec/json/variant_nested_builder.cpp
index 9c2856a05c5..29ceb80d58a 100644
--- a/be/src/vec/json/variant_nested_builder.cpp
+++ b/be/src/vec/json/variant_nested_builder.cpp
@@ -324,20 +324,4 @@ size_t VariantNestedBuilder::get_array_size(const Field& 
value, const FieldInfo&
     return 1;
 }
 
-void VariantNestedBuilder::flatten_array_values(const Field& value, size_t 
target_depth,
-                                                 std::vector<Field>& 
flat_values,
-                                                 std::vector<size_t>& 
level_sizes) {
-    if (target_depth == 0 || value.get_type() != PrimitiveType::TYPE_ARRAY) {
-        flat_values.push_back(value);
-        return;
-    }
-
-    const auto& arr = value.get<Array>();
-    level_sizes.push_back(arr.size());
-
-    for (const auto& elem : arr) {
-        flatten_array_values(elem, target_depth - 1, flat_values, level_sizes);
-    }
-}
-
 } // namespace doris::vectorized
diff --git a/be/src/vec/json/variant_nested_builder.h 
b/be/src/vec/json/variant_nested_builder.h
index 5287a40734f..83cabe83e73 100755
--- a/be/src/vec/json/variant_nested_builder.h
+++ b/be/src/vec/json/variant_nested_builder.h
@@ -109,11 +109,6 @@ private:
     // Extract array size from a field (for determining offsets)
     static size_t get_array_size(const Field& value, const FieldInfo& info);
 
-    // Flatten nested array values to the deepest level
-    static void flatten_array_values(const Field& value, size_t target_depth,
-                                     std::vector<Field>& flat_values,
-                                     std::vector<size_t>& level_sizes);
-
     ColumnVariant& _column_variant;
     size_t _max_depth = 3;
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to