This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch feat-nested in repository https://gitbox.apache.org/repos/asf/doris.git
commit fbe54a0cbab7f773223f326292be4faf1ca797ce Author: Claude Code <[email protected]> AuthorDate: Mon Jan 12 10:37:01 2026 +0800 Refactor variant nested group handling and improve iterator management - Enhanced HierarchicalDataIterator with shared root support and improved error handling - Refactored NestedGroupBuilder to consolidate offset padding logic via OffsetManager - Improved variant column reader/writer implementation for nested structures - Updated variant nested builder with better memory management Co-Authored-By: Claude Sonnet 4.5 <[email protected]> --- .../variant/hierarchical_data_iterator.cpp | 16 +- .../segment_v2/variant/nested_group_builder.cpp | 160 ++++--- .../segment_v2/variant/nested_group_builder.h | 21 + .../segment_v2/variant/variant_column_reader.cpp | 533 ++------------------- .../variant/variant_column_writer_impl.cpp | 11 +- .../variant/variant_column_writer_impl.h | 4 - be/src/vec/json/variant_nested_builder.cpp | 16 - be/src/vec/json/variant_nested_builder.h | 5 - 8 files changed, 172 insertions(+), 594 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp index dd375cb1dcc..7087d413929 100644 --- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp @@ -59,6 +59,14 @@ Status HierarchicalDataIterator::create(ColumnIteratorUPtr* reader, int32_t col_ // use set_root to share instead continue; } + // Skip NestedGroup subcolumns (columns with __ng. prefix in path). + // NestedGroup columns only contain rows that have the nested array, not all rows. + // They need special handling via NestedGroupWholeIterator, not regular hierarchical merge. + const auto& leaf_path = leaves_paths[i].get_path(); + if (leaf_path.find("__ng.") != std::string::npos) { + VLOG_DEBUG << "Skipping NestedGroup subcolumn: " << leaf_path; + continue; + } RETURN_IF_ERROR( stream_iter->add_stream(col_uid, leaves[i], column_reader_cache, stats)); } @@ -274,7 +282,13 @@ Status HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) { MutableColumnPtr column = node.data.column->get_ptr(); PathInData relative_path = node.path.copy_pop_nfront(_path.get_parts().size()); - DCHECK(column->size() == nrows); + VLOG_DEBUG << "HierarchicalDataIterator::_init_container: node.path=" << node.path.get_path() + << ", relative_path=" << relative_path.get_path() + << ", column_size=" << column->size() << ", nrows=" << nrows + << ", has_nested_part=" << node.path.has_nested_part() + << ", type=" << (node.data.type ? node.data.type->get_name() : "null"); + CHECK(column->size() == nrows) << "column->size()=" << column->size() << ", nrows=" << nrows + << ", path=" << node.path.get_path(); if (node.path.has_nested_part()) { if (node.data.type->get_primitive_type() != PrimitiveType::TYPE_ARRAY) { return Status::InternalError( diff --git a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp index d4e50154387..1dcc5fef16f 100755 --- a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp +++ b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.cpp @@ -217,7 +217,6 @@ Status NestedGroupBuilder::_process_array_of_objects(const doris::JsonbValue* ar return Status::OK(); } -// NOLINTNEXTLINE(readability-function-cognitive-complexity,readability-function-size) Status NestedGroupBuilder::_process_object_as_paths( const doris::JsonbValue* obj_value, const vectorized::PathInData& current_prefix, NestedGroup& group, size_t element_flat_idx, @@ -244,68 +243,92 @@ Status NestedGroupBuilder::_process_object_as_paths( } if (v->isObject()) { - // flatten object fields into dotted paths. - RETURN_IF_ERROR(_process_object_as_paths(v, next_prefix, group, element_flat_idx, - seen_child_paths, seen_nested_paths, - depth + 1)); - continue; + RETURN_IF_ERROR(_process_object_field(v, next_prefix, group, element_flat_idx, + seen_child_paths, seen_nested_paths, depth)); + } else if (v->isArray() && _is_array_of_objects(v)) { + RETURN_IF_ERROR(_process_nested_array_field(v, next_prefix, group, element_flat_idx, + seen_nested_paths, depth)); + } else { + RETURN_IF_ERROR(_process_scalar_field(v, next_prefix, group, element_flat_idx, + seen_child_paths)); } + } - if (v->isArray() && _is_array_of_objects(v)) { - // Nested array<object> inside this group. - // array<object> has the highest priority. If the same path was - // previously treated as a scalar child, discard it. - if (auto it_child = group.children.find(next_prefix); it_child != group.children.end()) { - group.children.erase(it_child); - } - std::shared_ptr<NestedGroup>& ng = group.nested_groups[next_prefix]; - if (!ng) { - ng = std::make_shared<NestedGroup>(); - ng->path = next_prefix; - } + return Status::OK(); +} - if (_handle_conflict(*ng, /*is_array_object=*/true)) { - continue; - } +Status NestedGroupBuilder::_process_object_field( + const doris::JsonbValue* obj_value, const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_child_paths, + std::unordered_set<std::string>& seen_nested_paths, size_t depth) { + // Recursively flatten object fields into dotted paths. + return _process_object_as_paths(obj_value, next_prefix, group, element_flat_idx, + seen_child_paths, seen_nested_paths, depth + 1); +} - // Ensure offsets size up to current parent element. - // Fill missing parent elements with empty arrays. - OffsetManager::backfill_to_element(*ng, element_flat_idx); +Status NestedGroupBuilder::_process_nested_array_field( + const doris::JsonbValue* arr_value, const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_nested_paths, size_t depth) { + // Nested array<object> inside this group. + // array<object> has the highest priority. If the same path was + // previously treated as a scalar child, discard it. + if (auto it_child = group.children.find(next_prefix); it_child != group.children.end()) { + group.children.erase(it_child); + } - // Process nested group for this parent element (one offsets entry appended inside). - RETURN_IF_ERROR(_process_array_of_objects(v, *ng, element_flat_idx, depth + 1)); - seen_nested_paths.insert(ng->path.get_path()); - continue; - } + std::shared_ptr<NestedGroup>& ng = group.nested_groups[next_prefix]; + if (!ng) { + ng = std::make_shared<NestedGroup>(); + ng->path = next_prefix; + } - // Scalar / non-array value becomes a child subcolumn. - // if this path is already a nested array<object>, discard scalars. - if (group.nested_groups.contains(next_prefix)) { - continue; - } - vectorized::Field f; - RETURN_IF_ERROR(_jsonb_to_field(v, f)); - - // Ensure subcolumn exists and is nullable (for NestedGroup children, we need nullable - // to support NULL values when a field is missing in some rows) - if (group.children.find(next_prefix) == group.children.end()) { - // Create a new nullable subcolumn for this path - group.children[next_prefix] = vectorized::ColumnVariant::Subcolumn(0, true, false); - } + if (_handle_conflict(*ng, /*is_array_object=*/true)) { + return Status::OK(); + } - auto& sub = group.children[next_prefix]; - if (sub.size() < element_flat_idx) { - sub.insert_many_defaults(element_flat_idx - sub.size()); - } - try { - sub.insert(f); - } catch (const doris::Exception& e) { - return Status::InternalError("NestedGroupBuilder insert failed at {}: {}", - next_prefix.get_path(), e.to_string()); - } - seen_child_paths.insert(next_prefix.get_path()); + // Ensure offsets size up to current parent element. + // Fill missing parent elements with empty arrays. + OffsetManager::backfill_to_element(*ng, element_flat_idx); + + // Process nested group for this parent element (one offsets entry appended inside). + RETURN_IF_ERROR(_process_array_of_objects(arr_value, *ng, element_flat_idx, depth + 1)); + seen_nested_paths.insert(ng->path.get_path()); + return Status::OK(); +} + +Status NestedGroupBuilder::_process_scalar_field( + const doris::JsonbValue* value, const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_child_paths) { + // Scalar / non-array value becomes a child subcolumn. + // If this path is already a nested array<object>, discard scalars. + if (group.nested_groups.contains(next_prefix)) { + return Status::OK(); } + vectorized::Field f; + RETURN_IF_ERROR(_jsonb_to_field(value, f)); + + // Ensure subcolumn exists and is nullable (for NestedGroup children, we need nullable + // to support NULL values when a field is missing in some rows) + if (group.children.find(next_prefix) == group.children.end()) { + // Create a new nullable subcolumn for this path + group.children[next_prefix] = vectorized::ColumnVariant::Subcolumn(0, true, false); + } + + auto& sub = group.children[next_prefix]; + if (sub.size() < element_flat_idx) { + sub.insert_many_defaults(element_flat_idx - sub.size()); + } + try { + sub.insert(f); + } catch (const doris::Exception& e) { + return Status::InternalError("NestedGroupBuilder insert failed at {}: {}", + next_prefix.get_path(), e.to_string()); + } + seen_child_paths.insert(next_prefix.get_path()); return Status::OK(); } @@ -350,7 +373,34 @@ Status NestedGroupBuilder::_jsonb_to_field(const doris::JsonbValue* value, return Status::OK(); } - return Status::InvalidArgument("NestedGroupBuilder cannot convert container type {} to field", + // For arrays (non array<object>), recursively convert each element to Field + // and create an Array type Field. + if (value->isArray()) { + const auto* arr = value->unpack<doris::ArrayVal>(); + vectorized::Array arr_field; + const int n = arr->numElem(); + arr_field.reserve(n); + for (int i = 0; i < n; ++i) { + const auto* elem = arr->get(i); + vectorized::Field elem_field; + RETURN_IF_ERROR(_jsonb_to_field(elem, elem_field)); + arr_field.push_back(std::move(elem_field)); + } + out = vectorized::Field::create_field<PrimitiveType::TYPE_ARRAY>(std::move(arr_field)); + return Status::OK(); + } + + // // For objects that reach here (shouldn't happen normally as objects should be + // // recursively flattened in _process_object_as_paths), serialize as JSONB as fallback. + // if (value->isObject()) { + // const char* data = reinterpret_cast<const char*>(value); + // size_t size = value->size(); + // out = vectorized::Field::create_field<PrimitiveType::TYPE_JSONB>( + // vectorized::JsonbField(data, size)); + // return Status::OK(); + // } + + return Status::InvalidArgument("NestedGroupBuilder cannot convert type {} to field", value->typeName()); } diff --git a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h index 040f946dc8d..723569f26dc 100755 --- a/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h +++ b/be/src/olap/rowset/segment_v2/variant/nested_group_builder.h @@ -111,6 +111,27 @@ private: Status _process_array_of_objects(const doris::JsonbValue* arr_value, NestedGroup& group, size_t parent_row_idx, size_t depth); + // Process nested object field by recursively flattening into dotted paths. + Status _process_object_field(const doris::JsonbValue* obj_value, + const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_child_paths, + std::unordered_set<std::string>& seen_nested_paths, + size_t depth); + + // Process nested array<object> field within a NestedGroup. + Status _process_nested_array_field(const doris::JsonbValue* arr_value, + const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_nested_paths, + size_t depth); + + // Process scalar field and insert into subcolumn. + Status _process_scalar_field(const doris::JsonbValue* value, + const vectorized::PathInData& next_prefix, + NestedGroup& group, size_t element_flat_idx, + std::unordered_set<std::string>& seen_child_paths); + // Return true if this array can be treated as array<object> (nulls allowed). bool _is_array_of_objects(const doris::JsonbValue* arr_value) const; diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp index daa743baa12..8418bc0d2e9 100755 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp @@ -32,6 +32,7 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h" #include "olap/rowset/segment_v2/variant/nested_group_builder.h" +#include "olap/rowset/segment_v2/variant/nested_group_iterator.h" #include "olap/rowset/segment_v2/variant/sparse_column_extract_iterator.h" #include "olap/rowset/segment_v2/variant/sparse_column_merge_iterator.h" #include "olap/tablet_schema.h" @@ -50,488 +51,6 @@ namespace doris::segment_v2 { #include "common/compile_check_begin.h" -namespace { - -// read offsets in range [start, start+count) and also return previous offset. -Status _read_offsets_with_prev(ColumnIterator* offsets_iter, ordinal_t start, size_t count, - uint64_t* prev, std::vector<uint64_t>* out) { - *prev = 0; - out->clear(); - if (count == 0) { - return Status::OK(); - } - if (start > 0) { - RETURN_IF_ERROR(offsets_iter->seek_to_ordinal(start - 1)); - vectorized::MutableColumnPtr prev_col = vectorized::ColumnOffset64::create(); - size_t one = 1; - bool has_null = false; - RETURN_IF_ERROR(offsets_iter->next_batch(&one, prev_col, &has_null)); - auto* prev_data = assert_cast<vectorized::ColumnOffset64*>(prev_col.get()); - if (!prev_data->get_data().empty()) { - *prev = prev_data->get_data()[0]; - } - } - RETURN_IF_ERROR(offsets_iter->seek_to_ordinal(start)); - vectorized::MutableColumnPtr off_col = vectorized::ColumnOffset64::create(); - bool has_null = false; - RETURN_IF_ERROR(offsets_iter->next_batch(&count, off_col, &has_null)); - auto* off_data = assert_cast<vectorized::ColumnOffset64*>(off_col.get()); - out->assign(off_data->get_data().begin(), off_data->get_data().end()); - return Status::OK(); -} - -// Iterator for reading the whole NestedGroup as Nullable(Variant(root_type=JSONB)). -class NestedGroupWholeIterator : public ColumnIterator { -public: - explicit NestedGroupWholeIterator(const NestedGroupReader* group_reader) - : _group_reader(group_reader) {} - - Status init(const ColumnIteratorOptions& opts) override { - DCHECK(_group_reader && _group_reader->is_valid()); - _iter_opts = opts; - - // Build iterators for this group recursively. - RETURN_IF_ERROR(_build_group_state(_root_state, _group_reader)); - return Status::OK(); - } - - Status seek_to_ordinal(ordinal_t ord_idx) override { - _current_ordinal = ord_idx; - RETURN_IF_ERROR(_seek_group_state(_root_state, ord_idx)); - return Status::OK(); - } - - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override { - RETURN_IF_ERROR(_append_group_as_any(_root_state, _current_ordinal, *n, dst)); - _current_ordinal += *n; - *has_null = false; - return Status::OK(); - } - - Status read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) override { - bool has_null = false; - for (size_t i = 0; i < count; ++i) { - RETURN_IF_ERROR(seek_to_ordinal(rowids[i])); - size_t one = 1; - RETURN_IF_ERROR(next_batch(&one, dst, &has_null)); - } - return Status::OK(); - } - - ordinal_t get_current_ordinal() const override { return _current_ordinal; } - -private: - struct GroupState { - const NestedGroupReader* reader = nullptr; - ColumnIteratorUPtr offsets_iter; - std::unordered_map<std::string, ColumnIteratorUPtr> child_iters; - std::unordered_map<std::string, std::unique_ptr<GroupState>> nested_groups; - }; - - Status _build_group_state(GroupState& state, const NestedGroupReader* reader) { - state.reader = reader; - RETURN_IF_ERROR(reader->offsets_reader->new_iterator(&state.offsets_iter, nullptr)); - RETURN_IF_ERROR(state.offsets_iter->init(_iter_opts)); - for (const auto& [name, cr] : reader->child_readers) { - ColumnIteratorUPtr it; - RETURN_IF_ERROR(cr->new_iterator(&it, nullptr)); - RETURN_IF_ERROR(it->init(_iter_opts)); - state.child_iters.emplace(name, std::move(it)); - } - for (const auto& [name, nested] : reader->nested_group_readers) { - auto st = std::make_unique<GroupState>(); - RETURN_IF_ERROR(_build_group_state(*st, nested.get())); - state.nested_groups.emplace(name, std::move(st)); - } - return Status::OK(); - } - - Status _seek_group_state(GroupState& state, ordinal_t row_ord) { - RETURN_IF_ERROR(state.offsets_iter->seek_to_ordinal(row_ord)); - uint64_t start_off = 0; - if (row_ord > 0) { - std::vector<uint64_t> prev; - RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), row_ord, 1, &start_off, - &prev)); - if (!prev.empty()) { - start_off = prev[0]; - } - } - for (auto& [_, it] : state.child_iters) { - RETURN_IF_ERROR(it->seek_to_ordinal(start_off)); - } - for (auto& [_, nested] : state.nested_groups) { - // nested groups are indexed by parent element ordinal (flat index) - RETURN_IF_ERROR(_seek_group_state(*nested, start_off)); - } - return Status::OK(); - } - - // output selector: - // - If dst is Nullable(JSONB) (ColumnNullable(ColumnString)), append JSONB blobs. - // - Otherwise append Nullable(Variant(root_type=JSONB)). - Status _append_group_as_any(GroupState& state, ordinal_t row_ord, size_t row_cnt, - vectorized::MutableColumnPtr& dst) { - if (auto* dst_nullable = vectorized::check_and_get_column<vectorized::ColumnNullable>(dst.get()); - dst_nullable != nullptr) { - if (vectorized::check_and_get_column<vectorized::ColumnString>( - &dst_nullable->get_nested_column()) != nullptr) { - return _append_group_as_jsonb_blob(state, row_ord, row_cnt, dst); - } - } - return _append_group_as_jsonb(state, row_ord, row_cnt, dst); - } - - // build a JSONB blob column (Nullable(JSONB)) for rows [row_ord, row_ord+row_cnt). - // This is used for nested groups inside an element object. - Status _append_group_as_jsonb_blob(GroupState& state, ordinal_t row_ord, size_t row_cnt, - vectorized::MutableColumnPtr& dst) { - auto* dst_nullable = assert_cast<vectorized::ColumnNullable*>(dst.get()); - auto& dst_str = assert_cast<vectorized::ColumnString&>(dst_nullable->get_nested_column()); - auto& dst_nullmap = dst_nullable->get_null_map_column().get_data(); - - uint64_t start_off = 0; - std::vector<uint64_t> offsets; - RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), row_ord, row_cnt, &start_off, - &offsets)); - uint64_t end_off = offsets.empty() ? start_off : offsets.back(); - auto elem_count = static_cast<size_t>(end_off - start_off); - - RETURN_IF_ERROR(_seek_child_iters(state, start_off)); - - vectorized::MutableColumnPtr elem_obj = vectorized::ColumnVariant::create(0, elem_count); - auto* elem_obj_ptr = assert_cast<vectorized::ColumnVariant*>(elem_obj.get()); - RETURN_IF_ERROR( - _fill_elem_object_with_scalar_children(state, start_off, elem_count, elem_obj_ptr)); - RETURN_IF_ERROR(_fill_elem_object_with_nested_groups(state, start_off, elem_count, elem_obj_ptr)); - - doris::JsonBinaryValue jsonb_value; - for (size_t r = 0; r < offsets.size(); ++r) { - uint64_t off = offsets[r]; - uint64_t prev = (r == 0) ? start_off : offsets[r - 1]; - - std::string json; - json.push_back('['); - for (uint64_t i = prev; i < off; ++i) { - if (i > prev) { - json.push_back(','); - } - std::string obj; - elem_obj_ptr->serialize_one_row_to_string(static_cast<int64_t>(i), &obj); - json.append(obj); - } - json.push_back(']'); - - RETURN_IF_ERROR(jsonb_value.from_json_string(json.data(), json.size())); - dst_str.insert_data(jsonb_value.value(), jsonb_value.size()); - dst_nullmap.push_back(0); - } - return Status::OK(); - } - - // build a Variant(root_type=JSONB) column for rows [row_ord, row_ord+row_cnt) - // and append into dst (expected Nullable(Variant) or Variant). - Status _append_group_as_jsonb(GroupState& state, ordinal_t row_ord, size_t row_cnt, - vectorized::MutableColumnPtr& dst) { - uint64_t start_off = 0; - std::vector<uint64_t> offsets; - RETURN_IF_ERROR(_read_offsets_with_prev(state.offsets_iter.get(), row_ord, row_cnt, &start_off, - &offsets)); - uint64_t end_off = offsets.empty() ? start_off : offsets.back(); - auto elem_count = static_cast<size_t>(end_off - start_off); - - // Seek all child iterators to start offset (safe for both sequential/random usage). - RETURN_IF_ERROR(_seek_child_iters(state, start_off)); - - vectorized::MutableColumnPtr elem_obj = vectorized::ColumnVariant::create(0, elem_count); - auto* elem_obj_ptr = assert_cast<vectorized::ColumnVariant*>(elem_obj.get()); - RETURN_IF_ERROR(_fill_elem_object_with_scalar_children(state, start_off, elem_count, elem_obj_ptr)); - RETURN_IF_ERROR(_fill_elem_object_with_nested_groups(state, start_off, elem_count, elem_obj_ptr)); - return _append_variant_jsonb_rows_from_elem_object(*elem_obj_ptr, start_off, offsets, dst); - } - - Status _seek_child_iters(GroupState& state, uint64_t start_off) { - for (auto& [_, it] : state.child_iters) { - RETURN_IF_ERROR(it->seek_to_ordinal(start_off)); - } - return Status::OK(); - } - - Status _fill_elem_object_with_scalar_children(GroupState& state, uint64_t start_off, - size_t elem_count, - vectorized::ColumnVariant* elem_obj_ptr) { - for (auto& [name, it] : state.child_iters) { - auto type = state.reader->child_readers.at(name)->get_vec_data_type(); - auto col = type->create_column(); - if (elem_count > 0) { - size_t to_read = elem_count; - bool child_has_null = false; - RETURN_IF_ERROR(it->next_batch(&to_read, col, &child_has_null)); - } - vectorized::PathInData p(name); - bool ok = elem_obj_ptr->add_sub_column(p, col->assume_mutable(), type); - if (!ok) { - return Status::InternalError("Duplicated NestedGroup child field {}", name); - } - } - return Status::OK(); - } - - Status _fill_elem_object_with_nested_groups(GroupState& state, uint64_t start_off, size_t elem_count, - vectorized::ColumnVariant* elem_obj_ptr) { - auto jsonb_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>()); - for (auto& [name, nested_state] : state.nested_groups) { - auto nested_jsonb = vectorized::ColumnString::create(); - auto nested_nullable = vectorized::ColumnNullable::create( - std::move(nested_jsonb), vectorized::ColumnUInt8::create()); - auto nested_mut = nested_nullable->assume_mutable(); - if (elem_count > 0) { - size_t tmp_n = elem_count; - RETURN_IF_ERROR(_append_group_as_jsonb_blob(*nested_state, start_off, tmp_n, nested_mut)); - } else { - nested_mut->insert_many_defaults(elem_count); - } - vectorized::PathInData p(name); - bool ok = elem_obj_ptr->add_sub_column(p, std::move(nested_mut), jsonb_type); - if (!ok) { - return Status::InternalError("Duplicated NestedGroup nested field {}", name); - } - } - return Status::OK(); - } - - Status _append_variant_jsonb_rows_from_elem_object(const vectorized::ColumnVariant& elem_obj, - uint64_t start_off, - const std::vector<uint64_t>& offsets, - vectorized::MutableColumnPtr& dst) { - auto* dst_nullable = vectorized::check_and_get_column<vectorized::ColumnNullable>(dst.get()); - auto& dst_variant = - dst_nullable ? assert_cast<vectorized::ColumnVariant&>(dst_nullable->get_nested_column()) - : assert_cast<vectorized::ColumnVariant&>(*dst); - - doris::JsonBinaryValue jsonb_value; - for (size_t r = 0; r < offsets.size(); ++r) { - uint64_t off = offsets[r]; - uint64_t prev = (r == 0) ? start_off : offsets[r - 1]; - - std::string json; - json.push_back('['); - for (uint64_t i = prev; i < off; ++i) { - if (i > prev) { - json.push_back(','); - } - std::string obj; - elem_obj.serialize_one_row_to_string(static_cast<int64_t>(i), &obj); - json.append(obj); - } - json.push_back(']'); - - RETURN_IF_ERROR(jsonb_value.from_json_string(json.data(), json.size())); - vectorized::Field jsonb_field = vectorized::Field::create_field<PrimitiveType::TYPE_JSONB>( - vectorized::JsonbField(jsonb_value.value(), jsonb_value.size())); - - vectorized::VariantMap object; - object.try_emplace(vectorized::PathInData {}, - vectorized::FieldWithDataType(jsonb_field)); - vectorized::Field variant_field = - vectorized::Field::create_field<PrimitiveType::TYPE_VARIANT>(std::move(object)); - dst_variant.insert(variant_field); - if (dst_nullable) { - dst_nullable->get_null_map_column().get_data().push_back(0); - } - } - return Status::OK(); - } - - const NestedGroupReader* _group_reader; - ColumnIteratorOptions _iter_opts; - GroupState _root_state; - ordinal_t _current_ordinal = 0; -}; - -} // namespace - -namespace { - -// when reading the whole Variant column, merge top-level NestedGroups back -// into ColumnVariant as subcolumns (path = array path). This enables discarding JSONB -// subcolumns physically while keeping query results correct. -class VariantRootWithNestedGroupsIterator : public ColumnIterator { -public: - VariantRootWithNestedGroupsIterator(ColumnIteratorUPtr root_iter, - const NestedGroupReaders* nested_group_readers) - : _root_iter(std::move(root_iter)) { - if (!nested_group_readers) { - return; - } - for (const auto& [path, reader] : *nested_group_readers) { - if (reader && reader->is_valid()) { - _nested_group_readers.emplace_back(path, reader.get()); - } - } - } - - Status init(const ColumnIteratorOptions& opts) override { - RETURN_IF_ERROR(_root_iter->init(opts)); - _nested_group_iters.clear(); - _nested_group_iters.reserve(_nested_group_readers.size()); - for (const auto& [path, reader] : _nested_group_readers) { - auto it = std::make_unique<NestedGroupWholeIterator>(reader); - RETURN_IF_ERROR(it->init(opts)); - _nested_group_iters.emplace_back(path, std::move(it)); - } - return Status::OK(); - } - - Status seek_to_ordinal(ordinal_t ord_idx) override { - RETURN_IF_ERROR(_root_iter->seek_to_ordinal(ord_idx)); - for (auto& [_, it] : _nested_group_iters) { - RETURN_IF_ERROR(it->seek_to_ordinal(ord_idx)); - } - return Status::OK(); - } - - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override { - auto& obj = - dst->is_nullable() - ? assert_cast<vectorized::ColumnVariant&>( - assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()) - : assert_cast<vectorized::ColumnVariant&>(*dst); - auto most_common_type = obj.get_most_common_type(); // NOLINT(readability-static-accessed-through-instance) - - // Check if we have a top-level NestedGroup ($root path) - bool has_root_nested_group = false; - for (const auto& [path, _] : _nested_group_readers) { - if (path == std::string(kRootNestedGroupPath)) { - has_root_nested_group = true; - break; - } - } - - auto root_column = most_common_type->create_column(); - RETURN_IF_ERROR(_root_iter->next_batch(n, root_column, has_null)); - - // Fill dst nullmap from root. - if (root_column->is_nullable() && dst->is_nullable()) { - auto& dst_null_map = - assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column(); - auto& src_null_map = - assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column(); - dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); - } - - // Build a tmp ColumnVariant for this batch: root + nested-group subcolumns. - auto tmp = vectorized::ColumnVariant::create(0, root_column->size()); - auto& tmp_obj = assert_cast<vectorized::ColumnVariant&>(*tmp); - - // If we have a $root NestedGroup, it will replace the root JSONB. - // Otherwise, add the root column as usual. - if (!has_root_nested_group) { - tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); - } - - auto ng_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>()); - for (auto& [path, it] : _nested_group_iters) { - auto col = ng_type->create_column(); - size_t m = *n; - bool tmp_has_null = false; - RETURN_IF_ERROR(it->next_batch(&m, col, &tmp_has_null)); - - // Special handling for top-level NestedGroup ($root path): - // Use NestedGroup data as root instead of adding as subcolumn - if (path == std::string(kRootNestedGroupPath)) { - tmp_obj.add_sub_column(vectorized::PathInData(), std::move(col), ng_type); - } else { - // Normal subcolumn handling - tmp_obj.add_sub_column(vectorized::PathInData(path), std::move(col), ng_type); - } - } - - obj.insert_range_from(*tmp, 0, tmp_obj.rows()); - if (!obj.is_finalized()) { - obj.finalize(); - } -#ifndef NDEBUG - obj.check_consistency(); -#endif - *has_null = false; - return Status::OK(); - } - - Status read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) override { - auto& obj = - dst->is_nullable() - ? assert_cast<vectorized::ColumnVariant&>( - assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()) - : assert_cast<vectorized::ColumnVariant&>(*dst); - auto most_common_type = obj.get_most_common_type(); // NOLINT(readability-static-accessed-through-instance) - - // Check if we have a top-level NestedGroup ($root path) - bool has_root_nested_group = false; - for (const auto& [path, _] : _nested_group_readers) { - if (path == std::string(kRootNestedGroupPath)) { - has_root_nested_group = true; - break; - } - } - - auto root_column = most_common_type->create_column(); - RETURN_IF_ERROR(_root_iter->read_by_rowids(rowids, count, root_column)); - - if (root_column->is_nullable() && dst->is_nullable()) { - auto& dst_null_map = - assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column(); - auto& src_null_map = - assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column(); - dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); - } - - auto tmp = vectorized::ColumnVariant::create(0, root_column->size()); - auto& tmp_obj = assert_cast<vectorized::ColumnVariant&>(*tmp); - - // If we have a $root NestedGroup, it will replace the root JSONB. - // Otherwise, add the root column as usual. - if (!has_root_nested_group) { - tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); - } - - auto ng_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeJsonb>()); - for (auto& [path, it] : _nested_group_iters) { - auto col = ng_type->create_column(); - RETURN_IF_ERROR(it->read_by_rowids(rowids, count, col)); - - // Special handling for top-level NestedGroup ($root path): - // Use NestedGroup data as root instead of adding as subcolumn - if (path == std::string(kRootNestedGroupPath)) { - tmp_obj.add_sub_column(vectorized::PathInData(), std::move(col), ng_type); - } else { - // Normal subcolumn handling - tmp_obj.add_sub_column(vectorized::PathInData(path), std::move(col), ng_type); - } - } - - obj.insert_range_from(*tmp, 0, tmp_obj.rows()); - if (!obj.is_finalized()) { - obj.finalize(); - } -#ifndef NDEBUG - obj.check_consistency(); -#endif - return Status::OK(); - } - - ordinal_t get_current_ordinal() const override { return _root_iter->get_current_ordinal(); } - -private: - ColumnIteratorUPtr _root_iter; - std::vector<std::pair<std::string, const NestedGroupReader*>> _nested_group_readers; - std::vector<std::pair<std::string, std::unique_ptr<NestedGroupWholeIterator>>> _nested_group_iters; -}; - -} // namespace - const SubcolumnColumnMetaInfo::Node* VariantColumnReader::get_subcolumn_meta_by_path( const vectorized::PathInData& relative_path) const { const auto* node = _subcolumns_meta_info->find_leaf(relative_path); @@ -973,13 +492,8 @@ Status VariantColumnReader::_new_iterator_with_flat_leaves( return Status::OK(); } if (relative_path.empty()) { - // root path: merge NestedGroup JSONB back as subcolumns if present - if (!_nested_group_readers.empty()) { - *iterator = std::make_unique<VariantRootWithNestedGroupsIterator>( - std::make_unique<FileColumnIterator>(_root_column_reader), &_nested_group_readers); - return Status::OK(); - } - // root path, use VariantRootColumnIterator + // root path in flat-leaf mode: use VariantRootColumnIterator for simple root read. + // In flat-leaf compaction, we read individual columns directly without hierarchical merge. *iterator = std::make_unique<VariantRootColumnIterator>( std::make_unique<FileColumnIterator>(_root_column_reader)); return Status::OK(); @@ -1059,10 +573,29 @@ Status VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator, } // Whole NestedGroup access: reading $.nested should return the array<object> itself. + // With redundant storage, try to use JSONB subcolumn directly (WHOLE mode), + // falling back to NestedGroup columnar storage (PRUNED mode) if JSONB not available. if (!relative_path.empty()) { const NestedGroupReader* gr = get_nested_group_reader(relative_path.get_path()); if (gr && gr->is_valid()) { - *iterator = std::make_unique<NestedGroupWholeIterator>(gr); + // Try to get JSONB subcolumn iterator for WHOLE mode + const auto* leaf_node = _subcolumns_meta_info->find_leaf(relative_path); + if (leaf_node != nullptr) { + // JSONB subcolumn exists, use WHOLE mode for direct read + std::shared_ptr<ColumnReader> jsonb_reader; + Status st = column_reader_cache->get_path_column_reader( + col_uid, relative_path, &jsonb_reader, opt->stats, leaf_node); + if (st.ok() && jsonb_reader) { + ColumnIteratorUPtr jsonb_iter; + RETURN_IF_ERROR(jsonb_reader->new_iterator(&jsonb_iter, nullptr)); + *iterator = std::make_unique<NestedGroupWholeIterator>( + gr, NestedGroupReadMode::WHOLE, std::move(jsonb_iter)); + return Status::OK(); + } + } + // JSONB subcolumn not available, fallback to PRUNED mode + *iterator = std::make_unique<NestedGroupWholeIterator>( + gr, NestedGroupReadMode::PRUNED, nullptr); return Status::OK(); } } @@ -1137,20 +670,12 @@ Status VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator, column_reader_cache, sparse_column_cache_ptr); } - // Whole Variant read: merge NestedGroup JSONB back as subcolumns if present. - if (relative_path.empty()) { - if (!_nested_group_readers.empty()) { - *iterator = std::make_unique<VariantRootWithNestedGroupsIterator>( - std::make_unique<FileColumnIterator>(_root_column_reader), &_nested_group_readers); - return Status::OK(); - } - *iterator = std::make_unique<VariantRootColumnIterator>( - std::make_unique<FileColumnIterator>(_root_column_reader)); - return Status::OK(); - } - - // Check if path is prefix, example sparse columns path: a.b.c, a.b.e, access prefix: a.b. - // Or access root path + // Check if path is prefix or root path (empty). + // has_prefix_path returns true for empty path, so this covers whole Variant reads. + // With redundant storage (JSONB subcolumns + NestedGroup), we read from JSONB subcolumns + // directly via hierarchical reader, which preserves complete data including structure conflicts. + // NestedGroup is stored for future column pruning optimization but not used here. + // Example sparse columns path: a.b.c, a.b.e, access prefix: a.b. if (has_prefix_path(relative_path)) { // Example {"b" : {"c":456,"e":7.111}} // b.c is sparse column, b.e is subcolumn, so b is both the prefix of sparse column and subcolumn diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp index 1a807c37f3d..0762df2ca74 100755 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.cpp @@ -485,9 +485,6 @@ Status VariantColumnWriterImpl::_process_subcolumns(vectorized::ColumnVariant* p // already handled continue; } - if (_skip_subcolumn_paths.find(entry->path.get_path()) != _skip_subcolumn_paths.end()) { - continue; - } CHECK(entry->data.is_finalized()); // create subcolumn writer if under limit; otherwise externalize ColumnMetaPB via IndexedColumn @@ -594,12 +591,11 @@ Status VariantColumnWriterImpl::finalize() { ptr->check_consistency(); #endif - // Build NestedGroups from JSONB columns before writing subcolumns, so we can skip - // JSONB subcolumns that are expanded into NestedGroup. + // Build NestedGroups from JSONB columns. Both JSONB and NestedGroup are stored + // for redundancy: JSONB for Whole reads, NestedGroup for column pruning. doris::segment_v2::NestedGroupsMap nested_groups; doris::segment_v2::NestedGroupBuilder ng_builder; ng_builder.set_max_depth(static_cast<size_t>(config::variant_nested_group_max_depth)); - _skip_subcolumn_paths.clear(); if (ptr->get_root_type() && vectorized::remove_nullable(ptr->get_root_type())->get_primitive_type() == @@ -618,9 +614,6 @@ Status VariantColumnWriterImpl::finalize() { } RETURN_IF_ERROR(ng_builder.build_from_jsonb(entry->data.get_finalized_column_ptr()->get_ptr(), entry->path, nested_groups, entry->data.size())); - if (nested_groups.find(entry->path) != nested_groups.end()) { - _skip_subcolumn_paths.insert(entry->path.get_path()); - } } size_t num_rows = _column->size(); diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h index 90e302ee0b7..d4e3e75eedf 100644 --- a/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h +++ b/be/src/olap/rowset/segment_v2/variant/variant_column_writer_impl.h @@ -166,10 +166,6 @@ private: // hold the references of subcolumns info std::unordered_map<std::string, TabletSchema::SubColumnInfo> _subcolumns_info; std::unordered_map<std::string, NestedGroupWriter> _nested_group_writers; - - // English comment: JSONB subcolumns that are expanded into NestedGroup should not be written - // as physical subcolumns in the segment. - std::unordered_set<std::string> _skip_subcolumn_paths; }; void _init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column, diff --git a/be/src/vec/json/variant_nested_builder.cpp b/be/src/vec/json/variant_nested_builder.cpp index 9c2856a05c5..29ceb80d58a 100644 --- a/be/src/vec/json/variant_nested_builder.cpp +++ b/be/src/vec/json/variant_nested_builder.cpp @@ -324,20 +324,4 @@ size_t VariantNestedBuilder::get_array_size(const Field& value, const FieldInfo& return 1; } -void VariantNestedBuilder::flatten_array_values(const Field& value, size_t target_depth, - std::vector<Field>& flat_values, - std::vector<size_t>& level_sizes) { - if (target_depth == 0 || value.get_type() != PrimitiveType::TYPE_ARRAY) { - flat_values.push_back(value); - return; - } - - const auto& arr = value.get<Array>(); - level_sizes.push_back(arr.size()); - - for (const auto& elem : arr) { - flatten_array_values(elem, target_depth - 1, flat_values, level_sizes); - } -} - } // namespace doris::vectorized diff --git a/be/src/vec/json/variant_nested_builder.h b/be/src/vec/json/variant_nested_builder.h index 5287a40734f..83cabe83e73 100755 --- a/be/src/vec/json/variant_nested_builder.h +++ b/be/src/vec/json/variant_nested_builder.h @@ -109,11 +109,6 @@ private: // Extract array size from a field (for determining offsets) static size_t get_array_size(const Field& value, const FieldInfo& info); - // Flatten nested array values to the deepest level - static void flatten_array_values(const Field& value, size_t target_depth, - std::vector<Field>& flat_values, - std::vector<size_t>& level_sizes); - ColumnVariant& _column_variant; size_t _max_depth = 3; }; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
