csun5285 commented on code in PR #57702: URL: https://github.com/apache/doris/pull/57702#discussion_r2533790844
########## be/src/olap/rowset/segment_v2/variant/variant_ext_meta_writer.cpp: ########## @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/variant/variant_ext_meta_writer.h" + +#include <utility> + +#include "olap/rowset/segment_v2/indexed_column_writer.h" +#include "olap/types.h" +#include "vec/common/schema_util.h" + +namespace doris::segment_v2 { + +Status VariantExtMetaWriter::_ensure_inited(Writers* w) { + if (w->inited) { + return Status::OK(); + } + + // key writer: VARCHAR, value index ON, ordinal index OFF + IndexedColumnWriterOptions dict_opts; + dict_opts.write_value_index = true; + dict_opts.write_ordinal_index = false; + dict_opts.encoding = PREFIX_ENCODING; + dict_opts.compression = _comp; + const TypeInfo* dict_type = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>(); + w->key_writer = std::make_unique<IndexedColumnWriter>(dict_opts, dict_type, _fw); + RETURN_IF_ERROR(w->key_writer->init()); + + // value writer: VARCHAR, value index OFF, ordinal index ON + IndexedColumnWriterOptions vals_opts; + vals_opts.write_value_index = false; + vals_opts.write_ordinal_index = true; + vals_opts.encoding = PLAIN_ENCODING; + vals_opts.compression = _comp; + const TypeInfo* vals_type = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>(); + w->val_writer = std::make_unique<IndexedColumnWriter>(vals_opts, vals_type, _fw); + RETURN_IF_ERROR(w->val_writer->init()); + + w->inited = true; + return Status::OK(); +} + +Status VariantExtMetaWriter::add(int32_t root_uid, const Slice& key, const Slice& val) { + auto& w = _writers_by_uid[root_uid]; + RETURN_IF_ERROR(_ensure_inited(&w)); + RETURN_IF_ERROR(w.key_writer->add(&key)); + RETURN_IF_ERROR(w.val_writer->add(&val)); + ++w.count; + return Status::OK(); +} + +Status VariantExtMetaWriter::flush_to_footer(SegmentFooterPB* footer) { + for (auto& [uid, w] : _writers_by_uid) { + if (!w.inited || w.count == 0) { + continue; + } + doris::segment_v2::IndexedColumnMetaPB key_meta; + doris::segment_v2::IndexedColumnMetaPB val_meta; + RETURN_IF_ERROR(w.key_writer->finish(&key_meta)); + RETURN_IF_ERROR(w.val_writer->finish(&val_meta)); + + // keys + std::string k = std::string("variant_meta_keys.") + std::to_string(uid); + std::string v; + key_meta.AppendToString(&v); + auto* p1 = footer->add_file_meta_datas(); + p1->set_key(k); + p1->set_value(v); + + // values + std::string k2 = std::string("variant_meta_values.") + std::to_string(uid); + std::string v2; + val_meta.AppendToString(&v2); + auto* p2 = footer->add_file_meta_datas(); + p2->set_key(k2); + p2->set_value(v2); + } + _writers_by_uid.clear(); + return Status::OK(); +} + +Status VariantExtMetaWriter::externalize_from_footer(SegmentFooterPB* footer) { + // Variant meta pre-processing: + // - Collect non-sparse subcolumns per root and externalize them as KV (path→ColumnMetaPB bytes) + // in sorted path order to keep stability; + // - Collect sparse subcolumns (including buckets) per root and embed them into the corresponding + // root variant's ColumnMetaPB.children_columns; + // - Keep only top-level columns (including variant roots) in footer.columns(). + std::vector<ColumnMetaPB> kept; Review Comment: Variant 的 root 列的ColumnMetaPB放在footer 里面,其他的子列的,稀疏列的ColumnMetaPB可以用一个单独的结构保存在内存里,这里就不用再动 footer 了。 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
