yiguolei commented on code in PR #57702: URL: https://github.com/apache/doris/pull/57702#discussion_r2559313109
########## be/src/olap/rowset/segment_v2/column_meta_accessor.cpp: ########## @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/column_meta_accessor.h" + +#include <cstdint> +#include <string> + +#include "io/io_common.h" +#include "olap/rowset/segment_v2/external_col_meta_util.h" +#include "olap/rowset/segment_v2/segment.h" // kSegmentFooterVersionV3_ExtColMeta + +namespace doris::segment_v2 { + +// Abstract base implementation shared by both V2 and V3 layouts. +class ColumnMetaAccessor::Impl { +public: + virtual ~Impl() = default; + virtual Status build_uid_to_colid_map( + std::unordered_map<int32_t, size_t>* uid_to_colid) const = 0; + virtual Status get_column_meta(uint32_t column_id, ColumnMetaPB* out) const = 0; + virtual Status traverse_metas( + const std::function<void(const ColumnMetaPB&)>& visitor) const = 0; +}; + +// V2: inline layout using only footer.columns(). +class ColumnMetaAccessorV2 : public ColumnMetaAccessor::Impl { +public: + ColumnMetaAccessorV2(const SegmentFooterPB* footer, io::FileReaderSPtr file_reader) + : _footer(footer), _file_reader(std::move(file_reader)) {} + + Status build_uid_to_colid_map( + std::unordered_map<int32_t, size_t>* uid_to_colid) const override { + uid_to_colid->clear(); + uint32_t ordinal = 0; + for (const auto& column_meta : _footer->columns()) { + // 跳过 variant 子列(unique_id == -1) + if (column_meta.unique_id() == -1) { + ordinal++; + continue; + } + uid_to_colid->try_emplace(column_meta.unique_id(), ordinal++); + } + return Status::OK(); + } + + Status get_column_meta(uint32_t column_id, ColumnMetaPB* out) const override { + if (_footer->columns_size() == 0 || + column_id >= static_cast<uint32_t>(_footer->columns_size())) { + return Status::Corruption( + "no inline column meta available for column_id={} (columns_size={})", column_id, + _footer->columns_size()); + } + *out = _footer->columns(static_cast<int>(column_id)); + return Status::OK(); + } + + Status traverse_metas(const std::function<void(const ColumnMetaPB&)>& visitor) const override { + if (_footer->columns_size() == 0) { + return Status::Corruption("no column meta found in footer (inline V2)"); + } + for (const auto& column : _footer->columns()) { + visitor(column); + } + return Status::OK(); + } + +private: + const SegmentFooterPB* _footer = nullptr; // non-owning + io::FileReaderSPtr _file_reader; +}; + +// V3: use external Column Meta Region + column_meta_entries layout. +class ColumnMetaAccessorV3 : public ColumnMetaAccessor::Impl { +public: + ColumnMetaAccessorV3(const SegmentFooterPB* footer, io::FileReaderSPtr file_reader, + const ExternalColMetaUtil::ExternalMetaPointers& ptrs) + : _footer(footer), _file_reader(std::move(file_reader)), _ptrs(ptrs) {} + + Status build_uid_to_colid_map( + std::unordered_map<int32_t, size_t>* uid_to_colid) const override { + uid_to_colid->clear(); + + bool loaded = false; + // Prefer external uid->col_id map when available. + loaded = ExternalColMetaUtil::parse_uid_to_colid_map(*_footer, _ptrs, uid_to_colid); + + // Compatibility: if external map is broken, fall back to inline columns. + if (!loaded && _footer->columns_size() > 0) { + uint32_t ordinal = 0; + for (const auto& column_meta : _footer->columns()) { + if (column_meta.unique_id() == -1) { + ordinal++; + continue; + } + uid_to_colid->try_emplace(column_meta.unique_id(), ordinal++); + } + loaded = true; + } + + if (!loaded) { + return Status::Corruption( + "segment external meta mapping missing or corrupted and no inline columns; " + "uid->col_id cannot be resolved. path={}", + _file_reader ? _file_reader->path().native() : std::string("<unknown>")); + } + + return Status::OK(); + } + + Status get_column_meta(uint32_t column_id, ColumnMetaPB* out) const override { + // Prefer external Column Meta Region first. + if (column_id < _ptrs.num_columns) { + return ExternalColMetaUtil::read_col_meta(_file_reader, *_footer, _ptrs, column_id, + out); + } + + // Fallback: inline footer.columns(). + if (_footer->columns_size() > 0 && + column_id < static_cast<uint32_t>(_footer->columns_size())) { + *out = _footer->columns(static_cast<int>(column_id)); + return Status::OK(); + } + + return Status::Corruption( + "no column meta available for column_id={} (inline/external missing)", column_id); + } + + Status traverse_metas(const std::function<void(const ColumnMetaPB&)>& visitor) const override { + const uint64_t region_size = (_ptrs.region_end > _ptrs.region_start) + ? (_ptrs.region_end - _ptrs.region_start) + : 0; + if (region_size == 0) { + return Status::Corruption("invalid external meta region size"); + } + + // Read entire meta region once to reduce random I/O. + std::string region_buf; + region_buf.resize(static_cast<size_t>(region_size)); + size_t br = 0; + io::IOContext io_ctx {.is_index_data = true}; + RETURN_IF_ERROR(_file_reader->read_at(_ptrs.region_start, Slice(region_buf), &br, &io_ctx)); + if (br != region_size) { + return Status::Corruption("short read on meta region"); + } + + if (_footer->column_meta_entries_size() != _ptrs.num_columns) { + return Status::Corruption("column_meta_entries size mismatch"); + } + + uint64_t offset = _ptrs.region_start; + for (uint32_t i = 0; i < _ptrs.num_columns; ++i) { + const auto& entry = _footer->column_meta_entries(static_cast<int>(i)); + const uint64_t sz = static_cast<uint64_t>(entry.length()); + const uint64_t pos = offset; + if (!ExternalColMetaUtil::is_valid_meta_slice(pos, sz, _ptrs)) { + return Status::Corruption("external meta entry out of region bounds"); + } + const uint64_t rel = pos - _ptrs.region_start; + ColumnMetaPB meta; + if (!meta.ParseFromArray(region_buf.data() + rel, static_cast<int>(sz))) { + return Status::Corruption("failed parse ColumnMetaPB from region"); + } + visitor(meta); + offset += sz; + } + return Status::OK(); + } + +private: + const SegmentFooterPB* _footer = nullptr; // non-owning + io::FileReaderSPtr _file_reader; + ExternalColMetaUtil::ExternalMetaPointers _ptrs; +}; + +Status ColumnMetaAccessor::init(const SegmentFooterPB& footer, io::FileReaderSPtr file_reader) { + // First check footer version to see if external Column Meta Region might exist, + // then try to parse external layout; if parsing fails, fall back to V2. + ExternalColMetaUtil::ExternalMetaPointers ptrs; + if (footer.version() >= kSegmentFooterVersionV3_ExtColMeta && + ExternalColMetaUtil::parse_external_meta_pointers(footer, &ptrs) && ptrs.num_columns > 0) { + _impl = new ColumnMetaAccessorV3(&footer, std::move(file_reader), ptrs); + } else { + _impl = new ColumnMetaAccessorV2(&footer, std::move(file_reader)); Review Comment: 这个直接用unique ptr吧 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
