AshinGau commented on code in PR #18074: URL: https://github.com/apache/doris/pull/18074#discussion_r1147033728
########## be/src/vec/exec/format/parquet/vparquet_reader.cpp: ########## @@ -150,18 +161,46 @@ void ParquetReader::close() { } _closed = true; } + + if (_is_file_metadata_owned && _file_metadata != nullptr) { + delete _file_metadata; + } } Status ParquetReader::_open_file() { if (_file_reader == nullptr) { + SCOPED_RAW_TIMER(&_statistics.open_file_time); + ++_statistics.open_file_num; RETURN_IF_ERROR(FileFactory::create_file_reader( _profile, _system_properties, _file_description, &_file_system, &_file_reader)); } if (_file_metadata == nullptr) { + SCOPED_RAW_TIMER(&_statistics.parse_footer_time); if (_file_reader->size() == 0) { return Status::EndOfFile("open file failed, empty parquet file: " + _scan_range.path); } - RETURN_IF_ERROR(parse_thrift_footer(_file_reader, _file_metadata)); + if (_kv_cache == nullptr) { + _is_file_metadata_owned = true; + RETURN_IF_ERROR(parse_thrift_footer(_file_reader, &_file_metadata)); + } else { + _is_file_metadata_owned = false; + _file_metadata = _kv_cache->get<FileMetaData>( Review Comment: `KVCache::get` is a synchronization method and is locked by a single mutex. Maybe we can change the storage map in `KVCache` from `std::unordered_map` to `phmap::parallel_flat_hash_map` to increase concurrency. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org