This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 6dbac3279ca [fix](filecache) pass tablet_id through FileReaderOptions
instead of parsing from path (#61683) (#63813)
6dbac3279ca is described below
commit 6dbac3279ca8af1d044ecd9215faaafd7ef2e77b
Author: deardeng <[email protected]>
AuthorDate: Thu Jun 11 11:19:23 2026 +0800
[fix](filecache) pass tablet_id through FileReaderOptions instead of
parsing from path (#61683) (#63813)
pick from https://github.com/apache/doris/pull/61683
---
be/src/cloud/cloud_internal_service.cpp | 2 +
be/src/cloud/cloud_tablet.cpp | 2 +
be/src/cloud/cloud_warm_up_manager.cpp | 16 +++++--
be/src/cloud/cloud_warm_up_manager.h | 3 +-
be/src/io/cache/block_file_cache_downloader.cpp | 2 +
be/src/io/cache/block_file_cache_downloader.h | 1 +
be/src/io/cache/cached_remote_file_reader.cpp | 56 ++++++++++------------
be/src/io/cache/cached_remote_file_reader.h | 3 +-
be/src/io/cache/file_cache_common.h | 1 +
be/src/io/tools/file_cache_microbench.cpp | 1 +
be/src/olap/collection_statistics.cpp | 2 +-
be/src/olap/compaction.cpp | 5 +-
be/src/olap/rowset/beta_rowset.cpp | 4 +-
be/src/olap/rowset/beta_rowset_writer.cpp | 3 +-
be/src/olap/rowset/rowset_writer.h | 2 +-
.../olap/rowset/segment_v2/index_file_reader.cpp | 10 ++--
be/src/olap/rowset/segment_v2/index_file_reader.h | 7 ++-
.../olap/rowset/segment_v2/index_file_writer.cpp | 10 ++--
be/src/olap/rowset/segment_v2/index_file_writer.h | 4 +-
.../segment_v2/inverted_index_fs_directory.cpp | 4 +-
.../segment_v2/inverted_index_fs_directory.h | 3 +-
be/src/olap/rowset/segment_v2/segment.cpp | 11 ++++-
be/src/olap/rowset/segment_v2/segment.h | 1 +
be/src/olap/task/index_builder.cpp | 10 ++--
be/test/io/cache/block_file_cache_test.cpp | 13 +++++
.../io/cache/block_file_cache_test_lru_dump.cpp | 1 +
be/test/io/fs/packed_file_concurrency_test.cpp | 2 +
27 files changed, 115 insertions(+), 64 deletions(-)
diff --git a/be/src/cloud/cloud_internal_service.cpp
b/be/src/cloud/cloud_internal_service.cpp
index 45cc2016b25..4bceaa64914 100644
--- a/be/src/cloud/cloud_internal_service.cpp
+++ b/be/src/cloud/cloud_internal_service.cpp
@@ -554,6 +554,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
.is_dryrun =
config::enable_reader_dryrun_when_download_file_cache,
.is_warmup = true},
.download_done = std::move(download_done),
+ .tablet_id = tablet_id,
};
g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
@@ -629,6 +630,7 @@ void
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
.is_dryrun =
config::enable_reader_dryrun_when_download_file_cache,
.is_warmup = true},
.download_done = std::move(download_done),
+ .tablet_id = tablet_id,
};
g_file_cache_event_driven_warm_up_submitted_index_num << 1;
g_file_cache_event_driven_warm_up_submitted_index_size <<
idx_size;
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index d67f7d97add..d7fb5610f51 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -517,6 +517,7 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr>
to_add, bool version_
LOG_WARNING("add rowset warm up error
").error(st);
}
}},
+ .tablet_id = _tablet_meta->tablet_id(),
});
}
@@ -550,6 +551,7 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr>
to_add, bool version_
LOG_WARNING("add rowset warm up error
").error(st);
}
}},
+ .tablet_id = _tablet_meta->tablet_id(),
};
self->update_rowset_warmup_state_inverted_idx_num_unlocked(WarmUpTriggerSource::SYNC_ROWSET,
rowset_meta->rowset_id(), 1);
_engine.file_cache_block_downloader().submit_download_task(std::move(meta));
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp
b/be/src/cloud/cloud_warm_up_manager.cpp
index 6195b8ca844..02d11f210b9 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -131,7 +131,8 @@ void CloudWarmUpManager::submit_download_tasks(io::Path
path, int64_t file_size,
io::FileSystemSPtr file_system,
int64_t expiration_time,
std::shared_ptr<bthread::CountdownEvent> wait,
- bool is_index,
std::function<void(Status)> done_cb) {
+ bool is_index,
std::function<void(Status)> done_cb,
+ int64_t tablet_id) {
VLOG_DEBUG << "submit warm up task for file: " << path << ", file_size: "
<< file_size
<< ", expiration_time: " << expiration_time
<< ", is_index: " << (is_index ? "true" : "false");
@@ -186,6 +187,7 @@ void CloudWarmUpManager::submit_download_tasks(io::Path
path, int64_t file_size,
}
wait->signal();
},
+ .tablet_id = tablet_id,
});
offset += current_chunk_size;
@@ -259,7 +261,8 @@ void CloudWarmUpManager::handle_jobs() {
submit_download_tasks(
storage_resource.value()->remote_segment_path(*rs, seg_id),
rs->segment_file_size(cast_set<int>(seg_id)),
rs->fs(),
- expiration_time, wait, false, [tablet, rs,
seg_id](Status st) {
+ expiration_time, wait, false,
+ [tablet, rs, seg_id](Status st) {
VLOG_DEBUG << "warmup rowset " <<
rs->version() << " segment "
<< seg_id << " completed";
if (tablet->complete_rowset_segment_warmup(
@@ -269,7 +272,8 @@ void CloudWarmUpManager::handle_jobs() {
VLOG_DEBUG << "warmup rowset " <<
rs->version()
<< " completed";
}
- });
+ },
+ tablet_id);
}
// 2nd. download inverted index files
@@ -316,7 +320,8 @@ void CloudWarmUpManager::handle_jobs() {
VLOG_DEBUG << "warmup rowset " <<
rs->version()
<< " completed";
}
- });
+ },
+ tablet_id);
}
} else {
if (schema_ptr->has_inverted_index() ||
schema_ptr->has_ann_index()) {
@@ -339,7 +344,8 @@ void CloudWarmUpManager::handle_jobs() {
VLOG_DEBUG << "warmup rowset " <<
rs->version()
<< " completed";
}
- });
+ },
+ tablet_id);
}
}
}
diff --git a/be/src/cloud/cloud_warm_up_manager.h
b/be/src/cloud/cloud_warm_up_manager.h
index f5b00394a27..8bc2361d95d 100644
--- a/be/src/cloud/cloud_warm_up_manager.h
+++ b/be/src/cloud/cloud_warm_up_manager.h
@@ -114,7 +114,8 @@ private:
void submit_download_tasks(io::Path path, int64_t file_size,
io::FileSystemSPtr file_system,
int64_t expiration_time,
std::shared_ptr<bthread::CountdownEvent> wait,
bool is_index = false,
- std::function<void(Status)> done_cb = nullptr);
+ std::function<void(Status)> done_cb = nullptr,
+ int64_t tablet_id = -1);
std::mutex _mtx;
std::condition_variable _cond;
int64_t _cur_job_id {0};
diff --git a/be/src/io/cache/block_file_cache_downloader.cpp
b/be/src/io/cache/block_file_cache_downloader.cpp
index 9c19083dbe2..fe5ec3e20dc 100644
--- a/be/src/io/cache/block_file_cache_downloader.cpp
+++ b/be/src/io/cache/block_file_cache_downloader.cpp
@@ -286,6 +286,7 @@ void FileCacheBlockDownloader::download_file_cache_block(
.is_warmup = true,
},
.download_done = std::move(download_done),
+ .tablet_id = meta.tablet_id(),
};
download_segment_file(download_meta);
});
@@ -300,6 +301,7 @@ void FileCacheBlockDownloader::download_segment_file(const
DownloadFileMeta& met
.is_doris_table = true,
.cache_base_path {},
.file_size = meta.file_size,
+ .tablet_id = meta.tablet_id,
};
auto st = meta.file_system->open_file(meta.path, &file_reader, &opts);
if (!st.ok()) {
diff --git a/be/src/io/cache/block_file_cache_downloader.h
b/be/src/io/cache/block_file_cache_downloader.h
index c9a46891673..29d55a4c6cc 100644
--- a/be/src/io/cache/block_file_cache_downloader.h
+++ b/be/src/io/cache/block_file_cache_downloader.h
@@ -44,6 +44,7 @@ struct DownloadFileMeta {
io::FileSystemSPtr file_system;
IOContext ctx;
std::function<void(Status)> download_done;
+ int64_t tablet_id {-1};
};
struct DownloadTask {
diff --git a/be/src/io/cache/cached_remote_file_reader.cpp
b/be/src/io/cache/cached_remote_file_reader.cpp
index e155af8c898..a78a4194161 100644
--- a/be/src/io/cache/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/cached_remote_file_reader.cpp
@@ -48,7 +48,6 @@
#include "io/fs/file_reader.h"
#include "io/fs/local_file_system.h"
#include "io/io_common.h"
-#include "olap/storage_policy.h"
#include "runtime/client_cache.h"
#include "runtime/exec_env.h"
#include "runtime/thread_context.h"
@@ -92,7 +91,8 @@ bvar::Adder<uint64_t> g_failed_get_peer_addr_counter(
CachedRemoteFileReader::CachedRemoteFileReader(FileReaderSPtr
remote_file_reader,
const FileReaderOptions& opts)
- : _remote_file_reader(std::move(remote_file_reader)) {
+ : _tablet_id(opts.tablet_id),
_remote_file_reader(std::move(remote_file_reader)) {
+ DCHECK(!opts.is_doris_table || _tablet_id > 0);
_is_doris_table = opts.is_doris_table;
if (_is_doris_table) {
_cache_hash = BlockFileCache::hash(path().filename().native());
@@ -158,29 +158,30 @@ std::pair<size_t, size_t>
CachedRemoteFileReader::s_align_size(size_t offset, si
}
namespace {
-std::optional<int64_t> extract_tablet_id(const std::string& file_path) {
- return StorageResource::parse_tablet_id_from_path(file_path);
+// Execute S3 read
+Status execute_s3_read(size_t empty_start, size_t& size,
std::unique_ptr<char[]>& buffer,
+ ReadStatistics& stats, const IOContext* io_ctx,
+ FileReaderSPtr remote_file_reader) {
+ s3_read_counter << 1;
+ SCOPED_RAW_TIMER(&stats.remote_read_timer);
+ stats.from_peer_cache = false;
+ return remote_file_reader->read_at(empty_start, Slice(buffer.get(), size),
&size, io_ctx);
}
// Get peer connection info from tablet_id
-std::pair<std::string, int> get_peer_connection_info(const std::string&
file_path) {
+std::pair<std::string, int> get_peer_connection_info(int64_t tablet_id,
+ const std::string&
file_path) {
std::string host = "";
int port = 0;
- // Try to get tablet_id from actual path and lookup tablet info
- if (auto tablet_id = extract_tablet_id(file_path)) {
- auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
- if (auto tablet_info = manager.get_balanced_tablet_info(*tablet_id)) {
- host = tablet_info->first;
- port = tablet_info->second;
- } else {
- LOG_EVERY_N(WARNING, 100)
- << "get peer connection info not found"
- << ", tablet_id=" << *tablet_id << ", file_path=" <<
file_path;
- }
+ DCHECK(tablet_id > 0);
+ auto& manager =
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+ if (auto tablet_info = manager.get_balanced_tablet_info(tablet_id)) {
+ host = tablet_info->first;
+ port = tablet_info->second;
} else {
- LOG_EVERY_N(WARNING, 100) << "parse tablet id from path failed"
- << "tablet_id=null, file_path=" << file_path;
+ LOG_EVERY_N(WARNING, 100) << "get peer connection info not found"
+ << ", tablet_id=" << tablet_id << ",
file_path=" << file_path;
}
DBUG_EXECUTE_IF("PeerFileCacheReader::_fetch_from_peer_cache_blocks", {
@@ -200,8 +201,8 @@ std::pair<std::string, int> get_peer_connection_info(const
std::string& file_pat
Status execute_peer_read(const std::vector<FileBlockSPtr>& empty_blocks,
size_t empty_start,
size_t& size, std::unique_ptr<char[]>& buffer,
const std::string& file_path, size_t file_size, bool
is_doris_table,
- ReadStatistics& stats, const IOContext* io_ctx) {
- auto [host, port] = get_peer_connection_info(file_path);
+ int64_t tablet_id, ReadStatistics& stats, const
IOContext* io_ctx) {
+ auto [host, port] = get_peer_connection_info(tablet_id, file_path);
VLOG_DEBUG << "PeerFileCacheReader read from peer, host=" << host << ",
port=" << port
<< ", file_path=" << file_path;
@@ -226,16 +227,6 @@ Status execute_peer_read(const std::vector<FileBlockSPtr>&
empty_blocks, size_t
return st;
}
-// Execute S3 read
-Status execute_s3_read(size_t empty_start, size_t& size,
std::unique_ptr<char[]>& buffer,
- ReadStatistics& stats, const IOContext* io_ctx,
- FileReaderSPtr remote_file_reader) {
- s3_read_counter << 1;
- SCOPED_RAW_TIMER(&stats.remote_read_timer);
- stats.from_peer_cache = false;
- return remote_file_reader->read_at(empty_start, Slice(buffer.get(), size),
&size, io_ctx);
-}
-
} // anonymous namespace
Status CachedRemoteFileReader::_execute_remote_read(const
std::vector<FileBlockSPtr>& empty_blocks,
@@ -257,7 +248,7 @@ Status CachedRemoteFileReader::_execute_remote_read(const
std::vector<FileBlockS
return execute_s3_read(empty_start, size, buffer, stats, io_ctx,
_remote_file_reader);
} else {
return execute_peer_read(empty_blocks, empty_start, size, buffer,
path().native(),
- this->size(), _is_doris_table, stats,
io_ctx);
+ this->size(), _is_doris_table,
_tablet_id, stats, io_ctx);
}
});
@@ -271,7 +262,7 @@ Status CachedRemoteFileReader::_execute_remote_read(const
std::vector<FileBlockS
// ATTN: Save original size before peer read, as it may be modified by
fetch_blocks, read peer ref size
size_t original_size = size;
auto st = execute_peer_read(empty_blocks, empty_start, size, buffer,
path().native(),
- this->size(), _is_doris_table, stats,
io_ctx);
+ this->size(), _is_doris_table, _tablet_id,
stats, io_ctx);
if (!st.ok()) {
// Restore original size for S3 fallback, as peer read may have
modified it
size = original_size;
@@ -388,6 +379,7 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset,
Slice result, size_t*
s_align_size(offset + already_read, bytes_req - already_read,
size());
CacheContext cache_context(io_ctx);
cache_context.stats = &stats;
+ cache_context.tablet_id = _tablet_id;
MonotonicStopWatch sw;
sw.start();
diff --git a/be/src/io/cache/cached_remote_file_reader.h
b/be/src/io/cache/cached_remote_file_reader.h
index 674fbcf3460..19710f2f8c3 100644
--- a/be/src/io/cache/cached_remote_file_reader.h
+++ b/be/src/io/cache/cached_remote_file_reader.h
@@ -83,7 +83,8 @@ private:
void _update_stats(const ReadStatistics& stats, FileCacheStatistics* state,
bool is_inverted_index) const;
- bool _is_doris_table;
+ bool _is_doris_table = false;
+ int64_t _tablet_id = -1;
FileReaderSPtr _remote_file_reader;
UInt128Wrapper _cache_hash;
BlockFileCache* _cache;
diff --git a/be/src/io/cache/file_cache_common.h
b/be/src/io/cache/file_cache_common.h
index 417f68ecc97..cdadf0861f1 100644
--- a/be/src/io/cache/file_cache_common.h
+++ b/be/src/io/cache/file_cache_common.h
@@ -164,6 +164,7 @@ struct CacheContext {
bool is_cold_data {false};
ReadStatistics* stats;
bool is_warmup {false};
+ int64_t tablet_id {0};
};
template <class Lock>
diff --git a/be/src/io/tools/file_cache_microbench.cpp
b/be/src/io/tools/file_cache_microbench.cpp
index db85b5707af..48514efed5a 100644
--- a/be/src/io/tools/file_cache_microbench.cpp
+++ b/be/src/io/tools/file_cache_microbench.cpp
@@ -1490,6 +1490,7 @@ private:
doris::io::FileReaderOptions reader_opts;
reader_opts.cache_type =
doris::io::FileCachePolicy::FILE_BLOCK_CACHE;
reader_opts.is_doris_table = true;
+ reader_opts.tablet_id = 1; // microbench placeholder
doris::io::FileDescription fd;
std::string obj_path = "s3://" +
doris::config::test_s3_bucket + "/";
diff --git a/be/src/olap/collection_statistics.cpp
b/be/src/olap/collection_statistics.cpp
index 7a548ed0a14..68cff3c3069 100644
--- a/be/src/olap/collection_statistics.cpp
+++ b/be/src/olap/collection_statistics.cpp
@@ -164,7 +164,7 @@ Status CollectionStatistics::process_segment(const
RowsetSharedPtr& rowset, int3
rowset_meta->fs(),
std::string
{InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)},
tablet_schema->get_inverted_index_storage_format(),
- rowset_meta->inverted_index_file_info(seg_id));
+ rowset_meta->inverted_index_file_info(seg_id),
rowset_meta->tablet_id());
RETURN_IF_ERROR(idx_file_reader->init(config::inverted_index_read_buffer_size,
io_ctx));
int32_t total_seg_num_docs = 0;
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 81cbbfcbce9..c168818f3a9 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -871,7 +871,7 @@ Status Compaction::do_inverted_index_compaction() {
fs,
std::string
{InvertedIndexDescriptor::get_index_file_path_prefix(seg_path.value())},
_cur_tablet_schema->get_inverted_index_storage_format(),
- rowset->rowset_meta()->inverted_index_file_info(seg_id));
+ rowset->rowset_meta()->inverted_index_file_info(seg_id),
_tablet->tablet_id());
auto st =
index_file_reader->init(config::inverted_index_read_buffer_size);
DBUG_EXECUTE_IF("Compaction::do_inverted_index_compaction_init_inverted_index_file_reader",
{
@@ -1125,7 +1125,8 @@ void
Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
std::string
{InvertedIndexDescriptor::get_index_file_path_prefix(
seg_path.value())},
_cur_tablet_schema->get_inverted_index_storage_format(),
-
rowset->rowset_meta()->inverted_index_file_info(i));
+
rowset->rowset_meta()->inverted_index_file_info(i),
+ _tablet->tablet_id());
auto st =
index_file_reader->init(config::inverted_index_read_buffer_size);
index_file_path =
index_file_reader->get_index_file_path(index_meta);
DBUG_EXECUTE_IF(
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 35fbf1dff1e..35d2b701089 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -633,6 +633,7 @@ Status BetaRowset::check_current_rowset_segment() {
.is_doris_table = true,
.cache_base_path {},
.file_size = _rowset_meta->segment_file_size(seg_id),
+ .tablet_id = _rowset_meta->tablet_id(),
};
auto s = segment_v2::Segment::open(fs, seg_path,
_rowset_meta->tablet_id(), seg_id,
@@ -843,7 +844,8 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value*
rowset_value,
auto seg_path = DORIS_TRY(segment_path(seg_id));
auto index_file_path_prefix =
InvertedIndexDescriptor::get_index_file_path_prefix(seg_path);
auto index_file_reader = std::make_unique<IndexFileReader>(
- fs, std::string(index_file_path_prefix), storage_format);
+ fs, std::string(index_file_path_prefix), storage_format,
InvertedIndexFileInfo(),
+ _rowset_meta->tablet_id());
RETURN_IF_ERROR(index_file_reader->init());
auto dirs = index_file_reader->get_all_directories();
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 2c2462f1d57..bdb3dd9075e 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -493,6 +493,7 @@ Status
BetaRowsetWriter::_load_noncompacted_segment(segment_v2::SegmentSharedPtr
: io::FileCachePolicy::NO_CACHE,
.is_doris_table = true,
.cache_base_path {},
+ .tablet_id = _rowset_meta->tablet_id(),
};
auto s = segment_v2::Segment::open(fs, path, _rowset_meta->tablet_id(),
segment_id, rowset_id(),
_context.tablet_schema, reader_options,
&segment);
@@ -1161,7 +1162,7 @@ Status
BetaRowsetWriter::create_segment_writer_for_segcompaction(
index_file_writer = std::make_unique<IndexFileWriter>(
_context.fs(), prefix, _context.rowset_id.to_string(),
_num_segcompacted,
_context.tablet_schema->get_inverted_index_storage_format(),
- std::move(idx_file_writer));
+ std::move(idx_file_writer), true /* can_use_ram_dir */,
_context.tablet_id);
}
segment_v2::SegmentWriterOptions writer_options;
diff --git a/be/src/olap/rowset/rowset_writer.h
b/be/src/olap/rowset/rowset_writer.h
index 8809b34f87a..0a319eba6ef 100644
--- a/be/src/olap/rowset/rowset_writer.h
+++ b/be/src/olap/rowset/rowset_writer.h
@@ -115,7 +115,7 @@ public:
*index_file_writer = std::make_unique<IndexFileWriter>(
_context.fs(), segment_prefix, _context.rowset_id.to_string(),
segment_id,
_context.tablet_schema->get_inverted_index_storage_format(),
- std::move(idx_file_v2_ptr), can_use_ram_dir);
+ std::move(idx_file_v2_ptr), can_use_ram_dir,
_context.tablet_id);
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/index_file_reader.cpp
b/be/src/olap/rowset/segment_v2/index_file_reader.cpp
index cc3c2636c0f..8d07ad6aa01 100644
--- a/be/src/olap/rowset/segment_v2/index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/index_file_reader.cpp
@@ -63,8 +63,9 @@ Status IndexFileReader::_init_from(int32_t read_buffer_size,
const io::IOContext
DCHECK(_fs != nullptr) << "file system is nullptr,
index_file_full_path: "
<< index_file_full_path;
// 2. open file
- auto ok = DorisFSDirectory::FSIndexInput::open(
- _fs, index_file_full_path.c_str(), index_input, err,
read_buffer_size, file_size);
+ auto ok =
+ DorisFSDirectory::FSIndexInput::open(_fs,
index_file_full_path.c_str(), index_input,
+ err, read_buffer_size,
file_size, _tablet_id);
if (!ok) {
if (err.number() == CL_ERR_FileNotFound) {
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
@@ -182,8 +183,9 @@ Result<std::unique_ptr<DorisCompoundReader,
DirectoryDeleter>> IndexFileReader::
DCHECK(_fs != nullptr)
<< "file system is nullptr, index_file_path: " <<
index_file_path;
// 2. open file
- auto ok = DorisFSDirectory::FSIndexInput::open(
- _fs, index_file_path.c_str(), index_input, err,
_read_buffer_size, file_size);
+ auto ok = DorisFSDirectory::FSIndexInput::open(_fs,
index_file_path.c_str(),
+ index_input, err,
_read_buffer_size,
+ file_size,
_tablet_id);
if (!ok) {
// now index_input = nullptr
if (err.number() == CL_ERR_FileNotFound) {
diff --git a/be/src/olap/rowset/segment_v2/index_file_reader.h
b/be/src/olap/rowset/segment_v2/index_file_reader.h
index 2b5f2e183a1..e9de115c3af 100644
--- a/be/src/olap/rowset/segment_v2/index_file_reader.h
+++ b/be/src/olap/rowset/segment_v2/index_file_reader.h
@@ -52,11 +52,13 @@ public:
IndexFileReader(io::FileSystemSPtr fs, std::string index_path_prefix,
InvertedIndexStorageFormatPB storage_format,
- InvertedIndexFileInfo idx_file_info =
InvertedIndexFileInfo())
+ InvertedIndexFileInfo idx_file_info =
InvertedIndexFileInfo(),
+ int64_t tablet_id = -1)
: _fs(std::move(fs)),
_index_path_prefix(std::move(index_path_prefix)),
_storage_format(storage_format),
- _idx_file_info(idx_file_info) {}
+ _idx_file_info(idx_file_info),
+ _tablet_id(tablet_id) {}
virtual ~IndexFileReader() = default;
MOCK_FUNCTION Status init(int32_t read_buffer_size =
config::inverted_index_read_buffer_size,
@@ -90,6 +92,7 @@ private:
mutable std::shared_mutex _mutex; // Use mutable for const read operations
bool _inited = false;
InvertedIndexFileInfo _idx_file_info;
+ int64_t _tablet_id = -1;
};
} // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/index_file_writer.cpp
b/be/src/olap/rowset/segment_v2/index_file_writer.cpp
index 9d30f5b5126..07bff98e25d 100644
--- a/be/src/olap/rowset/segment_v2/index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/index_file_writer.cpp
@@ -41,7 +41,8 @@ namespace doris::segment_v2 {
IndexFileWriter::IndexFileWriter(io::FileSystemSPtr fs, std::string
index_path_prefix,
std::string rowset_id, int64_t seg_id,
InvertedIndexStorageFormatPB storage_format,
- io::FileWriterPtr file_writer, bool
can_use_ram_dir)
+ io::FileWriterPtr file_writer, bool
can_use_ram_dir,
+ int64_t tablet_id)
: _fs(std::move(fs)),
_index_path_prefix(std::move(index_path_prefix)),
_rowset_id(std::move(rowset_id)),
@@ -49,7 +50,8 @@ IndexFileWriter::IndexFileWriter(io::FileSystemSPtr fs,
std::string index_path_p
_storage_format(storage_format),
_local_fs(io::global_local_filesystem()),
_idx_v2_writer(std::move(file_writer)),
- _can_use_ram_dir(can_use_ram_dir) {
+ _can_use_ram_dir(can_use_ram_dir),
+ _tablet_id(tablet_id) {
auto tmp_file_dir =
ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir();
_tmp_dir = tmp_file_dir.native();
if (_storage_format == InvertedIndexStorageFormatPB::V1) {
@@ -121,8 +123,8 @@ Status IndexFileWriter::delete_index(const TabletIndex*
index_meta) {
}
Status IndexFileWriter::add_into_searcher_cache() {
- auto index_file_reader =
- std::make_unique<IndexFileReader>(_fs, _index_path_prefix,
_storage_format);
+ auto index_file_reader = std::make_unique<IndexFileReader>(
+ _fs, _index_path_prefix, _storage_format, InvertedIndexFileInfo(),
_tablet_id);
auto st = index_file_reader->init();
if (!st.ok()) {
if (dynamic_cast<io::StreamSinkFileWriter*>(_idx_v2_writer.get()) !=
nullptr) {
diff --git a/be/src/olap/rowset/segment_v2/index_file_writer.h
b/be/src/olap/rowset/segment_v2/index_file_writer.h
index 5adb8f76ac1..c7f08c0d77a 100644
--- a/be/src/olap/rowset/segment_v2/index_file_writer.h
+++ b/be/src/olap/rowset/segment_v2/index_file_writer.h
@@ -50,7 +50,8 @@ class IndexFileWriter {
public:
IndexFileWriter(io::FileSystemSPtr fs, std::string index_path_prefix,
std::string rowset_id,
int64_t seg_id, InvertedIndexStorageFormatPB
storage_format,
- io::FileWriterPtr file_writer = nullptr, bool
can_use_ram_dir = true);
+ io::FileWriterPtr file_writer = nullptr, bool
can_use_ram_dir = true,
+ int64_t tablet_id = -1);
virtual ~IndexFileWriter() = default;
MOCK_FUNCTION Result<std::shared_ptr<DorisFSDirectory>> open(const
TabletIndex* index_meta);
@@ -111,6 +112,7 @@ private:
bool _can_use_ram_dir = true;
IndexStorageFormatPtr _index_storage_format;
+ int64_t _tablet_id = -1;
friend class IndexStorageFormatV1;
friend class IndexStorageFormatV2;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index 2232369e3dd..568c016329b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -87,7 +87,8 @@ const char* const DorisFSDirectory::WRITE_LOCK_FILE =
"write.lock";
bool DorisFSDirectory::FSIndexInput::open(const io::FileSystemSPtr& fs, const
char* path,
IndexInput*& ret, CLuceneError&
error,
- int32_t buffer_size, int64_t
file_size) {
+ int32_t buffer_size, int64_t
file_size,
+ int64_t tablet_id) {
CND_PRECONDITION(path != nullptr, "path is NULL");
if (buffer_size == -1) {
@@ -100,6 +101,7 @@ bool DorisFSDirectory::FSIndexInput::open(const
io::FileSystemSPtr& fs, const ch
:
io::FileCachePolicy::NO_CACHE;
reader_options.is_doris_table = true;
reader_options.file_size = file_size;
+ reader_options.tablet_id = tablet_id;
Status st = fs->open_file(path, &h->_reader, &reader_options);
DBUG_EXECUTE_IF("inverted file read error: index file not found",
{ st = Status::Error<doris::ErrorCode::NOT_FOUND>("index
file not found"); })
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
index 7b1ac0bdf55..79854df88d2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
@@ -190,7 +190,8 @@ protected:
public:
static bool open(const io::FileSystemSPtr& fs, const char* path,
IndexInput*& ret,
- CLuceneError& error, int32_t bufferSize = -1, int64_t
file_size = -1);
+ CLuceneError& error, int32_t bufferSize = -1, int64_t
file_size = -1,
+ int64_t tablet_id = -1);
~FSIndexInput() override;
IndexInput* clone() const override;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index ead805a2fd1..10b7499b3b8 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -88,8 +88,15 @@ Status Segment::open(io::FileSystemSPtr fs, const
std::string& path, int64_t tab
uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr
tablet_schema,
const io::FileReaderOptions& reader_options,
std::shared_ptr<Segment>* output,
InvertedIndexFileInfo idx_file_info,
OlapReaderStatistics* stats) {
- auto s = _open(fs, path, segment_id, rowset_id, tablet_schema,
reader_options, output,
+ // Ensure tablet_id is available in reader_options for
CachedRemoteFileReader peer read.
+ io::FileReaderOptions opts_with_tablet = reader_options;
+ opts_with_tablet.tablet_id = tablet_id;
+
+ auto s = _open(fs, path, segment_id, rowset_id, tablet_schema,
opts_with_tablet, output,
idx_file_info, stats);
+ if (s.ok() && output && *output) {
+ (*output)->_tablet_id = tablet_id;
+ }
if (!s.ok()) {
if (!config::is_cloud_mode()) {
auto res = ExecEnv::get_tablet(tablet_id);
@@ -236,7 +243,7 @@ Status Segment::_open_index_file_reader() {
_fs,
std::string {InvertedIndexDescriptor::get_index_file_path_prefix(
_file_reader->path().native())},
- _tablet_schema->get_inverted_index_storage_format(),
_idx_file_info);
+ _tablet_schema->get_inverted_index_storage_format(),
_idx_file_info, _tablet_id);
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 1e24b16fa80..ddbf34b8ffa 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -295,6 +295,7 @@ private:
DorisCallOnce<Status> _index_file_reader_open;
InvertedIndexFileInfo _idx_file_info;
+ int64_t _tablet_id = -1;
int _be_exec_version = BeExecVersionManager::get_newest_version();
};
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 8dab082cdef..1dc0c5e7565 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -278,7 +278,8 @@ Status IndexBuilder::update_inverted_index_info() {
auto idx_file_reader = std::make_unique<IndexFileReader>(
context.fs(),
std::string
{InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)},
-
output_rs_tablet_schema->get_inverted_index_storage_format());
+
output_rs_tablet_schema->get_inverted_index_storage_format(),
+ InvertedIndexFileInfo(), _tablet->tablet_id());
auto st = idx_file_reader->init();
DBUG_EXECUTE_IF(
"IndexBuilder::update_inverted_index_info_index_file_reader_init_not_ok", {
@@ -371,7 +372,7 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
fs, std::move(index_path_prefix),
output_rowset_meta->rowset_id().to_string(),
seg_ptr->id(),
output_rowset_schema->get_inverted_index_storage_format(),
- std::move(file_writer));
+ std::move(file_writer), true /* can_use_ram_dir */,
_tablet->tablet_id());
RETURN_IF_ERROR(index_file_writer->initialize(dirs));
// create inverted index writer
for (auto& index_meta : _dropped_inverted_indexes) {
@@ -442,12 +443,13 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
index_file_writer = std::make_unique<IndexFileWriter>(
fs, index_path_prefix,
output_rowset_meta->rowset_id().to_string(),
seg_ptr->id(),
output_rowset_schema->get_inverted_index_storage_format(),
- std::move(file_writer));
+ std::move(file_writer), true /* can_use_ram_dir */,
_tablet->tablet_id());
RETURN_IF_ERROR(index_file_writer->initialize(dirs));
} else {
index_file_writer = std::make_unique<IndexFileWriter>(
fs, index_path_prefix,
output_rowset_meta->rowset_id().to_string(),
- seg_ptr->id(),
output_rowset_schema->get_inverted_index_storage_format());
+ seg_ptr->id(),
output_rowset_schema->get_inverted_index_storage_format(),
+ nullptr, true /* can_use_ram_dir */,
_tablet->tablet_id());
}
// create inverted index writer, or ann index writer
for (auto inverted_index : _alter_inverted_indexes) {
diff --git a/be/test/io/cache/block_file_cache_test.cpp
b/be/test/io/cache/block_file_cache_test.cpp
index a968d46dea9..88c0a00d0bb 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -3727,6 +3727,7 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
auto key = io::BlockFileCache::hash("tmp_file");
EXPECT_EQ(reader._cache_hash, key);
@@ -3840,6 +3841,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_tail) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
{
std::string buffer;
@@ -3913,6 +3915,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_error_handle) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
auto sp = SyncPoint::get_instance();
sp->enable_processing();
@@ -3998,6 +4001,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_self_heal_on_downloaded_not
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
uint64_t before_self_heal =
g_read_cache_self_heal_on_not_found.get_value();
@@ -4094,6 +4098,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_no_self_heal_on_non_not_fou
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
std::string buffer(64_kb, '\0');
@@ -4242,6 +4247,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_concurrent) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
bool flag1 = false;
auto reader = std::make_shared<CachedRemoteFileReader>(local_reader, opts);
auto sp = SyncPoint::get_instance();
@@ -4326,6 +4332,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_concurrent_2) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
auto reader = std::make_shared<CachedRemoteFileReader>(local_reader, opts);
auto sp = SyncPoint::get_instance();
sp->enable_processing();
@@ -4846,6 +4853,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_opt_lock) {
io::FileReaderOptions opts;
opts.cache_type = FileCachePolicy::FILE_BLOCK_CACHE;
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
{
FileReaderSPtr local_reader;
ASSERT_TRUE(global_local_filesystem()->open_file(tmp_file,
&local_reader).ok());
@@ -7474,6 +7482,7 @@ TEST_F(BlockFileCacheTest,
reader_dryrun_when_download_file_cache) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
auto key = io::BlockFileCache::hash("tmp_file");
EXPECT_EQ(reader._cache_hash, key);
@@ -7959,6 +7968,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_ttl_index) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
auto key = io::BlockFileCache::hash("tmp_file");
EXPECT_EQ(reader._cache_hash, key);
@@ -8036,6 +8046,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_normal_index) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
CachedRemoteFileReader reader(local_reader, opts);
auto key = io::BlockFileCache::hash("tmp_file");
EXPECT_EQ(reader._cache_hash, key);
@@ -8192,6 +8203,7 @@ TEST_F(BlockFileCacheTest,
DISABLE_cached_remote_file_reader_direct_read_and_evi
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
auto reader = std::make_shared<CachedRemoteFileReader>(local_reader, opts);
std::string buffer;
@@ -8282,6 +8294,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_direct_read_bytes_check) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
auto reader = std::make_shared<CachedRemoteFileReader>(local_reader, opts);
std::string buffer;
diff --git a/be/test/io/cache/block_file_cache_test_lru_dump.cpp
b/be/test/io/cache/block_file_cache_test_lru_dump.cpp
index b44a348c5fa..a9e0ccaf8b8 100644
--- a/be/test/io/cache/block_file_cache_test_lru_dump.cpp
+++ b/be/test/io/cache/block_file_cache_test_lru_dump.cpp
@@ -528,6 +528,7 @@ TEST_F(BlockFileCacheTest,
cached_remote_file_reader_direct_read_order_check) {
io::FileReaderOptions opts;
opts.cache_type = io::cache_type_from_string("file_block_cache");
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
auto reader = std::make_shared<CachedRemoteFileReader>(local_reader, opts);
std::string buffer;
diff --git a/be/test/io/fs/packed_file_concurrency_test.cpp
b/be/test/io/fs/packed_file_concurrency_test.cpp
index 31c2db19fb8..bbab140212a 100644
--- a/be/test/io/fs/packed_file_concurrency_test.cpp
+++ b/be/test/io/fs/packed_file_concurrency_test.cpp
@@ -454,6 +454,7 @@ protected:
FileReaderOptions local_opts = opts ? *opts : FileReaderOptions();
local_opts.cache_type = FileCachePolicy::FILE_BLOCK_CACHE;
local_opts.is_doris_table = true;
+ local_opts.tablet_id = 10086;
*reader = std::make_shared<CachedRemoteFileReader>(raw, local_opts);
return Status::OK();
}
@@ -720,6 +721,7 @@ TEST_F(MergeFileConcurrencyTest,
ConcurrentWriteReadCorrectness) {
FileReaderOptions opts;
opts.cache_type = FileCachePolicy::FILE_BLOCK_CACHE;
opts.is_doris_table = true;
+ opts.tablet_id = 10086;
ASSERT_TRUE(reader_fs.open_file(Path(path), &reader,
&opts).ok());
// After the fix, CachedRemoteFileReader wraps
PackedFileReader (not vice versa)
// This ensures cache key uses segment path for proper cleanup
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]