This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d336eedc4fc [fix](filecache) load meta exit with file NOT_FOUND
exception (#59311)
d336eedc4fc is described below
commit d336eedc4fcea4787e8855c5719a95d4932bcf8a
Author: zhengyu <[email protected]>
AuthorDate: Fri Dec 26 10:05:34 2025 +0800
[fix](filecache) load meta exit with file NOT_FOUND exception (#59311)
When loading cache block file into memory, the iteration will fail with
exceptions
caused by concurrency. This commit skips the error and continues with
the following
iteration.
---
be/src/io/cache/fs_file_cache_storage.cpp | 53 ++++++-
.../io/cache/block_file_cache_test_meta_store.cpp | 154 +++++++++++++++++++++
2 files changed, 201 insertions(+), 6 deletions(-)
diff --git a/be/src/io/cache/fs_file_cache_storage.cpp
b/be/src/io/cache/fs_file_cache_storage.cpp
index d39cf584bef..0860265e181 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -26,6 +26,7 @@
#include <filesystem>
#include <mutex>
#include <system_error>
+#include <vector>
#include "common/logging.h"
#include "common/status.h"
@@ -877,8 +878,8 @@ void
FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const
// If the difference is more than threshold, load from filesystem as well
if (estimated_file_count > 100) {
double difference_ratio =
- static_cast<double>(estimated_file_count) -
- static_cast<double>(db_block_count) /
static_cast<double>(estimated_file_count);
+ (static_cast<double>(estimated_file_count) -
static_cast<double>(db_block_count)) /
+ static_cast<double>(estimated_file_count);
if (difference_ratio >
config::file_cache_meta_store_vs_file_system_diff_num_threshold) {
LOG(WARNING) << "Significant difference between DB blocks (" <<
db_block_count
@@ -983,13 +984,53 @@ size_t
FSFileCacheStorage::estimate_file_count_from_statfs() const {
// Get total size of cache directory to estimate file count
std::error_code ec;
uintmax_t total_size = 0;
- for (const auto& entry :
std::filesystem::recursive_directory_iterator(_cache_base_path, ec)) {
+ std::vector<std::filesystem::path> pending_dirs
{std::filesystem::path(_cache_base_path)};
+ while (!pending_dirs.empty()) {
+ auto current_dir = pending_dirs.back();
+ pending_dirs.pop_back();
+
+ std::filesystem::directory_iterator it(current_dir, ec);
if (ec) {
- LOG(WARNING) << "Error accessing directory entry: " <<
ec.message();
+ LOG(WARNING) << "Failed to list directory while estimating file
count, dir="
+ << current_dir << ", err=" << ec.message();
+ ec.clear();
continue;
}
- if (entry.is_regular_file()) {
- total_size += entry.file_size();
+
+ for (; it != std::filesystem::directory_iterator(); ++it) {
+ std::error_code status_ec;
+ auto entry_status = it->symlink_status(status_ec);
+ TEST_SYNC_POINT_CALLBACK(
+
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterEntryStatus",
+ &status_ec);
+ if (status_ec) {
+ LOG(WARNING) << "Failed to stat entry while estimating file
count, path="
+ << it->path() << ", err=" << status_ec.message();
+ continue;
+ }
+
+ if (std::filesystem::is_directory(entry_status)) {
+ auto next_dir = it->path();
+ TEST_SYNC_POINT_CALLBACK(
+
"FSFileCacheStorage::estimate_file_count_from_statfs::OnDirectory",
+ &next_dir);
+ pending_dirs.emplace_back(next_dir);
+ continue;
+ }
+
+ if (std::filesystem::is_regular_file(entry_status)) {
+ std::error_code size_ec;
+ auto file_size = it->file_size(size_ec);
+ TEST_SYNC_POINT_CALLBACK(
+
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterFileSize",
+ &size_ec);
+ if (size_ec) {
+ LOG(WARNING) << "Failed to get file size while estimating
file count, path="
+ << it->path() << ", err=" <<
size_ec.message();
+ continue;
+ }
+ total_size += file_size;
+ }
}
}
diff --git a/be/test/io/cache/block_file_cache_test_meta_store.cpp
b/be/test/io/cache/block_file_cache_test_meta_store.cpp
index 48e565cd8ee..8234d03b527 100644
--- a/be/test/io/cache/block_file_cache_test_meta_store.cpp
+++ b/be/test/io/cache/block_file_cache_test_meta_store.cpp
@@ -18,7 +18,20 @@
//
https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/tests/gtest_lru_file_cache.cpp
// and modified by Doris
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wkeyword-macro"
+#endif
+
+#define private public
+#define protected public
#include "block_file_cache_test_common.h"
+#undef private
+#undef protected
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
namespace doris::io {
@@ -498,6 +511,147 @@ TEST_F(BlockFileCacheTest,
clear_retains_meta_directory_and_clears_meta_entries)
}
}
+TEST_F(BlockFileCacheTest, estimate_file_count_skips_removed_directory) {
+ std::string test_dir = cache_base_path +
"/estimate_file_count_removed_dir";
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+ auto keep_dir = fs::path(test_dir) / "keep";
+ auto remove_dir = fs::path(test_dir) / "remove";
+ fs::create_directories(keep_dir);
+ fs::create_directories(remove_dir);
+
+ auto keep_file = keep_dir / "data.bin";
+ std::string one_mb(1024 * 1024, 'd');
+ {
+ std::ofstream ofs(keep_file, std::ios::binary);
+ ASSERT_TRUE(ofs.good());
+ for (int i = 0; i < 3; ++i) {
+ ofs.write(one_mb.data(), one_mb.size());
+ ASSERT_TRUE(ofs.good());
+ }
+ }
+
+ FSFileCacheStorage storage;
+ storage._cache_base_path = test_dir;
+
+ const std::string sync_point_name =
+ "FSFileCacheStorage::estimate_file_count_from_statfs::OnDirectory";
+ auto* sync_point = doris::SyncPoint::get_instance();
+ doris::SyncPoint::CallbackGuard guard(sync_point_name);
+ sync_point->set_call_back(
+ sync_point_name,
+ [remove_dir](std::vector<std::any>&& args) {
+ auto* path =
doris::try_any_cast<std::filesystem::path*>(args[0]);
+ if (*path == remove_dir) {
+ fs::remove_all(remove_dir);
+ }
+ },
+ &guard);
+ sync_point->enable_processing();
+
+ size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+ sync_point->disable_processing();
+
+ ASSERT_EQ(3, estimated_files);
+ ASSERT_FALSE(fs::exists(remove_dir));
+
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+}
+
+TEST_F(BlockFileCacheTest, estimate_file_count_handles_stat_failure) {
+ std::string test_dir = cache_base_path +
"/estimate_file_count_stat_failure";
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+ fs::create_directories(test_dir);
+
+ auto data_file = fs::path(test_dir) / "data.bin";
+ std::string one_mb(1024 * 1024, 'x');
+ {
+ std::ofstream ofs(data_file, std::ios::binary);
+ ASSERT_TRUE(ofs.good());
+ ofs.write(one_mb.data(), one_mb.size());
+ ASSERT_TRUE(ofs.good());
+ }
+
+ FSFileCacheStorage storage;
+ storage._cache_base_path = test_dir;
+
+ const std::string sync_point_name =
+
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterEntryStatus";
+ auto* sync_point = doris::SyncPoint::get_instance();
+ doris::SyncPoint::CallbackGuard guard(sync_point_name);
+ sync_point->set_call_back(
+ sync_point_name,
+ [](std::vector<std::any>&& args) {
+ auto* ec = doris::try_any_cast<std::error_code*>(args[0]);
+ if (ec != nullptr) {
+ *ec = std::make_error_code(std::errc::io_error);
+ }
+ },
+ &guard);
+ sync_point->enable_processing();
+
+ size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+ sync_point->disable_processing();
+
+ ASSERT_EQ(0, estimated_files);
+
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+}
+
+TEST_F(BlockFileCacheTest, estimate_file_count_handles_file_size_failure) {
+ std::string test_dir = cache_base_path +
"/estimate_file_count_file_size_failure";
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+ fs::create_directories(test_dir);
+
+ auto data_file = fs::path(test_dir) / "data.bin";
+ std::string one_mb(1024 * 1024, 'x');
+ {
+ std::ofstream ofs(data_file, std::ios::binary);
+ ASSERT_TRUE(ofs.good());
+ ofs.write(one_mb.data(), one_mb.size());
+ ASSERT_TRUE(ofs.good());
+ }
+
+ FSFileCacheStorage storage;
+ storage._cache_base_path = test_dir;
+
+ const std::string sync_point_name =
+
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterFileSize";
+ auto* sync_point = doris::SyncPoint::get_instance();
+ doris::SyncPoint::CallbackGuard guard(sync_point_name);
+ sync_point->set_call_back(
+ sync_point_name,
+ [](std::vector<std::any>&& args) {
+ auto* ec = doris::try_any_cast<std::error_code*>(args[0]);
+ if (ec != nullptr) {
+ *ec = std::make_error_code(std::errc::io_error);
+ }
+ },
+ &guard);
+ sync_point->enable_processing();
+
+ size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+ sync_point->disable_processing();
+
+ ASSERT_EQ(0, estimated_files);
+
+ if (fs::exists(test_dir)) {
+ fs::remove_all(test_dir);
+ }
+}
+
//TODO(zhengyu): check lazy load
//TODO(zhengyu): check version2 start
//TODO(zhengyu): check version2 version3 mixed start
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]