This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d336eedc4fc [fix](filecache) load meta exit with file NOT_FOUND 
exception (#59311)
d336eedc4fc is described below

commit d336eedc4fcea4787e8855c5719a95d4932bcf8a
Author: zhengyu <[email protected]>
AuthorDate: Fri Dec 26 10:05:34 2025 +0800

    [fix](filecache) load meta exit with file NOT_FOUND exception (#59311)
    
    When loading cache block file into memory, the iteration will fail with
    exceptions
    caused by concurrency. This commit skips the error and continues with
    the following
    iteration.
---
 be/src/io/cache/fs_file_cache_storage.cpp          |  53 ++++++-
 .../io/cache/block_file_cache_test_meta_store.cpp  | 154 +++++++++++++++++++++
 2 files changed, 201 insertions(+), 6 deletions(-)

diff --git a/be/src/io/cache/fs_file_cache_storage.cpp 
b/be/src/io/cache/fs_file_cache_storage.cpp
index d39cf584bef..0860265e181 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -26,6 +26,7 @@
 #include <filesystem>
 #include <mutex>
 #include <system_error>
+#include <vector>
 
 #include "common/logging.h"
 #include "common/status.h"
@@ -877,8 +878,8 @@ void 
FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const
     // If the difference is more than threshold, load from filesystem as well
     if (estimated_file_count > 100) {
         double difference_ratio =
-                static_cast<double>(estimated_file_count) -
-                static_cast<double>(db_block_count) / 
static_cast<double>(estimated_file_count);
+                (static_cast<double>(estimated_file_count) - 
static_cast<double>(db_block_count)) /
+                static_cast<double>(estimated_file_count);
 
         if (difference_ratio > 
config::file_cache_meta_store_vs_file_system_diff_num_threshold) {
             LOG(WARNING) << "Significant difference between DB blocks (" << 
db_block_count
@@ -983,13 +984,53 @@ size_t 
FSFileCacheStorage::estimate_file_count_from_statfs() const {
     // Get total size of cache directory to estimate file count
     std::error_code ec;
     uintmax_t total_size = 0;
-    for (const auto& entry : 
std::filesystem::recursive_directory_iterator(_cache_base_path, ec)) {
+    std::vector<std::filesystem::path> pending_dirs 
{std::filesystem::path(_cache_base_path)};
+    while (!pending_dirs.empty()) {
+        auto current_dir = pending_dirs.back();
+        pending_dirs.pop_back();
+
+        std::filesystem::directory_iterator it(current_dir, ec);
         if (ec) {
-            LOG(WARNING) << "Error accessing directory entry: " << 
ec.message();
+            LOG(WARNING) << "Failed to list directory while estimating file 
count, dir="
+                         << current_dir << ", err=" << ec.message();
+            ec.clear();
             continue;
         }
-        if (entry.is_regular_file()) {
-            total_size += entry.file_size();
+
+        for (; it != std::filesystem::directory_iterator(); ++it) {
+            std::error_code status_ec;
+            auto entry_status = it->symlink_status(status_ec);
+            TEST_SYNC_POINT_CALLBACK(
+                    
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterEntryStatus",
+                    &status_ec);
+            if (status_ec) {
+                LOG(WARNING) << "Failed to stat entry while estimating file 
count, path="
+                             << it->path() << ", err=" << status_ec.message();
+                continue;
+            }
+
+            if (std::filesystem::is_directory(entry_status)) {
+                auto next_dir = it->path();
+                TEST_SYNC_POINT_CALLBACK(
+                        
"FSFileCacheStorage::estimate_file_count_from_statfs::OnDirectory",
+                        &next_dir);
+                pending_dirs.emplace_back(next_dir);
+                continue;
+            }
+
+            if (std::filesystem::is_regular_file(entry_status)) {
+                std::error_code size_ec;
+                auto file_size = it->file_size(size_ec);
+                TEST_SYNC_POINT_CALLBACK(
+                        
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterFileSize",
+                        &size_ec);
+                if (size_ec) {
+                    LOG(WARNING) << "Failed to get file size while estimating 
file count, path="
+                                 << it->path() << ", err=" << 
size_ec.message();
+                    continue;
+                }
+                total_size += file_size;
+            }
         }
     }
 
diff --git a/be/test/io/cache/block_file_cache_test_meta_store.cpp 
b/be/test/io/cache/block_file_cache_test_meta_store.cpp
index 48e565cd8ee..8234d03b527 100644
--- a/be/test/io/cache/block_file_cache_test_meta_store.cpp
+++ b/be/test/io/cache/block_file_cache_test_meta_store.cpp
@@ -18,7 +18,20 @@
 // 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/tests/gtest_lru_file_cache.cpp
 // and modified by Doris
 
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wkeyword-macro"
+#endif
+
+#define private public
+#define protected public
 #include "block_file_cache_test_common.h"
+#undef private
+#undef protected
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
 
 namespace doris::io {
 
@@ -498,6 +511,147 @@ TEST_F(BlockFileCacheTest, 
clear_retains_meta_directory_and_clears_meta_entries)
     }
 }
 
+TEST_F(BlockFileCacheTest, estimate_file_count_skips_removed_directory) {
+    std::string test_dir = cache_base_path + 
"/estimate_file_count_removed_dir";
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+    auto keep_dir = fs::path(test_dir) / "keep";
+    auto remove_dir = fs::path(test_dir) / "remove";
+    fs::create_directories(keep_dir);
+    fs::create_directories(remove_dir);
+
+    auto keep_file = keep_dir / "data.bin";
+    std::string one_mb(1024 * 1024, 'd');
+    {
+        std::ofstream ofs(keep_file, std::ios::binary);
+        ASSERT_TRUE(ofs.good());
+        for (int i = 0; i < 3; ++i) {
+            ofs.write(one_mb.data(), one_mb.size());
+            ASSERT_TRUE(ofs.good());
+        }
+    }
+
+    FSFileCacheStorage storage;
+    storage._cache_base_path = test_dir;
+
+    const std::string sync_point_name =
+            "FSFileCacheStorage::estimate_file_count_from_statfs::OnDirectory";
+    auto* sync_point = doris::SyncPoint::get_instance();
+    doris::SyncPoint::CallbackGuard guard(sync_point_name);
+    sync_point->set_call_back(
+            sync_point_name,
+            [remove_dir](std::vector<std::any>&& args) {
+                auto* path = 
doris::try_any_cast<std::filesystem::path*>(args[0]);
+                if (*path == remove_dir) {
+                    fs::remove_all(remove_dir);
+                }
+            },
+            &guard);
+    sync_point->enable_processing();
+
+    size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+    sync_point->disable_processing();
+
+    ASSERT_EQ(3, estimated_files);
+    ASSERT_FALSE(fs::exists(remove_dir));
+
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+}
+
+TEST_F(BlockFileCacheTest, estimate_file_count_handles_stat_failure) {
+    std::string test_dir = cache_base_path + 
"/estimate_file_count_stat_failure";
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+    fs::create_directories(test_dir);
+
+    auto data_file = fs::path(test_dir) / "data.bin";
+    std::string one_mb(1024 * 1024, 'x');
+    {
+        std::ofstream ofs(data_file, std::ios::binary);
+        ASSERT_TRUE(ofs.good());
+        ofs.write(one_mb.data(), one_mb.size());
+        ASSERT_TRUE(ofs.good());
+    }
+
+    FSFileCacheStorage storage;
+    storage._cache_base_path = test_dir;
+
+    const std::string sync_point_name =
+            
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterEntryStatus";
+    auto* sync_point = doris::SyncPoint::get_instance();
+    doris::SyncPoint::CallbackGuard guard(sync_point_name);
+    sync_point->set_call_back(
+            sync_point_name,
+            [](std::vector<std::any>&& args) {
+                auto* ec = doris::try_any_cast<std::error_code*>(args[0]);
+                if (ec != nullptr) {
+                    *ec = std::make_error_code(std::errc::io_error);
+                }
+            },
+            &guard);
+    sync_point->enable_processing();
+
+    size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+    sync_point->disable_processing();
+
+    ASSERT_EQ(0, estimated_files);
+
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+}
+
+TEST_F(BlockFileCacheTest, estimate_file_count_handles_file_size_failure) {
+    std::string test_dir = cache_base_path + 
"/estimate_file_count_file_size_failure";
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+    fs::create_directories(test_dir);
+
+    auto data_file = fs::path(test_dir) / "data.bin";
+    std::string one_mb(1024 * 1024, 'x');
+    {
+        std::ofstream ofs(data_file, std::ios::binary);
+        ASSERT_TRUE(ofs.good());
+        ofs.write(one_mb.data(), one_mb.size());
+        ASSERT_TRUE(ofs.good());
+    }
+
+    FSFileCacheStorage storage;
+    storage._cache_base_path = test_dir;
+
+    const std::string sync_point_name =
+            
"FSFileCacheStorage::estimate_file_count_from_statfs::AfterFileSize";
+    auto* sync_point = doris::SyncPoint::get_instance();
+    doris::SyncPoint::CallbackGuard guard(sync_point_name);
+    sync_point->set_call_back(
+            sync_point_name,
+            [](std::vector<std::any>&& args) {
+                auto* ec = doris::try_any_cast<std::error_code*>(args[0]);
+                if (ec != nullptr) {
+                    *ec = std::make_error_code(std::errc::io_error);
+                }
+            },
+            &guard);
+    sync_point->enable_processing();
+
+    size_t estimated_files = storage.estimate_file_count_from_statfs();
+
+    sync_point->disable_processing();
+
+    ASSERT_EQ(0, estimated_files);
+
+    if (fs::exists(test_dir)) {
+        fs::remove_all(test_dir);
+    }
+}
+
 //TODO(zhengyu): check lazy load
 //TODO(zhengyu): check version2 start
 //TODO(zhengyu): check version2 version3 mixed start


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to