This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 26f1a65873f branch-3.1: [enhancement](filecache) fix default capacity 
and add reset_capacity validation #51711 (#52333)
26f1a65873f is described below

commit 26f1a65873f850662d802299d2caec1a59a12af7
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Jun 26 18:16:59 2025 +0800

    branch-3.1: [enhancement](filecache) fix default capacity and add 
reset_capacity validation #51711 (#52333)
    
    Cherry-picked from #51711
    
    Signed-off-by: zhengyu <[email protected]>
    Co-authored-by: zhengyu <[email protected]>
---
 be/src/io/cache/block_file_cache_factory.cpp | 62 ++++++++++++++++++----
 be/test/io/cache/block_file_cache_test.cpp   | 77 ++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+), 11 deletions(-)

diff --git a/be/src/io/cache/block_file_cache_factory.cpp 
b/be/src/io/cache/block_file_cache_factory.cpp
index 2d0d25735fe..d43e3acea14 100644
--- a/be/src/io/cache/block_file_cache_factory.cpp
+++ b/be/src/io/cache/block_file_cache_factory.cpp
@@ -92,16 +92,13 @@ Status FileCacheFactory::create_file_cache(const 
std::string& cache_base_path,
             LOG_ERROR("").tag("file cache path", cache_base_path).tag("error", 
strerror(errno));
             return Status::IOError("{} statfs error {}", cache_base_path, 
strerror(errno));
         }
-        size_t disk_capacity = static_cast<size_t>(
-                static_cast<size_t>(stat.f_blocks) * 
static_cast<size_t>(stat.f_bsize) *
-                
(static_cast<double>(config::file_cache_enter_disk_resource_limit_mode_percent) 
/
-                 100));
+        size_t disk_capacity = 
static_cast<size_t>(static_cast<size_t>(stat.f_blocks) *
+                                                   
static_cast<size_t>(stat.f_bsize));
         if (file_cache_settings.capacity == 0 || disk_capacity < 
file_cache_settings.capacity) {
             LOG_INFO(
-                    "The cache {} config size {} is larger than {}% disk size 
{} or zero, recalc "
+                    "The cache {} config size {} is larger than disk size {} 
or zero, recalc "
                     "it.",
-                    cache_base_path, file_cache_settings.capacity,
-                    config::file_cache_enter_disk_resource_limit_mode_percent, 
disk_capacity);
+                    cache_base_path, file_cache_settings.capacity, 
disk_capacity);
             file_cache_settings = get_file_cache_settings(disk_capacity,
                                                           
file_cache_settings.max_query_cache_size);
         }
@@ -174,16 +171,59 @@ std::vector<std::string> 
FileCacheFactory::get_base_paths() {
     return paths;
 }
 
+std::string validate_capacity(const std::string& path, int64_t new_capacity,
+                              int64_t& valid_capacity) {
+    struct statfs stat;
+    if (statfs(path.c_str(), &stat) < 0) {
+        auto ret = fmt::format("reset capacity {} statfs error {}. ", path, 
strerror(errno));
+        LOG_ERROR(ret);
+        valid_capacity = 0; // caller will handle the error
+        return ret;
+    }
+    size_t disk_capacity = 
static_cast<size_t>(static_cast<size_t>(stat.f_blocks) *
+                                               
static_cast<size_t>(stat.f_bsize));
+    if (new_capacity == 0 || disk_capacity < new_capacity) {
+        auto ret = fmt::format(
+                "The cache {} config size {} is larger than disk size {} or 
zero, recalc "
+                "it to disk size. ",
+                path, new_capacity, disk_capacity);
+        valid_capacity = disk_capacity;
+        LOG_WARNING(ret);
+        return ret;
+    }
+    valid_capacity = new_capacity;
+    return "";
+}
+
 std::string FileCacheFactory::reset_capacity(const std::string& path, int64_t 
new_capacity) {
+    std::stringstream ss;
+    size_t total_capacity = 0;
     if (path.empty()) {
-        std::stringstream ss;
-        for (auto& [_, cache] : _path_to_cache) {
-            ss << cache->reset_capacity(new_capacity);
+        for (auto& [p, cache] : _path_to_cache) {
+            int64_t valid_capacity = 0;
+            ss << validate_capacity(p, new_capacity, valid_capacity);
+            if (valid_capacity <= 0) {
+                return ss.str();
+            }
+            ss << cache->reset_capacity(valid_capacity);
+            total_capacity += cache->capacity();
         }
+        _capacity = total_capacity;
         return ss.str();
     } else {
         if (auto iter = _path_to_cache.find(path); iter != 
_path_to_cache.end()) {
-            return iter->second->reset_capacity(new_capacity);
+            int64_t valid_capacity = 0;
+            ss << validate_capacity(path, new_capacity, valid_capacity);
+            if (valid_capacity <= 0) {
+                return ss.str();
+            }
+            ss << iter->second->reset_capacity(valid_capacity);
+
+            for (auto& [p, cache] : _path_to_cache) {
+                total_capacity += cache->capacity();
+            }
+            _capacity = total_capacity;
+            return ss.str();
         }
     }
     return "Unknown the cache path " + path;
diff --git a/be/test/io/cache/block_file_cache_test.cpp 
b/be/test/io/cache/block_file_cache_test.cpp
index 0d6883e4c8b..1408919fe17 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -7905,4 +7905,81 @@ TEST_F(BlockFileCacheTest, 
cached_remote_file_reader_normal_index) {
     FileCacheFactory::instance()->_capacity = 0;
 }
 
+TEST_F(BlockFileCacheTest, test_reset_capacity) {
+    std::string cache_path2 = caches_dir / "cache2" / "";
+
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    if (fs::exists(cache_path2)) {
+        fs::remove_all(cache_path2);
+    }
+
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 30;
+    settings.disposable_queue_elements = 5;
+    settings.capacity = 90;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 30;
+    
ASSERT_TRUE(FileCacheFactory::instance()->create_file_cache(cache_base_path, 
settings).ok());
+    ASSERT_TRUE(FileCacheFactory::instance()->create_file_cache(cache_path2, 
settings).ok());
+    EXPECT_EQ(FileCacheFactory::instance()->get_cache_instance_size(), 2);
+    EXPECT_EQ(FileCacheFactory::instance()->get_capacity(), 180);
+
+    // valid path + valid capacity
+    auto s = FileCacheFactory::instance()->reset_capacity(cache_base_path, 80);
+    LOG(INFO) << s;
+    EXPECT_EQ(FileCacheFactory::instance()->get_capacity(), 170);
+
+    // empty path + valid capacity
+    s = FileCacheFactory::instance()->reset_capacity("", 70);
+    LOG(INFO) << s;
+    EXPECT_EQ(FileCacheFactory::instance()->get_capacity(), 140);
+
+    // invalid path + valid capacity
+    s = FileCacheFactory::instance()->reset_capacity("/not/exist/haha", 70);
+    LOG(INFO) << s;
+    EXPECT_EQ(FileCacheFactory::instance()->get_capacity(), 140);
+
+    // valid path + invalid capacity
+    s = FileCacheFactory::instance()->reset_capacity(cache_base_path, 
INT64_MAX);
+    LOG(INFO) << s;
+    EXPECT_LT(FileCacheFactory::instance()->get_capacity(), INT64_MAX);
+    EXPECT_GT(FileCacheFactory::instance()->get_capacity(), 70);
+
+    // valid path + zero capacity
+    s = FileCacheFactory::instance()->reset_capacity(cache_base_path, 0);
+    LOG(INFO) << s;
+    EXPECT_LT(FileCacheFactory::instance()->get_capacity(), INT64_MAX);
+    EXPECT_GT(FileCacheFactory::instance()->get_capacity(), 70);
+
+    // empty path + invalid capacity
+    s = FileCacheFactory::instance()->reset_capacity("", INT64_MAX);
+    LOG(INFO) << s;
+    EXPECT_LT(FileCacheFactory::instance()->get_capacity(), INT64_MAX);
+    EXPECT_GT(FileCacheFactory::instance()->get_capacity(), 70);
+
+    // empty path + zero capacity
+    s = FileCacheFactory::instance()->reset_capacity("", 0);
+    LOG(INFO) << s;
+    EXPECT_LT(FileCacheFactory::instance()->get_capacity(), INT64_MAX);
+    EXPECT_GT(FileCacheFactory::instance()->get_capacity(), 70);
+
+    FileCacheFactory::instance()->clear_file_caches(true);
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    if (fs::exists(cache_path2)) {
+        fs::remove_all(cache_path2);
+    }
+    FileCacheFactory::instance()->_caches.clear();
+    FileCacheFactory::instance()->_path_to_cache.clear();
+    FileCacheFactory::instance()->_capacity = 0;
+}
+
 } // namespace doris::io


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to