This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 34846980fcc [fix](filecache) clean empty v3 cache dirs (#63344)
34846980fcc is described below

commit 34846980fcc75585c1998f154fc21908d646e7b1
Author: Xin Liao <[email protected]>
AuthorDate: Mon May 25 18:00:21 2026 +0800

    [fix](filecache) clean empty v3 cache dirs (#63344)
    
    ### What problem does this PR solve?
    
    Issue Number: #63117
    
    Fixes file cache empty directory leakage for v3 cache layout.
    
    In v3 layout, cache files live under `<hash_prefix>/<hash>_0/`.
    `FSFileCacheStorage::remove()` deletes the v3 cache file first, then
    switches the local directory variable to the legacy v2 path for
    compatibility cleanup. The final empty-directory cleanup therefore
    checks the v2 directory and can leave the actual v3 `<hash>_0` directory
    behind.
    
    This patch keeps v2 and v3 directory paths separately, preserves the
    existing v2 compatibility cleanup, and additionally removes the v3 key
    directory when it is empty. The v3 directory cleanup uses non-recursive
    `std::filesystem::remove()` to avoid deleting concurrently created
    files.
    
    ### Check List
    
    - [x] Added unit test
    - [ ] Regression test
---
 be/src/io/cache/fs_file_cache_storage.cpp          | 32 ++++++++----
 be/src/io/fs/local_file_system.cpp                 | 20 ++++++++
 be/src/io/fs/local_file_system.h                   |  3 ++
 .../fs_file_cache_storage_leak_cleaner_test.cpp    | 57 ++++++++++++++++++++++
 be/test/io/fs/local_file_system_test.cpp           | 24 +++++++++
 5 files changed, 127 insertions(+), 9 deletions(-)

diff --git a/be/src/io/cache/fs_file_cache_storage.cpp 
b/be/src/io/cache/fs_file_cache_storage.cpp
index 43b9c4ae4cb..abccb16d542 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -281,25 +281,39 @@ Status FSFileCacheStorage::read(const FileCacheKey& key, 
size_t value_offset, Sl
 }
 
 Status FSFileCacheStorage::remove(const FileCacheKey& key) {
-    std::string dir = get_path_in_local_cache_v3(key.hash);
-    std::string file = get_path_in_local_cache_v3(dir, key.offset);
+    const std::string v3_dir = get_path_in_local_cache_v3(key.hash);
+    const std::string v3_file = get_path_in_local_cache_v3(v3_dir, key.offset);
     FDCache::instance()->remove_file_reader(std::make_pair(key.hash, 
key.offset));
-    RETURN_IF_ERROR(fs->delete_file(file));
+    RETURN_IF_ERROR(fs->delete_file(v3_file));
     // return OK not means the file is deleted, it may be not exist
 
+    std::string v2_dir;
     { // try to detect the file with old v2 format
-        dir = get_path_in_local_cache_v2(key.hash, key.meta.expiration_time);
-        file = get_path_in_local_cache_v2(dir, key.offset, key.meta.type);
-        RETURN_IF_ERROR(fs->delete_file(file));
+        v2_dir = get_path_in_local_cache_v2(key.hash, 
key.meta.expiration_time);
+        const std::string v2_file = get_path_in_local_cache_v2(v2_dir, 
key.offset, key.meta.type);
+        RETURN_IF_ERROR(fs->delete_file(v2_file));
     }
 
     BlockMetaKey mkey(key.meta.tablet_id, UInt128Wrapper(key.hash), 
key.offset);
     _meta_store->delete_key(mkey);
     std::vector<FileInfo> files;
     bool exists {false};
-    RETURN_IF_ERROR(fs->list(dir, true, &files, &exists));
-    if (files.empty()) {
-        RETURN_IF_ERROR(fs->delete_directory(dir));
+    RETURN_IF_ERROR(fs->list(v2_dir, true, &files, &exists));
+    if (exists && files.empty()) {
+        auto st = fs->delete_empty_directory(v2_dir);
+        if (!st.ok()) {
+            LOG_WARNING("failed to remove cache directory {}", 
v2_dir).error(st);
+        }
+    }
+
+    files.clear();
+    exists = false;
+    RETURN_IF_ERROR(fs->list(v3_dir, true, &files, &exists));
+    if (exists && files.empty()) {
+        auto st = fs->delete_empty_directory(v3_dir);
+        if (!st.ok()) {
+            LOG_WARNING("failed to remove cache directory {}", 
v3_dir).error(st);
+        }
     }
     return Status::OK();
 }
diff --git a/be/src/io/fs/local_file_system.cpp 
b/be/src/io/fs/local_file_system.cpp
index 7edabd836c1..4f9b6baf2a7 100644
--- a/be/src/io/fs/local_file_system.cpp
+++ b/be/src/io/fs/local_file_system.cpp
@@ -167,6 +167,26 @@ Status LocalFileSystem::delete_directory_or_file(const 
Path& path) {
     FILESYSTEM_M(delete_directory_or_file_impl(path));
 }
 
+Status LocalFileSystem::delete_empty_directory(const Path& dir) {
+    FILESYSTEM_M(delete_empty_directory_impl(dir));
+}
+
+Status LocalFileSystem::delete_empty_directory_impl(const Path& dir) {
+    Path path;
+    RETURN_IF_ERROR(absolute_path(dir, path));
+    VLOG_DEBUG << "delete empty directory: " << path.native();
+    int ret = 0;
+    RETRY_ON_EINTR(ret, rmdir(path.c_str()));
+    if (ret != 0) {
+        std::error_code ec(errno, std::generic_category());
+        if (ec == std::errc::no_such_file_or_directory) {
+            return Status::OK();
+        }
+        return localfs_error(ec, fmt::format("failed to delete empty directory 
{}", path.native()));
+    }
+    return Status::OK();
+}
+
 Status LocalFileSystem::delete_directory_or_file_impl(const Path& path) {
     bool is_dir;
     RETURN_IF_ERROR(is_directory(path, &is_dir));
diff --git a/be/src/io/fs/local_file_system.h b/be/src/io/fs/local_file_system.h
index c1ffbd22949..b1546bc4734 100644
--- a/be/src/io/fs/local_file_system.h
+++ b/be/src/io/fs/local_file_system.h
@@ -63,6 +63,8 @@ public:
     static bool equal_or_sub_path(const Path& parent, const Path& child);
     // delete dir or file
     Status delete_directory_or_file(const Path& path);
+    // delete directory only if it is empty
+    Status delete_empty_directory(const Path& dir);
     // change the file permission of the given path
     Status permission(const Path& file, std::filesystem::perms prms);
 
@@ -87,6 +89,7 @@ protected:
     Status delete_file_impl(const Path& file) override;
     Status delete_directory_impl(const Path& dir) override;
     Status delete_directory_or_file_impl(const Path& path);
+    Status delete_empty_directory_impl(const Path& dir);
     Status batch_delete_impl(const std::vector<Path>& files) override;
     Status exists_impl(const Path& path, bool* res) const override;
     Status file_size_impl(const Path& file, int64_t* file_size) const override;
diff --git a/be/test/io/cache/fs_file_cache_storage_leak_cleaner_test.cpp 
b/be/test/io/cache/fs_file_cache_storage_leak_cleaner_test.cpp
index 49ef8587ced..e4e066ad458 100644
--- a/be/test/io/cache/fs_file_cache_storage_leak_cleaner_test.cpp
+++ b/be/test/io/cache/fs_file_cache_storage_leak_cleaner_test.cpp
@@ -288,6 +288,63 @@ TEST_F(FSFileCacheLeakCleanerTest, 
remove_orphan_and_tmp_files) {
     fs::remove_all(dir, ec);
 }
 
+TEST_F(FSFileCacheLeakCleanerTest, 
remove_cleans_empty_v2_and_v3_key_directories) {
+    auto dir = prepare_test_dir();
+    FileCacheSettings settings = default_settings();
+    BlockFileCache mgr(dir.string(), settings);
+    FSFileCacheStorage storage;
+    setup_storage(storage, mgr, dir);
+
+    FileCacheKey key;
+    key.hash = BlockFileCache::hash("remove_empty_v3_dir");
+    key.offset = 0;
+    key.meta.expiration_time = 123456789;
+    key.meta.type = FileCacheType::NORMAL;
+    key.meta.tablet_id = 0;
+
+    auto key_dir = storage.get_path_in_local_cache_v3(key.hash);
+    auto block_path = FSFileCacheStorage::get_path_in_local_cache_v3(key_dir, 
key.offset);
+    auto old_key_dir = storage.get_path_in_local_cache_v2(key.hash, 
key.meta.expiration_time);
+    auto old_block_path =
+            FSFileCacheStorage::get_path_in_local_cache_v2(old_key_dir, 
key.offset, key.meta.type);
+    create_regular_file(block_path, 'r');
+    create_regular_file(old_block_path, 'o');
+
+    ASSERT_TRUE(storage.remove(key).ok());
+
+    EXPECT_FALSE(fs::exists(block_path));
+    EXPECT_FALSE(fs::exists(key_dir));
+    EXPECT_FALSE(fs::exists(old_block_path));
+    EXPECT_FALSE(fs::exists(old_key_dir));
+}
+
+TEST_F(FSFileCacheLeakCleanerTest, remove_v3_block_when_v2_directory_missing) {
+    auto dir = prepare_test_dir();
+    FileCacheSettings settings = default_settings();
+    BlockFileCache mgr(dir.string(), settings);
+    FSFileCacheStorage storage;
+    setup_storage(storage, mgr, dir);
+
+    FileCacheKey key;
+    key.hash = BlockFileCache::hash("remove_v3_without_v2_dir");
+    key.offset = 0;
+    key.meta.expiration_time = 123456789;
+    key.meta.type = FileCacheType::NORMAL;
+    key.meta.tablet_id = 0;
+
+    auto key_dir = storage.get_path_in_local_cache_v3(key.hash);
+    auto block_path = FSFileCacheStorage::get_path_in_local_cache_v3(key_dir, 
key.offset);
+    auto old_key_dir = storage.get_path_in_local_cache_v2(key.hash, 
key.meta.expiration_time);
+    create_regular_file(block_path, 'r');
+    ASSERT_FALSE(fs::exists(old_key_dir));
+
+    ASSERT_TRUE(storage.remove(key).ok());
+
+    EXPECT_FALSE(fs::exists(block_path));
+    EXPECT_FALSE(fs::exists(key_dir));
+    EXPECT_FALSE(fs::exists(old_key_dir));
+}
+
 TEST_F(FSFileCacheLeakCleanerTest, 
snapshot_metadata_for_hash_offsets_handles_missing_hash) {
     auto dir = prepare_test_dir();
     FileCacheSettings settings = default_settings();
diff --git a/be/test/io/fs/local_file_system_test.cpp 
b/be/test/io/fs/local_file_system_test.cpp
index 450c3daea8f..5947dffef9c 100644
--- a/be/test/io/fs/local_file_system_test.cpp
+++ b/be/test/io/fs/local_file_system_test.cpp
@@ -212,6 +212,30 @@ TEST_F(LocalFileSystemTest, Delete) {
     ASSERT_TRUE(check_exist(fmt::format("{}/dir/dir1", test_dir))); // Parent 
should exist
 }
 
+TEST_F(LocalFileSystemTest, DeleteEmptyDirectory) {
+    auto empty_dir = fmt::format("{}/empty_dir", test_dir);
+    auto st = io::global_local_filesystem()->create_directory(empty_dir);
+    ASSERT_TRUE(st.ok()) << st;
+
+    st = io::global_local_filesystem()->delete_empty_directory(empty_dir);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_FALSE(check_exist(empty_dir));
+    st = io::global_local_filesystem()->delete_empty_directory(empty_dir);
+    ASSERT_TRUE(st.ok()) << st;
+
+    auto non_empty_dir = fmt::format("{}/non_empty_dir", test_dir);
+    auto child_file = fmt::format("{}/child", non_empty_dir);
+    st = io::global_local_filesystem()->create_directory(non_empty_dir);
+    ASSERT_TRUE(st.ok()) << st;
+    st = save_string_file(child_file, "abc");
+    ASSERT_TRUE(st.ok()) << st;
+
+    st = io::global_local_filesystem()->delete_empty_directory(non_empty_dir);
+    ASSERT_FALSE(st.ok()) << st;
+    ASSERT_TRUE(check_exist(non_empty_dir));
+    ASSERT_TRUE(check_exist(child_file));
+}
+
 TEST_F(LocalFileSystemTest, AbnormalFileWriter) {
     auto fname = fmt::format("{}/abc", test_dir);
     {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to