This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 284ecc531142475233f993c6fcde5b5b2a1be794
Author: zhengyu <[email protected]>
AuthorDate: Tue Mar 17 09:17:43 2026 +0800

    [fix](filecache) add check and exception handle for empty block file 
(#59646) (#61203)
    
    current code didn't check and handle cases when block file is fail to
    download (empty file). add check and handler for that.
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/io/cache/file_block.cpp             |  17 +++-
 be/src/io/cache/file_block.h               |   1 +
 be/test/io/cache/block_file_cache_test.cpp | 130 +++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/be/src/io/cache/file_block.cpp b/be/src/io/cache/file_block.cpp
index 731c1d311e6..83e80d872c2 100644
--- a/be/src/io/cache/file_block.cpp
+++ b/be/src/io/cache/file_block.cpp
@@ -113,7 +113,12 @@ void 
FileBlock::reset_downloader_impl(std::lock_guard<std::mutex>& block_lock) {
 
 Status FileBlock::set_downloaded(std::lock_guard<std::mutex>& /* block_lock 
*/) {
     DCHECK(_download_state != State::DOWNLOADED);
-    DCHECK_NE(_downloaded_size, 0);
+    if (_downloaded_size == 0) {
+        _download_state = State::EMPTY;
+        _downloader_id = 0;
+        return Status::InternalError("Try to set empty block {} as downloaded",
+                                     _block_range.to_string());
+    }
     Status status = _mgr->_storage->finalize(_key);
     if (status.ok()) [[likely]] {
         _download_state = State::DOWNLOADED;
@@ -147,7 +152,15 @@ Status FileBlock::append(Slice data) {
 }
 
 Status FileBlock::finalize() {
-    if (_downloaded_size != 0 && _downloaded_size != _block_range.size()) {
+    if (_downloaded_size == 0) {
+        std::lock_guard block_lock(_mutex);
+        _download_state = State::EMPTY;
+        _downloader_id = 0;
+        _cv.notify_all();
+        return Status::InternalError("Try to finalize an empty file block {}",
+                                     _block_range.to_string());
+    }
+    if (_downloaded_size != _block_range.size()) {
         SCOPED_CACHE_LOCK(_mgr->_mutex, _mgr);
         size_t old_size = _block_range.size();
         _block_range.right = _block_range.left + _downloaded_size - 1;
diff --git a/be/src/io/cache/file_block.h b/be/src/io/cache/file_block.h
index 9cc4f06c6f0..eab74e827a8 100644
--- a/be/src/io/cache/file_block.h
+++ b/be/src/io/cache/file_block.h
@@ -44,6 +44,7 @@ class FileBlock {
     friend class BlockFileCache;
     friend class CachedRemoteFileReader;
     friend struct FileBlockCell;
+    friend class FileBlockTestAccessor;
 
 public:
     enum class State {
diff --git a/be/test/io/cache/block_file_cache_test.cpp 
b/be/test/io/cache/block_file_cache_test.cpp
index 333619a0bf8..287993e4283 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -21,6 +21,22 @@
 #include "block_file_cache_test_common.h"
 namespace doris::io {
 
+class FileBlockTestAccessor {
+public:
+    static void set_state(FileBlock& block, FileBlock::State state) {
+        block._download_state = state;
+    }
+    static void set_downloader_id(FileBlock& block, uint64_t id) { 
block._downloader_id = id; }
+    static void set_downloaded_size(FileBlock& block, size_t size) {
+        block._downloaded_size = size;
+    }
+
+    static Status call_set_downloaded(FileBlock& block) {
+        std::lock_guard<std::mutex> lock(block._mutex);
+        return block.set_downloaded(lock);
+    }
+};
+
 fs::path caches_dir = fs::current_path() / "lru_cache_test";
 std::string cache_base_path = caches_dir / "cache1" / "";
 std::string tmp_file = caches_dir / "tmp_file";
@@ -8318,4 +8334,118 @@ TEST_F(BlockFileCacheTest, 
cached_remote_file_reader_direct_read_bytes_check) {
     FileCacheFactory::instance()->_capacity = 0;
 }
 
+TEST_F(BlockFileCacheTest, finalize_empty_block) {
+    std::string my_cache_path = caches_dir / "empty_block_test" / "";
+    if (fs::exists(my_cache_path)) {
+        fs::remove_all(my_cache_path);
+    }
+    io::FileCacheSettings settings;
+    settings.capacity = 100;
+    settings.max_file_block_size = 100;
+    io::BlockFileCache mgr(my_cache_path, settings);
+    ASSERT_TRUE(mgr.initialize().ok());
+
+    for (int i = 0; i < 100; i++) {
+        if (mgr.get_async_open_success()) {
+            break;
+        };
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+
+    auto key = io::BlockFileCache::hash("empty_block_test");
+    io::CacheContext context;
+    ReadStatistics rstats;
+    context.stats = &rstats;
+    context.cache_type = io::FileCacheType::NORMAL;
+
+    {
+        auto holder = mgr.get_or_set(key, 0, 10, context);
+        auto blocks = fromHolder(holder);
+        ASSERT_EQ(blocks.size(), 1);
+        auto block = blocks[0];
+        ASSERT_EQ(block->state(), io::FileBlock::State::EMPTY);
+
+        ASSERT_EQ(block->get_or_set_downloader(), 
io::FileBlock::get_caller_id());
+        ASSERT_EQ(block->state(), io::FileBlock::State::DOWNLOADING);
+
+        // Call finalize without calling append()
+        Status st = block->finalize();
+        ASSERT_FALSE(st.ok());
+        ASSERT_EQ(block->state(), io::FileBlock::State::EMPTY);
+        ASSERT_EQ(block->get_downloader(), 0);
+    }
+    if (fs::exists(my_cache_path)) {
+        fs::remove_all(my_cache_path);
+    }
+}
+
+TEST_F(BlockFileCacheTest, finalize_partial_block) {
+    std::string my_cache_path = caches_dir / "partial_block_test" / "";
+    if (fs::exists(my_cache_path)) {
+        fs::remove_all(my_cache_path);
+    }
+    io::FileCacheSettings settings;
+    settings.capacity = 100;
+    settings.max_file_block_size = 100;
+    io::BlockFileCache mgr(my_cache_path, settings);
+    ASSERT_TRUE(mgr.initialize().ok());
+
+    for (int i = 0; i < 100; i++) {
+        if (mgr.get_async_open_success()) {
+            break;
+        };
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+
+    auto key = io::BlockFileCache::hash("partial_block_test");
+    io::CacheContext context;
+    ReadStatistics rstats;
+    context.stats = &rstats;
+    context.cache_type = io::FileCacheType::NORMAL;
+
+    {
+        auto holder = mgr.get_or_set(key, 0, 10, context);
+        auto blocks = fromHolder(holder);
+        ASSERT_EQ(blocks.size(), 1);
+        auto block = blocks[0];
+        ASSERT_EQ(block->get_or_set_downloader(), 
io::FileBlock::get_caller_id());
+
+        std::string data(5, '0');
+        ASSERT_TRUE(block->append(Slice(data.data(), data.size())).ok());
+
+        // Finalize a block that only has 5 bytes out of 10
+        Status st = block->finalize();
+        ASSERT_TRUE(st.ok());
+        ASSERT_EQ(block->state(), io::FileBlock::State::DOWNLOADED);
+        ASSERT_EQ(block->range().size(), 5);
+        ASSERT_EQ(block->range().right, 4);
+    }
+
+    // Verify it was shrunk in the cache
+    ASSERT_EQ(mgr.get_used_cache_size(io::FileCacheType::NORMAL), 5);
+
+    if (fs::exists(my_cache_path)) {
+        fs::remove_all(my_cache_path);
+    }
+}
+
+TEST_F(BlockFileCacheTest, set_downloaded_empty_block_branch) {
+    FileCacheKey key;
+    key.hash = io::BlockFileCache::hash("set_downloaded_empty_block_branch");
+    key.offset = 0;
+    key.meta.type = io::FileCacheType::NORMAL;
+    key.meta.expiration_time = 0;
+
+    // mgr is intentionally nullptr: this branch returns before touching 
storage.
+    io::FileBlock block(key, 10, nullptr, io::FileBlock::State::EMPTY);
+    FileBlockTestAccessor::set_state(block, io::FileBlock::State::DOWNLOADING);
+    FileBlockTestAccessor::set_downloader_id(block, 123);
+    FileBlockTestAccessor::set_downloaded_size(block, 0);
+
+    Status st = FileBlockTestAccessor::call_set_downloaded(block);
+    ASSERT_FALSE(st.ok());
+    ASSERT_EQ(block.state(), io::FileBlock::State::EMPTY);
+    ASSERT_EQ(block.get_downloader(), 0);
+}
+
 } // namespace doris::io


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to