github-actions[bot] commented on code in PR #64182:
URL: https://github.com/apache/doris/pull/64182#discussion_r3369681290


##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -656,51 +657,74 @@ void 
BlockFileCache::add_need_update_lru_block(FileBlockSPtr block) {
 std::string BlockFileCache::clear_file_cache_async() {
     LOG(INFO) << "start clear_file_cache_async, path=" << _cache_base_path;
     _lru_dumper->remove_lru_dump_files();
-    int64_t num_cells_all = 0;
-    int64_t num_cells_to_delete = 0;
-    int64_t num_cells_wait_recycle = 0;
-    int64_t num_files_all = 0;
     TEST_SYNC_POINT_CALLBACK("BlockFileCache::clear_file_cache_async");
+    auto summary = clear_file_cache_common(false);
+
+    std::stringstream ss;
+    ss << "finish clear_file_cache_async, path=" << _cache_base_path
+       << " num_files_all=" << summary.num_files_all << " num_cells_all=" << 
summary.num_cells_all
+       << " num_cells_to_delete=" << summary.num_cells_to_delete
+       << " num_cells_wait_recycle=" << summary.num_cells_wait_recycle;
+    auto msg = ss.str();
+    LOG(INFO) << msg;
+    _lru_dumper->remove_lru_dump_files();
+    return msg;
+}
+
+std::string BlockFileCache::clear_file_cache_sync() {
+    LOG(INFO) << "start clear_file_cache_sync, path=" << _cache_base_path;
+    pause_ttl_manager();
+    Defer resume_ttl_manager_guard {[this] { resume_ttl_manager(); }};
+
+    _lru_dumper->remove_lru_dump_files();
+    TEST_SYNC_POINT_CALLBACK("BlockFileCache::clear_file_cache_sync");
+    auto summary = clear_file_cache_common(true);
+
+    std::stringstream ss;
+    ss << "finish clear_file_cache_sync, path=" << _cache_base_path
+       << " num_files_all=" << summary.num_files_all << " num_cells_all=" << 
summary.num_cells_all
+       << " num_cells_to_delete=" << summary.num_cells_to_delete
+       << " num_cells_wait_recycle=" << summary.num_cells_wait_recycle;
+    auto msg = ss.str();
+    LOG(INFO) << msg;
+    _lru_dumper->remove_lru_dump_files();
+    return msg;
+}
+
+BlockFileCache::ClearFileCacheSummary BlockFileCache::clear_file_cache_common(
+        bool sync_remove_releasable) {
+    ClearFileCacheSummary summary;
     {
         SCOPED_CACHE_LOCK(_mutex, this);
 
         std::vector<FileBlockCell*> deleting_cells;
         for (auto& [_, offset_to_cell] : _files) {
-            ++num_files_all;
+            ++summary.num_files_all;
             for (auto& [_1, cell] : offset_to_cell) {
-                ++num_cells_all;
+                ++summary.num_cells_all;
                 deleting_cells.push_back(&cell);
             }
         }
 
-        // we cannot delete the element in the loop above, because it will 
break the iterator
+        // Removing cells invalidates _files iterators, so the scan first 
records cell pointers.
         for (auto& cell : deleting_cells) {
             if (!cell->releasable()) {
                 LOG(INFO) << "cell is not releasable, hash="
                           << " offset=" << cell->file_block->offset();
                 cell->file_block->set_deleting();
-                ++num_cells_wait_recycle;

Review Comment:
   This leaves the held block in `_files` with only `is_deleting()` set. A 
later `get_or_set()` for the same hash/offset does not filter deleting cells in 
`get_impl()`, so after `clear_file_caches(true)` returns, a new reader can 
still acquire this block as a `DOWNLOADED` cache hit while the old holder is 
alive. If traffic keeps reacquiring it, the holder cleanup condition 
(`use_count() == 2`) can also be delayed indefinitely, so the sync clear does 
not actually make held entries unavailable to new readers.
   
   Concrete flow: query A holds a downloaded block, admin calls 
`clear_file_caches(true)`, this branch marks it deleting and returns, query B 
reads the same file before A releases; `get_impl()` returns the same cell 
because it only checks ranges, not `is_deleting()`. Please make deleting blocks 
invisible to new lookups or otherwise detach them from `_files` while 
preserving the old holder's safe cleanup path, and add a test for this 
reacquire-after-sync-clear case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to