This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d5b1eb1d0a9 [chore](cloud) Fix clear file cache implementation and 
regression case (#33281)
d5b1eb1d0a9 is described below

commit d5b1eb1d0a98516d2e58a5f963a5942a22041472
Author: Gavin Chou <[email protected]>
AuthorDate: Sat Apr 6 21:57:45 2024 +0800

    [chore](cloud) Fix clear file cache implementation and regression case 
(#33281)
---
 be/src/http/action/clear_file_cache_action.cpp     |  9 ++--
 be/src/io/cache/block_file_cache.cpp               | 60 ++++++++++++++++++----
 be/src/io/cache/block_file_cache.h                 | 10 ++--
 be/src/io/cache/block_file_cache_factory.cpp       | 13 ++---
 be/src/io/cache/block_file_cache_factory.h         | 10 +++-
 .../cloud_p0/cache/http/test_clear_cache.groovy    |  5 +-
 .../cache/http/test_clear_cache_async.groovy       |  5 +-
 7 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/be/src/http/action/clear_file_cache_action.cpp 
b/be/src/http/action/clear_file_cache_action.cpp
index b1059c359b1..6a4a2517508 100644
--- a/be/src/http/action/clear_file_cache_action.cpp
+++ b/be/src/http/action/clear_file_cache_action.cpp
@@ -32,12 +32,9 @@ const std::string SYNC = "sync";
 void ClearFileCacheAction::handle(HttpRequest* req) {
     req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json");
     std::string sync = req->param(SYNC);
-    if (to_lower(sync) == "true") {
-        io::FileCacheFactory::instance()->clear_file_caches(true);
-    } else {
-        io::FileCacheFactory::instance()->clear_file_caches(false);
-    }
-    HttpChannel::send_reply(req, HttpStatus::OK, Status::OK().to_json());
+    auto ret =
+            io::FileCacheFactory::instance()->clear_file_caches(sync == "TRUE" 
|| sync == "true");
+    HttpChannel::send_reply(req, HttpStatus::OK, ret);
 }
 
 } // namespace doris
diff --git a/be/src/io/cache/block_file_cache.cpp 
b/be/src/io/cache/block_file_cache.cpp
index 47bfcbaed52..b41cb9f6a5f 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -367,14 +367,21 @@ FileBlocks BlockFileCache::get_impl(const UInt128Wrapper& 
hash, const CacheConte
     return result;
 }
 
-void BlockFileCache::clear_file_cache_async() {
+std::string BlockFileCache::clear_file_cache_async() {
+    LOG(INFO) << "start clear_file_cache_async, path=" << _cache_base_path;
+    int64_t num_cells_all = 0;
+    int64_t num_cells_to_delete = 0;
+    int64_t num_files_all = 0;
     {
         std::lock_guard cache_lock(_mutex);
         if (!_async_clear_file_cache) {
             for (auto& [_, offset_to_cell] : _files) {
+                ++num_files_all;
                 for (auto& [_, cell] : offset_to_cell) {
+                    ++num_cells_all;
                     if (cell.releasable()) {
                         cell.is_deleted = true;
+                        ++num_cells_to_delete;
                     }
                 }
             }
@@ -382,6 +389,13 @@ void BlockFileCache::clear_file_cache_async() {
         }
     }
     TEST_SYNC_POINT_CALLBACK("BlockFileCache::recycle_deleted_blocks");
+    std::stringstream ss;
+    ss << "finish clear_file_cache_async, path=" << _cache_base_path
+       << " num_files_all=" << num_files_all << " num_cells_all=" << 
num_cells_all
+       << " num_cells_to_delete=" << num_cells_to_delete;
+    auto msg = ss.str();
+    LOG(INFO) << msg;
+    return msg;
 }
 
 void BlockFileCache::recycle_deleted_blocks() {
@@ -1502,13 +1516,32 @@ bool BlockFileCache::try_reserve_for_lazy_load(size_t 
size,
     return !_disk_resource_limit_mode || removed_size >= size;
 }
 
-Status BlockFileCache::clear_file_cache_directly() {
+std::string BlockFileCache::clear_file_cache_directly() {
     using namespace std::chrono;
-    auto start_time = steady_clock::time_point();
+    std::stringstream ss;
+    auto start = steady_clock::now();
     std::lock_guard cache_lock(_mutex);
-    LOG_INFO("Start clear file cache directly").tag("path", _cache_base_path);
-    
RETURN_IF_ERROR(global_local_filesystem()->delete_directory(_cache_base_path));
-    
RETURN_IF_ERROR(global_local_filesystem()->create_directory(_cache_base_path));
+    LOG_INFO("start clear_file_cache_directly").tag("path", _cache_base_path);
+
+    auto st = global_local_filesystem()->delete_directory(_cache_base_path);
+    if (!st.ok()) {
+        ss << " failed to clear_file_cache_directly, path=" << _cache_base_path
+           << " delete dir failed: " << st;
+        LOG(WARNING) << ss.str();
+        return ss.str();
+    }
+    st = global_local_filesystem()->create_directory(_cache_base_path);
+    if (!st.ok()) {
+        ss << " failed to clear_file_cache_directly, path=" << _cache_base_path
+           << " create dir failed: " << st;
+        LOG(WARNING) << ss.str();
+        return ss.str();
+    }
+    int64_t num_files = _files.size();
+    int64_t cache_size = _cur_cache_size;
+    int64_t index_queue_size = _index_queue.get_elements_num(cache_lock);
+    int64_t normal_queue_size = _normal_queue.get_elements_num(cache_lock);
+    int64_t disposible_queue_size = 
_disposable_queue.get_elements_num(cache_lock);
     _files.clear();
     _cur_cache_size = 0;
     _time_to_key.clear();
@@ -1516,12 +1549,17 @@ Status BlockFileCache::clear_file_cache_directly() {
     _index_queue.clear(cache_lock);
     _normal_queue.clear(cache_lock);
     _disposable_queue.clear(cache_lock);
-    auto use_time = duration_cast<milliseconds>(steady_clock::time_point() - 
start_time);
-    LOG_INFO("End clear file cache directly")
-            .tag("path", _async_clear_file_cache)
-            .tag("use_time", static_cast<int64_t>(use_time.count()));
-    return Status::OK();
+    ss << "finish clear_file_cache_directly"
+       << " path=" << _cache_base_path
+       << " time_elapsed=" << duration_cast<milliseconds>(steady_clock::now() 
- start).count()
+       << " num_files=" << num_files << " cache_size=" << cache_size
+       << " index_queue_size=" << index_queue_size << " normal_queue_size=" << 
normal_queue_size
+       << " disposible_queue_size=" << disposible_queue_size;
+    auto msg = ss.str();
+    LOG(INFO) << msg;
+    return msg;
 }
+
 template void BlockFileCache::remove(FileBlockSPtr file_block,
                                      std::lock_guard<std::mutex>& cache_lock,
                                      std::lock_guard<std::mutex>& block_lock);
diff --git a/be/src/io/cache/block_file_cache.h 
b/be/src/io/cache/block_file_cache.h
index 4de78f228e6..282148aa566 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -87,9 +87,13 @@ public:
     FileBlocksHolder get_or_set(const UInt128Wrapper& hash, size_t offset, 
size_t size,
                                 const CacheContext& context);
 
-    void clear_file_cache_async();
-    // use for test
-    Status clear_file_cache_directly();
+    /**
+     * Clear all cached data for this cache instance async
+     *
+     * @returns summary message
+     */
+    std::string clear_file_cache_async();
+    std::string clear_file_cache_directly();
 
     /// For debug.
     std::string dump_structure(const UInt128Wrapper& hash);
diff --git a/be/src/io/cache/block_file_cache_factory.cpp 
b/be/src/io/cache/block_file_cache_factory.cpp
index 76068eb5898..54c99b86680 100644
--- a/be/src/io/cache/block_file_cache_factory.cpp
+++ b/be/src/io/cache/block_file_cache_factory.cpp
@@ -118,17 +118,12 @@ FileCacheFactory::get_query_context_holders(const 
TUniqueId& query_id) {
     return holders;
 }
 
-void FileCacheFactory::clear_file_caches(bool sync) {
+std::string FileCacheFactory::clear_file_caches(bool sync) {
+    std::stringstream ss;
     for (const auto& cache : _caches) {
-        if (sync) {
-            Status st = cache->clear_file_cache_directly();
-            if (st.ok()) {
-                LOG_WARNING("").error(st);
-            }
-        } else {
-            cache->clear_file_cache_async();
-        }
+        ss << (sync ? cache->clear_file_cache_directly() : 
cache->clear_file_cache_async()) << "\n";
     }
+    return ss.str();
 }
 
 } // namespace io
diff --git a/be/src/io/cache/block_file_cache_factory.h 
b/be/src/io/cache/block_file_cache_factory.h
index 55f1a8b4041..a6ff2fa08fa 100644
--- a/be/src/io/cache/block_file_cache_factory.h
+++ b/be/src/io/cache/block_file_cache_factory.h
@@ -59,7 +59,15 @@ public:
     BlockFileCache* get_by_path(const std::string& cache_base_path);
     std::vector<BlockFileCache::QueryFileCacheContextHolderPtr> 
get_query_context_holders(
             const TUniqueId& query_id);
-    void clear_file_caches(bool sync);
+
+    /**
+     * Clears data of all file cache instances
+     *
+     * @param sync wait until all data cleared
+     * @return summary message
+     */
+    std::string clear_file_caches(bool sync);
+
     FileCacheFactory() = default;
     FileCacheFactory& operator=(const FileCacheFactory&) = delete;
     FileCacheFactory(const FileCacheFactory&) = delete;
diff --git a/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy 
b/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
index d7a8f641193..963705577c4 100644
--- a/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
+++ b/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
@@ -36,13 +36,14 @@ suite("test_clear_cache") {
 
     backendId = backendIdToBackendIP.keySet()[0]
     def url = backendIdToBackendIP.get(backendId) + ":" + 
backendIdToBackendHttpPort.get(backendId) + """/api/clear_file_cache"""
-    logger.info(url)
+    url = url + "?sync=true"
+    logger.info("clear file cache URL:" + url)
     def clearFileCache = { check_func ->
         httpTest {
             endpoint ""
             uri url
             op "post"
-            body "{\"sync\"=\"true\"}"
+            body ""
             check check_func
         }
     }
diff --git 
a/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy 
b/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
index 43e9482e0b9..4ac6427de82 100644
--- a/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
+++ b/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
@@ -37,13 +37,14 @@ suite("test_clear_cache_async") {
 
     backendId = backendIdToBackendIP.keySet()[0]
     def url = backendIdToBackendIP.get(backendId) + ":" + 
backendIdToBackendHttpPort.get(backendId) + """/api/clear_file_cache"""
-    logger.info(url)
+    url = url + "?sync=false"
+    logger.info("clear file cache URL:" + url)
     def clearFileCache = { check_func ->
         httpTest {
             endpoint ""
             uri url
             op "post"
-            body "{\"sync\"=\"false\"}"
+            body ""
             check check_func
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to