This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d5b1eb1d0a9 [chore](cloud) Fix clear file cache implementation and
regression case (#33281)
d5b1eb1d0a9 is described below
commit d5b1eb1d0a98516d2e58a5f963a5942a22041472
Author: Gavin Chou <[email protected]>
AuthorDate: Sat Apr 6 21:57:45 2024 +0800
[chore](cloud) Fix clear file cache implementation and regression case
(#33281)
---
be/src/http/action/clear_file_cache_action.cpp | 9 ++--
be/src/io/cache/block_file_cache.cpp | 60 ++++++++++++++++++----
be/src/io/cache/block_file_cache.h | 10 ++--
be/src/io/cache/block_file_cache_factory.cpp | 13 ++---
be/src/io/cache/block_file_cache_factory.h | 10 +++-
.../cloud_p0/cache/http/test_clear_cache.groovy | 5 +-
.../cache/http/test_clear_cache_async.groovy | 5 +-
7 files changed, 78 insertions(+), 34 deletions(-)
diff --git a/be/src/http/action/clear_file_cache_action.cpp
b/be/src/http/action/clear_file_cache_action.cpp
index b1059c359b1..6a4a2517508 100644
--- a/be/src/http/action/clear_file_cache_action.cpp
+++ b/be/src/http/action/clear_file_cache_action.cpp
@@ -32,12 +32,9 @@ const std::string SYNC = "sync";
void ClearFileCacheAction::handle(HttpRequest* req) {
req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json");
std::string sync = req->param(SYNC);
- if (to_lower(sync) == "true") {
- io::FileCacheFactory::instance()->clear_file_caches(true);
- } else {
- io::FileCacheFactory::instance()->clear_file_caches(false);
- }
- HttpChannel::send_reply(req, HttpStatus::OK, Status::OK().to_json());
+ auto ret =
+ io::FileCacheFactory::instance()->clear_file_caches(sync == "TRUE"
|| sync == "true");
+ HttpChannel::send_reply(req, HttpStatus::OK, ret);
}
} // namespace doris
diff --git a/be/src/io/cache/block_file_cache.cpp
b/be/src/io/cache/block_file_cache.cpp
index 47bfcbaed52..b41cb9f6a5f 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -367,14 +367,21 @@ FileBlocks BlockFileCache::get_impl(const UInt128Wrapper&
hash, const CacheConte
return result;
}
-void BlockFileCache::clear_file_cache_async() {
+std::string BlockFileCache::clear_file_cache_async() {
+ LOG(INFO) << "start clear_file_cache_async, path=" << _cache_base_path;
+ int64_t num_cells_all = 0;
+ int64_t num_cells_to_delete = 0;
+ int64_t num_files_all = 0;
{
std::lock_guard cache_lock(_mutex);
if (!_async_clear_file_cache) {
for (auto& [_, offset_to_cell] : _files) {
+ ++num_files_all;
for (auto& [_, cell] : offset_to_cell) {
+ ++num_cells_all;
if (cell.releasable()) {
cell.is_deleted = true;
+ ++num_cells_to_delete;
}
}
}
@@ -382,6 +389,13 @@ void BlockFileCache::clear_file_cache_async() {
}
}
TEST_SYNC_POINT_CALLBACK("BlockFileCache::recycle_deleted_blocks");
+ std::stringstream ss;
+ ss << "finish clear_file_cache_async, path=" << _cache_base_path
+ << " num_files_all=" << num_files_all << " num_cells_all=" <<
num_cells_all
+ << " num_cells_to_delete=" << num_cells_to_delete;
+ auto msg = ss.str();
+ LOG(INFO) << msg;
+ return msg;
}
void BlockFileCache::recycle_deleted_blocks() {
@@ -1502,13 +1516,32 @@ bool BlockFileCache::try_reserve_for_lazy_load(size_t
size,
return !_disk_resource_limit_mode || removed_size >= size;
}
-Status BlockFileCache::clear_file_cache_directly() {
+std::string BlockFileCache::clear_file_cache_directly() {
using namespace std::chrono;
- auto start_time = steady_clock::time_point();
+ std::stringstream ss;
+ auto start = steady_clock::now();
std::lock_guard cache_lock(_mutex);
- LOG_INFO("Start clear file cache directly").tag("path", _cache_base_path);
-
RETURN_IF_ERROR(global_local_filesystem()->delete_directory(_cache_base_path));
-
RETURN_IF_ERROR(global_local_filesystem()->create_directory(_cache_base_path));
+ LOG_INFO("start clear_file_cache_directly").tag("path", _cache_base_path);
+
+ auto st = global_local_filesystem()->delete_directory(_cache_base_path);
+ if (!st.ok()) {
+ ss << " failed to clear_file_cache_directly, path=" << _cache_base_path
+ << " delete dir failed: " << st;
+ LOG(WARNING) << ss.str();
+ return ss.str();
+ }
+ st = global_local_filesystem()->create_directory(_cache_base_path);
+ if (!st.ok()) {
+ ss << " failed to clear_file_cache_directly, path=" << _cache_base_path
+ << " create dir failed: " << st;
+ LOG(WARNING) << ss.str();
+ return ss.str();
+ }
+ int64_t num_files = _files.size();
+ int64_t cache_size = _cur_cache_size;
+ int64_t index_queue_size = _index_queue.get_elements_num(cache_lock);
+ int64_t normal_queue_size = _normal_queue.get_elements_num(cache_lock);
+ int64_t disposible_queue_size =
_disposable_queue.get_elements_num(cache_lock);
_files.clear();
_cur_cache_size = 0;
_time_to_key.clear();
@@ -1516,12 +1549,17 @@ Status BlockFileCache::clear_file_cache_directly() {
_index_queue.clear(cache_lock);
_normal_queue.clear(cache_lock);
_disposable_queue.clear(cache_lock);
- auto use_time = duration_cast<milliseconds>(steady_clock::time_point() -
start_time);
- LOG_INFO("End clear file cache directly")
- .tag("path", _async_clear_file_cache)
- .tag("use_time", static_cast<int64_t>(use_time.count()));
- return Status::OK();
+ ss << "finish clear_file_cache_directly"
+ << " path=" << _cache_base_path
+ << " time_elapsed=" << duration_cast<milliseconds>(steady_clock::now()
- start).count()
+ << " num_files=" << num_files << " cache_size=" << cache_size
+ << " index_queue_size=" << index_queue_size << " normal_queue_size=" <<
normal_queue_size
+ << " disposible_queue_size=" << disposible_queue_size;
+ auto msg = ss.str();
+ LOG(INFO) << msg;
+ return msg;
}
+
template void BlockFileCache::remove(FileBlockSPtr file_block,
std::lock_guard<std::mutex>& cache_lock,
std::lock_guard<std::mutex>& block_lock);
diff --git a/be/src/io/cache/block_file_cache.h
b/be/src/io/cache/block_file_cache.h
index 4de78f228e6..282148aa566 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -87,9 +87,13 @@ public:
FileBlocksHolder get_or_set(const UInt128Wrapper& hash, size_t offset,
size_t size,
const CacheContext& context);
- void clear_file_cache_async();
- // use for test
- Status clear_file_cache_directly();
+ /**
+ * Clear all cached data for this cache instance async
+ *
+ * @returns summary message
+ */
+ std::string clear_file_cache_async();
+ std::string clear_file_cache_directly();
/// For debug.
std::string dump_structure(const UInt128Wrapper& hash);
diff --git a/be/src/io/cache/block_file_cache_factory.cpp
b/be/src/io/cache/block_file_cache_factory.cpp
index 76068eb5898..54c99b86680 100644
--- a/be/src/io/cache/block_file_cache_factory.cpp
+++ b/be/src/io/cache/block_file_cache_factory.cpp
@@ -118,17 +118,12 @@ FileCacheFactory::get_query_context_holders(const
TUniqueId& query_id) {
return holders;
}
-void FileCacheFactory::clear_file_caches(bool sync) {
+std::string FileCacheFactory::clear_file_caches(bool sync) {
+ std::stringstream ss;
for (const auto& cache : _caches) {
- if (sync) {
- Status st = cache->clear_file_cache_directly();
- if (st.ok()) {
- LOG_WARNING("").error(st);
- }
- } else {
- cache->clear_file_cache_async();
- }
+ ss << (sync ? cache->clear_file_cache_directly() :
cache->clear_file_cache_async()) << "\n";
}
+ return ss.str();
}
} // namespace io
diff --git a/be/src/io/cache/block_file_cache_factory.h
b/be/src/io/cache/block_file_cache_factory.h
index 55f1a8b4041..a6ff2fa08fa 100644
--- a/be/src/io/cache/block_file_cache_factory.h
+++ b/be/src/io/cache/block_file_cache_factory.h
@@ -59,7 +59,15 @@ public:
BlockFileCache* get_by_path(const std::string& cache_base_path);
std::vector<BlockFileCache::QueryFileCacheContextHolderPtr>
get_query_context_holders(
const TUniqueId& query_id);
- void clear_file_caches(bool sync);
+
+ /**
+ * Clears data of all file cache instances
+ *
+ * @param sync wait until all data cleared
+ * @return summary message
+ */
+ std::string clear_file_caches(bool sync);
+
FileCacheFactory() = default;
FileCacheFactory& operator=(const FileCacheFactory&) = delete;
FileCacheFactory(const FileCacheFactory&) = delete;
diff --git a/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
b/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
index d7a8f641193..963705577c4 100644
--- a/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
+++ b/regression-test/suites/cloud_p0/cache/http/test_clear_cache.groovy
@@ -36,13 +36,14 @@ suite("test_clear_cache") {
backendId = backendIdToBackendIP.keySet()[0]
def url = backendIdToBackendIP.get(backendId) + ":" +
backendIdToBackendHttpPort.get(backendId) + """/api/clear_file_cache"""
- logger.info(url)
+ url = url + "?sync=true"
+ logger.info("clear file cache URL:" + url)
def clearFileCache = { check_func ->
httpTest {
endpoint ""
uri url
op "post"
- body "{\"sync\"=\"true\"}"
+ body ""
check check_func
}
}
diff --git
a/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
b/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
index 43e9482e0b9..4ac6427de82 100644
--- a/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
+++ b/regression-test/suites/cloud_p0/cache/http/test_clear_cache_async.groovy
@@ -37,13 +37,14 @@ suite("test_clear_cache_async") {
backendId = backendIdToBackendIP.keySet()[0]
def url = backendIdToBackendIP.get(backendId) + ":" +
backendIdToBackendHttpPort.get(backendId) + """/api/clear_file_cache"""
- logger.info(url)
+ url = url + "?sync=false"
+ logger.info("clear file cache URL:" + url)
def clearFileCache = { check_func ->
httpTest {
endpoint ""
uri url
op "post"
- body "{\"sync\"=\"false\"}"
+ body ""
check check_func
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]