This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0d1c7c3d7e7 [fix](cloud) Fix cloud drop tablet tasks pile up (#58131)
0d1c7c3d7e7 is described below
commit 0d1c7c3d7e7341a3ac825545650923a957135bbf
Author: deardeng <[email protected]>
AuthorDate: Thu Nov 20 04:01:10 2025 +0800
[fix](cloud) Fix cloud drop tablet tasks pile up (#58131)
### What problem does this PR solve?
Previously, since clean tablet task was a lightweight operation on the
BE, it was assumed that dropping tablets would not cause task backlog.
However, online observations showed that tasks were still backlogged,
causing the BE to consume a lot of memory. Therefore, tablet
deduplication logic was added to prevent task backlog.
---
be/src/agent/task_worker_pool.cpp | 6 ++----
.../test_clean_tablet_when_drop_force_table.groovy | 17 ++++++++++++++++-
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/be/src/agent/task_worker_pool.cpp
b/be/src/agent/task_worker_pool.cpp
index 2ee7a21fb27..c0660939a6f 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -124,10 +124,6 @@ bool register_task_info(const TTaskType::type task_type,
int64_t signature) {
// no need to report task of these types
return true;
}
- if (task_type == TTaskType::type::DROP && config::is_cloud_mode()) {
- // cloud no need to report drop task status
- return true;
- }
if (signature == -1) { // No need to report task with unintialized
signature
return true;
@@ -1874,6 +1870,8 @@ void drop_tablet_callback(StorageEngine& engine, const
TAgentTaskRequest& req) {
void drop_tablet_callback(CloudStorageEngine& engine, const TAgentTaskRequest&
req) {
const auto& drop_tablet_req = req.drop_tablet_req;
+ // here drop_tablet_req.tablet_id is the signature of the task, see
DropReplicaTask in fe
+ Defer defer = [&] { remove_task_info(req.task_type, req.signature); };
DBUG_EXECUTE_IF("WorkPoolCloudDropTablet.drop_tablet_callback.failed", {
LOG_WARNING("WorkPoolCloudDropTablet.drop_tablet_callback.failed")
.tag("tablet_id", drop_tablet_req.tablet_id);
diff --git
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
index 48b4eedf5e6..6a48706718c 100644
---
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
+++
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
@@ -33,12 +33,25 @@ suite('test_clean_tablet_when_drop_force_table', 'docker') {
options.beConfigs += [
'report_tablet_interval_seconds=1',
'write_buffer_size=10240',
- 'write_buffer_size_for_agg=10240'
+ 'write_buffer_size_for_agg=10240',
+ 'sys_log_verbose_modules=task_worker_pool'
]
options.setFeNum(3)
options.setBeNum(3)
options.cloudMode = true
options.enableDebugPoints()
+
+ def checkBeLog = { String beLogPath ->
+ log.info("search be log path: {}", beLogPath)
+ def logFile = new File(beLogPath)
+ assertTrue(logFile.exists(), "BE log file not found: ${beLogPath}")
+ def queueZeroLine = logFile.readLines().find { line ->
+ line =~ /remove task info\. type=DROP, .*queue_size=0/
+ }
+ assertTrue(queueZeroLine != null,
+ "Expected to find log line with queue_size=0 in ${beLogPath},
but none matched.")
+ log.info("found queue_size=0 log line: {}", queueZeroLine)
+ }
def testCase = { tableName, waitTime, useDp=false->
def ms = cluster.getAllMetaservices().get(0)
@@ -191,6 +204,8 @@ suite('test_clean_tablet_when_drop_force_table', 'docker') {
}
}
+ String beLogPath = cluster.getBeByIndex(1).getLogFilePath()
+ checkBeLog(beLogPath)
}
docker(options) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]