This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 692dd73330d branch-4.0: [fix](cloud)Support
`cloud_tablet_rebalancer_interval_second` config dynamic modification #58198
(#58377)
692dd73330d is described below
commit 692dd73330da9c353bb128d0cc777ef160f00b57
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Nov 27 09:07:43 2025 +0800
branch-4.0: [fix](cloud)Support `cloud_tablet_rebalancer_interval_second`
config dynamic modification #58198 (#58377)
Cherry-picked from #58198
Co-authored-by: deardeng <[email protected]>
---
.../main/java/org/apache/doris/common/Config.java | 2 +-
.../doris/cloud/catalog/CloudTabletRebalancer.java | 20 +++-
.../balance/test_expanding_node_balance.groovy | 111 +++++++++++++++++++++
3 files changed, 130 insertions(+), 3 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 57d12602f4f..3342153d991 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3319,7 +3319,7 @@ public class Config extends ConfigBase {
public static int drop_user_notify_ms_max_times = 86400;
@ConfField(mutable = true, masterOnly = true)
- public static long cloud_tablet_rebalancer_interval_second = 20;
+ public static long cloud_tablet_rebalancer_interval_second = 1;
@ConfField(mutable = true, masterOnly = true)
public static boolean enable_cloud_partition_balance = true;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
index a33667f0f64..0c631306703 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
@@ -306,7 +306,23 @@ public class CloudTabletRebalancer extends MasterDaemon {
checkDecommissionState(clusterToBes);
inited = true;
- LOG.info("finished to rebalancer. cost: {} ms",
(System.currentTimeMillis() - start));
+ long sleepSeconds = Config.cloud_tablet_rebalancer_interval_second;
+ if (sleepSeconds < 0L) {
+ LOG.warn("cloud tablet rebalance interval second is negative,
change it to default 1s");
+ sleepSeconds = 1L;
+ }
+ long balanceEnd = System.currentTimeMillis();
+ if
(DebugPointUtil.isEnable("CloudTabletRebalancer.balanceEnd.tooLong")) {
+ LOG.info("debug pointCloudTabletRebalancer.balanceEnd.tooLong");
+ // slower the balance end time to trigger next balance immediately
+ balanceEnd += (Config.cloud_tablet_rebalancer_interval_second +
10L) * 1000L;
+ }
+ if (balanceEnd - start >
Config.cloud_tablet_rebalancer_interval_second * 1000L) {
+ sleepSeconds = 0L;
+ }
+ setInterval(sleepSeconds * 1000L);
+ LOG.info("finished to rebalancer. cost: {} ms, rebalancer sche
interval {} s",
+ (System.currentTimeMillis() - start), sleepSeconds);
}
private void buildClusterToBackendMap() {
@@ -895,7 +911,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
LOG.warn("check pre tablets {} cache status {} {}", tabletIds,
result.getStatus().getStatusCode(),
result.getStatus().getErrorMsgs());
} else {
- LOG.info("check pre tablets {} cache succ status {} {}",
tabletIds, result.getStatus().getStatusCode(),
+ LOG.debug("check pre tablets {} cache succ status {} {}",
tabletIds, result.getStatus().getStatusCode(),
result.getStatus().getErrorMsgs());
}
return result.getTaskDone();
diff --git
a/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy
b/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy
new file mode 100644
index 00000000000..1c8874864c0
--- /dev/null
+++ b/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite('test_expanding_node_balance', 'docker') {
+ if (!isCloudMode()) {
+ return;
+ }
+
+ def clusterOptions = [
+ new ClusterOptions(),
+ new ClusterOptions(),
+ new ClusterOptions(),
+ ]
+
+ for (options in clusterOptions) {
+ options.feConfigs += [
+ 'cloud_cluster_check_interval_second=1',
+ 'cloud_tablet_rebalancer_interval_second=20',
+ 'sys_log_verbose_modules=org',
+ 'heartbeat_interval_second=1',
+ 'rehash_tablet_after_be_dead_seconds=3600',
+ 'cloud_warm_up_for_rebalance_type=peer_read_async_warmup',
+ // disable Auto Analysis Job Executor
+ 'auto_check_statistics_in_minutes=60',
+ ]
+ options.cloudMode = true
+ options.setFeNum(1)
+ options.setBeNum(1)
+ options.enableDebugPoints()
+ }
+
+
+ def testCase = { command, expectCost ->
+ sql """
+ CREATE TABLE `fact_sales` (
+ `order_id` varchar(255) NOT NULL,
+ `order_line_id` varchar(255) NOT NULL,
+ `order_date` date NOT NULL,
+ `time_of_day` varchar(50) NOT NULL,
+ `season` varchar(50) NOT NULL,
+ `month` int NOT NULL,
+ `location_id` varchar(255) NOT NULL,
+ `region` varchar(100) NOT NULL,
+ `product_name` varchar(255) NOT NULL,
+ `quantity` int NOT NULL,
+ `sales_amount` double NOT NULL,
+ `discount_percentage` int NOT NULL,
+ `product_id` varchar(255) NOT NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`order_id`, `order_line_id`)
+ DISTRIBUTED BY HASH(`order_id`) BUCKETS 256
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ cluster.addBackend(15, "compute_cluster")
+
+ sql """
+ $command
+ """
+ def begin = System.currentTimeMillis();
+ awaitUntil(1000, 10) {
+ def showRet = sql_return_maparray """ADMIN SHOW REPLICA
DISTRIBUTION FROM fact_sales"""
+ logger.info("show result {}", showRet)
+ showRet.any { row ->
+ Integer.valueOf((String) row.ReplicaNum) == 16
+ }
+ }
+ def cost = (System.currentTimeMillis() - begin) / 1000;
+ log.info("exec command: {}\n time cost: {}s", command, cost)
+ assertTrue(cost < expectCost, "cost assert wrong")
+ }
+
+ docker(clusterOptions[0]) {
+ def command = 'admin set frontend
config("cloud_min_balance_tablet_num_per_run"="16");'
+ // assert < 300s
+ testCase(command, 300)
+ }
+
+ docker(clusterOptions[1]) {
+ def command = 'admin set frontend
config("cloud_tablet_rebalancer_interval_second"="0");'
+ // assert < 50s
+ testCase(command, 50)
+ }
+
+ docker(clusterOptions[2]) {
+
GetDebugPoint().enableDebugPointForAllFEs("CloudTabletRebalancer.balanceEnd.tooLong")
+ // do nothing
+ def command = 'select 1'
+ // assert < 50s
+ testCase(command, 50)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]