This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 692dd73330d branch-4.0: [fix](cloud)Support 
`cloud_tablet_rebalancer_interval_second` config dynamic modification #58198 
(#58377)
692dd73330d is described below

commit 692dd73330da9c353bb128d0cc777ef160f00b57
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Nov 27 09:07:43 2025 +0800

    branch-4.0: [fix](cloud)Support `cloud_tablet_rebalancer_interval_second` 
config dynamic modification #58198 (#58377)
    
    Cherry-picked from #58198
    
    Co-authored-by: deardeng <[email protected]>
---
 .../main/java/org/apache/doris/common/Config.java  |   2 +-
 .../doris/cloud/catalog/CloudTabletRebalancer.java |  20 +++-
 .../balance/test_expanding_node_balance.groovy     | 111 +++++++++++++++++++++
 3 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 57d12602f4f..3342153d991 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -3319,7 +3319,7 @@ public class Config extends ConfigBase {
     public static int drop_user_notify_ms_max_times = 86400;
 
     @ConfField(mutable = true, masterOnly = true)
-    public static long cloud_tablet_rebalancer_interval_second = 20;
+    public static long cloud_tablet_rebalancer_interval_second = 1;
 
     @ConfField(mutable = true, masterOnly = true)
     public static boolean enable_cloud_partition_balance = true;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
index a33667f0f64..0c631306703 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
@@ -306,7 +306,23 @@ public class CloudTabletRebalancer extends MasterDaemon {
 
         checkDecommissionState(clusterToBes);
         inited = true;
-        LOG.info("finished to rebalancer. cost: {} ms", 
(System.currentTimeMillis() - start));
+        long sleepSeconds = Config.cloud_tablet_rebalancer_interval_second;
+        if (sleepSeconds < 0L) {
+            LOG.warn("cloud tablet rebalance interval second is negative, 
change it to default 1s");
+            sleepSeconds = 1L;
+        }
+        long balanceEnd = System.currentTimeMillis();
+        if 
(DebugPointUtil.isEnable("CloudTabletRebalancer.balanceEnd.tooLong")) {
+            LOG.info("debug pointCloudTabletRebalancer.balanceEnd.tooLong");
+            // slower the balance end time to trigger next balance immediately
+            balanceEnd += (Config.cloud_tablet_rebalancer_interval_second + 
10L) * 1000L;
+        }
+        if (balanceEnd - start > 
Config.cloud_tablet_rebalancer_interval_second * 1000L) {
+            sleepSeconds = 0L;
+        }
+        setInterval(sleepSeconds * 1000L);
+        LOG.info("finished to rebalancer. cost: {} ms, rebalancer sche 
interval {} s",
+                (System.currentTimeMillis() - start), sleepSeconds);
     }
 
     private void buildClusterToBackendMap() {
@@ -895,7 +911,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
                 LOG.warn("check pre tablets {} cache status {} {}", tabletIds, 
result.getStatus().getStatusCode(),
                         result.getStatus().getErrorMsgs());
             } else {
-                LOG.info("check pre tablets {} cache succ status {} {}", 
tabletIds, result.getStatus().getStatusCode(),
+                LOG.debug("check pre tablets {} cache succ status {} {}", 
tabletIds, result.getStatus().getStatusCode(),
                         result.getStatus().getErrorMsgs());
             }
             return result.getTaskDone();
diff --git 
a/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy 
b/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy
new file mode 100644
index 00000000000..1c8874864c0
--- /dev/null
+++ b/regression-test/suites/cloud_p0/balance/test_expanding_node_balance.groovy
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite('test_expanding_node_balance', 'docker') {
+    if (!isCloudMode()) {
+        return;
+    }
+
+    def clusterOptions = [
+        new ClusterOptions(),
+        new ClusterOptions(),
+        new ClusterOptions(),
+    ]
+
+    for (options in clusterOptions) {
+        options.feConfigs += [
+            'cloud_cluster_check_interval_second=1',
+            'cloud_tablet_rebalancer_interval_second=20',
+            'sys_log_verbose_modules=org',
+            'heartbeat_interval_second=1',
+            'rehash_tablet_after_be_dead_seconds=3600',
+            'cloud_warm_up_for_rebalance_type=peer_read_async_warmup',
+            // disable Auto Analysis Job Executor
+            'auto_check_statistics_in_minutes=60',
+        ]
+        options.cloudMode = true
+        options.setFeNum(1)
+        options.setBeNum(1)
+        options.enableDebugPoints()
+    }
+
+
+    def testCase = { command, expectCost ->
+        sql """
+        CREATE TABLE `fact_sales` (
+            `order_id` varchar(255) NOT NULL,
+            `order_line_id` varchar(255) NOT NULL,
+            `order_date` date NOT NULL,
+            `time_of_day` varchar(50) NOT NULL,
+            `season` varchar(50) NOT NULL,
+            `month` int NOT NULL,
+            `location_id` varchar(255) NOT NULL,
+            `region` varchar(100) NOT NULL,
+            `product_name` varchar(255) NOT NULL,
+            `quantity` int NOT NULL,
+            `sales_amount` double NOT NULL,
+            `discount_percentage` int NOT NULL,
+            `product_id` varchar(255) NOT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`order_id`, `order_line_id`)
+        DISTRIBUTED BY HASH(`order_id`) BUCKETS 256
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        )
+        """
+
+        cluster.addBackend(15, "compute_cluster")
+
+        sql """
+        $command
+        """
+        def begin = System.currentTimeMillis();
+        awaitUntil(1000, 10) {
+            def showRet = sql_return_maparray """ADMIN SHOW REPLICA 
DISTRIBUTION FROM fact_sales"""
+            logger.info("show result {}", showRet)
+            showRet.any { row -> 
+                Integer.valueOf((String) row.ReplicaNum) == 16
+            }
+        }
+        def cost = (System.currentTimeMillis() - begin) / 1000;
+        log.info("exec command: {}\n  time cost: {}s", command, cost)
+        assertTrue(cost < expectCost, "cost assert wrong")
+    }
+
+    docker(clusterOptions[0]) {
+        def command = 'admin set frontend 
config("cloud_min_balance_tablet_num_per_run"="16");' 
+        // assert < 300s
+        testCase(command, 300)
+    }
+
+    docker(clusterOptions[1]) {
+        def command = 'admin set frontend 
config("cloud_tablet_rebalancer_interval_second"="0");' 
+        // assert < 50s
+        testCase(command, 50)
+    }
+
+    docker(clusterOptions[2]) {
+        
GetDebugPoint().enableDebugPointForAllFEs("CloudTabletRebalancer.balanceEnd.tooLong")
+        // do nothing
+        def command = 'select 1'
+        // assert < 50s
+        testCase(command, 50)
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to