This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 24586570b0f [enhance](job) optimize auto resume rule to adapt VCG 
failover (#59421)
24586570b0f is described below

commit 24586570b0f989761c57c97818da2ac5afceead8
Author: hui lai <[email protected]>
AuthorDate: Sun Dec 28 02:56:23 2025 +0800

    [enhance](job) optimize auto resume rule to adapt VCG failover (#59421)
    
    ### What problem does this PR solve?
    
    In https://github.com/apache/doris/pull/52515 introduces VCG(Virtual
    Compute Group) to be used for multi availability zone disaster recovery.
    
    But routine load job do not adapt it perfectly: If a cluster in an
    availability zone crashes, VCG provides disaster recovery capabilities,
    but the job will not be automatically resume. So this PR removed the
    `dead BE count` calculation when judge `isNeedAutoSchedule`.
    
    ### Release note
    
    None
---
 fe/fe-common/src/main/java/org/apache/doris/common/Config.java |  6 ------
 .../java/org/apache/doris/load/routineload/ScheduleRule.java   | 10 ----------
 2 files changed, 16 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 0a8625d6252..029420e5f19 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1381,12 +1381,6 @@ public class Config extends ConfigBase {
     @ConfField
     public static boolean check_java_version = true;
 
-    /**
-     * it can't auto-resume routine load job as long as one of the backends is 
down
-     */
-    @ConfField(mutable = true, masterOnly = true)
-    public static int max_tolerable_backend_down_num = 0;
-
     /**
      * a period for auto resume routine load
      */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
index 8454bba7303..aff189a7c37 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
@@ -59,16 +59,6 @@ public class ScheduleRule {
                 && jobRoutine.pauseReason.getCode() != 
InternalErrorCode.MANUAL_PAUSE_ERR
                 && jobRoutine.pauseReason.getCode() != 
InternalErrorCode.TOO_MANY_FAILURE_ROWS_ERR
                 && jobRoutine.pauseReason.getCode() != 
InternalErrorCode.CANNOT_RESUME_ERR) {
-            int dead = deadBeCount();
-            if (dead > Config.max_tolerable_backend_down_num) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("dead backend num {} is larger than config {}, "
-                                    + "routine load job {} can not be auto 
rescheduled",
-                            dead, Config.max_tolerable_backend_down_num, 
jobRoutine.id);
-                }
-                return false;
-            }
-
             if (jobRoutine.latestResumeTimestamp == 0) { //the first resume
                 jobRoutine.latestResumeTimestamp = System.currentTimeMillis();
                 jobRoutine.autoResumeCount = 1;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to