This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 24586570b0f [enhance](job) optimize auto resume rule to adapt VCG
failover (#59421)
24586570b0f is described below
commit 24586570b0f989761c57c97818da2ac5afceead8
Author: hui lai <[email protected]>
AuthorDate: Sun Dec 28 02:56:23 2025 +0800
[enhance](job) optimize auto resume rule to adapt VCG failover (#59421)
### What problem does this PR solve?
In https://github.com/apache/doris/pull/52515 introduces VCG(Virtual
Compute Group) to be used for multi availability zone disaster recovery.
But routine load job do not adapt it perfectly: If a cluster in an
availability zone crashes, VCG provides disaster recovery capabilities,
but the job will not be automatically resume. So this PR removed the
`dead BE count` calculation when judge `isNeedAutoSchedule`.
### Release note
None
---
fe/fe-common/src/main/java/org/apache/doris/common/Config.java | 6 ------
.../java/org/apache/doris/load/routineload/ScheduleRule.java | 10 ----------
2 files changed, 16 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 0a8625d6252..029420e5f19 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1381,12 +1381,6 @@ public class Config extends ConfigBase {
@ConfField
public static boolean check_java_version = true;
- /**
- * it can't auto-resume routine load job as long as one of the backends is
down
- */
- @ConfField(mutable = true, masterOnly = true)
- public static int max_tolerable_backend_down_num = 0;
-
/**
* a period for auto resume routine load
*/
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
index 8454bba7303..aff189a7c37 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/ScheduleRule.java
@@ -59,16 +59,6 @@ public class ScheduleRule {
&& jobRoutine.pauseReason.getCode() !=
InternalErrorCode.MANUAL_PAUSE_ERR
&& jobRoutine.pauseReason.getCode() !=
InternalErrorCode.TOO_MANY_FAILURE_ROWS_ERR
&& jobRoutine.pauseReason.getCode() !=
InternalErrorCode.CANNOT_RESUME_ERR) {
- int dead = deadBeCount();
- if (dead > Config.max_tolerable_backend_down_num) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("dead backend num {} is larger than config {}, "
- + "routine load job {} can not be auto
rescheduled",
- dead, Config.max_tolerable_backend_down_num,
jobRoutine.id);
- }
- return false;
- }
-
if (jobRoutine.latestResumeTimestamp == 0) { //the first resume
jobRoutine.latestResumeTimestamp = System.currentTimeMillis();
jobRoutine.autoResumeCount = 1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]