This is an automated email from the ASF dual-hosted git repository.
lide pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new d1ffdc76d64 [branch-1.2](bug) fix fe schedule clone task stuck in
running state #26397
d1ffdc76d64 is described below
commit d1ffdc76d641c74cb843265e6630564ff1d4be7f
Author: xy720 <[email protected]>
AuthorDate: Tue Nov 7 10:10:28 2023 +0800
[branch-1.2](bug) fix fe schedule clone task stuck in running state #26397
---
.../java/org/apache/doris/clone/TabletSchedCtx.java | 8 ++++++++
.../java/org/apache/doris/clone/TabletScheduler.java | 19 +++++++++++++++----
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index b904654c74d..bad60d8ec43 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -293,6 +293,10 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
return failedSchedCounter;
}
+ public void resetFailedSchedCounter() {
+ failedSchedCounter = 0;
+ }
+
public void increaseFailedRunningCounter() {
++failedRunningCounter;
}
@@ -301,6 +305,10 @@ public class TabletSchedCtx implements
Comparable<TabletSchedCtx> {
return failedRunningCounter;
}
+ public boolean isExceedFailedRunningLimit() {
+ return failedRunningCounter >= RUNNING_FAILED_COUNTER_THRESHOLD;
+ }
+
public void setLastSchedTime(long lastSchedTime) {
this.lastSchedTime = lastSchedTime;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 30d6c23d8a5..0116ac3404b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1530,12 +1530,23 @@ public class TabletScheduler extends MasterDaemon {
try {
tabletCtx.finishCloneTask(cloneTask, request);
} catch (SchedException e) {
- tabletCtx.increaseFailedRunningCounter();
tabletCtx.setErrMsg(e.getMessage());
if (e.getStatus() == Status.RUNNING_FAILED) {
- stat.counterCloneTaskFailed.incrementAndGet();
- addToRunningTablets(tabletCtx);
- return false;
+ tabletCtx.increaseFailedRunningCounter();
+ if (!tabletCtx.isExceedFailedRunningLimit()) {
+ stat.counterCloneTaskFailed.incrementAndGet();
+ tabletCtx.releaseResource(this);
+ tabletCtx.resetFailedSchedCounter();
+ tabletCtx.setState(TabletSchedCtx.State.PENDING);
+ dynamicAdjustPrioAndAddBackToPendingTablets(tabletCtx,
e.getMessage());
+ return false;
+ } else {
+ // unrecoverable
+ stat.counterTabletScheduledDiscard.incrementAndGet();
+ finalizeTabletCtx(tabletCtx,
TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE,
+ e.getMessage());
+ return true;
+ }
} else if (e.getStatus() == Status.UNRECOVERABLE) {
// unrecoverable
stat.counterTabletScheduledDiscard.incrementAndGet();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]