This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 818e675b728 branch-2.1: [fix](restore) Cut down restore timeout when
create replicas failed #47278 (#47496)
818e675b728 is described below
commit 818e675b728db81b9ea12f878f117a2c0b9a39c5
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Feb 12 09:58:10 2025 +0800
branch-2.1: [fix](restore) Cut down restore timeout when create replicas
failed #47278 (#47496)
Cherry-picked from #47278
Co-authored-by: Uniqueyou <[email protected]>
---
.../src/main/java/org/apache/doris/backup/RestoreJob.java | 13 ++++++++++++-
.../java/org/apache/doris/common/MarkedCountDownLatch.java | 6 ++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
index 2e7ca149226..92db8522f89 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
@@ -60,6 +60,7 @@ import org.apache.doris.common.MarkedCountDownLatch;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
+import org.apache.doris.common.util.DbUtil;
import org.apache.doris.common.util.DebugPointUtil;
import org.apache.doris.common.util.DynamicPartitionUtil;
import org.apache.doris.common.util.PropertyAnalyzer;
@@ -161,7 +162,7 @@ public class RestoreJob extends AbstractJob {
private boolean reserveReplica = false;
private boolean reserveDynamicPartitionEnable = false;
-
+ private long createReplicasTimeStamp = -1;
// this 2 members is to save all newly restored objs
// tbl name -> part
private List<Pair<String, Partition>> restoredPartitions =
Lists.newArrayList();
@@ -964,6 +965,7 @@ public class RestoreJob extends AbstractJob {
// No log here, PENDING state restore job will redo this method
state = RestoreJobState.CREATING;
+ createReplicasTimeStamp = System.currentTimeMillis();
}
private void waitingAllReplicasCreated() {
@@ -972,6 +974,14 @@ public class RestoreJob extends AbstractJob {
if (!createReplicaTasksLatch.await(0, TimeUnit.SECONDS)) {
LOG.info("waiting {} create replica tasks for restore to
finish. {}",
createReplicaTasksLatch.getCount(), this);
+ long createReplicasTimeOut =
DbUtil.getCreateReplicasTimeoutMs(createReplicaTasksLatch.getMarkCount());
+ long tryCreateTime = System.currentTimeMillis() -
createReplicasTimeStamp;
+ if (tryCreateTime > createReplicasTimeOut) {
+ status = new Status(ErrCode.TIMEOUT,
+ "restore job with create replicas timeout: " +
tryCreateTime + " with label: " + label);
+ cancelInternal(false);
+ LOG.warn("restore job {} create replicas timeout, cancel
{}", jobId, this);
+ }
return;
}
} catch (InterruptedException e) {
@@ -2346,6 +2356,7 @@ public class RestoreJob extends AbstractJob {
snapshotInfos = HashBasedTable.create();
fileMapping.clear();
jobInfo.releaseSnapshotInfo();
+ createReplicasTimeStamp = -1;
RestoreJobState curState = state;
finishedTime = System.currentTimeMillis();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
index 5c3201e2b80..0eecbc43b1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
@@ -30,13 +30,19 @@ public class MarkedCountDownLatch<K, V> extends
CountDownLatch {
private Multimap<K, V> marks;
private Multimap<K, V> failedMarks;
private Status st = Status.OK;
+ private int markCount = 0;
public MarkedCountDownLatch(int count) {
super(count);
+ this.markCount = count;
marks = HashMultimap.create();
failedMarks = HashMultimap.create();
}
+ public int getMarkCount() {
+ return markCount;
+ }
+
public synchronized void addMark(K key, V value) {
marks.put(key, value);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]