This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 50fbe31f93 [fix](tablet report) fix not add replicas when a backend re
join the cluster after changing its ip or port (#22700)
50fbe31f93 is described below
commit 50fbe31f9311da941abe9c6e6a01cb8446c9c738
Author: yujun <[email protected]>
AuthorDate: Thu Aug 10 15:29:28 2023 +0800
[fix](tablet report) fix not add replicas when a backend re join the
cluster after changing its ip or port (#22700)
---
.../org/apache/doris/master/ReportHandler.java | 25 +++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
index 3a9c01eb3c..b78cbddb38 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
@@ -1188,7 +1188,21 @@ public class ReportHandler extends Daemon {
Pair<TabletStatus, TabletSchedCtx.Priority> status =
tablet.getHealthStatusWithPriority(infoService,
visibleVersion, replicaAlloc, aliveBeIds);
- if (isColocateBackend || status.first ==
TabletStatus.VERSION_INCOMPLETE
+ // FORCE_REDUNDANT is a specific missing case.
+ // So it can add replica when it's in FORCE_REDUNDANT.
+ // But must be careful to avoid: delete a replica then add it
back, then repeat forever.
+ // If this replica is sched available and existing another replica
is sched unavailable,
+ // it's safe to add this replica.
+ // Because if the tablet scheduler want to delete a replica, it
will choose the sched
+ // unavailable replica and avoid the repeating loop as above.
+ boolean canAddForceRedundant = status.first ==
TabletStatus.FORCE_REDUNDANT
+ && infoService.checkBackendScheduleAvailable(backendId)
+ && tablet.getReplicas().stream().anyMatch(
+ r ->
!infoService.checkBackendScheduleAvailable(r.getBackendId()));
+
+ if (isColocateBackend
+ || canAddForceRedundant
+ || status.first == TabletStatus.VERSION_INCOMPLETE
|| status.first == TabletStatus.REPLICA_MISSING
|| status.first == TabletStatus.UNRECOVERABLE) {
long lastFailedVersion = -1L;
@@ -1264,7 +1278,10 @@ public class ReportHandler extends Daemon {
Env.getCurrentEnv().getEditLog().logAddReplica(info);
- LOG.info("add replica[{}-{}] to catalog. backend[{}]",
tabletId, replicaId, backendId);
+ LOG.info("add replica[{}-{}] to catalog. backend[{}], tablet
status {}, tablet size {}, "
+ + "is colocate backend {}",
+ tabletId, replicaId, backendId, status.first.name(),
tablet.getReplicas().size(),
+ isColocateBackend);
return true;
} else {
// replica is enough. check if this tablet is already in meta
@@ -1275,7 +1292,9 @@ public class ReportHandler extends Daemon {
return true;
}
}
- LOG.warn("replica is enough[{}-{}]",
tablet.getReplicas().size(), replicaAlloc.toCreateStmt());
+ LOG.warn("no add replica [{}-{}] cause it is enough[{}-{}],
tablet status {}",
+ tabletId, replicaId, tablet.getReplicas().size(),
replicaAlloc.toCreateStmt(),
+ status.first.name());
return false;
}
} finally {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]