This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit b20df704b18b9b6741401e9211d180d9b19c8532 Author: Mingyu Chen <[email protected]> AuthorDate: Tue May 17 22:36:30 2022 +0800 [fix] fix bug that replica can not be repaired duo to DECOMMISSION state (#9424) Reset state of replica which state are in DECOMMISSION after finished scheduling. --- .../org/apache/doris/clone/TabletSchedCtx.java | 24 ++++++++++++++ .../org/apache/doris/clone/TabletScheduler.java | 38 +++++++++++++--------- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index 6610b484ae..ac7a96efc2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -1098,6 +1098,8 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { sb.append(". to backend: ").append(destBackendId); sb.append(", dest path hash: ").append(destPathHash); } + sb.append(", visible version: ").append(visibleVersion); + sb.append(", committed version: ").append(committedVersion); if (errMsg != null) { sb.append(". err: ").append(errMsg); } @@ -1119,4 +1121,26 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { } } } + + /** + * call this when releaseTabletCtx() + */ + public void resetReplicaState() { + if (tablet != null) { + for (Replica replica : tablet.getReplicas()) { + // To address issue: https://github.com/apache/incubator-doris/issues/9422 + // the DECOMMISSION state is set in TabletScheduler and not persist to meta. + // So it is reasonable to reset this state if we failed to scheduler this tablet. + // That is, if the TabletScheduler cannot process the tablet, then it should reset + // any intermediate state it set during the scheduling process. + if (replica.getState() == ReplicaState.DECOMMISSION) { + replica.setState(ReplicaState.NORMAL); + replica.setWatermarkTxnId(-1); + LOG.debug("reset replica {} on backend {} of tablet {} state from DECOMMISSION to NORMAL", + replica.getId(), replica.getBackendId(), tabletId); + } + } + } + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 1f80cada9e..8231269816 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -385,12 +385,12 @@ public class TabletScheduler extends MasterDaemon { if (tabletCtx.getType() == Type.BALANCE) { // if balance is disabled, remove this tablet if (Config.disable_balance) { - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), "disable balance and " + e.getMessage()); } else { // remove the balance task if it fails to be scheduled many times if (tabletCtx.getFailedSchedCounter() > 10) { - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), "schedule failed too many times and " + e.getMessage()); } else { // we must release resource it current hold, and be scheduled again @@ -410,19 +410,19 @@ public class TabletScheduler extends MasterDaemon { } else if (e.getStatus() == Status.FINISHED) { // schedule redundant tablet or scheduler disabled will throw this exception stat.counterTabletScheduledSucceeded.incrementAndGet(); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, e.getStatus(), e.getMessage()); } else { Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus()); // discard stat.counterTabletScheduledDiscard.incrementAndGet(); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); } continue; } catch (Exception e) { LOG.warn("got unexpected exception, discard this schedule. tablet: {}", tabletCtx.getTabletId(), e); stat.counterTabletScheduledFailed.incrementAndGet(); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); continue; } @@ -532,7 +532,8 @@ public class TabletScheduler extends MasterDaemon { for (TransactionState transactionState : dbTransactionMgr.getPreCommittedTxnList()) { if(transactionState.getTableIdList().contains(tbl.getId())) { // If table releate to transaction with precommitted status, do not allow to do balance. - throw new SchedException(Status.UNRECOVERABLE, "There exists PRECOMMITTED transaction releated to table"); + throw new SchedException(Status.UNRECOVERABLE, + "There exists PRECOMMITTED transaction related to table"); } } } catch (AnalysisException e) { @@ -1053,7 +1054,6 @@ public class TabletScheduler extends MasterDaemon { } private void deleteReplicaInternal(TabletSchedCtx tabletCtx, Replica replica, String reason, boolean force) throws SchedException { - /* * Before deleting a replica, we should make sure that there is no running txn on it and no more txns will be on it. * So we do followings: @@ -1069,6 +1069,8 @@ public class TabletScheduler extends MasterDaemon { replica.setState(ReplicaState.DECOMMISSION); // set priority to normal because it may wait for a long time. Remain it as VERY_HIGH may block other task. tabletCtx.setOrigPriority(Priority.NORMAL); + LOG.debug("set replica {} on backend {} of tablet {} state to DECOMMISSION", + replica.getId(), replica.getBackendId(), tabletCtx.getTabletId()); throw new SchedException(Status.SCHEDULE_FAILED, "set watermark txn " + nextTxnId); } else if (replica.getState() == ReplicaState.DECOMMISSION && replica.getWatermarkTxnId() != -1) { long watermarkTxnId = replica.getWatermarkTxnId(); @@ -1323,17 +1325,20 @@ public class TabletScheduler extends MasterDaemon { addTablet(tabletCtx, true /* force */); } - private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, String reason) { + private void finalizeTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, Status status, String reason) { // use 2 steps to avoid nested database lock and synchronized.(releaseTabletCtx() may hold db lock) // remove the tablet ctx, so that no other process can see it removeTabletCtx(tabletCtx, reason); // release resources taken by tablet ctx - releaseTabletCtx(tabletCtx, state); + releaseTabletCtx(tabletCtx, state, status == Status.UNRECOVERABLE); } - private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state) { + private void releaseTabletCtx(TabletSchedCtx tabletCtx, TabletSchedCtx.State state, boolean resetReplicaState) { tabletCtx.setState(state); tabletCtx.releaseResource(this); + if (resetReplicaState) { + tabletCtx.resetReplicaState(); + } tabletCtx.setFinishedTime(System.currentTimeMillis()); } @@ -1393,25 +1398,25 @@ public class TabletScheduler extends MasterDaemon { } else if (e.getStatus() == Status.UNRECOVERABLE) { // unrecoverable stat.counterTabletScheduledDiscard.incrementAndGet(); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); return true; } else if (e.getStatus() == Status.FINISHED) { // tablet is already healthy, just remove - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); return true; } } catch (Exception e) { LOG.warn("got unexpected exception when finish clone task. tablet: {}", tabletCtx.getTabletId(), e); stat.counterTabletScheduledDiscard.incrementAndGet(); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, e.getMessage()); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); return true; } Preconditions.checkState(tabletCtx.getState() == TabletSchedCtx.State.FINISHED); stat.counterCloneTaskSucceeded.incrementAndGet(); gatherStatistics(tabletCtx); - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, "finished"); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished"); return true; } @@ -1475,7 +1480,10 @@ public class TabletScheduler extends MasterDaemon { // 2. release ctx timeoutTablets.stream().forEach(t -> { - releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED); + // Set "resetReplicaState" to true because + // the timeout task should also be considered as UNRECOVERABLE, + // so need to reset replica state. + releaseTabletCtx(t, TabletSchedCtx.State.CANCELLED, true); stat.counterCloneTaskTimeout.incrementAndGet(); }); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
