This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git
The following commit(s) were added to refs/heads/master by this push:
new e2c867da5 RATIS-2345. Leader stepDown could cause a deadlock. (#1300)
e2c867da5 is described below
commit e2c867da55f8caf789db5e759ffebc2e79a4961d
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Tue Oct 14 08:18:01 2025 -0700
RATIS-2345. Leader stepDown could cause a deadlock. (#1300)
---
.../java/org/apache/ratis/server/impl/LeaderStateImpl.java | 8 +++++---
.../java/org/apache/ratis/server/impl/PendingStepDown.java | 10 +++++++++-
.../src/test/java/org/apache/ratis/RaftBasicTests.java | 6 +++---
3 files changed, 17 insertions(+), 7 deletions(-)
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
index 8358f063d..0835802bd 100644
---
a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
+++
b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
@@ -703,10 +703,12 @@ class LeaderStateImpl implements LeaderState {
private void stepDown(long term, StepDownReason reason) {
try {
lease.getAndSetEnabled(false);
- server.changeToFollowerAndPersistMetadata(term, false, reason).join();
+ server.changeToFollowerAndPersistMetadata(term, false, reason)
+ .get(5, TimeUnit.SECONDS);
pendingStepDown.complete(server::newSuccessReply);
- } catch(IOException e) {
- final String s = this + ": Failed to persist metadata for term " + term;
+ } catch(Exception e) {
+ pendingStepDown.completeExceptionally(e);
+ final String s = this + ": Failed to step down for term " + term;
LOG.warn(s, e);
// the failure should happen while changing the state to follower
// thus the in-memory state should have been updated
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java
b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java
index b7bfde3f6..c1e5cc5f5 100644
---
a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java
+++
b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java
@@ -56,8 +56,12 @@ public class PendingStepDown {
replyFuture.complete(newSuccessReply.apply(request));
}
+ void completeExceptionally(Exception e) {
+ replyFuture.completeExceptionally(e);
+ }
+
void timeout() {
- replyFuture.completeExceptionally(new TimeoutIOException(
+ completeExceptionally(new TimeoutIOException(
": Failed to step down leader on " + leader + "request " +
request.getTimeoutMs() + "ms"));
}
@@ -105,6 +109,10 @@ public class PendingStepDown {
pending.getAndSetNull().ifPresent(p -> p.complete(newSuccessReply));
}
+ void completeExceptionally(Exception e) {
+ pending.getAndSetNull().ifPresent(p -> p.completeExceptionally(e));
+ }
+
void timeout() {
pending.getAndSetNull().ifPresent(PendingRequest::timeout);
}
diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
index 156cecf0b..f1319cde7 100644
--- a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
+++ b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
@@ -360,7 +360,7 @@ public abstract class RaftBasicTests<CLUSTER extends
MiniRaftCluster>
final Timer timer = new Timer();
timer.schedule(new TimerTask() {
- private int previousLastStep = lastStep.get();
+ private final AtomicInteger previousLastStep = new
AtomicInteger(lastStep.get());
@Override
public void run() {
@@ -371,8 +371,8 @@ public abstract class RaftBasicTests<CLUSTER extends
MiniRaftCluster>
JavaUtils.dumpAllThreads(s -> log.info(s));
final int last = lastStep.get();
- if (last != previousLastStep) {
- previousLastStep = last;
+ if (last != previousLastStep.get()) {
+ previousLastStep.set(last);
} else {
final RaftServer.Division leader = cluster.getLeader();
log.info("NO PROGRESS at " + last + ", try to restart leader=" +
leader);