This is an automated email from the ASF dual-hosted git repository.

williamsong pushed a commit to branch feature/leaderlease
in repository https://gitbox.apache.org/repos/asf/ratis.git


The following commit(s) were added to refs/heads/feature/leaderlease by this 
push:
     new 0711c39f2 RATIS-1894. Implement ReadOnly based on leader lease (#925)
0711c39f2 is described below

commit 0711c39f2c71d587bc276ecae837144475bbf8d3
Author: William Song <[email protected]>
AuthorDate: Thu Sep 28 21:55:16 2023 +0800

    RATIS-1894. Implement ReadOnly based on leader lease (#925)
---
 ratis-docs/src/site/markdown/configurations.md     |  8 +++
 .../apache/ratis/server/RaftServerConfigKeys.java  | 10 +++
 .../org/apache/ratis/server/impl/LeaderLease.java  | 14 ++++-
 .../apache/ratis/server/impl/LeaderStateImpl.java  | 20 +++++-
 .../apache/ratis/server/impl/RaftServerImpl.java   | 17 +++---
 .../ratis/server/impl/TransferLeadership.java      |  9 +++
 .../org/apache/ratis/ReadOnlyRequestTests.java     | 71 +++++++++++++++++-----
 .../ratis/server/impl/LeaderElectionTests.java     |  2 +
 8 files changed, 123 insertions(+), 28 deletions(-)

diff --git a/ratis-docs/src/site/markdown/configurations.md 
b/ratis-docs/src/site/markdown/configurations.md
index 7fa7ddf80..0500a053a 100644
--- a/ratis-docs/src/site/markdown/configurations.md
+++ b/ratis-docs/src/site/markdown/configurations.md
@@ -185,6 +185,14 @@ treat the peer as caught-up. Increase this number when 
write throughput is high.
 
 
--------------------------------------------------------------------------------
 
+| **Property**    | `raft.server.read.leader.lease.enabled`                    
|
+|:----------------|:-----------------------------------------------------------|
+| **Description** | whether to enable lease in linearizable read-only requests 
|
+| **Type**        | boolean                                                    
|
+| **Default**     | true                                                       
|
+
+--------------------------------------------------------------------------------
+
 | **Property**    | `raft.server.read.leader.lease.timeout.ratio` |
 |:----------------|:----------------------------------------------|
 | **Description** | maximum timeout ratio of leader lease         |
diff --git 
a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
 
b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
index 5551f9cdd..cd38e5667 100644
--- 
a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
+++ 
b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java
@@ -192,6 +192,16 @@ public interface RaftServerConfigKeys {
       set(properties::setEnum, OPTION_KEY, option);
     }
 
+    String LEADER_LEASE_ENABLED_KEY = PREFIX + ".leader.lease.enabled";
+    boolean LEADER_LEASE_ENABLED_DEFAULT = false;
+    static boolean leaderLeaseEnabled(RaftProperties properties) {
+      return getBoolean(properties::getBoolean, LEADER_LEASE_ENABLED_KEY,
+          LEADER_LEASE_ENABLED_DEFAULT, getDefaultLog());
+    }
+    static void setLeaderLeaseEnabled(RaftProperties properties, boolean 
enabled) {
+      setBoolean(properties::setBoolean, LEADER_LEASE_ENABLED_KEY, enabled);
+    }
+
     String LEADER_LEASE_TIMEOUT_RATIO_KEY = PREFIX + 
".leader.lease.timeout.ratio";
     double LEADER_LEASE_TIMEOUT_RATIO_DEFAULT = 0.9;
     static double leaderLeaseTimeoutRatio(RaftProperties properties) {
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java
index 758cbb6c5..315cc9f14 100644
--- a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java
+++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java
@@ -27,6 +27,7 @@ import org.apache.ratis.util.Timestamp;
 import java.util.List;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
@@ -34,11 +35,12 @@ import java.util.stream.Stream;
 
 class LeaderLease {
 
+  private final AtomicBoolean enabled;
   private final long leaseTimeoutMs;
-  // TODO invalidate leader lease when stepDown / transferLeader
   private final AtomicReference<Timestamp> lease = new 
AtomicReference<>(Timestamp.currentTime());
 
   LeaderLease(RaftProperties properties) {
+    this.enabled = new 
AtomicBoolean(RaftServerConfigKeys.Read.leaderLeaseEnabled(properties));
     final double leaseRatio = 
RaftServerConfigKeys.Read.leaderLeaseTimeoutRatio(properties);
     Preconditions.assertTrue(leaseRatio > 0.0 && leaseRatio <= 1.0,
         "leader ratio should sit in (0,1], now is " + leaseRatio);
@@ -47,8 +49,16 @@ class LeaderLease {
         .toIntExact(TimeUnit.MILLISECONDS);
   }
 
+  boolean getAndSetEnabled(boolean newValue) {
+    return enabled.getAndSet(newValue);
+  }
+
+  boolean isEnabled() {
+    return enabled.get();
+  }
+
   boolean isValid() {
-    return lease.get().elapsedTimeMs() < leaseTimeoutMs;
+    return isEnabled() && lease.get().elapsedTimeMs() < leaseTimeoutMs;
   }
 
   /**
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
index 418139378..4ebfc3d56 100644
--- 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
+++ 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java
@@ -438,6 +438,7 @@ class LeaderStateImpl implements LeaderState {
     messageStreamRequests.clear();
     // TODO client should retry on NotLeaderException
     readIndexHeartbeats.failListeners(nle);
+    lease.getAndSetEnabled(false);
     server.getServerRpc().notifyNotLeader(server.getMemberId().getGroupId());
     logAppenderMetrics.unregister();
     raftServerMetrics.unregister();
@@ -675,6 +676,7 @@ class LeaderStateImpl implements LeaderState {
 
   private void stepDown(long term, StepDownReason reason) {
     try {
+      lease.getAndSetEnabled(false);
       server.changeToFollowerAndPersistMetadata(term, false, reason);
       pendingStepDown.complete(server::newSuccessReply);
     } catch(IOException e) {
@@ -953,6 +955,7 @@ class LeaderStateImpl implements LeaderState {
       pendingRequests.replySetConfiguration(server::newSuccessReply);
       // if the leader is not included in the current configuration, step down
       if (!conf.containsInConf(server.getId(), RaftPeerRole.FOLLOWER, 
RaftPeerRole.LISTENER)) {
+        lease.getAndSetEnabled(false);
         LOG.info("{} is not included in the new configuration {}. Will 
shutdown server...", this, conf);
         try {
           // leave some time for all RPC senders to send out new conf entry
@@ -1113,6 +1116,12 @@ class LeaderStateImpl implements LeaderState {
           new LeaderNotReadyException(server.getMemberId())));
     }
 
+    // if lease is enabled, check lease first
+    if (hasLease()) {
+      return CompletableFuture.completedFuture(readIndex);
+    }
+
+    // send heartbeats and wait for majority acknowledgments
     final AppendEntriesListener listener = 
readIndexHeartbeats.addAppendEntriesListener(
         readIndex, i -> new AppendEntriesListener(i, senders));
 
@@ -1129,7 +1138,15 @@ class LeaderStateImpl implements LeaderState {
     readIndexHeartbeats.onAppendEntriesReply(appender, reply, 
this::hasMajority);
   }
 
+  boolean getAndSetLeaseEnabled(boolean newValue) {
+    return lease.getAndSetEnabled(newValue);
+  }
+
   boolean hasLease() {
+    if (!lease.isEnabled()) {
+      return false;
+    }
+
     if (checkLeaderLease()) {
       return true;
     }
@@ -1143,7 +1160,8 @@ class LeaderStateImpl implements LeaderState {
   }
 
   private boolean checkLeaderLease() {
-    return isReady() && (server.getRaftConf().isSingleton() || 
lease.isValid());
+    return isRunning() && isReady()
+        && (server.getRaftConf().isSingleton() || lease.isValid());
   }
 
   void replyPendingRequest(long logIndex, RaftClientReply reply) {
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
index 40a17c4e9..3fb0cb2fa 100644
--- 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
+++ 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java
@@ -999,8 +999,14 @@ class RaftServerImpl implements RaftServer.Division,
   }
 
   private CompletableFuture<RaftClientReply> readAsync(RaftClientRequest 
request) {
-    if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE
-        && !request.getType().getRead().getPreferNonLinearizable()) {
+    if (request.getType().getRead().getPreferNonLinearizable()
+        || readOption == RaftServerConfigKeys.Read.Option.DEFAULT) {
+      final CompletableFuture<RaftClientReply> reply = 
checkLeaderState(request, null, false);
+       if (reply != null) {
+         return reply;
+       }
+       return queryStateMachine(request);
+    } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE){
       /*
         Linearizable read using ReadIndex. See Raft paper section 6.4.
         1. First obtain readIndex from Leader.
@@ -1027,13 +1033,6 @@ class RaftServerImpl implements RaftServer.Division,
           .thenCompose(readIndex -> getReadRequests().waitToAdvance(readIndex))
           .thenCompose(readIndex -> queryStateMachine(request))
           .exceptionally(e -> readException2Reply(request, e));
-    } else if (readOption == RaftServerConfigKeys.Read.Option.DEFAULT
-        || request.getType().getRead().getPreferNonLinearizable()) {
-       CompletableFuture<RaftClientReply> reply = checkLeaderState(request, 
null, false);
-       if (reply != null) {
-         return reply;
-       }
-       return queryStateMachine(request);
     } else {
       throw new IllegalStateException("Unexpected read option: " + readOption);
     }
diff --git 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/TransferLeadership.java
 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/TransferLeadership.java
index 74ada6541..e54bee748 100644
--- 
a/ratis-server/src/main/java/org/apache/ratis/server/impl/TransferLeadership.java
+++ 
b/ratis-server/src/main/java/org/apache/ratis/server/impl/TransferLeadership.java
@@ -295,6 +295,9 @@ public class TransferLeadership {
     if (previous != null) {
       return createReplyFutureFromPreviousRequest(request, previous);
     }
+    // disable the lease before transferring leader
+    final boolean previousLeaseEnabled = server.getRole().getLeaderState()
+        .map(l -> l.getAndSetLeaseEnabled(false)).orElse(false);
     final PendingRequest pendingRequest = supplier.get();
     final Result result = tryTransferLeadership(context);
     final Result.Type type = result.getType();
@@ -308,6 +311,12 @@ public class TransferLeadership {
               timeout.toString(TimeUnit.SECONDS, 3))),
           LOG, () -> "Failed to handle timeout");
     }
+    // reset back lease if the current transfer fails
+    pendingRequest.getReplyFuture().whenCompleteAsync((reply, ex) -> {
+      if (ex != null || !reply.isSuccess()) {
+        server.getRole().getLeaderState().ifPresent(l -> 
l.getAndSetLeaseEnabled(previousLeaseEnabled));
+      }
+    });
     return pendingRequest.getReplyFuture();
   }
 
diff --git 
a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java 
b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java
index a919a9292..eea75592e 100644
--- a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java
+++ b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java
@@ -43,7 +43,6 @@ import org.slf4j.event.Level;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -69,16 +68,21 @@ public abstract class ReadOnlyRequestTests<CLUSTER extends 
MiniRaftCluster>
     final RaftProperties p = getProperties();
     p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
         CounterStateMachine.class, StateMachine.class);
-
-    p.setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
   }
 
   @Test
   public void testLinearizableRead() throws Exception {
-    runWithNewCluster(NUM_SERVERS, this::testLinearizableReadImpl);
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, this::testReadOnlyImpl);
   }
 
-  private void testLinearizableReadImpl(CLUSTER cluster) throws Exception {
+  @Test
+  public void testLeaseRead() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, this::testReadOnlyImpl);
+  }
+
+  private void testReadOnlyImpl(CLUSTER cluster) throws Exception {
     try {
       RaftTestUtil.waitForLeader(cluster);
       final RaftPeerId leaderId = cluster.getLeader().getId();
@@ -98,10 +102,17 @@ public abstract class ReadOnlyRequestTests<CLUSTER extends 
MiniRaftCluster>
 
   @Test
   public void testLinearizableReadTimeout() throws Exception {
-    runWithNewCluster(NUM_SERVERS, this::testLinearizableReadTimeoutImpl);
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, this::testReadOnlyTimeoutImpl);
+  }
+
+  @Test
+  public void testLeaseReadTimeout() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, this::testReadOnlyTimeoutImpl);
   }
 
-  private void testLinearizableReadTimeoutImpl(CLUSTER cluster) throws 
Exception {
+  private void testReadOnlyTimeoutImpl(CLUSTER cluster) throws Exception {
     try {
       RaftTestUtil.waitForLeader(cluster);
       final RaftPeerId leaderId = cluster.getLeader().getId();
@@ -126,10 +137,17 @@ public abstract class ReadOnlyRequestTests<CLUSTER 
extends MiniRaftCluster>
 
   @Test
   public void testFollowerLinearizableRead() throws Exception {
-    runWithNewCluster(NUM_SERVERS, this::testFollowerLinearizableReadImpl);
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyImpl);
+  }
+
+  @Test
+  public void testFollowerLeaseRead() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyImpl);
   }
 
-  private void testFollowerLinearizableReadImpl(CLUSTER cluster) throws 
Exception {
+  private void testFollowerReadOnlyImpl(CLUSTER cluster) throws Exception {
     try {
       RaftTestUtil.waitForLeader(cluster);
 
@@ -155,10 +173,17 @@ public abstract class ReadOnlyRequestTests<CLUSTER 
extends MiniRaftCluster>
 
   @Test
   public void testFollowerLinearizableReadParallel() throws Exception {
-    runWithNewCluster(NUM_SERVERS, 
this::testFollowerLinearizableReadParallelImpl);
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyParallelImpl);
   }
 
-  private void testFollowerLinearizableReadParallelImpl(CLUSTER cluster) 
throws Exception {
+  @Test
+  public void testFollowerLeaseReadParallel() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyParallelImpl);
+  }
+
+  private void testFollowerReadOnlyParallelImpl(CLUSTER cluster) throws 
Exception {
     try {
       RaftTestUtil.waitForLeader(cluster);
 
@@ -183,10 +208,17 @@ public abstract class ReadOnlyRequestTests<CLUSTER 
extends MiniRaftCluster>
 
   @Test
   public void testFollowerLinearizableReadFailWhenLeaderDown() throws 
Exception {
-    runWithNewCluster(NUM_SERVERS, 
this::testFollowerLinearizableReadFailWhenLeaderDownImpl);
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, 
this::testFollowerReadOnlyFailWhenLeaderDownImpl);
   }
 
-  private void testFollowerLinearizableReadFailWhenLeaderDownImpl(CLUSTER 
cluster) throws Exception {
+  @Test
+  public void testFollowerLeaseReadWhenLeaderDown() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, 
this::testFollowerReadOnlyFailWhenLeaderDownImpl);
+  }
+
+  private void testFollowerReadOnlyFailWhenLeaderDownImpl(CLUSTER cluster) 
throws Exception {
     try {
       RaftTestUtil.waitForLeader(cluster);
 
@@ -215,11 +247,18 @@ public abstract class ReadOnlyRequestTests<CLUSTER 
extends MiniRaftCluster>
   }
 
   @Test
-  public void testFollowerLinearizableReadRetryWhenLeaderDown() throws 
Exception {
-    runWithNewCluster(NUM_SERVERS, 
this::testFollowerLinearizableReadRetryWhenLeaderDown);
+  public void testFollowerReadOnlyRetryWhenLeaderDown() throws Exception {
+    getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, 
RaftServerConfigKeys.Read.Option.LINEARIZABLE);
+    runWithNewCluster(NUM_SERVERS, 
this::testFollowerReadOnlyRetryWhenLeaderDown);
+  }
+
+  @Test
+  public void testFollowerLeaseReadRetryWhenLeaderDown() throws Exception {
+    
getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, 
true);
+    runWithNewCluster(NUM_SERVERS, 
this::testFollowerReadOnlyRetryWhenLeaderDown);
   }
 
-  private void testFollowerLinearizableReadRetryWhenLeaderDown(CLUSTER 
cluster) throws Exception {
+  private void testFollowerReadOnlyRetryWhenLeaderDown(CLUSTER cluster) throws 
Exception {
     // only retry on readIndexException
     final RetryPolicy retryPolicy = ExceptionDependentRetry
         .newBuilder()
diff --git 
a/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java
 
b/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java
index c2e5cbd1c..6453e8e94 100644
--- 
a/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java
+++ 
b/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java
@@ -652,6 +652,7 @@ public abstract class LeaderElectionTests<CLUSTER extends 
MiniRaftCluster>
   @Test
   public void testLeaderLease() throws Exception {
     // use a strict lease
+    RaftServerConfigKeys.Read.setLeaderLeaseEnabled(getProperties(), true);
     RaftServerConfigKeys.Read.setLeaderLeaseTimeoutRatio(getProperties(), 0.5);
     runWithNewCluster(3, c -> runLeaseTest(c, this::runTestLeaderLease));
   }
@@ -679,6 +680,7 @@ public abstract class LeaderElectionTests<CLUSTER extends 
MiniRaftCluster>
   @Test
   public void testLeaderLeaseDuringReconfiguration() throws Exception {
     // use a strict lease
+    RaftServerConfigKeys.Read.setLeaderLeaseEnabled(getProperties(), true);
     RaftServerConfigKeys.Read.setLeaderLeaseTimeoutRatio(getProperties(), 0.5);
     runWithNewCluster(3, c -> runLeaseTest(c, 
this::runTestLeaderLeaseDuringReconfiguration));
   }

Reply via email to