This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git


The following commit(s) were added to refs/heads/master by this push:
     new e52710a6 RATIS-1622. Fix high CPU load when some followers are down 
(#680)
e52710a6 is described below

commit e52710a6c7b36a3b4a481269dd1077f00b77b7a5
Author: Riguz Lee <[email protected]>
AuthorDate: Thu Jul 21 05:30:00 2022 +0800

    RATIS-1622. Fix high CPU load when some followers are down (#680)
---
 .../java/org/apache/ratis/grpc/server/GrpcLogAppender.java     | 10 +++++++++-
 .../src/main/java/org/apache/ratis/server/RaftServer.java      |  5 ++---
 .../test/java/org/apache/ratis/server/ServerRestartTests.java  |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git 
a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java 
b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
index 65bb62b6..2eab1132 100644
--- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
+++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
@@ -158,12 +158,20 @@ public class GrpcLogAppender extends LogAppenderBase {
   public long getWaitTimeMs() {
     if (haveTooManyPendingRequests()) {
       return getHeartbeatWaitTimeMs(); // Should wait for a short time
-    } else if (shouldSendAppendEntries()) {
+    } else if (shouldSendAppendEntries() && !isSlowFollower()) {
+      // For normal nodes, new entries should be sent ASAP
+      // however for slow followers (especially when the follower is down),
+      // keep sending without any wait time only ends up in high CPU load
       return 0L;
     }
     return Math.min(10L, getHeartbeatWaitTimeMs());
   }
 
+  private boolean isSlowFollower() {
+    final TimeDuration elapsedTime = 
getFollower().getLastRpcResponseTime().elapsedTime();
+    return 
elapsedTime.compareTo(getServer().properties().rpcSlownessTimeout()) > 0;
+  }
+
   private void mayWait() {
     // use lastSend time instead of lastResponse time
     try {
diff --git 
a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java 
b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
index bdf4c0b4..e9719b96 100644
--- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
+++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
@@ -69,9 +69,8 @@ public interface RaftServer extends Closeable, RpcType.Get,
 
     /** @return the {@link RaftPeer} for this division. */
     default RaftPeer getPeer() {
-      return Optional.ofNullable(getGroup())
-          .map(g -> g.getPeer(getId()))
-          .orElseGet(() -> getRaftServer().getPeer());
+      return Optional.ofNullable(getRaftConf().getPeer(getId()))
+        .orElseGet(() -> getRaftServer().getPeer());
     }
 
     /** @return the information about this division. */
diff --git 
a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java 
b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
index 93e8f852..2c0e1816 100644
--- a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
+++ b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
@@ -379,7 +379,7 @@ public abstract class ServerRestartTests<CLUSTER extends 
MiniRaftCluster>
       final long mid = size / 2;
       raf.seek(mid);
       for (long i = mid; i < size; i++) {
-        raf.write(0);
+        raf.write(-1);
       }
     }
 

Reply via email to