This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git
The following commit(s) were added to refs/heads/master by this push:
new e52710a6 RATIS-1622. Fix high CPU load when some followers are down
(#680)
e52710a6 is described below
commit e52710a6c7b36a3b4a481269dd1077f00b77b7a5
Author: Riguz Lee <[email protected]>
AuthorDate: Thu Jul 21 05:30:00 2022 +0800
RATIS-1622. Fix high CPU load when some followers are down (#680)
---
.../java/org/apache/ratis/grpc/server/GrpcLogAppender.java | 10 +++++++++-
.../src/main/java/org/apache/ratis/server/RaftServer.java | 5 ++---
.../test/java/org/apache/ratis/server/ServerRestartTests.java | 2 +-
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git
a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
index 65bb62b6..2eab1132 100644
--- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
+++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java
@@ -158,12 +158,20 @@ public class GrpcLogAppender extends LogAppenderBase {
public long getWaitTimeMs() {
if (haveTooManyPendingRequests()) {
return getHeartbeatWaitTimeMs(); // Should wait for a short time
- } else if (shouldSendAppendEntries()) {
+ } else if (shouldSendAppendEntries() && !isSlowFollower()) {
+ // For normal nodes, new entries should be sent ASAP
+ // however for slow followers (especially when the follower is down),
+ // keep sending without any wait time only ends up in high CPU load
return 0L;
}
return Math.min(10L, getHeartbeatWaitTimeMs());
}
+ private boolean isSlowFollower() {
+ final TimeDuration elapsedTime =
getFollower().getLastRpcResponseTime().elapsedTime();
+ return
elapsedTime.compareTo(getServer().properties().rpcSlownessTimeout()) > 0;
+ }
+
private void mayWait() {
// use lastSend time instead of lastResponse time
try {
diff --git
a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
index bdf4c0b4..e9719b96 100644
--- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
+++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java
@@ -69,9 +69,8 @@ public interface RaftServer extends Closeable, RpcType.Get,
/** @return the {@link RaftPeer} for this division. */
default RaftPeer getPeer() {
- return Optional.ofNullable(getGroup())
- .map(g -> g.getPeer(getId()))
- .orElseGet(() -> getRaftServer().getPeer());
+ return Optional.ofNullable(getRaftConf().getPeer(getId()))
+ .orElseGet(() -> getRaftServer().getPeer());
}
/** @return the information about this division. */
diff --git
a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
index 93e8f852..2c0e1816 100644
--- a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
+++ b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java
@@ -379,7 +379,7 @@ public abstract class ServerRestartTests<CLUSTER extends
MiniRaftCluster>
final long mid = size / 2;
raf.seek(mid);
for (long i = mid; i < size; i++) {
- raf.write(0);
+ raf.write(-1);
}
}