This is an automated email from the ASF dual-hosted git repository. szetszwo pushed a commit to branch release-3.1.3_review in repository https://gitbox.apache.org/repos/asf/ratis.git
commit cf893f64906df82908fcc43aed2d575e52f7a174 Author: William Song <48054931+szywill...@users.noreply.github.com> AuthorDate: Sat Jan 4 01:58:36 2025 +0800 RATIS-2233. make NOPROGRESS timeout configurable (#1204) --- ratis-docs/src/site/markdown/configurations.md | 10 ++++++++++ .../org/apache/ratis/server/RaftServerConfigKeys.java | 15 +++++++++++++++ .../org/apache/ratis/server/impl/LeaderStateImpl.java | 4 +++- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ratis-docs/src/site/markdown/configurations.md b/ratis-docs/src/site/markdown/configurations.md index 71eae7d3d..4caf11b30 100644 --- a/ratis-docs/src/site/markdown/configurations.md +++ b/ratis-docs/src/site/markdown/configurations.md @@ -104,6 +104,16 @@ When bootstrapping a new peer, If the gap between the match index of the peer and the leader's latest committed index is less than this gap, we treat the peer as caught-up. Increase this number when write throughput is high. +--------------------------------------------------------------------------------- +| **Property** | `raft.server.staging.timeout` | +|:----------------|:-----------------------------------------| +| **Description** | timeout of bootstrapping a new peer | +| **Type** | TimeDuration | +| **Default** | 3 times of `raft.server.rpc.timeout.max` | + +During the initialization of a new peer, the leader will classify the bootstrap process as "NO PROGRESS" +if it fails to receive any RPC responses from this peer within this specified timeout period. + --------------------------------------------------------------------------------- ### ThreadPool - Configurations related to server thread pools. diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java index 8e1d92b75..849597433 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java @@ -116,6 +116,21 @@ public interface RaftServerConfigKeys { setInt(properties::setInt, STAGING_CATCHUP_GAP_KEY, stagingCatchupGap); } + String STAGING_TIMEOUT_KEY = PREFIX + ".staging.timeout"; + + TimeDuration STAGING_TIMEOUT_DEFAULT = null; + + static TimeDuration stagingTimeout(RaftProperties properties) { + final TimeDuration fallbackStagingTimeout = Rpc.timeoutMax(properties, null).multiply(3); + return getTimeDuration(properties.getTimeDuration(fallbackStagingTimeout.getUnit()), + STAGING_TIMEOUT_KEY, STAGING_TIMEOUT_DEFAULT, + Rpc.TIMEOUT_MAX_KEY, fallbackStagingTimeout, getDefaultLog()); + } + static void setStagingTimeout(RaftProperties properties, TimeDuration stagingTimeout) { + setTimeDuration(properties::setTimeDuration, STAGING_TIMEOUT_KEY, stagingTimeout); + } + + interface ThreadPool { String PREFIX = RaftServerConfigKeys.PREFIX + ".threadpool"; diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java index 0dfbf263d..5870f51a6 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java @@ -350,6 +350,7 @@ class LeaderStateImpl implements LeaderState { private final boolean logMetadataEnabled; private final int stagingCatchupGap; + private final TimeDuration stagingTimeout; private final RaftServerMetricsImpl raftServerMetrics; private final LogAppenderMetrics logAppenderMetrics; private final long followerMaxGapThreshold; @@ -364,6 +365,7 @@ class LeaderStateImpl implements LeaderState { final RaftProperties properties = server.getRaftServer().getProperties(); stagingCatchupGap = RaftServerConfigKeys.stagingCatchupGap(properties); + stagingTimeout = RaftServerConfigKeys.stagingTimeout(properties); final ServerState state = server.getState(); this.raftLog = state.getLog(); @@ -789,7 +791,7 @@ class LeaderStateImpl implements LeaderState { private BootStrapProgress checkProgress(FollowerInfo follower, long committed) { Preconditions.assertTrue(!isCaughtUp(follower)); final Timestamp progressTime = Timestamp.currentTime().addTimeMs(-server.getMaxTimeoutMs()); - final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-3L * server.getMaxTimeoutMs()); + final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-stagingTimeout.toLong(TimeUnit.MILLISECONDS)); if (follower.getLastRpcResponseTime().compareTo(timeoutTime) < 0) { LOG.debug("{} detects a follower {} timeout ({}ms) for bootstrapping", this, follower, follower.getLastRpcResponseTime().elapsedTimeMs());