This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new cc56c0ba52 HDDS-9821. XceiverServerRatis SyncTimeoutRetry is
overridden (#5717)
cc56c0ba52 is described below
commit cc56c0ba52f98bcd23001d1d316611f455b91af1
Author: Ivan Andika <[email protected]>
AuthorDate: Tue Dec 5 08:34:32 2023 +0800
HDDS-9821. XceiverServerRatis SyncTimeoutRetry is overridden (#5717)
---
.../org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 2 -
.../org/apache/hadoop/ozone/OzoneConfigKeys.java | 3 -
.../common/src/main/resources/ozone-default.xml | 11 ++-
.../transport/server/ratis/XceiverServerRatis.java | 101 ++++++++++++---------
4 files changed, 68 insertions(+), 49 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index 4b586b796d..1eb15b2848 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -80,8 +80,6 @@ public final class ScmConfigKeys {
public static final String
DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES =
"dfs.container.ratis.statemachinedata.sync.retries";
- public static final int
- DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT = -1;
public static final String
DFS_CONTAINER_RATIS_STATEMACHINE_MAX_PENDING_APPLY_TXNS =
"dfs.container.ratis.statemachine.max.pending.apply-transactions";
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index 89c9be5467..f124e24141 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -381,9 +381,6 @@ public final class OzoneConfigKeys {
public static final String
DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES =
ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES;
- public static final int
- DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT =
- ScmConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT;
public static final String DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS =
ScmConfigKeys.DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS;
public static final int DFS_CONTAINER_RATIS_LOG_QUEUE_NUM_ELEMENTS_DEFAULT =
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 7d8f538178..bf9a2f511b 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -98,10 +98,17 @@
</property>
<property>
<name>dfs.container.ratis.statemachinedata.sync.retries</name>
- <value>-1</value>
+ <value/>
<tag>OZONE, DEBUG, CONTAINER, RATIS</tag>
<description>Number of times the WriteStateMachineData op will be tried
- before failing, if this value is -1, then this retries indefinitely.
+ before failing. If the value is not configured, it will default
+ to (hdds.ratis.rpc.slowness.timeout /
dfs.container.ratis.statemachinedata.sync.timeout),
+ which means that the WriteStatMachineData will be retried for every sync
timeout until
+ the configured slowness timeout is hit, after which the StateMachine
will close down the pipeline.
+
+ If this value is set to -1, then this retries indefinitely. This might
not be desirable
+ since if due to persistent failure the WriteStateMachineData op was not
able to complete
+ for a long time, this might block the Ratis write pipeline.
</description>
</property>
<property>
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index c9ac85414a..4688ce4b27 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -265,41 +265,13 @@ public final class XceiverServerRatis implements
XceiverServerSpi {
final long raftSegmentPreallocatedSize =
setRaftSegmentPreallocatedSize(properties);
- TimeUnit timeUnit;
- long duration;
-
- // set the configs enable and set the stateMachineData sync timeout
- RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true);
+ // setup ratis stream if datastream is enabled
if (streamEnable) {
setUpRatisStream(properties);
}
- timeUnit = OzoneConfigKeys.
- DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit();
- duration = conf.getTimeDuration(
- OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT,
- OzoneConfigKeys.
- DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT
- .getDuration(), timeUnit);
- final TimeDuration dataSyncTimeout =
- TimeDuration.valueOf(duration, timeUnit);
- RaftServerConfigKeys.Log.StateMachineData
- .setSyncTimeout(properties, dataSyncTimeout);
- // typically a pipeline close will be initiated after a node failure
- // timeout from Ratis in case a follower does not respond.
- // By this time, all the writeStateMachine calls should be stopped
- // and IOs should fail.
- // Even if the leader is not able to complete write calls within
- // the timeout seconds, it should just fail the operation and trigger
- // pipeline close. failing the writeStateMachine call with limited retries
- // will ensure even the leader initiates a pipeline close if its not
- // able to complete write in the timeout configured.
-
- // NOTE : the default value for the retry count in ratis is -1,
- // which means retry indefinitely.
- RaftServerConfigKeys.Log.StateMachineData
- .setSyncTimeoutRetry(properties, (int) nodeFailureTimeoutMs /
- dataSyncTimeout.toIntExact(TimeUnit.MILLISECONDS));
+ // Set Ratis State Machine Data configurations
+ setStateMachineDataConfigurations(properties);
// set timeout for a retry cache entry
setTimeoutForRetryCache(properties);
@@ -359,17 +331,6 @@ public final class XceiverServerRatis implements
XceiverServerSpi {
RaftServerConfigKeys.Log.setQueueByteLimit(properties,
SizeInBytes.valueOf(logQueueByteLimit));
- int numSyncRetries = conf.getInt(
- OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES,
- OzoneConfigKeys.
- DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES_DEFAULT);
- RaftServerConfigKeys.Log.StateMachineData.setSyncTimeoutRetry(properties,
- numSyncRetries);
-
- // Enable the StateMachineCaching
- RaftServerConfigKeys.Log.StateMachineData.setCachingEnabled(
- properties, true);
-
RaftServerConfigKeys.Log.Appender.setInstallSnapshotEnabled(properties,
false);
@@ -470,6 +431,62 @@ public final class XceiverServerRatis implements
XceiverServerSpi {
SizeInBytes.valueOf(raftSegmentBufferSize));
}
+ private void setStateMachineDataConfigurations(RaftProperties properties) {
+ // set the configs enable and set the stateMachineData sync timeout
+ RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true);
+
+ TimeUnit timeUnit = OzoneConfigKeys.
+ DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT.getUnit();
+ long duration = conf.getTimeDuration(
+ OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT,
+ OzoneConfigKeys.
+ DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_TIMEOUT_DEFAULT
+ .getDuration(), timeUnit);
+ final TimeDuration dataSyncTimeout =
+ TimeDuration.valueOf(duration, timeUnit);
+ RaftServerConfigKeys.Log.StateMachineData
+ .setSyncTimeout(properties, dataSyncTimeout);
+ // typically a pipeline close will be initiated after a node failure
+ // timeout from Ratis in case a follower does not respond.
+ // By this time, all the writeStateMachine calls should be stopped
+ // and IOs should fail.
+ // Even if the leader is not able to complete write calls within
+ // the timeout seconds, it should just fail the operation and trigger
+ // pipeline close. failing the writeStateMachine call with limited retries
+ // will ensure even the leader initiates a pipeline close if its not
+ // able to complete write in the timeout configured.
+
+ // NOTE : the default value for the retry count in ratis is -1,
+ // which means retry indefinitely.
+ int syncTimeoutRetryDefault = (int) nodeFailureTimeoutMs /
+ dataSyncTimeout.toIntExact(TimeUnit.MILLISECONDS);
+ int numSyncRetries = conf.getInt(
+ OzoneConfigKeys.DFS_CONTAINER_RATIS_STATEMACHINEDATA_SYNC_RETRIES,
+ syncTimeoutRetryDefault);
+ RaftServerConfigKeys.Log.StateMachineData.setSyncTimeoutRetry(properties,
+ numSyncRetries);
+
+ // Enable the StateMachineCaching
+ // By enabling caching, the state machine data (e.g. write chunk data)
+ // will not be cached in Ratis log cache. The caching
+ // responsibility is deferred to the StateMachine implementation itself.
+ // ContainerStateMachine contains stateMachineDataCache that stores
+ // write chunk data for each log entry index.
+ //
+ // Note that in Ratis, the state machine data is never stored as
+ // part of the persisted Raft log entry. This means that the state
+ // machine data (in this case, the write chunk data) is only stored in the
+ // stateMachineDataCache until it's persisted in datanode storage
+ // (See ContainerStateMachine#writeStateMachineData)
+ //
+ // This requires ContainerStateMachine to implements additional mechanisms
+ // such as returning the state machine data in StateMachine#read to
+ // read back the state machine data that will be sent to the Ratis
+ // followers.
+ RaftServerConfigKeys.Log.StateMachineData.setCachingEnabled(
+ properties, true);
+ }
+
private RpcType setRpcType(RaftProperties properties) {
final String rpcType = conf.get(
OzoneConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]