This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new ab04652616 HDDS-8792. Enable dynamic reconfiguration of some RM
properties (#4910)
ab04652616 is described below
commit ab04652616adb6da33eaf6625e106f8f4b688a0a
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Fri Jun 16 21:30:06 2023 +0200
HDDS-8792. Enable dynamic reconfiguration of some RM properties (#4910)
---
.../container/replication/ReplicationManager.java | 15 +++++++++++-
.../hdds/scm/server/StorageContainerManager.java | 8 +++----
.../ozone/reconfig/TestScmReconfiguration.java | 28 +++++++++++++++++++++-
3 files changed, 45 insertions(+), 6 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 4212a2ebcc..14a325b844 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.conf.ConfigGroup;
import org.apache.hadoop.hdds.conf.ConfigType;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.PostConstruct;
+import org.apache.hadoop.hdds.conf.ReconfigurableConfig;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ReplicationCommandPriority;
@@ -1034,7 +1035,8 @@ public class ReplicationManager implements SCMService {
* Configuration used by the Replication Manager.
*/
@ConfigGroup(prefix = "hdds.scm.replication")
- public static class ReplicationManagerConfiguration {
+ public static class ReplicationManagerConfiguration
+ extends ReconfigurableConfig {
/**
* True if LegacyReplicationManager should be used for RATIS containers.
*/
@@ -1063,6 +1065,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "thread.interval",
type = ConfigType.TIME,
defaultValue = "300s",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "There is a replication monitor thread running inside " +
"SCM which takes care of replicating the containers in the " +
@@ -1077,6 +1080,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "under.replicated.interval",
type = ConfigType.TIME,
defaultValue = "30s",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "How frequently to check if there are work to process " +
" on the under replicated queue"
@@ -1089,6 +1093,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "over.replicated.interval",
type = ConfigType.TIME,
defaultValue = "30s",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "How frequently to check if there are work to process " +
" on the over replicated queue"
@@ -1102,6 +1107,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "event.timeout",
type = ConfigType.TIME,
defaultValue = "10m",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "Timeout for the container replication/deletion commands
"
+ "sent to datanodes. After this timeout the command will be "
@@ -1123,6 +1129,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "event.timeout.datanode.offset",
type = ConfigType.TIME,
defaultValue = "30s",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "The amount of time to subtract from "
+ "hdds.scm.replication.event.timeout to give a deadline on the "
@@ -1145,6 +1152,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "maintenance.replica.minimum",
type = ConfigType.INT,
defaultValue = "2",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "The minimum number of container replicas which must " +
" be available for a node to enter maintenance. If putting a " +
@@ -1166,6 +1174,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "maintenance.remaining.redundancy",
type = ConfigType.INT,
defaultValue = "1",
+ reconfigurable = true,
tags = {SCM, OZONE},
description = "The number of redundant containers in a group which" +
" must be available for a node to enter maintenance. If putting" +
@@ -1197,6 +1206,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "datanode.replication.limit",
type = ConfigType.INT,
defaultValue = "20",
+ reconfigurable = true,
tags = { SCM, DATANODE },
description = "A limit to restrict the total number of replication " +
"and reconstruction commands queued on a datanode. Note this is " +
@@ -1212,6 +1222,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "datanode.reconstruction.weight",
type = ConfigType.INT,
defaultValue = "3",
+ reconfigurable = true,
tags = { SCM, DATANODE },
description = "When counting the number of replication commands on a "
+
"datanode, the number of reconstruction commands is multiplied " +
@@ -1227,6 +1238,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "datanode.delete.container.limit",
type = ConfigType.INT,
defaultValue = "40",
+ reconfigurable = true,
tags = { SCM, DATANODE },
description = "A limit to restrict the total number of delete " +
"container commands queued on a datanode. Note this is intended " +
@@ -1242,6 +1254,7 @@ public class ReplicationManager implements SCMService {
@Config(key = "inflight.limit.factor",
type = ConfigType.DOUBLE,
defaultValue = "0.75",
+ reconfigurable = true,
tags = { SCM },
description = "The overall replication task limit on a cluster is the"
+
" number healthy nodes, times the datanode.replication.limit." +
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index ad70ead6a1..37fb5ee9d6 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -384,6 +384,9 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
eventQueue = new EventQueue();
serviceManager = new SCMServiceManager();
+ reconfigurationHandler =
+ new ReconfigurationHandler("SCM", conf, this::checkAdminAccess)
+ .register(OZONE_ADMINISTRATORS, this::reconfOzoneAdmins);
initializeSystemManagers(conf, configurator);
@@ -410,10 +413,6 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
scmAdmins = OzoneAdmins.getOzoneAdmins(scmStarterUser, conf);
LOG.info("SCM start with adminUsers: {}", scmAdmins.getAdminUsernames());
- reconfigurationHandler =
- new ReconfigurationHandler("SCM", conf, this::checkAdminAccess)
- .register(OZONE_ADMINISTRATORS, this::reconfOzoneAdmins);
-
datanodeProtocolServer = new SCMDatanodeProtocolServer(conf, this,
eventQueue, scmContext);
blockProtocolServer = new SCMBlockProtocolServer(conf, this);
@@ -785,6 +784,7 @@ public final class StorageContainerManager extends
ServiceRuntimeInfoImpl
systemClock,
legacyRM,
containerReplicaPendingOps);
+ reconfigurationHandler.register(replicationManager.getConfig());
}
serviceManager.register(replicationManager);
if (configurator.getScmSafeModeManager() != null) {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/reconfig/TestScmReconfiguration.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/reconfig/TestScmReconfiguration.java
index a407899c22..21eb93a2c6 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/reconfig/TestScmReconfiguration.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/reconfig/TestScmReconfiguration.java
@@ -21,8 +21,12 @@ package org.apache.hadoop.ozone.reconfig;
import com.google.common.collect.ImmutableSet;
import org.apache.hadoop.conf.ReconfigurationException;
import org.apache.hadoop.hdds.conf.ReconfigurationHandler;
+import
org.apache.hadoop.hdds.scm.container.replication.ReplicationManager.ReplicationManagerConfiguration;
import org.junit.jupiter.api.Test;
+import java.time.Duration;
+import java.util.Set;
+
import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -40,7 +44,13 @@ class TestScmReconfiguration extends ReconfigurationTestBase
{
@Test
void reconfigurableProperties() {
- assertProperties(getSubject(), ImmutableSet.of(OZONE_ADMINISTRATORS));
+ Set<String> expected = ImmutableSet.<String>builder()
+ .add(OZONE_ADMINISTRATORS)
+ .addAll(new ReplicationManagerConfiguration()
+ .reconfigurableProperties())
+ .build();
+
+ assertProperties(getSubject(), expected);
}
@Test
@@ -54,4 +64,20 @@ class TestScmReconfiguration extends ReconfigurationTestBase
{
getCluster().getStorageContainerManager().getScmAdminUsernames());
}
+ @Test
+ void replicationInterval() throws ReconfigurationException {
+ ReplicationManagerConfiguration config = replicationManagerConfig();
+
+ getSubject().reconfigurePropertyImpl(
+ "hdds.scm.replication.thread.interval",
+ "120s");
+
+ assertEquals(Duration.ofSeconds(120), config.getInterval());
+ }
+
+ private ReplicationManagerConfiguration replicationManagerConfig() {
+ return getCluster().getStorageContainerManager().getReplicationManager()
+ .getConfig();
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]