HDDS-571. Update SCM chill mode exit criteria to optionally wait for n datanodes. Contributed by Ajay Kumar.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cdf5d583 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cdf5d583 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cdf5d583 Branch: refs/heads/HEAD Commit: cdf5d58364afbb58b3ae49670a7b179d6c5a0ba7 Parents: 9bb2801 Author: Ajay Kumar <a...@apache.com> Authored: Fri Oct 5 14:02:54 2018 -0700 Committer: Ajay Kumar <a...@apache.com> Committed: Fri Oct 5 14:07:16 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hdds/HddsConfigKeys.java | 3 + .../common/src/main/resources/ozone-default.xml | 9 +++ .../hdds/scm/server/SCMChillModeManager.java | 63 +++++++++++++++++++- .../scm/server/TestSCMChillModeManager.java | 41 ++++++++++++- 4 files changed, 113 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 856d113..13b3bb7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -83,6 +83,9 @@ public final class HddsConfigKeys { public static final String HDDS_SCM_CHILLMODE_ENABLED = "hdds.scm.chillmode.enabled"; public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true; + public static final String HDDS_SCM_CHILLMODE_MIN_DATANODE = + "hdds.scm.chillmode.min.datanode"; + public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1; // % of containers which should have at least one reported replica // before SCM comes out of chill mode. http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/common/src/main/resources/ozone-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index b7c967d..d7cbd75 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1165,6 +1165,15 @@ </property> <property> + <name>hdds.scm.chillmode.min.datanode</name> + <value>1</value> + <tag>HDDS,SCM,OPERATION</tag> + <description>Minimum DataNodes which should be registered to get SCM out of + chill mode. + </description> + </property> + + <property> <name>hdds.container.action.max.limit</name> <value>20</value> <tag>DATANODE</tag> http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java index 3c1cc8f..c11a60f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java @@ -20,8 +20,10 @@ package org.apache.hadoop.hdds.scm.server; import com.google.common.annotations.VisibleForTesting; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -60,14 +62,16 @@ public class SCMChillModeManager implements private Map<String, ChillModeExitRule> exitRules = new HashMap(1); private Configuration config; private static final String CONT_EXIT_RULE = "ContainerChillModeRule"; + private static final String DN_EXIT_RULE = "DataNodeChillModeRule"; private final EventQueue eventPublisher; SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers, EventQueue eventQueue) { this.config = conf; this.eventPublisher = eventQueue; - exitRules - .put(CONT_EXIT_RULE, new ContainerChillModeRule(config, allContainers)); + exitRules.put(CONT_EXIT_RULE, + new ContainerChillModeRule(config, allContainers)); + exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config)); if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED, HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) { exitChillMode(eventQueue); @@ -120,6 +124,7 @@ public class SCMChillModeManager implements EventPublisher publisher) { if (getInChillMode()) { exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport); + exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport); validateChillModeExitRules(publisher); } } @@ -187,6 +192,9 @@ public class SCMChillModeManager implements @VisibleForTesting public double getCurrentContainerThreshold() { + if (maxContainer == 0) { + return 1; + } return (containerWithMinReplicas.doubleValue() / maxContainer); } @@ -217,6 +225,57 @@ public class SCMChillModeManager implements } } + /** + * Class defining Chill mode exit criteria according to number of DataNodes + * registered with SCM. + */ + public class DataNodeChillModeRule implements + ChillModeExitRule<NodeRegistrationContainerReport> { + + // Min DataNodes required to exit chill mode. + private int requiredDns; + private int registeredDns = 0; + // Set to track registered DataNodes. + private HashSet<UUID> registeredDnSet; + + public DataNodeChillModeRule(Configuration conf) { + requiredDns = conf + .getInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, + HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT); + registeredDnSet = new HashSet<>(requiredDns * 2); + } + + @Override + public boolean validate() { + return registeredDns >= requiredDns; + } + + @VisibleForTesting + public double getRegisteredDataNodes() { + return registeredDns; + } + + @Override + public void process(NodeRegistrationContainerReport reportsProto) { + if (requiredDns == 0) { + // No dn check required. + return; + } + + if(inChillMode.get()) { + registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid()); + registeredDns = registeredDnSet.size(); + LOG.info("SCM in chill mode. {} DataNodes registered, {} required.", + registeredDns, requiredDns); + } + } + + @Override + public void cleanup() { + registeredDnSet.clear(); + } + } + @VisibleForTesting public static Logger getLogger() { return LOG; http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java index 486c604..53d76e6 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java @@ -45,7 +45,7 @@ public class TestSCMChillModeManager { private List<ContainerInfo> containers; @Rule - public Timeout timeout = new Timeout(1000 * 20); + public Timeout timeout = new Timeout(1000 * 35); @BeforeClass public static void setUp() { @@ -111,6 +111,45 @@ public class TestSCMChillModeManager { assertFalse(scmChillModeManager.getInChillMode()); } + @Test + public void testChillModeDataNodeExitRule() throws Exception { + containers = new ArrayList<>(); + testChillModeDataNodes(0); + testChillModeDataNodes(3); + testChillModeDataNodes(5); + } + + private void testChillModeDataNodes(int numOfDns) throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(config); + conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns); + scmChillModeManager = new SCMChillModeManager(conf, containers, queue); + queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + scmChillModeManager); + // Assert SCM is in Chill mode. + assertTrue(scmChillModeManager.getInChillMode()); + + // Register all DataNodes except last one and assert SCM is in chill mode. + for (int i = 0; i < numOfDns-1; i++) { + queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + HddsTestUtils.createNodeRegistrationContainerReport(containers)); + assertTrue(scmChillModeManager.getInChillMode()); + assertTrue(scmChillModeManager.getCurrentContainerThreshold() == 1); + } + + if(numOfDns == 0){ + GenericTestUtils.waitFor(() -> { + return scmChillModeManager.getInChillMode(); + }, 10, 1000 * 10); + return; + } + // Register last DataNode and check that SCM is out of Chill mode. + queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, + HddsTestUtils.createNodeRegistrationContainerReport(containers)); + GenericTestUtils.waitFor(() -> { + return scmChillModeManager.getInChillMode(); + }, 10, 1000 * 10); + } + private void testContainerThreshold(List<ContainerInfo> dnContainers, double expectedThreshold) throws Exception { --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org