HDDS-571. Update SCM chill mode exit criteria to optionally wait for n 
datanodes. Contributed by Ajay Kumar.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cdf5d583
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cdf5d583
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cdf5d583

Branch: refs/heads/HDFS-12943
Commit: cdf5d58364afbb58b3ae49670a7b179d6c5a0ba7
Parents: 9bb2801
Author: Ajay Kumar <a...@apache.com>
Authored: Fri Oct 5 14:02:54 2018 -0700
Committer: Ajay Kumar <a...@apache.com>
Committed: Fri Oct 5 14:07:16 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hdds/HddsConfigKeys.java  |  3 +
 .../common/src/main/resources/ozone-default.xml |  9 +++
 .../hdds/scm/server/SCMChillModeManager.java    | 63 +++++++++++++++++++-
 .../scm/server/TestSCMChillModeManager.java     | 41 ++++++++++++-
 4 files changed, 113 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
index 856d113..13b3bb7 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
@@ -83,6 +83,9 @@ public final class HddsConfigKeys {
   public static final String HDDS_SCM_CHILLMODE_ENABLED =
       "hdds.scm.chillmode.enabled";
   public static final boolean HDDS_SCM_CHILLMODE_ENABLED_DEFAULT = true;
+  public static final String HDDS_SCM_CHILLMODE_MIN_DATANODE =
+      "hdds.scm.chillmode.min.datanode";
+  public static final int HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT = 1;
 
   // % of containers which should have at least one reported replica
   // before SCM comes out of chill mode.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/common/src/main/resources/ozone-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml 
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index b7c967d..d7cbd75 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1165,6 +1165,15 @@
   </property>
 
   <property>
+    <name>hdds.scm.chillmode.min.datanode</name>
+    <value>1</value>
+    <tag>HDDS,SCM,OPERATION</tag>
+    <description>Minimum DataNodes which should be registered to get SCM out of
+      chill mode.
+    </description>
+  </property>
+
+  <property>
     <name>hdds.container.action.max.limit</name>
     <value>20</value>
     <tag>DATANODE</tag>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
index 3c1cc8f..c11a60f 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMChillModeManager.java
@@ -20,8 +20,10 @@ package org.apache.hadoop.hdds.scm.server;
 import com.google.common.annotations.VisibleForTesting;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
@@ -60,14 +62,16 @@ public class SCMChillModeManager implements
   private Map<String, ChillModeExitRule> exitRules = new HashMap(1);
   private Configuration config;
   private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
+  private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
   private final EventQueue eventPublisher;
 
   SCMChillModeManager(Configuration conf, List<ContainerInfo> allContainers,
       EventQueue eventQueue) {
     this.config = conf;
     this.eventPublisher = eventQueue;
-    exitRules
-        .put(CONT_EXIT_RULE, new ContainerChillModeRule(config, 
allContainers));
+    exitRules.put(CONT_EXIT_RULE,
+        new ContainerChillModeRule(config, allContainers));
+    exitRules.put(DN_EXIT_RULE, new DataNodeChillModeRule(config));
     if (!conf.getBoolean(HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED,
         HddsConfigKeys.HDDS_SCM_CHILLMODE_ENABLED_DEFAULT)) {
       exitChillMode(eventQueue);
@@ -120,6 +124,7 @@ public class SCMChillModeManager implements
       EventPublisher publisher) {
     if (getInChillMode()) {
       exitRules.get(CONT_EXIT_RULE).process(nodeRegistrationContainerReport);
+      exitRules.get(DN_EXIT_RULE).process(nodeRegistrationContainerReport);
       validateChillModeExitRules(publisher);
     }
   }
@@ -187,6 +192,9 @@ public class SCMChillModeManager implements
 
     @VisibleForTesting
     public double getCurrentContainerThreshold() {
+      if (maxContainer == 0) {
+        return 1;
+      }
       return (containerWithMinReplicas.doubleValue() / maxContainer);
     }
 
@@ -217,6 +225,57 @@ public class SCMChillModeManager implements
     }
   }
 
+  /**
+   * Class defining Chill mode exit criteria according to number of DataNodes
+   * registered with SCM.
+   */
+  public class DataNodeChillModeRule implements
+      ChillModeExitRule<NodeRegistrationContainerReport> {
+
+    // Min DataNodes required to exit chill mode.
+    private int requiredDns;
+    private int registeredDns = 0;
+    // Set to track registered DataNodes.
+    private HashSet<UUID> registeredDnSet;
+
+    public DataNodeChillModeRule(Configuration conf) {
+      requiredDns = conf
+          .getInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE,
+              HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE_DEFAULT);
+      registeredDnSet = new HashSet<>(requiredDns * 2);
+    }
+
+    @Override
+    public boolean validate() {
+      return registeredDns >= requiredDns;
+    }
+
+    @VisibleForTesting
+    public double getRegisteredDataNodes() {
+      return registeredDns;
+    }
+
+    @Override
+    public void process(NodeRegistrationContainerReport reportsProto) {
+      if (requiredDns == 0) {
+        // No dn check required.
+        return;
+      }
+
+      if(inChillMode.get()) {
+        registeredDnSet.add(reportsProto.getDatanodeDetails().getUuid());
+        registeredDns = registeredDnSet.size();
+        LOG.info("SCM in chill mode. {} DataNodes registered, {} required.",
+            registeredDns, requiredDns);
+      }
+    }
+
+    @Override
+    public void cleanup() {
+      registeredDnSet.clear();
+    }
+  }
+
   @VisibleForTesting
   public static Logger getLogger() {
     return LOG;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cdf5d583/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
index 486c604..53d76e6 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMChillModeManager.java
@@ -45,7 +45,7 @@ public class TestSCMChillModeManager {
   private List<ContainerInfo> containers;
 
   @Rule
-  public Timeout timeout = new Timeout(1000 * 20);
+  public Timeout timeout = new Timeout(1000 * 35);
 
   @BeforeClass
   public static void setUp() {
@@ -111,6 +111,45 @@ public class TestSCMChillModeManager {
     assertFalse(scmChillModeManager.getInChillMode());
   }
 
+  @Test
+  public void testChillModeDataNodeExitRule() throws Exception {
+    containers = new ArrayList<>();
+    testChillModeDataNodes(0);
+    testChillModeDataNodes(3);
+    testChillModeDataNodes(5);
+  }
+
+  private void testChillModeDataNodes(int numOfDns) throws Exception {
+    OzoneConfiguration conf = new OzoneConfiguration(config);
+    conf.setInt(HddsConfigKeys.HDDS_SCM_CHILLMODE_MIN_DATANODE, numOfDns);
+    scmChillModeManager = new SCMChillModeManager(conf, containers, queue);
+    queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+        scmChillModeManager);
+    // Assert SCM is in Chill mode.
+    assertTrue(scmChillModeManager.getInChillMode());
+
+    // Register all DataNodes except last one and assert SCM is in chill mode.
+    for (int i = 0; i < numOfDns-1; i++) {
+      queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+          HddsTestUtils.createNodeRegistrationContainerReport(containers));
+      assertTrue(scmChillModeManager.getInChillMode());
+      assertTrue(scmChillModeManager.getCurrentContainerThreshold() == 1);
+    }
+
+    if(numOfDns == 0){
+      GenericTestUtils.waitFor(() -> {
+        return scmChillModeManager.getInChillMode();
+      }, 10, 1000 * 10);
+      return;
+    }
+    // Register last DataNode and check that SCM is out of Chill mode.
+    queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
+        HddsTestUtils.createNodeRegistrationContainerReport(containers));
+    GenericTestUtils.waitFor(() -> {
+      return scmChillModeManager.getInChillMode();
+    }, 10, 1000 * 10);
+  }
+
   private void testContainerThreshold(List<ContainerInfo> dnContainers,
       double expectedThreshold)
       throws Exception {


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to