This is an automated email from the ASF dual-hosted git repository.

inigoiri pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 3ab77d9  HDFS-14201. Ability to disallow safemode NN to become active. 
Contributed by Xiao Liang and He Xiaoqiao.
3ab77d9 is described below

commit 3ab77d9bc9eacfdb218b68988235a921c810b0d1
Author: Inigo Goiri <inigo...@apache.org>
AuthorDate: Tue Jun 18 09:58:29 2019 -0700

    HDFS-14201. Ability to disallow safemode NN to become active. Contributed 
by Xiao Liang and He Xiaoqiao.
---
 .../main/java/org/apache/hadoop/ipc/Server.java    |  3 +++
 .../java/org/apache/hadoop/hdfs/DFSConfigKeys.java |  4 +++
 .../hadoop/hdfs/server/namenode/NameNode.java      | 13 +++++++++
 .../src/main/resources/hdfs-default.xml            |  9 +++++++
 .../site/markdown/HDFSHighAvailabilityWithNFS.md   | 12 +++++++++
 .../site/markdown/HDFSHighAvailabilityWithQJM.md   | 12 +++++++++
 .../hdfs/server/namenode/ha/TestHASafeMode.java    | 31 ++++++++++++++++++++++
 .../hdfs/server/namenode/ha/TestNNHealthCheck.java | 30 +++++++++++++++++++++
 8 files changed, 114 insertions(+)

diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index 9018bed..8c0edbb 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -81,6 +81,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration.IntegerRanges;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
@@ -3090,6 +3091,8 @@ public abstract class Server {
     }
     
     this.exceptionsHandler.addTerseLoggingExceptions(StandbyException.class);
+    this.exceptionsHandler.addTerseLoggingExceptions(
+        HealthCheckFailedException.class);
   }
 
   public synchronized void addAuxiliaryListener(int auxiliaryPort)
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index f4a8def..fb83baf 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -1016,6 +1016,10 @@ public class DFSConfigKeys extends 
CommonConfigurationKeys {
   public static final int DFS_HA_ZKFC_PORT_DEFAULT = 8019;
   public static final String DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY = 
"dfs.ha.zkfc.nn.http.timeout.ms";
   public static final int DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY_DEFAULT = 20000;
+  public static final String DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE =
+      "dfs.ha.nn.not-become-active-in-safemode";
+  public static final boolean DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT =
+      false;
 
   // Security-related configs
   public static final String DFS_ENCRYPT_DATA_TRANSFER_KEY = 
"dfs.encrypt.data.transfer";
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index e4c8856..126ac0b 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -118,6 +118,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT;
 import static 
org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_KEY;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_DEFAULT;
@@ -390,6 +392,7 @@ public class NameNode extends ReconfigurableBase implements
   private final HAContext haContext;
   protected final boolean allowStaleStandbyReads;
   private AtomicBoolean started = new AtomicBoolean(false);
+  private final boolean notBecomeActiveInSafemode;
 
   private final static int HEALTH_MONITOR_WARN_THRESHOLD_MS = 5000;
   
@@ -983,6 +986,9 @@ public class NameNode extends ReconfigurableBase implements
       this.stopAtException(e);
       throw e;
     }
+    notBecomeActiveInSafemode = conf.getBoolean(
+        DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE,
+        DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT);
     this.started.set(true);
   }
 
@@ -1802,6 +1808,10 @@ public class NameNode extends ReconfigurableBase 
implements
       throw new HealthCheckFailedException(
           "The NameNode has no resources available");
     }
+    if (notBecomeActiveInSafemode && isInSafeMode()) {
+      throw new HealthCheckFailedException("The NameNode is configured to " +
+          "report UNHEALTHY to ZKFC in Safemode.");
+    }
   }
   
   synchronized void transitionToActive() 
@@ -1815,6 +1825,9 @@ public class NameNode extends ReconfigurableBase 
implements
           "Cannot transition from '" + OBSERVER_STATE + "' to '" +
               ACTIVE_STATE + "'");
     }
+    if (notBecomeActiveInSafemode && isInSafeMode()) {
+      throw new ServiceFailedException(getRole() + " still not leave 
safemode");
+    }
     state.setState(haContext, ACTIVE_STATE);
   }
 
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index d4f8abf..890d034 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -3193,6 +3193,15 @@
 </property>
 
 <property>
+  <name>dfs.ha.nn.not-become-active-in-safemode</name>
+  <value>false</value>
+  <description>
+    This will prevent safe mode namenodes to become active while other standby
+    namenodes might be ready to serve requests when it is set to true.
+  </description>
+</property>
+
+<property>
   <name>dfs.ha.tail-edits.in-progress</name>
   <value>false</value>
   <description>
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md
 
b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md
index d607561..06cda83 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md
@@ -296,6 +296,18 @@ The order in which you set these configurations is 
unimportant, but the values y
           <value>hdfs://mycluster</value>
         </property>
 
+*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode 
namenodes to become active
+
+    Whether allow namenode to become active when it is in safemode, when it is
+    set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
+    auto failover is on, or will throw exception to fail the transition to
+    active if auto failover is off. For example:
+
+        <property>
+          <name>dfs.ha.nn.not-become-active-in-safemode</name>
+          <value>true</value>
+        </property>
+
 ### Deployment details
 
 After all of the necessary configuration options have been set, one must 
initially synchronize the two HA NameNodes' on-disk metadata.
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
 
b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
index 4f3df27..eaa1a86 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
@@ -347,6 +347,18 @@ The order in which you set these configurations is 
unimportant, but the values y
           <value>/path/to/journal/node/local/data</value>
         </property>
 
+*   **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode 
namenodes to become active
+
+    Whether allow namenode to become active when it is in safemode, when it is
+    set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
+    auto failover is on, or will throw exception to fail the transition to
+    active if auto failover is off. For example:
+
+        <property>
+          <name>dfs.ha.nn.not-become-active-in-safemode</name>
+          <value>true</value>
+        </property>
+
 ### Deployment details
 
 After all of the necessary configuration options have been set, you must start 
the JournalNode daemons on the set of machines where they will run. This can be 
done by running the command "*hdfs \--daemon start journalnode*" and waiting 
for the daemon to start on each of the relevant machines.
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index f9445fa..3f1a979 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -17,11 +17,13 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URI;
@@ -30,6 +32,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -887,4 +891,31 @@ public class TestHASafeMode {
     cluster.transitionToActive(1);
     assertSafeMode(nn1, 3, 3, 3, 0);
   }
+
+  /**
+   * Test transition to active when namenode in safemode.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testTransitionToActiveWhenSafeMode() throws Exception {
+    Configuration config = new Configuration();
+    config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
+    try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
+        new File(GenericTestUtils.getRandomizedTempPath()))
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(1)
+        .build()) {
+      miniCluster.waitActive();
+      miniCluster.transitionToStandby(0);
+      miniCluster.transitionToStandby(1);
+      NameNode namenode0 = miniCluster.getNameNode(0);
+      NameNode namenode1 = miniCluster.getNameNode(1);
+      NameNodeAdapter.enterSafeMode(namenode0, false);
+      NameNodeAdapter.enterSafeMode(namenode1, false);
+      LambdaTestUtils.intercept(ServiceFailedException.class,
+          "NameNode still not leave safemode",
+          () -> miniCluster.transitionToActive(0));
+    }
+  }
 }
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
index e0f794f..ab7e0af 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static 
org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY;
+import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
 import static 
org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_RPC_ADDRESS_KEY;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -31,10 +32,12 @@ import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.MockNameNodeResourceChecker;
 import org.apache.hadoop.hdfs.tools.NNHAServiceTarget;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -76,6 +79,33 @@ public class TestNNHealthCheck {
     doNNHealthCheckTest();
   }
 
+  @Test
+  public void testNNHealthCheckWithSafemodeAsUnhealthy() throws Exception {
+    conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
+
+    // now bring up just the NameNode.
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
+    cluster.waitActive();
+
+    // manually set safemode.
+    cluster.getFileSystem(0)
+        .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER);
+
+    NNHAServiceTarget haTarget = new NNHAServiceTarget(conf,
+        DFSUtil.getNamenodeNameServiceId(conf), "nn1");
+    final String expectedTargetString = haTarget.getAddress().toString();
+
+    assertTrue("Expected haTarget " + haTarget + " containing " +
+            expectedTargetString,
+        haTarget.toString().contains(expectedTargetString));
+    HAServiceProtocol rpc = haTarget.getHealthMonitorProxy(conf, 5000);
+
+    LambdaTestUtils.intercept(RemoteException.class,
+        "The NameNode is configured to report UNHEALTHY to ZKFC in Safemode.",
+        () -> rpc.monitorHealth());
+  }
+
   private void doNNHealthCheckTest() throws IOException {
     MockNameNodeResourceChecker mockResourceChecker =
         new MockNameNodeResourceChecker(conf);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to