Author: eli
Date: Tue Jan 31 02:17:53 2012
New Revision: 1238134
URL: http://svn.apache.org/viewvc?rev=1238134&view=rev
Log:
HDFS-2853. HA: NN fails to start if the shared edits dir is marked required.
Contributed by Aaron T. Myers.
Added:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
Tue Jan 31 02:17:53 2012
@@ -141,3 +141,5 @@ HDFS-2841. HAAdmin does not work if secu
HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas
immediately upon pipeline creation. (todd)
HDFS-2824. Fix failover when prior NN died just after creating an edit log
segment. (atm via todd)
+
+HDFS-2853. HA: NN fails to start if the shared edits dir is marked required
(atm via eli)
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Tue Jan 31 02:17:53 2012
@@ -865,7 +865,7 @@ public class FSEditLog {
editLogStream = journalSet.startLogSegment(segmentTxId);
} catch (IOException ex) {
throw new IOException("Unable to start log segment " +
- segmentTxId + ": no journals successfully started.");
+ segmentTxId + ": too few journals successfully started.", ex);
}
curSegmentTxId = segmentTxId;
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
Tue Jan 31 02:17:53 2012
@@ -37,9 +37,6 @@ final class NameNodeResourcePolicy {
* required to continue operation.
* @return true if and only if there are sufficient NN resources to
* continue logging edits.
- * @throws RuntimeException if the number of <bold>configured</bold>
- * redundant resources is fewer than the minimum number of available
- * redundant resources.
*/
static boolean areResourcesAvailable(
Collection<? extends CheckableNameNodeResource> resources,
@@ -63,12 +60,6 @@ final class NameNodeResourcePolicy {
}
}
- if (redundantResourceCount < minimumRedundantResources) {
- throw new RuntimeException("Need a minimum of " +
minimumRedundantResources
- + " for NN to operate but only " + redundantResourceCount
- + " are configured.");
- }
-
if (redundantResourceCount == 0) {
// If there are no redundant resources, return true if there are any
// required resources available.
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
Tue Jan 31 02:17:53 2012
@@ -664,7 +664,12 @@ public class MiniDFSCluster {
}
public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
- return fileAsURI(new File(base_dir, "shared-edits-" +
+ return formatSharedEditsDir(base_dir, minNN, maxNN);
+ }
+
+ public static URI formatSharedEditsDir(File baseDir, int minNN, int maxNN)
+ throws IOException {
+ return fileAsURI(new File(baseDir, "shared-edits-" +
minNN + "-through-" + maxNN));
}
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
Tue Jan 31 02:17:53 2012
@@ -807,7 +807,7 @@ public class TestEditLog extends TestCas
fail("Did no throw exception on only having a bad dir");
} catch (IOException ioe) {
GenericTestUtils.assertExceptionContains(
- "no journals successfully started", ioe);
+ "too few journals successfully started", ioe);
} finally {
logDir.setWritable(true);
log.close();
Modified:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java?rev=1238134&r1=1238133&r2=1238134&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
(original)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
Tue Jan 31 02:17:53 2012
@@ -50,13 +50,7 @@ public class TestNameNodeResourcePolicy
assertFalse(testResourceScenario(4, 0, 3, 0, 2));
assertTrue(testResourceScenario(4, 0, 3, 0, 1));
assertFalse(testResourceScenario(4, 0, 4, 0, 1));
- try {
- testResourceScenario(1, 0, 0, 0, 2);
- fail("Should fail if there are more minimum redundant resources than " +
- "total redundant resources");
- } catch (RuntimeException rte) {
- assertTrue(rte.getMessage().startsWith("Need a minimum"));
- }
+ assertFalse(testResourceScenario(1, 0, 0, 0, 2));
}
@Test
Added:
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java?rev=1238134&view=auto
==============================================================================
---
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
(added)
+++
hadoop/common/branches/HDFS-1623/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
Tue Jan 31 02:17:53 2012
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+public class TestFailureOfSharedDir {
+
+ private static final Log LOG =
LogFactory.getLog(TestFailureOfSharedDir.class);
+
+ /**
+ * Test that marking the shared edits dir as being "required" causes the NN
to
+ * fail if that dir can't be accessed.
+ */
+ @Test
+ public void testFailureOfSharedDir() throws Exception {
+ Configuration conf = new Configuration();
+ URI sharedEditsUri = MiniDFSCluster.formatSharedEditsDir(
+ new File(MiniDFSCluster.getBaseDirectory()), 0, 1);
+ // Mark the shared edits dir required.
+ conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY,
+ sharedEditsUri.toString());
+
+ MiniDFSCluster cluster = null;
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleHATopology())
+ .numDataNodes(0)
+ .build();
+
+ assertEquals(sharedEditsUri, cluster.getSharedEditsDir(0, 1));
+
+ cluster.waitActive();
+ cluster.transitionToActive(0);
+
+ FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+
+ assertTrue(fs.mkdirs(new Path("/test1")));
+
+ // Blow away the shared edits dir.
+ FileUtil.fullyDelete(new File(sharedEditsUri));
+
+ NameNode nn0 = cluster.getNameNode(0);
+ try {
+ // Make sure that subsequent operations on the NN fail.
+ nn0.getRpcServer().rollEditLog();
+ fail("Succeeded in rolling edit log despite shared dir being deleted");
+ } catch (IOException ioe) {
+ GenericTestUtils.assertExceptionContains(
+ "Unable to start log segment 4: too few journals successfully
started",
+ ioe);
+ LOG.info("Got expected exception", ioe);
+ }
+ } finally {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+}