HDFS-14043. Tolerate corrupted seen_txid file. Contributed by Lukas Majercak.

(cherry picked from commit f3296501e09fa7f1e81548dfcefa56f20fe337ca)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/378f189c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/378f189c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/378f189c

Branch: refs/heads/branch-3.2
Commit: 378f189c4fa3d2928df80a91ac57ee762b9cf30c
Parents: 6e1fad2
Author: Inigo Goiri <inigo...@apache.org>
Authored: Mon Nov 5 16:48:37 2018 -0800
Committer: Inigo Goiri <inigo...@apache.org>
Committed: Mon Nov 5 16:49:12 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hdfs/util/PersistentLongFile.java    |  2 +
 .../hdfs/server/namenode/TestSaveNamespace.java | 56 ++++++++++++++++++++
 2 files changed, 58 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/378f189c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java
index 777dd87..a94d7ed 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java
@@ -98,6 +98,8 @@ public class PersistentLongFile {
         val = Long.parseLong(br.readLine());
         br.close();
         br = null;
+      } catch (NumberFormatException e) {
+        throw new IOException(e);
       } finally {
         IOUtils.cleanupWithLogger(LOG, br);
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/378f189c/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
index 8fa8701..6688ef2 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@@ -28,13 +28,20 @@ import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.spy;
 
 import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -737,6 +744,55 @@ public class TestSaveNamespace {
     }
   }
 
+  @Test(timeout=30000)
+  public void testTxFaultTolerance() throws Exception {
+    String baseDir = MiniDFSCluster.getBaseDirectory();
+    List<String> nameDirs = new ArrayList<>();
+    nameDirs.add(fileAsURI(new File(baseDir, "name1")).toString());
+    nameDirs.add(fileAsURI(new File(baseDir, "name2")).toString());
+
+    Configuration conf = new HdfsConfiguration();
+    String nameDirsStr = StringUtils.join(",", nameDirs);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDirsStr);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDirsStr);
+
+    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
+    DFSTestUtil.formatNameNode(conf);
+    FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
+    try {
+      // We have a BEGIN_LOG_SEGMENT txn to start
+      assertEquals(1, fsn.getEditLog().getLastWrittenTxId());
+
+      doAnEdit(fsn, 1);
+
+      assertEquals(2, fsn.getEditLog().getLastWrittenTxId());
+
+      // Shut down
+      fsn.close();
+
+      // Corrupt one of the seen_txid files
+      File txidFile0 = new File(new URI(nameDirs.get(0) +
+          "/current/seen_txid"));
+      FileWriter fw = new FileWriter(txidFile0, false);
+      try (PrintWriter pw = new PrintWriter(fw)) {
+        pw.print("corrupt____!");
+      }
+
+      // Restart
+      fsn = FSNamesystem.loadFromDisk(conf);
+      assertEquals(4, fsn.getEditLog().getLastWrittenTxId());
+
+      // Check seen_txid is same in both dirs
+      File txidFile1 = new File(new URI(nameDirs.get(1) +
+          "/current/seen_txid"));
+      assertTrue(FileUtils.contentEquals(txidFile0, txidFile1));
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+
   private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
     // Make an edit
     fsn.mkdirs("/test" + id, new PermissionStatus("test", "Test",


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to