HDFS-13314. NameNode should optionally exit if it detects FsImage corruption. 
Contributed by Arpit Agarwal.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a991e899
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a991e899
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a991e899

Branch: refs/heads/HDFS-12943
Commit: a991e899fb9f98d2089f37ac9ac7c485d3bbb959
Parents: 081c350
Author: Arpit Agarwal <a...@apache.org>
Authored: Wed Mar 28 11:37:34 2018 -0700
Committer: Arpit Agarwal <a...@apache.org>
Committed: Wed Mar 28 12:49:17 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hdfs/server/namenode/FSImage.java    | 29 +++++++++--
 .../server/namenode/FSImageFormatProtobuf.java  | 31 ++++++++---
 .../snapshot/FSImageFormatPBSnapshot.java       | 55 ++++++++++++++++++--
 3 files changed, 101 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/a991e899/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index e758108..dd7df5a 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -35,6 +35,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -68,6 +69,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.base.Joiner;
@@ -86,6 +88,10 @@ public class FSImage implements Closeable {
   protected FSEditLog editLog = null;
   private boolean isUpgradeFinalized = false;
 
+  // If true, then image corruption was detected. The NameNode process will
+  // exit immediately after saving the image.
+  private AtomicBoolean exitAfterSave = new AtomicBoolean(false);
+
   protected NNStorage storage;
   
   /**
@@ -954,8 +960,14 @@ public class FSImage implements Closeable {
     
     FSImageFormatProtobuf.Saver saver = new 
FSImageFormatProtobuf.Saver(context);
     FSImageCompression compression = 
FSImageCompression.createCompression(conf);
-    saver.save(newFile, compression);
-    
+    long numErrors = saver.save(newFile, compression);
+    if (numErrors > 0) {
+      // The image is likely corrupted.
+      LOG.error("Detected " + numErrors + " errors while saving FsImage " +
+          dstFile);
+      exitAfterSave.set(true);
+    }
+
     MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
     storage.setMostRecentCheckpointInfo(txid, Time.now());
   }
@@ -1117,6 +1129,12 @@ public class FSImage implements Closeable {
     }
     //Update NameDirSize Metric
     getStorage().updateNameDirSize();
+
+    if (exitAfterSave.get()) {
+      LOG.fatal("NameNode process will exit now... The saved FsImage " +
+          nnf + " is potentially corrupted.");
+      ExitUtil.terminate(-1);
+    }
   }
 
   /**
@@ -1184,8 +1202,11 @@ public class FSImage implements Closeable {
   
       // Since we now have a new checkpoint, we can clean up some
       // old edit logs and checkpoints.
-      purgeOldStorage(nnf);
-      archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
+      // Do not purge anything if we just wrote a corrupted FsImage.
+      if (!exitAfterSave.get()) {
+        purgeOldStorage(nnf);
+        archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_NEW);
+      }
     } finally {
       // Notify any threads waiting on the checkpoint to be canceled
       // that it is complete.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a991e899/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
index cd5a5526..4ac20ad 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
@@ -444,15 +444,22 @@ public final class FSImageFormatProtobuf {
       sectionOutputStream.flush();
     }
 
-    void save(File file, FSImageCompression compression) throws IOException {
+    /**
+     * @return number of non-fatal errors detected while writing the image.
+     * @throws IOException on fatal error.
+     */
+    long save(File file, FSImageCompression compression) throws IOException {
       FileOutputStream fout = new FileOutputStream(file);
       fileChannel = fout.getChannel();
       try {
         LOG.info("Saving image file {} using {}", file, compression);
         long startTime = monotonicNow();
-        saveInternal(fout, compression, file.getAbsolutePath());
-        LOG.info("Image file {} of size {} bytes saved in {} seconds.", file,
-            file.length(), (monotonicNow() - startTime) / 1000);
+        long numErrors = saveInternal(
+            fout, compression, file.getAbsolutePath());
+        LOG.info("Image file {} of size {} bytes saved in {} seconds {}.", 
file,
+            file.length(), (monotonicNow() - startTime) / 1000,
+            (numErrors > 0 ? (" with" + numErrors + " errors") : ""));
+        return numErrors;
       } finally {
         fout.close();
       }
@@ -476,7 +483,11 @@ public final class FSImageFormatProtobuf {
       saver.serializeFilesUCSection(sectionOutputStream);
     }
 
-    private void saveSnapshots(FileSummary.Builder summary) throws IOException 
{
+    /**
+     * @return number of non-fatal errors detected while saving the image.
+     * @throws IOException on fatal error.
+     */
+    private long saveSnapshots(FileSummary.Builder summary) throws IOException 
{
       FSImageFormatPBSnapshot.Saver snapshotSaver = new 
FSImageFormatPBSnapshot.Saver(
           this, summary, context, context.getSourceNamesystem());
 
@@ -487,9 +498,14 @@ public final class FSImageFormatProtobuf {
         snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
       }
       snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
+      return snapshotSaver.getNumImageErrors();
     }
 
-    private void saveInternal(FileOutputStream fout,
+    /**
+     * @return number of non-fatal errors detected while writing the FsImage.
+     * @throws IOException on fatal error.
+     */
+    private long saveInternal(FileOutputStream fout,
         FSImageCompression compression, String filePath) throws IOException {
       StartupProgress prog = NameNode.getStartupProgress();
       MessageDigest digester = MD5Hash.getDigester();
@@ -528,7 +544,7 @@ public final class FSImageFormatProtobuf {
       step = new Step(StepType.INODES, filePath);
       prog.beginStep(Phase.SAVING_CHECKPOINT, step);
       saveInodes(b);
-      saveSnapshots(b);
+      long numErrors = saveSnapshots(b);
       prog.endStep(Phase.SAVING_CHECKPOINT, step);
 
       step = new Step(StepType.DELEGATION_TOKENS, filePath);
@@ -551,6 +567,7 @@ public final class FSImageFormatProtobuf {
       saveFileSummary(underlyingOutputStream, summary);
       underlyingOutputStream.close();
       savedDigest = new MD5Hash(digester.digest());
+      return numErrors;
     }
 
     private void saveSecretManagerSection(FileSummary.Builder summary)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/a991e899/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
----------------------------------------------------------------------
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
index 4a5cead..2157554 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
@@ -1,4 +1,4 @@
-/**
+ /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -49,6 +50,7 @@ import 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.namenode.AclEntryStatusFormat;
 import org.apache.hadoop.hdfs.server.namenode.AclFeature;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
 import 
org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.LoaderContext;
@@ -415,6 +417,7 @@ public class FSImageFormatPBSnapshot {
     private final FileSummary.Builder headers;
     private final FSImageFormatProtobuf.Saver parent;
     private final SaveNamespaceContext context;
+    private long numImageErrors;
 
     public Saver(FSImageFormatProtobuf.Saver parent,
         FileSummary.Builder headers, SaveNamespaceContext context,
@@ -423,6 +426,7 @@ public class FSImageFormatPBSnapshot {
       this.headers = headers;
       this.context = context;
       this.fsn = fsn;
+      this.numImageErrors = 0;
     }
 
     /**
@@ -471,15 +475,17 @@ public class FSImageFormatPBSnapshot {
         throws IOException {
       final List<INodeReference> refList = parent.getSaverContext()
           .getRefList();
+      long i = 0;
       for (INodeReference ref : refList) {
-        INodeReferenceSection.INodeReference.Builder rb = 
buildINodeReference(ref);
+        INodeReferenceSection.INodeReference.Builder rb =
+            buildINodeReference(ref, i++);
         rb.build().writeDelimitedTo(out);
       }
       parent.commitSection(headers, SectionName.INODE_REFERENCE);
     }
 
     private INodeReferenceSection.INodeReference.Builder buildINodeReference(
-        INodeReference ref) throws IOException {
+        final INodeReference ref, final long refIndex) throws IOException {
       INodeReferenceSection.INodeReference.Builder rb =
           INodeReferenceSection.INodeReference.newBuilder().
             setReferredId(ref.getId());
@@ -489,6 +495,16 @@ public class FSImageFormatPBSnapshot {
       } else if (ref instanceof DstReference) {
         rb.setDstSnapshotId(ref.getDstSnapshotId());
       }
+
+      if (fsn.getFSDirectory().getInode(ref.getId()) == null) {
+        FSImage.LOG.error(
+            "FSImageFormatPBSnapshot: Missing referred INodeId " +
+            ref.getId() + " for INodeReference index " + refIndex +
+            "; path=" + ref.getFullPathName() +
+            "; parent=" + (ref.getParent() == null ? "null" :
+                ref.getParent().getFullPathName()));
+        ++numImageErrors;
+      }
       return rb;
     }
 
@@ -583,7 +599,23 @@ public class FSImageFormatPBSnapshot {
           List<INode> created = 
diff.getChildrenDiff().getCreatedUnmodifiable();
           db.setCreatedListSize(created.size());
           List<INode> deleted = 
diff.getChildrenDiff().getDeletedUnmodifiable();
+          INode previousNode = null;
+          boolean misordered = false;
           for (INode d : deleted) {
+            // getBytes() may return null below, and that is okay.
+            final int result = previousNode == null ? -1 :
+                previousNode.compareTo(d.getLocalNameBytes());
+            if (result == 0) {
+              FSImage.LOG.error(
+                  "Name '" + d.getLocalName() + "' is repeated in the " +
+                      "'deleted' difflist of directory " +
+                      dir.getFullPathName() + ", INodeId=" + dir.getId());
+              ++numImageErrors;
+            } else if (result > 0 && !misordered) {
+              misordered = true;
+              ++numImageErrors;
+            }
+            previousNode = d;
             if (d.isReference()) {
               refList.add(d.asReference());
               db.addDeletedINodeRef(refList.size() - 1);
@@ -591,11 +623,28 @@ public class FSImageFormatPBSnapshot {
               db.addDeletedINode(d.getId());
             }
           }
+          if (misordered) {
+            FSImage.LOG.error(
+                "Misordered entries in the 'deleted' difflist of directory " +
+                    dir.getFullPathName() + ", INodeId=" + dir.getId() +
+                    ". The full list is " +
+                    Arrays.toString(deleted.toArray()));
+          }
           db.build().writeDelimitedTo(out);
           saveCreatedList(created, out);
         }
       }
     }
+
+
+    /**
+     * Number of non-fatal errors detected while writing the
+     * SnapshotDiff and INodeReference sections.
+     * @return the number of non-fatal errors detected.
+     */
+    public long getNumImageErrors() {
+      return numImageErrors;
+    }
   }
 
   private FSImageFormatPBSnapshot(){}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to