Author: shv
Date: Tue Feb 5 16:07:08 2008
New Revision: 618833
URL: http://svn.apache.org/viewvc?rev=618833&view=rev
Log:
HADOOP-1188. fstime file is updated when a storage directory containing
namespace image becomes inaccessible. Contributed by Konstantin Shvachko.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=618833&r1=618832&r2=618833&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue Feb 5 16:07:08 2008
@@ -23,6 +23,9 @@
HADOOP-2779. Remove the references to HBase in the build.xml. (omalley)
+ HADOOP-1188. fstime file is updated when a storage directory containing
+ namespace image becomes inaccessible. (shv)
+
Release 0.16.0 - 2008-02-07
INCOMPATIBLE CHANGES
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=618833&r1=618832&r2=618833&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Tue Feb 5
16:07:08 2008
@@ -21,7 +21,6 @@
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.ByteArrayOutputStream;
-import java.io.DataOutput;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
@@ -33,7 +32,6 @@
import org.apache.hadoop.io.*;
import org.apache.hadoop.fs.permission.*;
-import org.apache.hadoop.util.StringUtils;
/**
* FSEditLog maintains a log of the namespace modifications.
@@ -252,7 +250,7 @@
editStreams.add(eStream);
} catch (IOException e) {
FSNamesystem.LOG.warn("Unable to open edit log file " + eFile);
- processIOError(idx);
+ fsimage.processIOError(idx);
idx--;
}
}
@@ -306,13 +304,14 @@
/**
* If there is an IO Error on any log operations, remove that
- * directory from the list of directories. If no more directories
- * remain, then raise an exception that will possibly cause the
- * server to exit
+ * directory from the list of directories.
+ * If no more directories remain, then exit.
*/
- synchronized void processIOError(int index) throws IOException {
+ synchronized void processIOError(int index) {
if (editStreams == null || editStreams.size() <= 1) {
- throw new IOException("Checkpoint directories inaccessible.");
+ FSNamesystem.LOG.fatal(
+ "Fatal Error : All storage directories are inaccessible.");
+ Runtime.getRuntime().exit(-1);
}
assert(index < getNumStorageDirs());
assert(getNumStorageDirs() == editStreams.size());
@@ -346,13 +345,13 @@
"Fatal Error.");
Runtime.getRuntime().exit(-1);
}
- try {
- processIOError(j);
- } catch (IOException e) {
- FSNamesystem.LOG.error("Unable to sync edit log. Fatal Error : " +
- StringUtils.stringifyException(e));
- Runtime.getRuntime().exit(-1);
- }
+ processIOError(j);
+ }
+ int failedStreamIdx = 0;
+ while(failedStreamIdx >= 0) {
+ failedStreamIdx = fsimage.incrementCheckpointTime();
+ if(failedStreamIdx >= 0)
+ processIOError(failedStreamIdx);
}
}
@@ -649,13 +648,7 @@
w.write(od);
}
} catch (IOException ie) {
- try {
- processIOError(idx);
- } catch (IOException e) {
- FSNamesystem.LOG.error("Unable to append to edit log. " +
- "Fatal Error.");
- Runtime.getRuntime().exit(-1);
- }
+ processIOError(idx);
}
}
// get a new transactionId
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=618833&r1=618832&r2=618833&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Tue Feb 5
16:07:08 2008
@@ -442,16 +442,31 @@
}
/**
+ * Record new checkpoint time in order to
+ * distinguish healthy directories from the removed ones.
+ *
+ * @return -1 if successful, or the index of the failed storage directory.
+ */
+ int incrementCheckpointTime() {
+ this.checkpointTime++;
+ // Write new checkpoint time.
+ for(int idx = 0; idx < getNumStorageDirs(); idx++) {
+ try {
+ StorageDirectory sd = getStorageDir(idx);
+ writeCheckpointTime(sd);
+ } catch(IOException e) {
+ return idx;
+ }
+ }
+ return -1;
+ }
+
+ /**
* If there is an IO Error on any log operations, remove that
- * directory from the list of directories. If no more directories
- * remain, then raise an exception that will possibly cause the
- * server to exit
+ * directory from the list of directories.
*/
- void processIOError(int index) throws IOException {
- int nrDirs = getNumStorageDirs();
- assert(index >= 0 && index < nrDirs);
- if (nrDirs <= 1)
- throw new IOException("Checkpoint directories inaccessible.");
+ void processIOError(int index) {
+ assert(index >= 0 && index < getNumStorageDirs());
storageDirs.remove(index);
}