Author: shv
Date: Fri Apr 13 07:33:21 2012
New Revision: 1325636
URL: http://svn.apache.org/viewvc?rev=1325636&view=rev
Log:
HDFS-119. Fix a bug in logSync(), which causes NameNode block forever.
Contributed by Suresh Srinivas and Konstantin Shvachko.
Modified:
hadoop/common/branches/branch-1/CHANGES.txt
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Modified: hadoop/common/branches/branch-1/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/CHANGES.txt?rev=1325636&r1=1325635&r2=1325636&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1/CHANGES.txt Fri Apr 13 07:33:21 2012
@@ -220,6 +220,8 @@ Release 1.1.0 - unreleased
HADOOP-8269. Fix some javadoc warnings on branch-1.
+ HDFS-119. Fix a bug in logSync(), which causes NameNode block forever.
(shv)
+
Release 1.0.3 - unreleased
NEW FEATURES
Modified:
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java?rev=1325636&r1=1325635&r2=1325636&view=diff
==============================================================================
---
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
(original)
+++
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
Fri Apr 13 07:33:21 2012
@@ -989,69 +989,86 @@ public class FSEditLog {
// Fetch the transactionId of this thread.
long mytxid = myTransactionId.get().txid;
- final int numEditStreams;
- synchronized (this) {
- numEditStreams = editStreams.size();
- assert numEditStreams > 0 : "no editlog streams";
- printStatistics(false);
-
- // if somebody is already syncing, then wait
- while (mytxid > synctxid && isSyncRunning) {
- try {
- wait(1000);
- } catch (InterruptedException ie) {
+ ArrayList<EditLogOutputStream> streams = new
ArrayList<EditLogOutputStream>();
+ boolean sync = false;
+ try {
+ synchronized (this) {
+ printStatistics(false);
+
+ // if somebody is already syncing, then wait
+ while (mytxid > synctxid && isSyncRunning) {
+ try {
+ wait(1000);
+ } catch (InterruptedException ie) {
+ }
}
- }
-
- //
- // If this transaction was already flushed, then nothing to do
- //
- if (mytxid <= synctxid) {
- numTransactionsBatchedInSync++;
- if (metrics != null) // Metrics is non-null only when used inside name
node
- metrics.incrTransactionsBatchedInSync();
- return;
- }
-
- // now, this thread will do the sync
- syncStart = txid;
- isSyncRunning = true;
-
- // swap buffers
- for (int idx = 0; idx < numEditStreams; idx++) {
- editStreams.get(idx).setReadyToFlush();
- }
- }
- // do the sync
- long start = FSNamesystem.now();
- for (int idx = 0; idx < numEditStreams; idx++) {
- EditLogOutputStream eStream = editStreams.get(idx);
- try {
- eStream.flush();
- } catch (IOException ioe) {
//
- // remember the streams that encountered an error.
+ // If this transaction was already flushed, then nothing to do
//
- if (errorStreams == null) {
- errorStreams = new ArrayList<EditLogOutputStream>(1);
+ if (mytxid <= synctxid) {
+ numTransactionsBatchedInSync++;
+ if (metrics != null) // Metrics is non-null only when used inside
name node
+ metrics.incrTransactionsBatchedInSync();
+ return;
+ }
+
+ // now, this thread will do the sync
+ syncStart = txid;
+ isSyncRunning = true;
+ sync = true;
+
+ // swap buffers
+ assert editStreams.size() > 0 : "no editlog streams";
+ for(EditLogOutputStream eStream : editStreams) {
+ try {
+ eStream.setReadyToFlush();
+ streams.add(eStream);
+ } catch (IOException ie) {
+ FSNamesystem.LOG.error("Unable to get ready to flush.", ie);
+ //
+ // remember the streams that encountered an error.
+ //
+ if (errorStreams == null) {
+ errorStreams = new ArrayList<EditLogOutputStream>(1);
+ }
+ errorStreams.add(eStream);
+ }
}
- errorStreams.add(eStream);
- FSNamesystem.LOG.error("Unable to sync "+eStream.getName());
}
- }
- long elapsed = FSNamesystem.now() - start;
- synchronized (this) {
- removeEditsStreamsAndStorageDirs(errorStreams);
- exitIfNoStreams();
- synctxid = syncStart;
- isSyncRunning = false;
- this.notifyAll();
+ // do the sync
+ long start = FSNamesystem.now();
+ for (EditLogOutputStream eStream : streams) {
+ try {
+ eStream.flush();
+ } catch (IOException ie) {
+ FSNamesystem.LOG.error("Unable to sync edit log.", ie);
+ //
+ // remember the streams that encountered an error.
+ //
+ if (errorStreams == null) {
+ errorStreams = new ArrayList<EditLogOutputStream>(1);
+ }
+ errorStreams.add(eStream);
+ }
+ }
+ long elapsed = FSNamesystem.now() - start;
+ removeEditsStreamsAndStorageDirs(errorStreams);
+ exitIfNoStreams();
+
+ if (metrics != null) // Metrics is non-null only when used inside name
node
+ metrics.addSync(elapsed);
+
+ } finally {
+ synchronized (this) {
+ if(sync) {
+ synctxid = syncStart;
+ isSyncRunning = false;
+ }
+ this.notifyAll();
+ }
}
-
- if (metrics != null) // Metrics is non-null only when used inside name node
- metrics.addSync(elapsed);
}
//