Author: stack Date: Tue Apr 27 04:10:42 2010 New Revision: 938316 URL: http://svn.apache.org/viewvc?rev=938316&view=rev Log: HBASE-2442 Log lease recovery catches IOException too widely
Modified: hadoop/hbase/branches/0.20/CHANGES.txt hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Modified: hadoop/hbase/branches/0.20/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=938316&r1=938315&r2=938316&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/CHANGES.txt (original) +++ hadoop/hbase/branches/0.20/CHANGES.txt Tue Apr 27 04:10:42 2010 @@ -103,6 +103,8 @@ Release 0.20.4 - Unreleased (Todd Lipcon via Stack) HBASE-2476 HLog sequence number is obtained outside updateLock (Todd Lipcon via Stack) + HBASE-2442 Log lease recovery catches IOException too widely + (Todd Lipcon via Stack) IMPROVEMENTS HBASE-2180 Bad read performance from synchronizing hfile.fddatainputstream Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/HLog.java URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/HLog.java?rev=938316&r1=938315&r2=938316&view=diff ============================================================================== --- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/HLog.java (original) +++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Tue Apr 27 04:10:42 2010 @@ -66,6 +66,8 @@ import org.apache.hadoop.hbase.util.Clas import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; +import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.SequenceFile.Metadata; @@ -1492,7 +1494,7 @@ public class HLog implements HConstants, * @param append */ public static void recoverLog(final FileSystem fs, final Path p, - final boolean append) { + final boolean append) throws IOException { if (!append) { return; } @@ -1503,6 +1505,9 @@ public class HLog implements HConstants, return; } + LOG.debug("Recovering DFS lease for path " + p); + long startWaiting = System.currentTimeMillis(); + // Trying recovery boolean recovered = false; while (!recovered) { @@ -1511,11 +1516,25 @@ public class HLog implements HConstants, out.close(); recovered = true; } catch (IOException e) { - LOG.info("Failed open for append, waiting on lease recovery: " + p, e); - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - // ignore it and try again + e = RemoteExceptionHandler.checkIOException(e); + if (e instanceof AlreadyBeingCreatedException) { + // We expect that we'll get this message while the lease is still + // within its soft limit, but if we get it past that, it means + // that the RS is holding onto the file even though it lost its + // znode. We could potentially abort after some time here. + long waitedFor = System.currentTimeMillis() - startWaiting; + + if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) { + LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p + + ":" + e.getMessage()); + } + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + // ignore it and try again + } + } else { + throw new IOException("Failed to open " + p + " for append", e); } } }