Author: cutting Date: Mon Apr 2 12:55:04 2007 New Revision: 524907 URL: http://svn.apache.org/viewvc?view=rev&rev=524907 Log: HADOOP-1123. Merge -r 523751:523752 from trunk to 0.12 branch.
Modified: lucene/hadoop/branches/branch-0.12/CHANGES.txt lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumException.java lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/LocalFileSystem.java lucene/hadoop/branches/branch-0.12/src/test/org/apache/hadoop/dfs/TestFileCorruption.java Modified: lucene/hadoop/branches/branch-0.12/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/CHANGES.txt?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/CHANGES.txt (original) +++ lucene/hadoop/branches/branch-0.12/CHANGES.txt Mon Apr 2 12:55:04 2007 @@ -6,6 +6,10 @@ 15. HADOOP-1162. Fix bug in record CSV and XML serialization of binary values. (Milind Bhandarkar via cutting) +16. HADOOP-1123. Fix NullPointerException in LocalFileSystem when + trying to recover from a checksum error. + (Hairong Kuang & Nigel Daley via tomwhite) + Release 0.12.2 - 2007-23-17 Modified: lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original) +++ lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Mon Apr 2 12:55:04 2007 @@ -308,7 +308,7 @@ * is corrupt but we will report both to the namenode. In the future, * we can consider figuring out exactly which block is corrupt. */ - public void reportChecksumFailure(Path f, + public boolean reportChecksumFailure(Path f, FSDataInputStream in, long inPos, FSDataInputStream sums, long sumsPos) { @@ -347,6 +347,7 @@ + StringUtils.stringifyException(ie)); } + return true; } } @@ -399,10 +400,10 @@ * is corrupt but we will report both to the namenode. In the future, * we can consider figuring out exactly which block is corrupt. */ - public void reportChecksumFailure(Path f, + public boolean reportChecksumFailure(Path f, FSDataInputStream in, long inPos, FSDataInputStream sums, long sumsPos) { - ((RawDistributedFileSystem)fs).reportChecksumFailure( + return ((RawDistributedFileSystem)fs).reportChecksumFailure( f, in, inPos, sums, sumsPos); } } Modified: lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumException.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumException.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumException.java (original) +++ lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumException.java Mon Apr 2 12:55:04 2007 @@ -22,7 +22,13 @@ /** Thrown for checksum errors. */ public class ChecksumException extends IOException { - public ChecksumException(String description) { + private long pos; + public ChecksumException(String description, long pos) { super(description); + this.pos = pos; + } + + public long getPos() { + return pos; } } Modified: lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java (original) +++ lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java Mon Apr 2 12:55:04 2007 @@ -207,13 +207,18 @@ summed += toSum; inSum += toSum; - if (inSum == bytesPerSum || endOfFile) { + if (inSum == bytesPerSum ) { verifySum(read-(summed-bytesPerSum)); + } else if( read == summed && endOfFile ) { + verifySum(read-read/bytesPerSum*bytesPerSum); } } } catch (ChecksumException ce) { LOG.info("Found checksum error: "+StringUtils.stringifyException(ce)); - if (retriesLeft == 0) { + long errPos = ce.getPos(); + boolean shouldRetry = fs.reportChecksumFailure( + file, datas, errPos, sums, errPos/bytesPerSum); + if (!shouldRetry || retriesLeft == 0) { throw ce; } @@ -250,8 +255,7 @@ inSum = 0; if (crc != sumValue) { long pos = getPos() - delta; - fs.reportChecksumFailure(file, datas, pos, sums, pos/bytesPerSum); - throw new ChecksumException("Checksum error: "+file+" at "+pos); + throw new ChecksumException("Checksum error: "+file+" at "+pos, pos); } } @@ -629,7 +633,10 @@ * @param inPos the position of the beginning of the bad data in the file * @param sums the stream open on the checksum file * @param sumsPos the position of the beginning of the bad data in the checksum file + * @return if retry is neccessary */ - public abstract void reportChecksumFailure(Path f, FSDataInputStream in, - long inPos, FSDataInputStream sums, long sumsPos); + public boolean reportChecksumFailure(Path f, FSDataInputStream in, + long inPos, FSDataInputStream sums, long sumsPos) { + return false; + } } Modified: lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java (original) +++ lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java Mon Apr 2 12:55:04 2007 @@ -447,11 +447,6 @@ throws IOException { } - public void reportChecksumFailure(Path p, FSDataInputStream in, - long inPos, - FSDataInputStream sums, long sumsPos) { - } - /** * Register a file with its size. This will also register a checksum for the * file that the user is trying to create. This is required since none of Modified: lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/LocalFileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/LocalFileSystem.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/LocalFileSystem.java (original) +++ lucene/hadoop/branches/branch-0.12/src/java/org/apache/hadoop/fs/LocalFileSystem.java Mon Apr 2 12:55:04 2007 @@ -59,7 +59,7 @@ * Moves files to a bad file directory on the same device, so that their * storage will not be reused. */ - public void reportChecksumFailure(Path p, FSDataInputStream in, + public boolean reportChecksumFailure(Path p, FSDataInputStream in, long inPos, FSDataInputStream sums, long sumsPos) { try { @@ -69,12 +69,17 @@ // find highest writable parent dir of f on the same device String device = new DF(f, getConf()).getMount(); File parent = f.getParentFile(); - File dir; - do { + File dir = null; + while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) { dir = parent; parent = parent.getParentFile(); - } while (parent.canWrite() && parent.toString().startsWith(device)); + } + if (dir==null) { + throw new IOException( + "not able to find the highest writable parent dir"); + } + // move the file there File badDir = new File(dir, "bad_files"); if (!badDir.mkdirs()) { @@ -95,5 +100,6 @@ } catch (IOException e) { LOG.warn("Error moving bad file " + p + ": " + e); } + return false; } } Modified: lucene/hadoop/branches/branch-0.12/src/test/org/apache/hadoop/dfs/TestFileCorruption.java URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.12/src/test/org/apache/hadoop/dfs/TestFileCorruption.java?view=diff&rev=524907&r1=524906&r2=524907 ============================================================================== --- lucene/hadoop/branches/branch-0.12/src/test/org/apache/hadoop/dfs/TestFileCorruption.java (original) +++ lucene/hadoop/branches/branch-0.12/src/test/org/apache/hadoop/dfs/TestFileCorruption.java Mon Apr 2 12:55:04 2007 @@ -22,11 +22,12 @@ import junit.framework.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.ChecksumException; +import org.apache.hadoop.fs.Path; /** * A JUnit test for corrupted file handling. - * - * @author Milind Bhandarkar */ public class TestFileCorruption extends TestCase { @@ -34,8 +35,6 @@ super(testName); } - - protected void setUp() throws Exception { } @@ -70,5 +69,28 @@ } finally { if (cluster != null) { cluster.shutdown(); } } + } + + /** check if local FS can handle corrupted blocks properly */ + public void testLocalFileCorruption() throws Exception { + Configuration conf = new Configuration(); + Path file = new Path(System.getProperty("test.build.data"), "corruptFile"); + FileSystem fs = FileSystem.getLocal(conf); + DataOutputStream dos = fs.create(file); + dos.writeBytes("original bytes"); + dos.close(); + // Now deliberately corrupt the file + dos = new DataOutputStream(new FileOutputStream(file.toString())); + dos.writeBytes("corruption"); + dos.close(); + // Now attempt to read the file + DataInputStream dis = fs.open(file,512); + try { + System.out.println("A ChecksumException is expected to be logged."); + dis.readByte(); + } catch (ChecksumException ignore) { + //expect this exception but let any NPE get thrown + } + fs.delete(file); } }