Author: kihwal Date: Fri Apr 4 19:35:44 2014 New Revision: 1584871 URL: http://svn.apache.org/r1584871 Log: HDFS-5728. Block recovery will fail if the metafile does not have crc for all chunks of the block. Contributed by Vinay.
Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1584871&r1=1584870&r2=1584871&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Apr 4 19:35:44 2014 @@ -22,6 +22,9 @@ Release 0.23.11 - UNRELEASED HDFS-6166. Revisit balancer so_timeout. (Nathan Roberts via kihwal) + HDFS-5728. Block recovery will fail if the metafile does not have crc + for all chunks of the block. (Vinayakumar B via kihwal) + Release 0.23.10 - 2013-12-09 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=1584871&r1=1584870&r2=1584871&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Fri Apr 4 19:35:44 2014 @@ -433,7 +433,7 @@ class FSDataset implements FSDatasetInte blockFile.length(), genStamp, volume, blockFile.getParentFile()); } else { newReplica = new ReplicaWaitingToBeRecovered(blockId, - validateIntegrity(blockFile, genStamp), + validateIntegrityAndSetLength(blockFile, genStamp), genStamp, volume, blockFile.getParentFile()); } @@ -457,7 +457,7 @@ class FSDataset implements FSDatasetInte * @param genStamp generation stamp of the block * @return the number of valid bytes */ - private long validateIntegrity(File blockFile, long genStamp) { + private long validateIntegrityAndSetLength(File blockFile, long genStamp) { DataInputStream checksumIn = null; InputStream blockIn = null; try { @@ -500,11 +500,25 @@ class FSDataset implements FSDatasetInte IOUtils.readFully(blockIn, buf, 0, lastChunkSize); checksum.update(buf, 0, lastChunkSize); + long validFileLength; if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc - return lastChunkStartPos + lastChunkSize; + validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunck is corrupt - return lastChunkStartPos; + validFileLength = lastChunkStartPos; } + + // truncate if extra bytes are present without CRC + if (blockFile.length() > validFileLength) { + RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); + try { + // truncate blockFile + blockRAF.setLength(validFileLength); + } finally { + blockRAF.close(); + } + } + + return validFileLength; } catch (IOException e) { DataNode.LOG.warn(e); return 0; Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java?rev=1584871&r1=1584870&r2=1584871&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java Fri Apr 4 19:35:44 2014 @@ -17,19 +17,27 @@ */ package org.apache.hadoop.hdfs; +import java.io.File; import java.io.IOException; +import java.io.RandomAccessFile; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.TestInterDatanodeProtocol; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.security.UserGroupInformation; public class TestLeaseRecovery extends junit.framework.TestCase { static final int BLOCK_SIZE = 1024; @@ -142,4 +150,54 @@ public class TestLeaseRecovery extends j if (cluster != null) {cluster.shutdown();} } } + + /** + * Block Recovery when the meta file not having crcs for all chunks in block + * file + */ + public void testBlockRecoveryWithLessMetafile() throws Exception { + Configuration conf = new Configuration(); + conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY, + UserGroupInformation.getCurrentUser().getShortUserName()); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1) + .build(); + Path file = new Path("/testRecoveryFile"); + DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem(); + FSDataOutputStream out = dfs.create(file); + int count = 0; + while (count < 2 * 1024 * 1024) { + out.writeBytes("Data"); + count += 4; + } + out.hsync(); + // abort the original stream + ((DFSOutputStream) out.getWrappedStream()).abort(); + + LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( + file.toString(), 0, count); + ExtendedBlock block = locations.get(0).getBlock(); + DataNode dn = cluster.getDataNodes().get(0); + BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null); + File metafile = new File(localPathInfo.getMetaPath()); + assertTrue(metafile.exists()); + + // reduce the block meta file size + RandomAccessFile raf = new RandomAccessFile(metafile, "rw"); + raf.setLength(metafile.length() - 20); + raf.close(); + + // restart DN to make replica to RWR + DataNodeProperties dnProp = cluster.stopDataNode(0); + cluster.restartDataNode(dnProp, true); + + // try to recover the lease + DistributedFileSystem newdfs = (DistributedFileSystem) FileSystem + .newInstance(cluster.getConfiguration(0)); + count = 0; + while (++count < 10 && !newdfs.recoverLease(file)) { + Thread.sleep(1000); + } + assertTrue("File should be closed", newdfs.recoverLease(file)); + + } }