Author: kihwal Date: Tue Mar 11 19:15:23 2014 New Revision: 1576477 URL: http://svn.apache.org/r1576477 Log: HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every block that is scanned. Contributed by Colin Patrick McCabe.
Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1576477&r1=1576476&r2=1576477&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Tue Mar 11 19:15:23 2014 @@ -8,6 +8,9 @@ Release 0.23.11 - UNRELEASED IMPROVEMENTS + HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every + block that is scanned (Colin Patrick McCabe) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java?rev=1576477&r1=1576476&r2=1576477&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java Tue Mar 11 19:15:23 2014 @@ -33,6 +33,8 @@ import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -152,12 +154,73 @@ public class DirectoryScanner implements * Tracks the files and other information related to a block on the disk * Missing file is indicated by setting the corresponding member * to null. + * + * Because millions of these structures may be created, we try to save + * memory here. So instead of storing full paths, we store path suffixes. + * The block file, if it exists, will have a path like this: + * <volume_base_path>/<block_path> + * So we don't need to store the volume path, since we already know what the + * volume is. + * + * The metadata file, if it exists, will have a path like this: + * <volume_base_path>/<block_path>_<genstamp>.meta + * So if we have a block file, there isn't any need to store the block path + * again. + * + * The accessor functions take care of these manipulations. */ static class ScanInfo implements Comparable<ScanInfo> { private final long blockId; - private final File metaFile; - private final File blockFile; + + /** + * The block file path, relative to the volume's base directory. + * If there was no block file found, this may be null. If 'vol' + * is null, then this is the full path of the block file. + */ + private final String blockSuffix; + + /** + * The suffix of the meta file path relative to the block file. + * If blockSuffix is null, then this will be the entire path relative + * to the volume base directory, or an absolute path if vol is also + * null. + */ + private final String metaSuffix; + private final FSVolumeInterface volume; + private final static Pattern CONDENSED_PATH_REGEX = + Pattern.compile("(?<!^)(\\\\|/){2,}"); + + private final static String QUOTED_FILE_SEPARATOR = + Matcher.quoteReplacement(File.separator); + + /** + * Get the most condensed version of the path. + * + * For example, the condensed version of /foo//bar is /foo/bar + * Unlike {@link File#getCanonicalPath()}, this will never perform I/O + * on the filesystem. + */ + private static String getCondensedPath(String path) { + return CONDENSED_PATH_REGEX.matcher(path). + replaceAll(QUOTED_FILE_SEPARATOR); + } + + /** + * Get a path suffix. + * + * @param f The file to get the suffix for. + * @param prefix The prefix we're stripping off. + * + * @return A suffix such that prefix + suffix = path to f + */ + private static String getSuffix(File f, String prefix) { + String fullPath = getCondensedPath(f.getAbsolutePath()); + if (fullPath.startsWith(prefix)) { + return fullPath.substring(prefix.length()); + } + throw new RuntimeException(prefix + " is not a prefix of " + fullPath); + } ScanInfo(long blockId) { this(blockId, null, null, null); @@ -165,17 +228,34 @@ public class DirectoryScanner implements ScanInfo(long blockId, File blockFile, File metaFile, FSVolumeInterface vol) { this.blockId = blockId; - this.metaFile = metaFile; - this.blockFile = blockFile; + String condensedVolPath = vol == null ? null : + getCondensedPath(vol.getBasePath()); + this.blockSuffix = blockFile == null ? null : + getSuffix(blockFile, condensedVolPath); + if (metaFile == null) { + this.metaSuffix = null; + } else if (blockFile == null) { + this.metaSuffix = getSuffix(metaFile, condensedVolPath); + } else { + this.metaSuffix = getSuffix(metaFile, + condensedVolPath + blockSuffix); + } this.volume = vol; } - File getMetaFile() { - return metaFile; + File getBlockFile() { + return (blockSuffix == null) ? null : + new File(volume.getBasePath(), blockSuffix); } - File getBlockFile() { - return blockFile; + File getMetaFile() { + if (metaSuffix == null) { + return null; + } else if (blockSuffix == null) { + return new File(volume.getBasePath(), metaSuffix); + } else { + return new File(volume.getBasePath(), blockSuffix + metaSuffix); + } } long getBlockId() { @@ -214,8 +294,9 @@ public class DirectoryScanner implements } public long getGenStamp() { - return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) : - GenerationStamp.GRANDFATHER_GENERATION_STAMP; + return metaSuffix != null ? Block.getGenerationStamp( + getMetaFile().getName()) : + GenerationStamp.GRANDFATHER_GENERATION_STAMP; } } Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=1576477&r1=1576476&r2=1576477&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Tue Mar 11 19:15:23 2014 @@ -594,6 +594,11 @@ class FSDataset implements FSDatasetInte } return (remaining > 0) ? remaining : 0; } + + @Override + public String getBasePath() { + return currentDir.getParent(); + } long getReserved(){ return reserved; Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java?rev=1576477&r1=1576476&r2=1576477&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java Tue Mar 11 19:15:23 2014 @@ -86,6 +86,9 @@ public interface FSDatasetInterface exte /** @return the available storage space in bytes. */ public long getAvailable() throws IOException; + /** @return the base path to the volume */ + public String getBasePath(); + /** @return the directory for the block pool. */ public File getDirectory(String bpid) throws IOException; Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java?rev=1576477&r1=1576476&r2=1576477&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java Tue Mar 11 19:15:23 2014 @@ -40,6 +40,8 @@ import org.apache.hadoop.hdfs.protocol.B import org.apache.hadoop.hdfs.server.common.GenerationStamp; import org.apache.hadoop.hdfs.server.datanode.FSDatasetInterface.FSVolumeInterface; +import org.junit.Test; + /** * Tests {@link DirectoryScanner} handling of differences * between blocks on the disk and block in memory. @@ -216,6 +218,7 @@ public class TestDirectoryScanner extend assertEquals(mismatchBlocks, stats.mismatchBlocks); } + @Test public void testDirectoryScanner() throws Exception { // Run the test with and without parallel scanning for (int parallelism = 1; parallelism < 3; parallelism++) { @@ -372,4 +375,93 @@ public class TestDirectoryScanner extend assertNotNull(memBlock); assertEquals(genStamp, memBlock.getGenerationStamp()); } + + private static class TestFsVolumeSpi implements FSVolumeInterface { + @Override + public String[] getBlockPoolList() { + return new String[0]; + } + + @Override + public long getAvailable() throws IOException { + return 0; + } + + @Override + public String getBasePath() { + return "/base"; + } + + @Override + public File getDirectory(String bpid) throws IOException { + return new File("/base/current/" + bpid); + } + + @Override + public File getFinalizedDir(String bpid) throws IOException { + return new File("/base/current/" + bpid + "/finalized"); + } + } + + private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi(); + + private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491"; + + private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231"; + + void testScanInfoObject(long blockId, File blockFile, File metaFile) + throws Exception { + assertEquals("/base/current/" + BPID_1 + "/finalized", + TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()); + DirectoryScanner.ScanInfo scanInfo = + new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME); + assertEquals(blockId, scanInfo.getBlockId()); + if (blockFile != null) { + assertEquals(blockFile.getAbsolutePath(), + scanInfo.getBlockFile().getAbsolutePath()); + } else { + assertNull(scanInfo.getBlockFile()); + } + if (metaFile != null) { + assertEquals(metaFile.getAbsolutePath(), + scanInfo.getMetaFile().getAbsolutePath()); + } else { + assertNull(scanInfo.getMetaFile()); + } + assertEquals(TEST_VOLUME, scanInfo.getVolume()); + } + + void testScanInfoObject(long blockId) throws Exception { + DirectoryScanner.ScanInfo scanInfo = + new DirectoryScanner.ScanInfo(blockId); + assertEquals(blockId, scanInfo.getBlockId()); + assertNull(scanInfo.getBlockFile()); + assertNull(scanInfo.getMetaFile()); + } + + @Test(timeout=120000) + public void testScanInfo() throws Exception { + testScanInfoObject(123, + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), + "blk_123"), + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), + "blk_123__1001.meta")); + testScanInfoObject(464, + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), + "blk_123"), + null); + testScanInfoObject(523, + null, + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), + "blk_123__1009.meta")); + testScanInfoObject(789, + null, + null); + testScanInfoObject(456); + testScanInfoObject(123, + new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), + "blk_567"), + new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), + "blk_567__1004.meta")); + } }