Repository: hadoop Updated Branches: refs/heads/trunk db80e4289 -> 932730df7
HADOOP-11785. Reduce the number of listStatus operation in distcp buildListing (Zoran Dimitrijevic via Colin P. McCabe) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/932730df Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/932730df Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/932730df Branch: refs/heads/trunk Commit: 932730df7d62077f7356464ad27f69469965d77a Parents: db80e42 Author: Colin Patrick Mccabe <[email protected]> Authored: Fri Apr 3 14:08:25 2015 -0700 Committer: Colin Patrick Mccabe <[email protected]> Committed: Fri Apr 3 14:08:25 2015 -0700 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 ++ .../apache/hadoop/tools/SimpleCopyListing.java | 41 +++++++++----------- 2 files changed, 21 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/932730df/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7dcf8c1..d2d1181 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -483,6 +483,9 @@ Release 2.8.0 - UNRELEASED OPTIMIZATIONS + HADOOP-11785. Reduce the number of listStatus operation in distcp + buildListing (Zoran Dimitrijevic via Colin P. McCabe) + BUG FIXES HADOOP-10027. *Compressor_deflateBytesDirect passes instance instead of http://git-wip-us.apache.org/repos/asf/hadoop/blob/932730df/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java index 6dc827a..e8a23aa 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java @@ -193,12 +193,12 @@ public class SimpleCopyListing extends CopyListing { writeToFileListing(fileListWriter, sourceCopyListingStatus, sourcePathRoot, options); - if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) { + if (sourceStatus.isDirectory()) { if (LOG.isDebugEnabled()) { - LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath()); + LOG.debug("Traversing source dir: " + sourceStatus.getPath()); } - traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, - options); + traverseDirectory(fileListWriter, sourceFS, sourceStatus, + sourcePathRoot, options); } } } @@ -275,22 +275,17 @@ public class SimpleCopyListing extends CopyListing { SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE)); } - private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem, - FileStatus fileStatus) throws IOException { - return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0; - } - private static FileStatus[] getChildren(FileSystem fileSystem, FileStatus parent) throws IOException { return fileSystem.listStatus(parent.getPath()); } - private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter, - FileStatus sourceStatus, - Path sourcePathRoot, - DistCpOptions options) - throws IOException { - FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf()); + private void traverseDirectory(SequenceFile.Writer fileListWriter, + FileSystem sourceFS, + FileStatus sourceStatus, + Path sourcePathRoot, + DistCpOptions options) + throws IOException { final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL); final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR); final boolean preserveRawXattrs = options.shouldPreserveRawXattrs(); @@ -299,9 +294,9 @@ public class SimpleCopyListing extends CopyListing { while (!pathStack.isEmpty()) { for (FileStatus child: getChildren(sourceFS, pathStack.pop())) { - if (LOG.isDebugEnabled()) - LOG.debug("Recording source-path: " - + sourceStatus.getPath() + " for copy."); + if (LOG.isDebugEnabled()) { + LOG.debug("Recording source-path: " + child.getPath() + " for copy."); + } CopyListingFileStatus childCopyListingStatus = DistCpUtils.toCopyListingFileStatus(sourceFS, child, preserveAcls && child.isDirectory(), @@ -309,16 +304,16 @@ public class SimpleCopyListing extends CopyListing { preserveRawXattrs && child.isDirectory()); writeToFileListing(fileListWriter, childCopyListingStatus, sourcePathRoot, options); - if (isDirectoryAndNotEmpty(sourceFS, child)) { - if (LOG.isDebugEnabled()) - LOG.debug("Traversing non-empty source dir: " - + sourceStatus.getPath()); + if (child.isDirectory()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Traversing into source dir: " + child.getPath()); + } pathStack.push(child); } } } } - + private void writeToFileListingRoot(SequenceFile.Writer fileListWriter, CopyListingFileStatus fileStatus, Path sourcePathRoot, DistCpOptions options) throws IOException {
