This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 94da630 HADOOP-16465 listLocatedStatus() optimisation (#1943) 94da630 is described below commit 94da630cd2d1e056393f83acdac2747cf4b12bad Author: Mukund Thakur <mukund-tha...@users.noreply.github.com> AuthorDate: Tue Apr 14 21:49:51 2020 +0530 HADOOP-16465 listLocatedStatus() optimisation (#1943) Contributed by Mukund Thakur Optimize S3AFileSystem.listLocatedStatus() to perform list operations directly and then fallback to head checks for files Change-Id: Ia2c0fa6fcc5967c49b914b92f41135d07dab0464 --- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 87 ++++++++++++++-------- .../hadoop/fs/s3a/ITestS3AFileOperationCost.java | 57 ++++++++++++++ 2 files changed, 114 insertions(+), 30 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 9bd33a4..9630a9e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -4283,42 +4283,69 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, RemoteIterator<? extends LocatedFileStatus> iterator = once("listLocatedStatus", path.toString(), () -> { - // lookup dir triggers existence check - final S3AFileStatus fileStatus = - (S3AFileStatus) getFileStatus(path); - if (fileStatus.isFile()) { - // simple case: File - LOG.debug("Path is a file"); - return new Listing.SingleStatusRemoteIterator( - filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); - } else { - // directory: trigger a lookup - final String key = maybeAddTrailingSlash(pathToKey(path)); - final Listing.FileStatusAcceptor acceptor = - new Listing.AcceptAllButSelfAndS3nDirs(path); - boolean allowAuthoritative = allowAuthoritative(f); - DirListingMetadata meta = - S3Guard.listChildrenWithTtl(metadataStore, path, - ttlTimeProvider, allowAuthoritative); - final RemoteIterator<S3AFileStatus> cachedFileStatusIterator = - listing.createProvidedFileStatusIterator( - S3Guard.dirMetaToStatuses(meta), filter, acceptor); - return (allowAuthoritative && meta != null - && meta.isAuthoritative()) - ? listing.createLocatedFileStatusIterator( - cachedFileStatusIterator) - : listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, "/"), - filter, - acceptor, - cachedFileStatusIterator)); + // Assuming the path to be a directory, + // trigger a list call directly. + final RemoteIterator<S3ALocatedFileStatus> + locatedFileStatusIteratorForDir = + getLocatedFileStatusIteratorForDir(path, filter); + + // If no listing is present then path might be a file. + if (!locatedFileStatusIteratorForDir.hasNext()) { + final S3AFileStatus fileStatus = + (S3AFileStatus) getFileStatus(path); + if (fileStatus.isFile()) { + // simple case: File + LOG.debug("Path is a file"); + return new Listing.SingleStatusRemoteIterator( + filter.accept(path) + ? toLocatedFileStatus(fileStatus) + : null); + } } + // Either empty or non-empty directory. + return locatedFileStatusIteratorForDir; }); return toLocatedFileStatusIterator(iterator); } /** + * Generate list located status for a directory. + * Also performing tombstone reconciliation for guarded directories. + * @param dir directory to check. + * @param filter a path filter. + * @return an iterator that traverses statuses of the given dir. + * @throws IOException in case of failure. + */ + private RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir( + Path dir, PathFilter filter) throws IOException { + final String key = maybeAddTrailingSlash(pathToKey(dir)); + final Listing.FileStatusAcceptor acceptor = + new Listing.AcceptAllButSelfAndS3nDirs(dir); + boolean allowAuthoritative = allowAuthoritative(dir); + DirListingMetadata meta = + S3Guard.listChildrenWithTtl(metadataStore, dir, + ttlTimeProvider, allowAuthoritative); + Set<Path> tombstones = meta != null + ? meta.listTombstones() + : null; + final RemoteIterator<S3AFileStatus> cachedFileStatusIterator = + listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), filter, acceptor); + return (allowAuthoritative && meta != null + && meta.isAuthoritative()) + ? listing.createLocatedFileStatusIterator( + cachedFileStatusIterator) + : listing.createTombstoneReconcilingIterator( + listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(dir, + createListObjectsRequest(key, "/"), + filter, + acceptor, + cachedFileStatusIterator)), + tombstones); + } + + /** * Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance. * @param status file status * @return a located status with block locations set up from this FS. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index e2f7fea..b2b983c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -112,6 +112,63 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase { } @Test + public void testCostOfLocatedFileStatusOnFile() throws Throwable { + describe("performing listLocatedStatus on a file"); + Path file = path(getMethodName() + ".txt"); + S3AFileSystem fs = getFileSystem(); + touch(fs, file); + resetMetricDiffs(); + fs.listLocatedStatus(file); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + metadataRequests.assertDiffEquals(1); + } + listRequests.assertDiffEquals(1); + } + + @Test + public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable { + describe("performing listLocatedStatus on an empty dir"); + Path dir = path(getMethodName()); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + resetMetricDiffs(); + fs.listLocatedStatus(dir); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + verifyOperationCount(2, 1); + } else { + if (fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + + @Test + public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable { + describe("performing listLocatedStatus on a non empty dir"); + Path dir = path(getMethodName() + "dir"); + S3AFileSystem fs = getFileSystem(); + fs.mkdirs(dir); + Path file = new Path(dir, "file.txt"); + touch(fs, file); + resetMetricDiffs(); + fs.listLocatedStatus(dir); + if (!fs.hasMetadataStore()) { + // Unguarded FS. + verifyOperationCount(0, 1); + } else { + if(fs.allowAuthoritative(dir)) { + verifyOperationCount(0, 0); + } else { + verifyOperationCount(0, 1); + } + } + } + + @Test public void testCostOfGetFileStatusOnFile() throws Throwable { describe("performing getFileStatus on a file"); Path simpleFile = path("simple.txt"); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org