Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 0c32daaae -> 48bda91e4
S3Guard: DynamoDBMetadataStore::move() should populate ancestor directories. Contributed by Mingliang Liu. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/48bda91e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/48bda91e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/48bda91e Branch: refs/heads/HADOOP-13345 Commit: 48bda91e4888b6311bc327351fe9fb3512b8ec1e Parents: 0c32daa Author: Aaron Fabbri <[email protected]> Authored: Tue Apr 4 11:35:42 2017 -0700 Committer: Aaron Fabbri <[email protected]> Committed: Tue Apr 4 11:35:42 2017 -0700 ---------------------------------------------------------------------- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 34 +++++++++- .../fs/s3a/s3guard/MetadataStoreTestBase.java | 4 +- .../s3a/s3guard/TestDynamoDBMetadataStore.java | 70 ++++++++++++++++++++ 3 files changed, 105 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index f598764..c3718d1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -418,9 +419,40 @@ public class DynamoDBMetadataStore implements MetadataStore { pathsToCreate == null ? 0 : pathsToCreate.size()); LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}", pathsToDelete, pathsToCreate); + + // In DynamoDBMetadataStore implementation, we assume that if a path + // exists, all its ancestors will also exist in the table. + // Following code is to maintain this invariant by putting all ancestor + // directories of the paths to create. + // ancestor paths that are not explicitly added to paths to create + Collection<PathMetadata> inferredPathsToCreate = null; + if (pathsToCreate != null) { + inferredPathsToCreate = new ArrayList<>(pathsToCreate); + // help set for fast look up; we should avoid putting duplicate paths + final Collection<Path> fullPathsToCreate = new HashSet<>(); + for (PathMetadata meta : pathsToCreate) { + fullPathsToCreate.add(meta.getFileStatus().getPath()); + } + + for (PathMetadata meta : pathsToCreate) { + Preconditions.checkArgument(meta != null); + Path parent = meta.getFileStatus().getPath().getParent(); + while (parent != null + && !parent.isRoot() + && !fullPathsToCreate.contains(parent)) { + LOG.debug("move: auto-create ancestor path {} for child path {}", + parent, meta.getFileStatus().getPath()); + final FileStatus status = makeDirStatus(parent, username); + inferredPathsToCreate.add(new PathMetadata(status, Tristate.FALSE)); + fullPathsToCreate.add(parent); + parent = parent.getParent(); + } + } + } + try { processBatchWriteRequest(pathToKey(pathsToDelete), - pathMetadataToItem(pathsToCreate)); + pathMetadataToItem(inferredPathsToCreate)); } catch (AmazonClientException e) { throw translateException("move", (String) null, e); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index ae899ed..36a5cc5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -710,7 +710,7 @@ public abstract class MetadataStoreTestBase extends Assert { assertNull(pathStr + " should not be cached.", meta); } - private void assertCached(String pathStr) throws IOException { + protected void assertCached(String pathStr) throws IOException { Path path = strToPath(pathStr); PathMetadata meta = ms.get(path); assertNotNull(pathStr + " should be cached.", meta); @@ -719,7 +719,7 @@ public abstract class MetadataStoreTestBase extends Assert { /** * Convenience to create a fully qualified Path from string. */ - private Path strToPath(String p) { + Path strToPath(String p) { final Path path = new Path(p); assert path.isAbsolute(); return path.makeQualified(contract.getFileSystem().getUri(), null); http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java index ca5f95c..a841e21 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java @@ -32,6 +32,7 @@ import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; import com.amazonaws.services.dynamodbv2.model.TableDescription; +import com.google.common.collect.Lists; import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.fs.s3a.Tristate; @@ -419,6 +420,75 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase { } } + /** + * Test that when moving nested paths, all its ancestors up to destination + * root will also be created. + * Here is the directory tree before move: + * + * testMovePopulateAncestors + * âââ a + * â  âââ b + * â  âââ src + * â  âââ dir1 + * â  â  âââ dir2 + * â  âââ file1.txt + * âââ c + * âââ d + * âââ dest + * + * As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at + * a/b/src. This test verifies that after the move, the new subtree at + * 'dest' is reachable from the root (i.e. c/ and c/d exist in the table. + * DynamoDBMetadataStore depends on this property to do recursive delete + * without a full table scan. + */ + @Test + public void testMovePopulatesAncestors() throws IOException { + final DynamoDBMetadataStore ddbms = getDynamoMetadataStore(); + final String testRoot = "/testMovePopulatesAncestors"; + final String srcRoot = testRoot + "/a/b/src"; + final String destRoot = testRoot + "/c/d/e/dest"; + + final Path nestedPath1 = strToPath(srcRoot + "/file1.txt"); + ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false))); + final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2"); + ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true))); + + // We don't put the destRoot path here, since put() would create ancestor + // entries, and we want to ensure that move() does it, instead. + + // Build enumeration of src / dest paths and do the move() + final Collection<Path> fullSourcePaths = Lists.newArrayList( + strToPath(srcRoot), + strToPath(srcRoot + "/dir1"), + strToPath(srcRoot + "/dir1/dir2"), + strToPath(srcRoot + "/file1.txt") + ); + final Collection<PathMetadata> pathsToCreate = Lists.newArrayList( + new PathMetadata(basicFileStatus(strToPath(destRoot), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"), + 0, true)), + new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"), + 1024, false)) + ); + + ddbms.move(fullSourcePaths, pathsToCreate); + + // assert that all the ancestors should have been populated automatically + assertCached(testRoot + "/c"); + assertCached(testRoot + "/c/d"); + assertCached(testRoot + "/c/d/e"); + assertCached(destRoot /* /c/d/e/dest */); + + // Also check moved files while we're at it + assertCached(destRoot + "/dir1"); + assertCached(destRoot + "/dir1/dir2"); + assertCached(destRoot + "/file1.txt"); + } + @Test public void testProvisionTable() throws IOException { final DynamoDBMetadataStore ddbms = getDynamoMetadataStore(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
