Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 a412b1020 -> a1b47db40
HADOOP-13931. S3AGuard: Use BatchWriteItem in DynamoDBMetadataStore#put(). Contributed by Mingliang Liu. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a1b47db4 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a1b47db4 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a1b47db4 Branch: refs/heads/HADOOP-13345 Commit: a1b47db405832087c080a4c5743f2efaa620d566 Parents: a412b10 Author: Chris Nauroth <cnaur...@apache.org> Authored: Fri Jan 6 10:30:47 2017 -0800 Committer: Chris Nauroth <cnaur...@apache.org> Committed: Fri Jan 6 10:30:47 2017 -0800 ---------------------------------------------------------------------- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 51 ++++++++++++-------- .../hadoop/fs/s3a/s3guard/MetadataStore.java | 5 +- 2 files changed, 34 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1b47db4/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 89ce3c4..1c19625 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -413,9 +413,27 @@ public class DynamoDBMetadataStore implements MetadataStore { @Override public void put(PathMetadata meta) throws IOException { - checkPathMetadata(meta); + // For a deeply nested path, this method will automatically create the full + // ancestry and save respective item in DynamoDB table. + // So after put operation, we maintain the invariant that if a path exists, + // all its ancestors will also exist in the table. + // For performance purpose, we generate the full paths to put and use batch + // write item request to save the items. LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta); - innerPut(meta); + processBatchWriteRequest(null, pathMetadataToItem(fullPathsToPut(meta))); + } + + /** + * Helper method to get full path of ancestors that are nonexistent in table. + */ + private Collection<PathMetadata> fullPathsToPut(PathMetadata meta) + throws IOException { + checkPathMetadata(meta); + final Collection<PathMetadata> metasToPut = new ArrayList<>(); + // root path is not persisted + if (!meta.getFileStatus().getPath().isRoot()) { + metasToPut.add(meta); + } // put all its ancestors if not present; as an optimization we return at its // first existent ancestor @@ -427,34 +445,29 @@ public class DynamoDBMetadataStore implements MetadataStore { final Item item = table.getItem(spec); if (item == null) { final S3AFileStatus status = new S3AFileStatus(false, path, username); - innerPut(new PathMetadata(status)); + metasToPut.add(new PathMetadata(status)); path = path.getParent(); } else { break; } } - } - - private void innerPut(PathMetadata meta) throws IOException { - final Path path = meta.getFileStatus().getPath(); - if (path.isRoot()) { - LOG.debug("Root path / is not persisted"); - return; - } - - try { - table.putItem(pathMetadataToItem(meta)); - } catch (AmazonClientException e) { - throw translateException("put", path, e); - } + return metasToPut; } @Override public void put(DirListingMetadata meta) throws IOException { LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta); - for (PathMetadata pathMetadata : meta.getListing()) { - put(pathMetadata); + // directory path + final Collection<PathMetadata> metasToPut = fullPathsToPut( + new PathMetadata(new S3AFileStatus(false, meta.getPath(), username))); + // all children of the directory + metasToPut.addAll(meta.getListing()); + + try { + processBatchWriteRequest(null, pathMetadataToItem(metasToPut)); + } catch (AmazonClientException e) { + throw translateException("put", (String) null, e); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/a1b47db4/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java index 6d3c440..5c611c2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java @@ -113,10 +113,9 @@ public interface MetadataStore extends Closeable { pathsToCreate) throws IOException; /** - * Saves metadata for exactly one path. For a deeply nested path, this method - * will not automatically create the full ancestry. Callers need to ensure - * saving the full path ancestry. + * Saves metadata for exactly one path. * + * Implementations may pre-create all the path's ancestors automatically. * Implementations must update any {@code DirListingMetadata} objects which * track the immediate parent of this file. * --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org