Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 930feb100 -> 30e1146c5 (forced update)
HADOOP-14457. create() does not notify metadataStore of parent directories or ensure they're not existing files. Contributed by Sean Mackrory. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/30e1146c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/30e1146c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/30e1146c Branch: refs/heads/HADOOP-13345 Commit: 30e1146c515d154544707d0e4d4a1c743e6d518f Parents: 309b8c0 Author: Steve Loughran <ste...@apache.org> Authored: Fri Jul 7 14:55:50 2017 +0100 Committer: Steve Loughran <ste...@apache.org> Committed: Fri Jul 7 15:02:22 2017 +0100 ---------------------------------------------------------------------- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 60 +++++++++++-------- .../fs/s3a/s3guard/LocalMetadataStore.java | 7 +++ .../hadoop/fs/s3a/s3guard/MetadataStore.java | 10 ++++ .../fs/s3a/s3guard/NullMetadataStore.java | 5 ++ .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 25 +++++++- .../hadoop/fs/s3a/ITestS3GuardCreate.java | 61 ++++++++++++++++++++ 7 files changed, 145 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 878f2f7..3279f1c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1773,7 +1773,7 @@ public class S3AFileSystem extends FileSystem { String key = pathToKey(f); createFakeDirectory(key); } - S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username); + S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username, true); // this is complicated because getParent(a/b/c/) returns a/b/c, but // we want a/b. See HADOOP-14428 for more details. deleteUnnecessaryFakeDirectories(new Path(f.toString()).getParent()); @@ -2292,6 +2292,7 @@ public class S3AFileSystem extends FileSystem { // See note about failure semantics in s3guard.md doc. try { if (hasMetadataStore()) { + S3Guard.addAncestors(metadataStore, p, username); S3AFileStatus status = createUploadFileStatus(p, S3AUtils.objectRepresentsDirectory(key, length), length, getDefaultBlockSize(p), username); http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 784b815..97c9135 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -25,7 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; -import java.util.HashSet; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -436,6 +436,30 @@ public class DynamoDBMetadataStore implements MetadataStore { } } + Collection<PathMetadata> completeAncestry( + Collection<PathMetadata> pathsToCreate) { + // Key on path to allow fast lookup + Map<Path, PathMetadata> ancestry = new HashMap<>(); + + for (PathMetadata meta : pathsToCreate) { + Preconditions.checkArgument(meta != null); + Path path = meta.getFileStatus().getPath(); + if (path.isRoot()) { + break; + } + ancestry.put(path, meta); + Path parent = path.getParent(); + while (!parent.isRoot() && !ancestry.containsKey(parent)) { + LOG.debug("auto-create ancestor path {} for child path {}", + parent, path); + final FileStatus status = makeDirStatus(parent, username); + ancestry.put(parent, new PathMetadata(status, Tristate.FALSE, false)); + parent = parent.getParent(); + } + } + return ancestry.values(); + } + @Override public void move(Collection<Path> pathsToDelete, Collection<PathMetadata> pathsToCreate) throws IOException { @@ -457,27 +481,7 @@ public class DynamoDBMetadataStore implements MetadataStore { // ancestor paths that are not explicitly added to paths to create Collection<PathMetadata> newItems = new ArrayList<>(); if (pathsToCreate != null) { - newItems.addAll(pathsToCreate); - // help set for fast look up; we should avoid putting duplicate paths - final Collection<Path> fullPathsToCreate = new HashSet<>(); - for (PathMetadata meta : pathsToCreate) { - fullPathsToCreate.add(meta.getFileStatus().getPath()); - } - - for (PathMetadata meta : pathsToCreate) { - Preconditions.checkArgument(meta != null); - Path parent = meta.getFileStatus().getPath().getParent(); - while (parent != null - && !parent.isRoot() - && !fullPathsToCreate.contains(parent)) { - LOG.debug("move: auto-create ancestor path {} for child path {}", - parent, meta.getFileStatus().getPath()); - final FileStatus status = makeDirStatus(parent, username); - newItems.add(new PathMetadata(status, Tristate.FALSE, false)); - fullPathsToCreate.add(parent); - parent = parent.getParent(); - } - } + newItems.addAll(completeAncestry(pathsToCreate)); } if (pathsToDelete != null) { for (Path meta : pathsToDelete) { @@ -575,7 +579,17 @@ public class DynamoDBMetadataStore implements MetadataStore { // For performance purpose, we generate the full paths to put and use batch // write item request to save the items. LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta); - processBatchWriteRequest(null, pathMetadataToItem(fullPathsToPut(meta))); + + Collection<PathMetadata> wrapper = new ArrayList<>(1); + wrapper.add(meta); + put(wrapper); + } + + @Override + public void put(Collection<PathMetadata> metas) throws IOException { + LOG.debug("Saving batch to table {} in region {}", tableName, region); + + processBatchWriteRequest(null, pathMetadataToItem(completeAncestry(metas))); } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java index b3c7de5..bd1f8df 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java @@ -273,6 +273,13 @@ public class LocalMetadataStore implements MetadataStore { dirHash.put(standardize(meta.getPath()), meta); } + public synchronized void put(Collection<PathMetadata> metas) throws + IOException { + for (PathMetadata meta : metas) { + put(meta); + } + } + @Override public void close() throws IOException { http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java index 9502a8a..76d5cdd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java @@ -165,6 +165,16 @@ public interface MetadataStore extends Closeable { void put(PathMetadata meta) throws IOException; /** + * Saves metadata for any number of paths. + * + * Semantics are otherwise the same as single-path puts. + * + * @param metas the metadata to save + * @throws IOException if there is an error + */ + void put(Collection<PathMetadata> metas) throws IOException; + + /** * Save directory listing metadata. Callers may save a partial directory * listing for a given path, or may store a complete and authoritative copy * of the directory listing. {@code MetadataStore} implementations may http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java index 65019eb..c6a4507 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java @@ -89,6 +89,11 @@ public class NullMetadataStore implements MetadataStore { } @Override + public void put(Collection<PathMetadata> meta) throws IOException { + return; + } + + @Override public void put(DirListingMetadata meta) throws IOException { return; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 19f0e50..51c370c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -251,9 +252,10 @@ public final class S3Guard { * an empty, dir, and the other dirs only contain their child * dir. * @param owner Hadoop user name. + * @param authoritative Whether to mark new directories as authoritative. */ public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs, - String owner) { + String owner, boolean authoritative) { if (dirs == null) { return; } @@ -286,7 +288,7 @@ public final class S3Guard { children.add(new PathMetadata(prevStatus)); } DirListingMetadata dirMeta = - new DirListingMetadata(f, children, true); + new DirListingMetadata(f, children, authoritative); try { ms.put(dirMeta); ms.put(new PathMetadata(status)); @@ -396,6 +398,25 @@ public final class S3Guard { } } + public static void addAncestors(MetadataStore metadataStore, + Path qualifiedPath, String username) throws IOException { + Collection<PathMetadata> newDirs = new ArrayList<>(); + Path parent = qualifiedPath.getParent(); + while (!parent.isRoot()) { + PathMetadata directory = metadataStore.get(parent); + if (directory == null || directory.isDeleted()) { + FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username, + null, parent); + PathMetadata meta = new PathMetadata(status, Tristate.UNKNOWN, false); + newDirs.add(meta); + } else { + break; + } + parent = parent.getParent(); + } + metadataStore.put(newDirs); + } + private static void addMoveStatus(Collection<Path> srcPaths, Collection<PathMetadata> dstMetas, Path srcPath, FileStatus dstStatus) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java new file mode 100644 index 0000000..dcc2538 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; +import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; +import org.junit.Assume; +import org.junit.Test; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; + +/** + * Home for testing the creation of new files and directories with S3Guard + * enabled. + */ +public class ITestS3GuardCreate extends AbstractS3ATestBase { + + /** + * Test that ancestor creation during S3AFileSystem#create() is properly + * accounted for in the MetadataStore. This should be handled by the + * FileSystem, and be a FS contract test, but S3A does not handle ancestors on + * create(), so we need to take care in the S3Guard code to do the right + * thing. This may change: See HADOOP-13221 for more detail. + */ + @Test + public void testCreatePopulatesFileAncestors() throws Exception { + final S3AFileSystem fs = getFileSystem(); + Assume.assumeTrue(fs.hasMetadataStore()); + final MetadataStore ms = fs.getMetadataStore(); + final Path parent = path("testCreatePopulatesFileAncestors"); + + try { + fs.mkdirs(parent); + final Path nestedFile = new Path(parent, "dir1/dir2/file4"); + touch(fs, nestedFile); + + DirListingMetadata list = ms.listChildren(parent); + assertFalse("MetadataStore falsely reports authoritative empty list", + list.isEmpty() == Tristate.TRUE); + } finally { + fs.delete(parent, true); + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org