HADOOP-14749. review s3guard docs & code prior to merge.
Contributed by Steve Loughran


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e531ae25
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e531ae25
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e531ae25

Branch: refs/heads/HADOOP-13345
Commit: e531ae251fac73f727f457039f3e16fb2a10069a
Parents: b114f24
Author: Steve Loughran <ste...@apache.org>
Authored: Sat Aug 12 21:59:11 2017 +0100
Committer: Steve Loughran <ste...@apache.org>
Committed: Sat Aug 12 21:59:11 2017 +0100

----------------------------------------------------------------------
 hadoop-tools/hadoop-aws/pom.xml                 |  16 +-
 .../java/org/apache/hadoop/fs/s3a/Listing.java  |   6 +-
 .../org/apache/hadoop/fs/s3a/S3AFileStatus.java |  11 -
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java |  72 +--
 .../hadoop/fs/s3a/S3AInstrumentation.java       |   2 +-
 .../org/apache/hadoop/fs/s3a/Statistic.java     |   2 +-
 .../org/apache/hadoop/fs/s3a/UploadInfo.java    |   4 +-
 .../fs/s3a/s3guard/DirListingMetadata.java      |  21 +-
 .../fs/s3a/s3guard/DynamoDBClientFactory.java   |   3 +-
 .../fs/s3a/s3guard/DynamoDBMetadataStore.java   |  94 ++--
 .../fs/s3a/s3guard/LocalMetadataStore.java      |  13 +-
 .../hadoop/fs/s3a/s3guard/MetadataStore.java    |   5 +-
 .../s3guard/MetadataStoreListFilesIterator.java |   5 +-
 .../fs/s3a/s3guard/NullMetadataStore.java       |   9 -
 .../hadoop/fs/s3a/s3guard/PathMetadata.java     |   6 +
 .../PathMetadataDynamoDBTranslation.java        |   6 +-
 .../apache/hadoop/fs/s3a/s3guard/S3Guard.java   |  87 ++--
 .../hadoop/fs/s3a/s3guard/S3GuardTool.java      |  76 +--
 .../hadoop/fs/s3a/s3guard/package-info.java     |   2 +-
 .../src/site/markdown/tools/hadoop-aws/index.md |   2 +-
 .../site/markdown/tools/hadoop-aws/s3guard.md   | 485 +++++++++++--------
 .../site/markdown/tools/hadoop-aws/testing.md   | 213 ++++++--
 .../apache/hadoop/fs/s3a/S3ATestConstants.java  |   2 +-
 .../org/apache/hadoop/fs/s3a/S3ATestUtils.java  |  47 +-
 .../fs/s3a/s3guard/AbstractMSContract.java      |   4 +-
 .../s3guard/AbstractS3GuardToolTestBase.java    | 161 ++++++
 .../s3a/s3guard/DynamoDBLocalClientFactory.java |  16 +-
 .../s3a/s3guard/ITestS3GuardConcurrentOps.java  |  19 +-
 .../s3a/s3guard/ITestS3GuardToolDynamoDB.java   |  79 +--
 .../fs/s3a/s3guard/ITestS3GuardToolLocal.java   |  68 +--
 .../fs/s3a/s3guard/MetadataStoreTestBase.java   |  51 +-
 .../fs/s3a/s3guard/S3GuardToolTestBase.java     | 159 ------
 .../fs/s3a/s3guard/TestDirListingMetadata.java  |  12 +-
 .../s3a/s3guard/TestDynamoDBMetadataStore.java  |  71 ++-
 .../fs/s3a/s3guard/TestLocalMetadataStore.java  |  12 +-
 .../fs/s3a/s3guard/TestNullMetadataStore.java   |   9 +-
 .../TestPathMetadataDynamoDBTranslation.java    |  20 +-
 .../hadoop/fs/s3a/s3guard/TestS3Guard.java      |   9 +-
 .../src/test/resources/log4j.properties         |   2 +-
 39 files changed, 1114 insertions(+), 767 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index 62371c3..4161a50 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -170,7 +170,7 @@
                     
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
                     
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
                     
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
-                    <!-- s3guard -->
+                    <!-- S3Guard -->
                     
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
                     
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
                     
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
@@ -216,7 +216,7 @@
                     
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
                     
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
                     
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
-                    <!-- s3guard -->
+                    <!-- S3Guard -->
                     
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
                     
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
                     
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
@@ -262,7 +262,7 @@
                     
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
                     
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
                     
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
-                    <!-- s3guard -->
+                    <!-- S3Guard -->
                     
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
                     
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
                     
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
@@ -289,7 +289,7 @@
       </properties>
     </profile>
 
-    <!-- Turn on s3guard tests-->
+    <!-- Turn on S3Guard tests-->
     <profile>
       <id>s3guard</id>
       <activation>
@@ -302,7 +302,7 @@
       </properties>
     </profile>
 
-    <!-- Switch to dynamo DB for s3guard. Has no effect unless s3guard is 
enabled -->
+    <!-- Switch to DynamoDB for S3Guard. Has no effect unless S3Guard is 
enabled -->
     <profile>
       <id>dynamo</id>
       <activation>
@@ -315,7 +315,7 @@
       </properties>
     </profile>
 
-    <!-- Switch to DynamoDBLocal for s3guard. Has no effect unless s3guard is 
enabled -->
+    <!-- Switch to DynamoDBLocal for S3Guard. Has no effect unless S3Guard is 
enabled -->
     <profile>
       <id>dynamodblocal</id>
       <activation>
@@ -328,8 +328,8 @@
       </properties>
     </profile>
 
-    <!-- Switch s3guard from Authoritative=false to true
-     Has no effect unless s3guard is enabled -->
+    <!-- Switch S3Guard from Authoritative=false to true
+     Has no effect unless S3Guard is enabled -->
     <profile>
       <id>non-auth</id>
       <activation>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
index 5299c6c..8efa218 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java
@@ -70,7 +70,8 @@ public class Listing {
    * @return the file status iterator
    */
   ProvidedFileStatusIterator createProvidedFileStatusIterator(
-      FileStatus[] fileStatuses, PathFilter filter,
+      FileStatus[] fileStatuses,
+      PathFilter filter,
       FileStatusAcceptor acceptor) {
     return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor);
   }
@@ -356,7 +357,8 @@ public class Listing {
      * If there is data in the local filtered list, return true.
      * Else: request more data util that condition is met, or there
      * is no more remote listing data.
-     * Lastly, return true if the provided file status has left items.
+     * Lastly, return true if the {@code providedStatusIterator}
+     * has left items.
      * @return true if a call to {@link #next()} will succeed.
      * @throws IOException
      */

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
index eee3c13..be08afe 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java
@@ -105,17 +105,6 @@ public class S3AFileStatus extends FileStatus {
     return isEmptyDirectory;
   }
 
-  /**
-   * Should not be called by clients.  Only used so {@link org.apache.hadoop
-   * .fs.s3a.s3guard.MetadataStore} can maintain this flag when caching
-   * FileStatuses on behalf of s3a.
-   * @param value for directories: TRUE / FALSE if known empty/not-empty,
-   *              UNKNOWN otherwise
-   */
-  public void setIsEmptyDirectory(Tristate value) {
-    isEmptyDirectory = value;
-  }
-
   /** Compare if this object is equal to another object.
    * @param   o the object to be compared.
    * @return  true if two file status has the same path name; false if not.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 95fe94f..c22383a 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -295,6 +295,7 @@ public class S3AFileSystem extends FileSystem {
     } catch (AmazonClientException e) {
       throw translateException("initializing ", new Path(name), e);
     }
+
   }
 
   /**
@@ -838,8 +839,8 @@ public class S3AFileSystem extends FileSystem {
       srcPaths = new HashSet<>(); // srcPaths need fast look up before put
       dstMetas = new ArrayList<>();
     }
-    // HADOOP-13761 s3guard: retries when source paths are not visible yet
-    // TODO s3guard: performance: mark destination dirs as authoritative
+    // TODO S3Guard HADOOP-13761: retries when source paths are not visible yet
+    // TODO S3Guard: performance: mark destination dirs as authoritative
 
     // Ok! Time to start
     if (srcStatus.isFile()) {
@@ -904,6 +905,8 @@ public class S3AFileSystem extends FileSystem {
         copyFile(key, newDstKey, length);
 
         if (hasMetadataStore()) {
+          // with a metadata store, the object entries need to be updated,
+          // including, potentially, the ancestors
           Path childSrc = keyToQualifiedPath(key);
           Path childDst = keyToQualifiedPath(newDstKey);
           if (objectRepresentsDirectory(key, length)) {
@@ -960,12 +963,18 @@ public class S3AFileSystem extends FileSystem {
 
   /**
    * Does this Filesystem have a metadata store?
-   * @return true if the FS has been instantiated with a metadata store
+   * @return true iff the FS has been instantiated with a metadata store
    */
   public boolean hasMetadataStore() {
     return !S3Guard.isNullMetadataStore(metadataStore);
   }
 
+  /**
+   * Get the metadata store.
+   * This will always be non-null, but may be bound to the
+   * {@code NullMetadataStore}.
+   * @return the metadata store of this FS instance
+   */
   @VisibleForTesting
   MetadataStore getMetadataStore() {
     return metadataStore;
@@ -1448,7 +1457,7 @@ public class S3AFileSystem extends FileSystem {
 
       if (status.isEmptyDirectory() == Tristate.TRUE) {
         LOG.debug("Deleting fake empty directory {}", key);
-        // HADOOP-13761 s3guard: retries here
+        // HADOOP-13761 S3Guard: retries here
         deleteObject(key);
         metadataStore.delete(f);
         instrumentation.directoryDeleted();
@@ -1466,7 +1475,7 @@ public class S3AFileSystem extends FileSystem {
             LOG.debug("Got object to delete {}", summary.getKey());
 
             if (keys.size() == MAX_ENTRIES_TO_DELETE) {
-              // HADOOP-13761 s3guard: retries
+              // TODO: HADOOP-13761 S3Guard: retries
               removeKeys(keys, true, false);
             }
           }
@@ -1475,7 +1484,7 @@ public class S3AFileSystem extends FileSystem {
             objects = continueListObjects(objects);
           } else {
             if (!keys.isEmpty()) {
-              // HADOOP-13761 s3guard: retries
+              // TODO: HADOOP-13761 S3Guard: retries
               removeKeys(keys, false, false);
             }
             break;
@@ -1742,7 +1751,7 @@ public class S3AFileSystem extends FileSystem {
    * Return a file status object that represents the path.
    * @param f The path we want information from
    * @return a FileStatus object
-   * @throws java.io.FileNotFoundException when the path does not exist;
+   * @throws FileNotFoundException when the path does not exist
    * @throws IOException on other problems.
    */
   public FileStatus getFileStatus(final Path f) throws IOException {
@@ -1750,12 +1759,12 @@ public class S3AFileSystem extends FileSystem {
   }
 
   /**
-   * Internal version of getFileStatus().
+   * Internal version of {@link #getFileStatus(Path)}.
    * @param f The path we want information from
    * @param needEmptyDirectoryFlag if true, implementation will calculate
    *        a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
    * @return a S3AFileStatus object
-   * @throws java.io.FileNotFoundException when the path does not exist;
+   * @throws FileNotFoundException when the path does not exist
    * @throws IOException on other problems.
    */
   @VisibleForTesting
@@ -1800,19 +1809,25 @@ public class S3AFileSystem extends FileSystem {
       } catch (FileNotFoundException e) {
         return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
       }
+      // entry was found, save in S3Guard
       return S3Guard.putAndReturn(metadataStore, s3FileStatus, 
instrumentation);
+    } else {
+      // there was no entry in S3Guard
+      // retrieve the data and update the metadata store in the process.
+      return S3Guard.putAndReturn(metadataStore,
+          s3GetFileStatus(path, key, tombstones), instrumentation);
     }
-    return S3Guard.putAndReturn(metadataStore,
-        s3GetFileStatus(path, key, tombstones), instrumentation);
   }
 
   /**
-   * Raw get file status that only uses S3.  Used to implement
-   * innerGetFileStatus, and for direct management of empty directory blobs.
+   * Raw {@code getFileStatus} that talks direct to S3.
+   * Used to implement {@link #innerGetFileStatus(Path, boolean)},
+   * and for direct management of empty directory blobs.
    * @param path Qualified path
    * @param key  Key string for the path
    * @return Status
-   * @throws IOException
+   * @throws FileNotFoundException when the path does not exist
+   * @throws IOException on other problems.
    */
   private S3AFileStatus s3GetFileStatus(final Path path, String key,
       Set<Path> tombstones) throws IOException {
@@ -1826,10 +1841,10 @@ public class S3AFileSystem extends FileSystem {
         } else {
           LOG.debug("Found exact file: normal file");
           return new S3AFileStatus(meta.getContentLength(),
-                  dateToLong(meta.getLastModified()),
-                  path,
-                  getDefaultBlockSize(path),
-                  username);
+              dateToLong(meta.getLastModified()),
+              path,
+              getDefaultBlockSize(path),
+              username);
         }
       } catch (AmazonServiceException e) {
         if (e.getStatusCode() != 404) {
@@ -1911,7 +1926,8 @@ public class S3AFileSystem extends FileSystem {
     throw new FileNotFoundException("No such file or directory: " + path);
   }
 
-  /** Helper function to determine if a collection of paths is empty
+  /**
+   * Helper function to determine if a collection of paths is empty
    * after accounting for tombstone markers (if provided).
    * @param keys Collection of path (prefixes / directories or keys).
    * @param tombstones Set of tombstone markers, or null if not applicable.
@@ -1932,7 +1948,8 @@ public class S3AFileSystem extends FileSystem {
     return true;
   }
 
-  /** Helper function to determine if a collection of object summaries is empty
+  /**
+   * Helper function to determine if a collection of object summaries is empty
    * after accounting for tombstone markers (if provided).
    * @param summaries Collection of objects as returned by listObjects.
    * @param tombstones Set of tombstone markers, or null if not applicable.
@@ -1945,14 +1962,12 @@ public class S3AFileSystem extends FileSystem {
       return summaries.isEmpty();
     }
     Collection<String> stringCollection = new ArrayList<>(summaries.size());
-    for(S3ObjectSummary summary : summaries) {
+    for (S3ObjectSummary summary : summaries) {
       stringCollection.add(summary.getKey());
     }
     return isEmptyOfKeys(stringCollection, tombstones);
   }
 
-
-
   /**
    * Raw version of {@link FileSystem#exists(Path)} which uses S3 only:
    * S3Guard MetadataStore, if any, will be skipped.
@@ -1962,7 +1977,8 @@ public class S3AFileSystem extends FileSystem {
     Path path = qualify(f);
     String key = pathToKey(path);
     try {
-      return s3GetFileStatus(path, key, null) != null;
+      s3GetFileStatus(path, key, null);
+      return true;
     } catch (FileNotFoundException e) {
       return false;
     }
@@ -2247,7 +2263,7 @@ public class S3AFileSystem extends FileSystem {
     deleteUnnecessaryFakeDirectories(p.getParent());
     Preconditions.checkArgument(length >= 0, "content length is negative");
 
-    // See note about failure semantics in s3guard.md doc.
+    // See note about failure semantics in S3Guard documentation
     try {
       if (hasMetadataStore()) {
         S3Guard.addAncestors(metadataStore, p, username);
@@ -2257,7 +2273,7 @@ public class S3AFileSystem extends FileSystem {
         S3Guard.putAndReturn(metadataStore, status, instrumentation);
       }
     } catch (IOException e) {
-      LOG.error("s3guard: Error updating MetadataStore for write to {}:",
+      LOG.error("S3Guard: Error updating MetadataStore for write to {}:",
           key, e);
       instrumentation.errorIgnored();
     }
@@ -2593,6 +2609,7 @@ public class S3AFileSystem extends FileSystem {
           cachedFilesIterator = listing.createProvidedFileStatusIterator(
               S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
           if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
+            // metadata listing is authoritative, so return it directly
             return 
listing.createLocatedFileStatusIterator(cachedFilesIterator);
           }
         }
@@ -2606,8 +2623,7 @@ public class S3AFileSystem extends FileSystem {
             tombstones);
       }
     } catch (AmazonClientException e) {
-      // TODO s3guard:
-      // 1. retry on file not found exception
+      // TODO S3Guard: retry on file not found exception
       throw translateException("listFiles", path, e);
     }
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
index 77804fe..c4b4866 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
@@ -91,7 +91,7 @@ public class S3AInstrumentation {
   private final Map<String, MutableCounterLong> streamMetrics =
       new HashMap<>(30);
 
-  /** Instantiate this without caring whether or not s3guard is enabled. */
+  /** Instantiate this without caring whether or not S3Guard is enabled. */
   private final S3GuardInstrumentation s3GuardInstrumentation
       = new S3GuardInstrumentation();
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
index bfc3d35..777c161 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -142,7 +142,7 @@ public enum Statistic {
   STREAM_WRITE_QUEUE_DURATION("stream_write_queue_duration",
       "Total queue duration of all block uploads"),
 
-  // S3guard stats
+  // S3Guard stats
   S3GUARD_METADATASTORE_PUT_PATH_REQUEST(
       "s3guard_metadatastore_put_path_request",
       "s3guard metadata store put one metadata path request"),

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
index b6d31af0..238cd97 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UploadInfo.java
@@ -24,8 +24,8 @@ import com.amazonaws.services.s3.transfer.Upload;
  * Simple struct that contains information about a S3 upload.
  */
 public class UploadInfo {
-  private Upload upload;
-  private long length;
+  private final Upload upload;
+  private final long length;
 
   public UploadInfo(Upload upload, long length) {
     this.upload = upload;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
index fe4f9a1..bcbdced 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java
@@ -18,11 +18,6 @@
 
 package org.apache.hadoop.fs.s3a.s3guard;
 
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -34,6 +29,11 @@ import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
 import com.google.common.base.Preconditions;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3a.Tristate;
 
 /**
@@ -61,7 +61,8 @@ public class DirListingMetadata {
    * Create a directory listing metadata container.
    *
    * @param path Path of the directory. If this path has a host component, then
-   *     all paths added later via put() must also have the same host.
+   *     all paths added later via {@link #put(FileStatus)} must also have
+   *     the same host.
    * @param listing Entries in the directory.
    * @param isAuthoritative true iff listing is the full contents of the
    *     directory, and the calling client reports that this may be cached as
@@ -257,6 +258,7 @@ public class DirListingMetadata {
     prettyPrint(sb);
     return sb.toString();
   }
+
   /**
    * Checks that child path is valid.
    * @param childPath path to check.
@@ -284,12 +286,15 @@ public class DirListingMetadata {
   }
 
   /**
-   * For Path's that are handed in directly, we assert they are in consistent
+   * For Paths that are handed in directly, we assert they are in consistent
    * format with checkPath().  For paths that are supplied embedded in
-   * FileStatus', we attempt to fill in missing scheme and host, when this
+   * FileStatus, we attempt to fill in missing scheme and host, when this
    * DirListingMetadata is associated with one.
    *
    * @return Path suitable for consistent hashtable lookups
+   * @throws NullPointerException null status argument
+   * @throws IllegalArgumentException bad status values or failure to
+   *                                  create a URI.
    */
   private Path childStatusToPathKey(FileStatus status) {
     Path p = status.getPath();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
index 0c88230..5005bc0 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBClientFactory.java
@@ -71,7 +71,8 @@ public interface DynamoDBClientFactory extends Configurable {
     @Override
     public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
         throws IOException {
-      assert getConf() != null : "Should have been configured before usage";
+      Preconditions.checkNotNull(getConf(),
+          "Should have been configured before usage");
 
       final Configuration conf = getConf();
       final AWSCredentialsProvider credentials =

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 97c9135..322361c 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.fs.s3a.s3guard;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.net.URI;
@@ -50,34 +51,32 @@ import 
com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
 import 
com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
 import com.amazonaws.services.dynamodbv2.model.ResourceInUseException;
 import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
-
 import com.amazonaws.services.dynamodbv2.model.WriteRequest;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.s3a.Constants;
-import org.apache.hadoop.fs.s3a.S3AInstrumentation;
-import org.apache.hadoop.fs.s3a.Tristate;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryPolicy;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AInstrumentation;
+import org.apache.hadoop.fs.s3a.Tristate;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*;
 import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.S3AUtils.*;
+import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
 import static 
org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
+import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.*;
 
 /**
  * DynamoDBMetadataStore is a {@link MetadataStore} that persists
@@ -159,7 +158,7 @@ import static 
org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*
  * the S3 bucket that hosts the S3A instance.  During initialization, it checks
  * the location of the S3 bucket and creates a DynamoDB client connected to the
  * same region. The region may also be set explicitly by setting the config
- * parameter fs.s3a.s3guard.ddb.endpoint with the corresponding endpoint.
+ * parameter {@code fs.s3a.s3guard.ddb.region} to the corresponding region.
  */
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
@@ -203,6 +202,7 @@ public class DynamoDBMetadataStore implements MetadataStore 
{
    * @param conf the file system configuration
    * @param s3Region region of the associated S3 bucket (if any).
    * @return DynamoDB instance.
+   * @throws IOException I/O error.
    */
   private static DynamoDB createDynamoDB(Configuration conf, String s3Region)
       throws IOException {
@@ -255,11 +255,18 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
    * not explicitly relate to any S3 bucket, which be nonexistent.
    *
    * This is used to operate the metadata store directly beyond the scope of 
the
-   * S3AFileSystem integration, e.g. command line tools. Generally you should
-   * use {@link #initialize(FileSystem)} if given an initialized S3 file 
system.
+   * S3AFileSystem integration, e.g. command line tools.
+   * Generally, callers should use {@link #initialize(FileSystem)}
+   * with an initialized {@code S3AFileSystem} instance.
+   *
+   * Without a filesystem to act as a reference point, the configuration itself
+   * must declare the table name and region in the
+   * {@link Constants#S3GUARD_DDB_TABLE_NAME_KEY} and
+   * {@link Constants#S3GUARD_DDB_REGION_KEY} respectively.
    *
    * @see #initialize(FileSystem)
    * @throws IOException if there is an error
+   * @throws IllegalArgumentException if the configuration is incomplete
    */
   @Override
   public void initialize(Configuration config) throws IOException {
@@ -267,10 +274,10 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
     // use the bucket as the DynamoDB table name if not specified in config
     tableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY);
     Preconditions.checkArgument(!StringUtils.isEmpty(tableName),
-        "No DynamoDB table name configured!");
+        "No DynamoDB table name configured");
     region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
     Preconditions.checkArgument(!StringUtils.isEmpty(region),
-        "No DynamoDB region configured!");
+        "No DynamoDB region configured");
     dynamoDB = createDynamoDB(conf, region);
 
     username = UserGroupInformation.getCurrentUser().getShortUserName();
@@ -279,6 +286,12 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
     initTable();
   }
 
+  /**
+   * Set retry policy. This is driven by the value of
+   * {@link Constants#S3GUARD_DDB_MAX_RETRIES} with an exponential backoff
+   * between each attempt of {@link #MIN_RETRY_SLEEP_MSEC} milliseconds.
+   * @param config
+   */
   private void setMaxRetries(Configuration config) {
     int maxRetries = config.getInt(S3GUARD_DDB_MAX_RETRIES,
         S3GUARD_DDB_MAX_RETRIES_DEFAULT);
@@ -297,6 +310,14 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
     innerDelete(path, false);
   }
 
+  /**
+   * Inner delete option, action based on the {@code tombstone} flag.
+   * No tombstone: delete the entry. Tombstone: create a tombstone entry.
+   * There is no check as to whether the entry exists in the table first.
+   * @param path path to delete
+   * @param tombstone flag to create a tombstone marker
+   * @throws IOException I/O error.
+   */
   private void innerDelete(Path path, boolean tombstone)
       throws IOException {
     path = checkPath(path);
@@ -414,6 +435,7 @@ public class DynamoDBMetadataStore implements MetadataStore 
{
     path = checkPath(path);
     LOG.debug("Listing table {} in region {}: {}", tableName, region, path);
 
+    // find the children in the table
     try {
       final QuerySpec spec = new QuerySpec()
           .withHashKey(pathToParentKeyAttribute(path))
@@ -432,10 +454,12 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
           ? null
           : new DirListingMetadata(path, metas, false);
     } catch (AmazonClientException e) {
+      // failure, including the path not being present
       throw translateException("listChildren", path, e);
     }
   }
 
+  // build the list of all parent entries.
   Collection<PathMetadata> completeAncestry(
       Collection<PathMetadata> pathsToCreate) {
     // Key on path to allow fast lookup
@@ -677,7 +701,7 @@ public class DynamoDBMetadataStore implements MetadataStore 
{
       return;
     }
     LOG.info("Deleting DynamoDB table {} in region {}", tableName, region);
-    Preconditions.checkNotNull(dynamoDB, "Not connected to Dynamo");
+    Preconditions.checkNotNull(dynamoDB, "Not connected to DynamoDB");
     try {
       table.delete();
       table.waitForDelete();
@@ -766,22 +790,23 @@ public class DynamoDBMetadataStore implements 
MetadataStore {
         LOG.debug("Binding to table {}", tableName);
         final String status = table.describe().getTableStatus();
         switch (status) {
-          case "CREATING":
-          case "UPDATING":
-            LOG.debug("Table {} in region {} is being created/updated. This 
may "
-                    + "indicate that the table is being operated by another "
-                    + "concurrent thread or process. Waiting for active...",
-                tableName, region);
-            waitForTableActive(table);
-            break;
-          case "DELETING":
-            throw new IOException("DynamoDB table '" + tableName + "' is being 
"
-                + "deleted in region " + region);
-          case "ACTIVE":
-            break;
-          default:
-            throw new IOException("Unknown DynamoDB table status " + status
-                + ": tableName='" + tableName + "', region=" + region);
+        case "CREATING":
+        case "UPDATING":
+          LOG.debug("Table {} in region {} is being created/updated. This may"
+                  + " indicate that the table is being operated by another "
+                  + "concurrent thread or process. Waiting for active...",
+              tableName, region);
+          waitForTableActive(table);
+          break;
+        case "DELETING":
+          throw new FileNotFoundException("DynamoDB table "
+              + "'" + tableName + "' is being "
+              + "deleted in region " + region);
+        case "ACTIVE":
+          break;
+        default:
+          throw new IOException("Unknown DynamoDB table status " + status
+              + ": tableName='" + tableName + "', region=" + region);
         }
 
         final Item versionMarker = getVersionMarkerItem();
@@ -799,7 +824,8 @@ public class DynamoDBMetadataStore implements MetadataStore 
{
 
           createTable(capacity);
         } else {
-          throw new IOException("DynamoDB table '" + tableName + "' does not "
+          throw new FileNotFoundException("DynamoDB table "
+              + "'" + tableName + "' does not "
               + "exist in region " + region + "; auto-creation is turned off");
         }
       }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index bd1f8df..a6492f9 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -37,12 +37,13 @@ import java.util.Map;
 
 /**
  * This is a local, in-memory, implementation of MetadataStore.
- * This is *not* a coherent cache across processes.  It is only
+ * This is <i>not</i> a coherent cache across processes.  It is only
  * locally-coherent.
  *
- * The purpose of this is for unit testing.  It could also be used to 
accelerate
- * local-only operations where only one process is operating on a given object
- * store, or multiple processes are accessing a read-only storage bucket.
+ * The purpose of this is for unit and integration testing.
+ * It could also be used to accelerate local-only operations where only one
+ * process is operating on a given object store, or multiple processes are
+ * accessing a read-only storage bucket.
  *
  * This MetadataStore does not enforce filesystem rules such as disallowing
  * non-recursive removal of non-empty directories.  It is assumed the caller
@@ -53,6 +54,10 @@ public class LocalMetadataStore implements MetadataStore {
   public static final Logger LOG = 
LoggerFactory.getLogger(MetadataStore.class);
   // TODO HADOOP-13649: use time instead of capacity for eviction.
   public static final int DEFAULT_MAX_RECORDS = 128;
+
+  /**
+   * Maximum number of records.
+   */
   public static final String CONF_MAX_RECORDS =
       "fs.metadatastore.local.max_records";
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 76d5cdd..dd8077b 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -48,7 +48,6 @@ public interface MetadataStore extends Closeable {
 
   /**
    * Performs one-time initialization of the metadata store via configuration.
-   *
    * @see #initialize(FileSystem)
    * @param conf Configuration.
    * @throws IOException if there is an error
@@ -149,8 +148,8 @@ public interface MetadataStore extends Closeable {
    *                      ().
    * @throws IOException if there is an error
    */
-  void move(Collection<Path> pathsToDelete, Collection<PathMetadata>
-      pathsToCreate) throws IOException;
+  void move(Collection<Path> pathsToDelete,
+      Collection<PathMetadata> pathsToCreate) throws IOException;
 
   /**
    * Saves metadata for exactly one path.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
index 272b1f4..679f767 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
@@ -28,13 +28,14 @@ import java.util.Queue;
 import java.util.Set;
 
 import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index c6a4507..41dd61b 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -43,22 +43,18 @@ public class NullMetadataStore implements MetadataStore {
 
   @Override
   public void close() throws IOException {
-    return;
   }
 
   @Override
   public void delete(Path path) throws IOException {
-    return;
   }
 
   @Override
   public void forgetMetadata(Path path) throws IOException {
-    return;
   }
 
   @Override
   public void deleteSubtree(Path path) throws IOException {
-    return;
   }
 
   @Override
@@ -80,22 +76,18 @@ public class NullMetadataStore implements MetadataStore {
   @Override
   public void move(Collection<Path> pathsToDelete,
       Collection<PathMetadata> pathsToCreate) throws IOException {
-    return;
   }
 
   @Override
   public void put(PathMetadata meta) throws IOException {
-    return;
   }
 
   @Override
   public void put(Collection<PathMetadata> meta) throws IOException {
-    return;
   }
 
   @Override
   public void put(DirListingMetadata meta) throws IOException {
-    return;
   }
 
   @Override
@@ -104,7 +96,6 @@ public class NullMetadataStore implements MetadataStore {
 
   @Override
   public void prune(long modTime) {
-    return;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
index d799e16..d8cb447 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java
@@ -37,6 +37,11 @@ public class PathMetadata {
   private Tristate isEmptyDirectory;
   private boolean isDeleted;
 
+  /**
+   * Create a tombstone from the current time.
+   * @param path path to tombstone
+   * @return the entry.
+   */
   public static PathMetadata tombstone(Path path) {
     long now = System.currentTimeMillis();
     FileStatus status = new FileStatus(0, false, 0, 0, now, path);
@@ -75,6 +80,7 @@ public class PathMetadata {
   }
 
   /**
+   * Query if a directory is empty.
    * @return Tristate.TRUE if this is known to be an empty directory,
    * Tristate.FALSE if known to not be empty, and Tristate.UNKNOWN if the
    * MetadataStore does have enough information to determine either way.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
index c206a00..8515bfb 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
@@ -136,7 +136,7 @@ final class PathMetadataDynamoDBTranslation {
           username, username, path);
     }
     boolean isDeleted =
-        item.hasAttribute(IS_DELETED) ? item.getBoolean(IS_DELETED) : false;
+        item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
 
     return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
   }
@@ -243,8 +243,8 @@ final class PathMetadataDynamoDBTranslation {
   }
 
   /**
-   * e.g. pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a
-   * @param path
+   * e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
+   * @param path path to convert
    * @return string for parent key
    */
   static String pathToParentKey(Path path) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
index d8ae3fb..edd3830 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -18,8 +18,17 @@
 
 package org.apache.hadoop.fs.s3a.s3guard;
 
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -29,21 +38,13 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 import org.apache.hadoop.fs.s3a.S3AInstrumentation;
-import org.apache.hadoop.fs.s3a.S3AUtils;
 import org.apache.hadoop.fs.s3a.Tristate;
 import org.apache.hadoop.util.ReflectionUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Set;
-
-import static org.apache.hadoop.fs.s3a.Constants.*;
-import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
+import static 
org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
+import static 
org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
+import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
 
 /**
  * Logic for integrating MetadataStore with S3A.
@@ -76,10 +77,6 @@ public final class S3Guard {
    * returning it.  Callers must clean up by calling
    * {@link MetadataStore#close()} when done using the MetadataStore.
    *
-   * If this fails to instantiate the class specified in config (fs.s3a
-   * .metadatastore.impl), or there is an error initializing it, this will log
-   * an error and return an instance of {@link NullMetadataStore} instead.
-   *
    * @param fs  FileSystem whose Configuration specifies which
    *            implementation to use.
    * @return Reference to new MetadataStore.
@@ -124,9 +121,9 @@ public final class S3Guard {
 
 
   /**
-   * Helper function which puts given S3AFileStatus in the MetadataStore and
-   * returns the same S3AFileStatus.
-   * @param ms MetadataStore to put() into.
+   * Helper function which puts a given S3AFileStatus into the MetadataStore 
and
+   * returns the same S3AFileStatus. Instrumentation monitors the put 
operation.
+   * @param ms MetadataStore to {@code put()} into.
    * @param status status to store
    * @param instrumentation instrumentation of the s3a file system
    * @return The same status as passed in
@@ -168,7 +165,7 @@ public final class S3Guard {
   }
 
   /**
-   * Given directory listing metadata from both the backing store, and the
+   * Given directory listing metadata from both the backing store and the
    * MetadataStore, merge the two sources of truth to create a consistent
    * view of the current directory contents, which can be returned to clients.
    *
@@ -187,7 +184,7 @@ public final class S3Guard {
       boolean isAuthoritative) throws IOException {
 
     // Fast-path for NullMetadataStore
-    if (ms instanceof NullMetadataStore) {
+    if (isNullMetadataStore(ms)) {
       return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
     }
 
@@ -216,7 +213,7 @@ public final class S3Guard {
 
       // Minor race condition here.  Multiple threads could add to this
       // mutable DirListingMetadata.  Since it is backed by a
-      // ConcurrentHashMap, the last put() wins.  I think this is ok.
+      // ConcurrentHashMap, the last put() wins.
       // More concerning is two threads racing on listStatus() and delete().
       // Any FileSystem has similar race conditions, but we could persist
       // a stale entry longer.  We could expose an atomic
@@ -245,6 +242,9 @@ public final class S3Guard {
 
   /**
    * Update MetadataStore to reflect creation of the given  directories.
+   *
+   * If an IOException is raised while trying to update the entry, this
+   * operation catches the exception and returns.
    * @param ms    MetadataStore to update.
    * @param dirs  null, or an ordered list of directories from leaf to root.
    *              E.g. if /a/ exists, and  mkdirs(/a/b/c/d) is called, this
@@ -285,7 +285,7 @@ public final class S3Guard {
         Path f = dirs.get(i);
         assertQualified(f);
         FileStatus status =
-            S3AUtils.createUploadFileStatus(f, true, 0, 0, owner);
+            createUploadFileStatus(f, true, 0, 0, owner);
 
         // We only need to put a DirListingMetadata if we are setting
         // authoritative bit
@@ -315,15 +315,16 @@ public final class S3Guard {
   }
 
   /**
-   * Helper function that records the move of directory path, adding
-   * resulting metadata to the supplied lists.  Does not store in 
MetadataStore.
+   * Helper function that records the move of directory paths, adding
+   * resulting metadata to the supplied lists.
+   * Does not store in MetadataStore.
    * @param ms  MetadataStore, used to make this a no-op, when it is
    *            NullMetadataStore.
    * @param srcPaths stores the source path here
    * @param dstMetas stores destination metadata here
    * @param srcPath  source path to store
    * @param dstPath  destination path to store
-   * @param owner Hadoop user name
+   * @param owner file owner to use in created records
    */
   public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
       Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
@@ -331,10 +332,9 @@ public final class S3Guard {
     if (isNullMetadataStore(ms)) {
       return;
     }
-    assertQualified(srcPath);
-    assertQualified(dstPath);
-    FileStatus dstStatus = S3AUtils.createUploadFileStatus(dstPath, true, 0,
-        0, owner);
+    assertQualified(srcPath, dstPath);
+
+    FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
     addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
   }
 
@@ -349,7 +349,7 @@ public final class S3Guard {
    * @param dstPath  destination path to store
    * @param size length of file moved
    * @param blockSize  blocksize to associate with destination file
-   * @param owner Hadoop user name
+   * @param owner file owner to use in created records
    */
   public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
       Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
@@ -357,9 +357,8 @@ public final class S3Guard {
     if (isNullMetadataStore(ms)) {
       return;
     }
-    assertQualified(srcPath);
-    assertQualified(dstPath);
-    FileStatus dstStatus = S3AUtils.createUploadFileStatus(dstPath, false,
+    assertQualified(srcPath, dstPath);
+    FileStatus dstStatus = createUploadFileStatus(dstPath, false,
         size, blockSize, owner);
     addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
   }
@@ -390,9 +389,7 @@ public final class S3Guard {
       return;
     }
 
-    assertQualified(srcRoot);
-    assertQualified(srcPath);
-    assertQualified(dstPath);
+    assertQualified(srcRoot, srcPath, dstPath);
 
     if (srcPath.equals(srcRoot)) {
       LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
@@ -438,6 +435,11 @@ public final class S3Guard {
     dstMetas.add(new PathMetadata(dstStatus));
   }
 
+  /**
+   * Assert that the path is qualified with a host and scheme.
+   * @param p path to check
+   * @throws NullPointerException if either argument does not hold
+   */
   public static void assertQualified(Path p) {
     URI uri = p.toUri();
     // Paths must include bucket in case MetadataStore is shared between
@@ -447,4 +449,15 @@ public final class S3Guard {
     // I believe this should never fail, since there is a host?
     Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
   }
+
+  /**
+   * Assert that all paths are valid.
+   * @param paths path to check
+   * @throws NullPointerException if either argument does not hold
+   */
+  public static void assertQualified(Path...paths) {
+    for (Path path : paths) {
+      assertQualified(path);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 9bd0cb8..ea34734 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -18,8 +18,23 @@
 
 package org.apache.hadoop.fs.s3a.s3guard;
 
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileStatus;
@@ -34,25 +49,11 @@ import org.apache.hadoop.fs.shell.CommandFormat;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
 
 import static org.apache.hadoop.fs.s3a.Constants.*;
 
 /**
- * Manage S3Guard Metadata Store.
+ * CLI to manage S3Guard Metadata Store.
  */
 public abstract class S3GuardTool extends Configured implements Tool {
   private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
@@ -74,6 +75,8 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
       "\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
       "\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
       "\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
+  private static final String DATA_IN_S3_IS_PRESERVED
+      = "(all data in S3 is preserved";
 
   abstract public String getUsage();
 
@@ -100,7 +103,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
    * Constructor a S3Guard tool with HDFS configuration.
    * @param conf Configuration.
    */
-  public S3GuardTool(Configuration conf) {
+  protected S3GuardTool(Configuration conf) {
     super(conf);
 
     commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
@@ -253,8 +256,8 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
   }
 
   /**
-   * Parse CLI arguments and returns the position arguments. The options are
-   * stored in {@link #commandFormat}
+   * Parse CLI arguments and returns the position arguments.
+   * The options are stored in {@link #commandFormat}
    *
    * @param args command line arguments.
    * @return the position arguments from CLI.
@@ -332,7 +335,8 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
    */
   static class Destroy extends S3GuardTool {
     private static final String NAME = "destroy";
-    public static final String PURPOSE = "destroy metadata repository";
+    public static final String PURPOSE = "destroy Metadata Store data "
+        + DATA_IN_S3_IS_PRESERVED;
     private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
         "\t" + PURPOSE + "\n\n" +
         "Common options:\n" +
@@ -367,8 +371,16 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
         return INVALID_ARGUMENT;
       }
 
-      initMetadataStore(false);
-      Preconditions.checkState(ms != null, "Metadata store is not 
initialized");
+      try {
+        initMetadataStore(false);
+      } catch (FileNotFoundException e) {
+        // indication that the table was not found
+        LOG.debug("Failed to bind to store to be destroyed", e);
+        LOG.info("Metadata Store does not exist.");
+        return SUCCESS;
+      }
+
+      Preconditions.checkState(ms != null, "Metadata Store is not 
initialized");
 
       ms.destroy();
       LOG.info("Metadata store is deleted.");
@@ -440,8 +452,9 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
     }
 
     /**
-     * Recursively import every path under path
+     * Recursively import every path under path.
      * @return number of items inserted into MetadataStore
+     * @throws IOException on I/O errors.
      */
     private long importDir(FileStatus status) throws IOException {
       Preconditions.checkArgument(status.isDirectory());
@@ -625,7 +638,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
      * @param msDir the directory FileStatus obtained from the metadata store.
      * @param s3Dir the directory FileStatus obtained from S3.
      * @param out the output stream to generate diff results.
-     * @throws IOException
+     * @throws IOException on I/O errors.
      */
     private void compareDir(FileStatus msDir, FileStatus s3Dir,
                             PrintStream out) throws IOException {
@@ -676,7 +689,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
      *
      * @param path the path to be compared.
      * @param out  the output stream to display results.
-     * @throws IOException
+     * @throws IOException on I/O errors.
      */
     private void compareRoot(Path path, PrintStream out) throws IOException {
       Path qualified = s3a.qualify(path);
@@ -732,7 +745,8 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
   static class Prune extends S3GuardTool {
     private static final String NAME = "prune";
     public static final String PURPOSE = "truncate older metadata from " +
-        "repository";
+        "repository "
+        + DATA_IN_S3_IS_PRESERVED;;
     private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
         "\t" + PURPOSE + "\n\n" +
         "Common options:\n" +
@@ -830,8 +844,8 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
   private static void printHelp() {
     if (cmd == null) {
       System.err.println("Usage: hadoop " + USAGE);
-      System.err.println("\tperform metadata store " +
-          "administrative commands for s3a filesystem.");
+      System.err.println("\tperform S3Guard metadata store " +
+          "administrative commands.");
     } else {
       System.err.println("Usage: hadoop " + cmd.getUsage());
     }
@@ -881,7 +895,11 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
     return ToolRunner.run(conf, cmd, otherArgs);
   }
 
-  public static void main(String[] args) throws Exception {
+  /**
+   * Main entry point. Calls {@code System.exit()} on all execution paths.
+   * @param args argument list
+   */
+  public static void main(String[] args) {
     try {
       int ret = run(args, new Configuration());
       System.exit(ret);
@@ -889,7 +907,7 @@ public abstract class S3GuardTool extends Configured 
implements Tool {
       System.err.println(e.getMessage());
       printHelp();
       System.exit(INVALID_ARGUMENT);
-    } catch (Exception e) {
+    } catch (Throwable e) {
       e.printStackTrace(System.err);
       System.exit(ERROR);
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
index 34f36b8..d430315 100644
--- 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/hadoop/blob/e531ae25/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md 
b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 0347b2a..b8d37c6 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -1553,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
 
 SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
 encrypt the objects. The user may specify a specific CMK or leave the
-`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
+`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
 in AWS IAM.  Each CMK configured in AWS IAM is region specific, and cannot be
 used in a in a S3 bucket in a different region.  There is can also be policies
 assigned to the CMK that prohibit or restrict its use for users causing S3A


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to