[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-11 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302424064
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardDiagnostic.java
 ##
 @@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.service.launcher.AbstractLaunchableService;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+
+import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_USAGE;
+
+/**
+ * Entry point for diagnostics operations.
+ */
+public class AbstractS3GuardDiagnostic extends AbstractLaunchableService {
+
+  private S3AFileSystem filesystem;
+
+  private DynamoDBMetadataStore store;
+
+  private URI uri;
+
+  private List arguments;
+
+  /**
+   * Constructor.
+   * @param name entry point name.
+   */
+  public AbstractS3GuardDiagnostic(final String name) {
+super(name);
+  }
+
+  /**
+   * Constructor. If the store is set then that is the store for the operation,
+   * otherwise the filesystem's binding is used instead.
+   * @param name entry point name.
+   * @param filesystem filesystem
+   * @param store optional metastore.
+   * @param uri URI. Must be set if filesystem == null.
+   */
+  public AbstractS3GuardDiagnostic(
+  final String name,
+  @Nullable final S3AFileSystem filesystem,
+  @Nullable final DynamoDBMetadataStore store,
+  @Nullable final URI uri) {
+super(name);
+this.store = store;
+this.filesystem = filesystem;
+if (store == null) {
+  require(filesystem != null, "No filesystem or URI");
+  bindStore(filesystem);
+}
+if (uri == null) {
+  require(filesystem != null, "No filesystem or URI");
+  setUri(filesystem.getUri());
+} else {
+  setUri(uri);
+}
+  }
+
+  /**
+   * Require a condition to hold, otherwise an exception is thrown.
+   * @param condition condition to be true
+   * @param error text on failure.
+   * @throws ServiceLaunchException if the condition is not met
+   */
+  protected static void require(boolean condition, String error) {
+if (!condition) {
+  throw failure(error);
+}
+  }
+
+  /**
+   * Generate a failure exception for throwing.
+   * @param message message
+   * @param ex optional nested exception.
+   * @return an exception to throw
+   */
+  protected static ServiceLaunchException failure(String message, Throwable 
ex) {
+return new ServiceLaunchException(LauncherExitCodes.EXIT_FAIL, message, 
ex);
+  }
+
+  /**
+   * Generate a failure exception for throwing.
+   * @param message message
+   * @return an exception to throw
+   */
+  protected static ServiceLaunchException failure(String message) {
+return new ServiceLaunchException(LauncherExitCodes.EXIT_FAIL, message);
+  }
+
+  @Override
+  public Configuration bindArgs(final Configuration config,
+  final List args)
+  throws Exception {
+this.arguments = args;
+return super.bindArgs(config, args);
+  }
+
+  /**
+   * Get the argument list.
+   * @return the argument list.
+   */
+  protected List getArguments() {
+return arguments;
+  }
+
+  /**
+   * Bind to the store from a CLI argument.
+   * @param fsURI filesystem URI
+   * @throws IOException failure
+   */
+  protected void bindFromCLI(String fsURI)
+  throws IOException {
+Configuration conf = getConfig();
+setUri(fsURI);
+FileSystem fs = FileSystem.get(getUri(), conf);
+require(fs instanceof S3AFileSystem,
+"Not an S3A Filesystem:  " + fsURI);
+filesystem = (S3AFileSystem) fs;
+bindStore(filesystem);
+setUri(fs.getUri());
+  }
+
+  /**
+   * Binds the {@link #store} field to the metastore of
+   * the filesystem 

[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-11 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302423028
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java
 ##
 @@ -921,43 +913,6 @@ private DDBPathMetadata verifyInAncestor(AncestorState 
state,
 return md;
   }
 
-  @Test
 
 Review comment:
   yes, this is slow, and also if we cannot provision a table we will fail 
other tests. This can be removed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-11 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302423028
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java
 ##
 @@ -921,43 +913,6 @@ private DDBPathMetadata verifyInAncestor(AncestorState 
state,
 return md;
   }
 
-  @Test
 
 Review comment:
   yes, this is slow, and also if we cannot provision a table we will fail 
other tests. This can go.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-11 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302422633
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DumpS3GuardTable.java
 ##
 @@ -0,0 +1,691 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Listing;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.service.Service;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+import org.apache.hadoop.service.launcher.ServiceLauncher;
+import org.apache.hadoop.util.DurationInfo;
+import org.apache.hadoop.util.ExitUtil;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.s3a.S3AUtils.ACCEPT_ALL;
+
+/**
+ * This is a low-level diagnostics entry point which does a CVE/TSV dump of
+ * the DDB state.
+ * As it also lists the filesystem, it actually changes the state of the store
+ * during the operation.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DumpS3GuardTable extends AbstractS3GuardDiagnostic {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(DumpS3GuardTable.class);
+
+  /**
+   * Application name.
+   */
+  public static final String NAME = "DumpS3GuardTable";
+
+  /**
+   * Usage.
+   */
+  private static final String USAGE_MESSAGE = NAME
+  + "  ";
+
+  /**
+   * Suffix for the flat list: {@value}.
+   */
+  public static final String FLAT_CSV = "-flat.csv";
+
+  /**
+   * Suffix for the raw S3 dump: {@value}.
+   */
+  public static final String RAW_CSV = "-s3.csv";
+
+  /**
+   * Suffix for the DDB scan: {@value}.
+   */
+  public static final String SCAN_CSV = "-scan.csv";
+
+  /**
+   * Suffix for the second DDB scan: : {@value}.
+   */
+  public static final String SCAN2_CSV = "-scan-2.csv";
+
+  /**
+   * Suffix for the treewalk scan of the S3A Filesystem: {@value}.
+   */
+  public static final String TREE_CSV = "-tree.csv";
+
+  /**
+   * Suffix for a recursive treewalk through the metastore: {@value}.
+   */
+  public static final String STORE_CSV = "-store.csv";
+
+  /**
+   * Path in the local filesystem to save the data.
+   */
+  protected String destPath;
+
+  /**
+   * Instantiate.
+   * @param name application name.
+   */
+  public DumpS3GuardTable(final String name) {
+super(name);
+  }
+
+  /**
+   * Instantiate with default name.
+   */
+  public DumpS3GuardTable() {
+this(NAME);
+  }
+
+  /**
+   * Bind to a specific FS + store.
+   * @param fs filesystem
+   * @param store metastore to use
+   * @param destFile the base filename for output
+   * @param uri URI of store -only needed if FS is null.
+   */
+  public DumpS3GuardTable(
+  final S3AFileSystem fs,
+  final DynamoDBMetadataStore store,
+  final File destFile,
+  final URI uri) {
+super(NAME, fs, store, uri);
+this.destPath = destFile.getAbsolutePath();
+  }
+
+  /**
+   * Bind to the argument list, including validating the CLI
+   * @throws Exception 

[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-11 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302421662
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java
 ##
 @@ -538,7 +538,7 @@ public CompleteMultipartUploadResult commitUpload(
   public BulkOperationState initiateCommitOperation(
   Path path) throws IOException {
 return S3Guard.initiateBulkWrite(owner.getMetadataStore(),
-BulkOperationState.OperationType.Put, path);
 
 Review comment:
   Ok understood,  I saw later that this is new.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302093454
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PurgeS3GuardTable.java
 ##
 @@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.service.Service;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+import org.apache.hadoop.service.launcher.ServiceLauncher;
+import org.apache.hadoop.util.DurationInfo;
+import org.apache.hadoop.util.ExitUtil;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.s3a.s3guard.DumpS3GuardTable.serviceMain;
+import static 
org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.PARENT;
+
+/**
+ * Purge the S3Guard table of a FileSystem from all entries related to
+ * that table.
+ * Will fail if there is no table, or the store is in auth mode.
+ * 
+ *   hadoop org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardTable \
+ *   -force s3a://example-bucket/
+ * 
+ *
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class PurgeS3GuardTable extends AbstractS3GuardDiagnostic {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(PurgeS3GuardTable.class);
+
+  public static final String NAME = "PurgeS3GuardTable";
+
+  public static final String FORCE = "-force";
+
+  private static final String USAGE_MESSAGE = NAME
+  + " [-force] ";
+
+  private boolean force;
+
+  private long filesFound;
+  private long filesDeleted;
+
+  public PurgeS3GuardTable(final String name) {
+super(name);
+  }
+
+  public PurgeS3GuardTable() {
+this(NAME);
+  }
+
+  public PurgeS3GuardTable(
+  final S3AFileSystem filesystem,
+  final DynamoDBMetadataStore store,
 
 Review comment:
   Please add dynamo to the classname and create an issue that we have to 
implement/abstract these for localmetadatastore.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302068921
 
 

 ##
 File path: hadoop-tools/hadoop-aws/pom.xml
 ##
 @@ -186,7 +186,11 @@
 **/ITestS3AHuge*.java
 
 **/ITestDynamoDBMetadataStoreScale.java
+
 
 Review comment:
   This comment should be one line beneath the current 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302099673
 
 

 ##
 File path: hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
 ##
 @@ -57,6 +57,8 @@ log4j.logger.org.apache.hadoop.ipc.Server=WARN
 #log4j.logger.org.apache.hadoop.fs.s3a.Listing=INFO
 # Log S3Guard classes
 #log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
+# if set to debug, this will log the PUT/DELETE operations on a store
+log4j.logger.org.apache.hadoop.fs.s3a.s3guard.Operations=DEBUG
 
 Review comment:
   we may don't want to log on debug for everybody.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302110128
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java
 ##
 @@ -172,6 +174,23 @@ public void setup() throws Exception {
 
   @Override
   public void teardown() throws Exception {
+if (ddbms != null) {
+  S3GuardTableAccess tableAccess = new S3GuardTableAccess(ddbms);
+  ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
+  builder.withKeyCondition(
 
 Review comment:
   use withCondition here instead of key, so you don't have to filter for the 
VersionMarker


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302064355
 
 

 ##
 File path: 
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java
 ##
 @@ -185,28 +191,57 @@ public void testListEmptyRootDirectory() throws 
IOException {
 for (FileStatus status : statuses) {
   ContractTestUtils.assertDeleted(fs, status.getPath(), true);
 }
-assertEquals("listStatus on empty root-directory returned a non-empty 
list",
-0, fs.listStatus(root).length);
-assertFalse("listFiles(/, false).hasNext",
-fs.listFiles(root, false).hasNext());
-assertFalse("listFiles(/, true).hasNext",
-fs.listFiles(root, true).hasNext());
-assertFalse("listLocatedStatus(/).hasNext",
-fs.listLocatedStatus(root).hasNext());
+FileStatus[] list1 = fs.listStatus(root);
 
 Review comment:
   nit: please use some name which is easier to understand eg. rootListStatus


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302071233
 
 

 ##
 File path: hadoop-tools/hadoop-aws/pom.xml
 ##
 @@ -215,15 +219,21 @@
   
   
   
-**/ITestS3AContractRootDir.java
 **/ITestS3AFileContextStatistics.java
+
 **/ITestS3AHuge*.java
+
 **/ITestS3AEncryptionSSEC*.java
 
 **/ITestDynamoDBMetadataStoreScale.java
 
 
 **/ITestTerasort*.java
+
+**/ITest*CommitMRJob.java
 
 Review comment:
   Same: we had a lot of testMRJob failures. Are those fail because of OoME?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302074925
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java
 ##
 @@ -538,7 +538,7 @@ public CompleteMultipartUploadResult commitUpload(
   public BulkOperationState initiateCommitOperation(
   Path path) throws IOException {
 return S3Guard.initiateBulkWrite(owner.getMetadataStore(),
-BulkOperationState.OperationType.Put, path);
 
 Review comment:
   This was a bug that the operationtype was put instead of Commit, right?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302096415
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java
 ##
 @@ -281,22 +289,6 @@ private void deleteAllMetadata() throws IOException {
   public static void deleteMetadataUnderPath(final DynamoDBMetadataStore ms,
   final Path path, final boolean suppressErrors) throws IOException {
 ThrottleTracker throttleTracker = new ThrottleTracker(ms);
-try (DurationInfo ignored = new DurationInfo(LOG, true, "prune")) {
 
 Review comment:
   Why did you remove this? We do this operation on a separate testing table, 
just for this test, so it won't affect any other.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302066096
 
 

 ##
 File path: 
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java
 ##
 @@ -185,28 +191,57 @@ public void testListEmptyRootDirectory() throws 
IOException {
 for (FileStatus status : statuses) {
   ContractTestUtils.assertDeleted(fs, status.getPath(), true);
 }
-assertEquals("listStatus on empty root-directory returned a non-empty 
list",
-0, fs.listStatus(root).length);
-assertFalse("listFiles(/, false).hasNext",
-fs.listFiles(root, false).hasNext());
-assertFalse("listFiles(/, true).hasNext",
-fs.listFiles(root, true).hasNext());
-assertFalse("listLocatedStatus(/).hasNext",
-fs.listLocatedStatus(root).hasNext());
+FileStatus[] list1 = fs.listStatus(root);
+assertEquals("listStatus on empty root-directory returned found: "
++ join("\n", list1),
+0, list1.length);
+assertNoElements("listFiles(/, false)",
+fs.listFiles(root, false));
+assertNoElements("listFiles(/, true)",
+fs.listFiles(root, true));
+assertNoElements("listLocatedStatus(/)",
+fs.listLocatedStatus(root));
 assertIsDirectory(root);
   }
 
+  /**
+   * Assert that an iterator has no elements; the raised exception
+   * will include the element list.
+   * @param operation operation for assertion text.
+   * @param iter iterator
+   * @throws IOException failure retrieving the values.
+   */
+  protected void assertNoElements(String operation,
+  RemoteIterator iter) throws IOException {
+List resultList = toList(iter);
+if (!resultList.isEmpty()) {
+  fail("Expected no results from " + operation + ", but got "
 
 Review comment:
     good that we will have more description


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302069432
 
 

 ##
 File path: hadoop-tools/hadoop-aws/pom.xml
 ##
 @@ -186,7 +186,11 @@
 **/ITestS3AHuge*.java
 
 **/ITestDynamoDBMetadataStoreScale.java
+
 
 Review comment:
   Also, if it uses that much ram we should create an upstream issue for this 
to solve this.
   For my note: we had a LOT of testMRJob failures, so excluding this will not 
mean that they will go away, it will still pop up during the sequential run.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302096634
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java
 ##
 @@ -921,43 +913,6 @@ private DDBPathMetadata verifyInAncestor(AncestorState 
state,
 return md;
   }
 
-  @Test
 
 Review comment:
   Why did you remove this test?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302088282
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DumpS3GuardTable.java
 ##
 @@ -0,0 +1,691 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Listing;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.service.Service;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+import org.apache.hadoop.service.launcher.ServiceLauncher;
+import org.apache.hadoop.util.DurationInfo;
+import org.apache.hadoop.util.ExitUtil;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.s3a.S3AUtils.ACCEPT_ALL;
+
+/**
+ * This is a low-level diagnostics entry point which does a CVE/TSV dump of
+ * the DDB state.
+ * As it also lists the filesystem, it actually changes the state of the store
+ * during the operation.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DumpS3GuardTable extends AbstractS3GuardDiagnostic {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(DumpS3GuardTable.class);
+
+  /**
+   * Application name.
+   */
+  public static final String NAME = "DumpS3GuardTable";
+
+  /**
+   * Usage.
+   */
+  private static final String USAGE_MESSAGE = NAME
+  + "  ";
+
+  /**
+   * Suffix for the flat list: {@value}.
+   */
+  public static final String FLAT_CSV = "-flat.csv";
+
+  /**
+   * Suffix for the raw S3 dump: {@value}.
+   */
+  public static final String RAW_CSV = "-s3.csv";
+
+  /**
+   * Suffix for the DDB scan: {@value}.
+   */
+  public static final String SCAN_CSV = "-scan.csv";
+
+  /**
+   * Suffix for the second DDB scan: : {@value}.
+   */
+  public static final String SCAN2_CSV = "-scan-2.csv";
+
+  /**
+   * Suffix for the treewalk scan of the S3A Filesystem: {@value}.
+   */
+  public static final String TREE_CSV = "-tree.csv";
+
+  /**
+   * Suffix for a recursive treewalk through the metastore: {@value}.
+   */
+  public static final String STORE_CSV = "-store.csv";
+
+  /**
+   * Path in the local filesystem to save the data.
+   */
+  protected String destPath;
+
+  /**
+   * Instantiate.
+   * @param name application name.
+   */
+  public DumpS3GuardTable(final String name) {
+super(name);
+  }
+
+  /**
+   * Instantiate with default name.
+   */
+  public DumpS3GuardTable() {
+this(NAME);
+  }
+
+  /**
+   * Bind to a specific FS + store.
+   * @param fs filesystem
+   * @param store metastore to use
+   * @param destFile the base filename for output
+   * @param uri URI of store -only needed if FS is null.
+   */
+  public DumpS3GuardTable(
+  final S3AFileSystem fs,
+  final DynamoDBMetadataStore store,
+  final File destFile,
+  final URI uri) {
+super(NAME, fs, store, uri);
+this.destPath = destFile.getAbsolutePath();
+  }
+
+  /**
+   * Bind to the argument list, including validating the CLI
+   * @throws Exception 

[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302098848
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardRootOperations.java
 ##
 @@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.File;
+import java.net.URI;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.assertj.core.api.Assertions;
+import org.junit.FixMethodOrder;
+import org.junit.Test;
+import org.junit.runners.MethodSorters;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import org.apache.hadoop.fs.s3a.impl.StoreContext;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE;
+import static 
org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides;
+import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles;
+
+/**
+ * This test run against the root of the FS, and operations which span the DDB
+ * table and the filesystem.
+ * For this reason, these tests are executed in the sequential phase of the
+ * integration tests.
+ * 
+ * The tests only run if DynamoDB is the metastore.
+ */
+@FixMethodOrder(MethodSorters.NAME_ASCENDING)
+public class ITestS3GuardRootOperations extends AbstractS3ATestBase {
+
+  private StoreContext storeContext;
+
+  private String fsUriStr;
+
+  private DynamoDBMetadataStore metastore;
 
 Review comment:
   Add dynamo to the class name.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302090898
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DumpS3GuardTable.java
 ##
 @@ -0,0 +1,691 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Listing;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.service.Service;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+import org.apache.hadoop.service.launcher.ServiceLauncher;
+import org.apache.hadoop.util.DurationInfo;
+import org.apache.hadoop.util.ExitUtil;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.s3a.S3AUtils.ACCEPT_ALL;
+
+/**
+ * This is a low-level diagnostics entry point which does a CVE/TSV dump of
+ * the DDB state.
+ * As it also lists the filesystem, it actually changes the state of the store
+ * during the operation.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DumpS3GuardTable extends AbstractS3GuardDiagnostic {
+
+  private static final Logger LOG =
+  LoggerFactory.getLogger(DumpS3GuardTable.class);
+
+  /**
+   * Application name.
+   */
+  public static final String NAME = "DumpS3GuardTable";
+
+  /**
+   * Usage.
+   */
+  private static final String USAGE_MESSAGE = NAME
+  + "  ";
+
+  /**
+   * Suffix for the flat list: {@value}.
+   */
+  public static final String FLAT_CSV = "-flat.csv";
+
+  /**
+   * Suffix for the raw S3 dump: {@value}.
+   */
+  public static final String RAW_CSV = "-s3.csv";
+
+  /**
+   * Suffix for the DDB scan: {@value}.
+   */
+  public static final String SCAN_CSV = "-scan.csv";
+
+  /**
+   * Suffix for the second DDB scan: : {@value}.
+   */
+  public static final String SCAN2_CSV = "-scan-2.csv";
+
+  /**
+   * Suffix for the treewalk scan of the S3A Filesystem: {@value}.
+   */
+  public static final String TREE_CSV = "-tree.csv";
+
+  /**
+   * Suffix for a recursive treewalk through the metastore: {@value}.
+   */
+  public static final String STORE_CSV = "-store.csv";
+
+  /**
+   * Path in the local filesystem to save the data.
+   */
+  protected String destPath;
+
+  /**
+   * Instantiate.
+   * @param name application name.
+   */
+  public DumpS3GuardTable(final String name) {
+super(name);
+  }
+
+  /**
+   * Instantiate with default name.
+   */
+  public DumpS3GuardTable() {
+this(NAME);
+  }
+
+  /**
+   * Bind to a specific FS + store.
+   * @param fs filesystem
+   * @param store metastore to use
+   * @param destFile the base filename for output
+   * @param uri URI of store -only needed if FS is null.
+   */
+  public DumpS3GuardTable(
+  final S3AFileSystem fs,
+  final DynamoDBMetadataStore store,
+  final File destFile,
+  final URI uri) {
+super(NAME, fs, store, uri);
+this.destPath = destFile.getAbsolutePath();
+  }
+
+  /**
+   * Bind to the argument list, including validating the CLI
+   * @throws Exception 

[GitHub] [hadoop] bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid inconsistencies between DDB and S3

2019-07-10 Thread GitBox
bgaborg commented on a change in pull request #1003: HADOOP-16384: Avoid 
inconsistencies between DDB and S3
URL: https://github.com/apache/hadoop/pull/1003#discussion_r302085607
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardDiagnostic.java
 ##
 @@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.service.launcher.AbstractLaunchableService;
+import org.apache.hadoop.service.launcher.LauncherExitCodes;
+import org.apache.hadoop.service.launcher.ServiceLaunchException;
+
+import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_USAGE;
+
+/**
+ * Entry point for diagnostics operations.
+ */
+public class AbstractS3GuardDiagnostic extends AbstractLaunchableService {
+
+  private S3AFileSystem filesystem;
+
+  private DynamoDBMetadataStore store;
+
+  private URI uri;
+
+  private List arguments;
+
+  /**
+   * Constructor.
+   * @param name entry point name.
+   */
+  public AbstractS3GuardDiagnostic(final String name) {
+super(name);
+  }
+
+  /**
+   * Constructor. If the store is set then that is the store for the operation,
+   * otherwise the filesystem's binding is used instead.
+   * @param name entry point name.
+   * @param filesystem filesystem
+   * @param store optional metastore.
+   * @param uri URI. Must be set if filesystem == null.
+   */
+  public AbstractS3GuardDiagnostic(
+  final String name,
+  @Nullable final S3AFileSystem filesystem,
+  @Nullable final DynamoDBMetadataStore store,
+  @Nullable final URI uri) {
+super(name);
+this.store = store;
+this.filesystem = filesystem;
+if (store == null) {
+  require(filesystem != null, "No filesystem or URI");
+  bindStore(filesystem);
+}
+if (uri == null) {
+  require(filesystem != null, "No filesystem or URI");
+  setUri(filesystem.getUri());
+} else {
+  setUri(uri);
+}
+  }
+
+  /**
+   * Require a condition to hold, otherwise an exception is thrown.
+   * @param condition condition to be true
+   * @param error text on failure.
+   * @throws ServiceLaunchException if the condition is not met
+   */
+  protected static void require(boolean condition, String error) {
+if (!condition) {
+  throw failure(error);
+}
+  }
+
+  /**
+   * Generate a failure exception for throwing.
+   * @param message message
+   * @param ex optional nested exception.
+   * @return an exception to throw
+   */
+  protected static ServiceLaunchException failure(String message, Throwable 
ex) {
+return new ServiceLaunchException(LauncherExitCodes.EXIT_FAIL, message, 
ex);
+  }
+
+  /**
+   * Generate a failure exception for throwing.
+   * @param message message
+   * @return an exception to throw
+   */
+  protected static ServiceLaunchException failure(String message) {
+return new ServiceLaunchException(LauncherExitCodes.EXIT_FAIL, message);
+  }
+
+  @Override
+  public Configuration bindArgs(final Configuration config,
+  final List args)
+  throws Exception {
+this.arguments = args;
+return super.bindArgs(config, args);
+  }
+
+  /**
+   * Get the argument list.
+   * @return the argument list.
+   */
+  protected List getArguments() {
+return arguments;
+  }
+
+  /**
+   * Bind to the store from a CLI argument.
+   * @param fsURI filesystem URI
+   * @throws IOException failure
+   */
+  protected void bindFromCLI(String fsURI)
+  throws IOException {
+Configuration conf = getConfig();
+setUri(fsURI);
+FileSystem fs = FileSystem.get(getUri(), conf);
+require(fs instanceof S3AFileSystem,
+"Not an S3A Filesystem:  " + fsURI);
+filesystem = (S3AFileSystem) fs;
+bindStore(filesystem);
+setUri(fs.getUri());
+  }
+
+  /**
+   * Binds the {@link #store} field to the metastore of
+   * the filesystem