[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-12 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r378419167
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.After;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  throw ex;
+}
+
+Path path = new Path(uri);
+intercept(FileNotFoundException.class,
+"No such file or directory: " + path,
+() -> fs.getFileStatus(path));
+
+Path src = new Path(fs.getUri() + "/testfile");
+byte[] data = dataset(1024, 'a', 'z');
+intercept(FileNotFoundException.class,
+"The specified bucket does not exist",
+() -> writeDataset(fs, src, data, data.length, 1024 * 1024, true));
+  }
+
+  @Test
+  public void testBucketProbingV1() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 1);
+intercept(FileNotFoundException.class,
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Test
+  public void testBucketProbingV2() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 2);
+intercept(FileNotFoundException.class,
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Test
+  public void testBucketProbingParameterValidation() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 3);
+intercept(IllegalArgumentException.class,
+"Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2",
+"Should throw IllegalArgumentException",
+() -> FileSystem.get(uri, configuration));
+configuration.setInt(S3A_BUCKET_PROBE, -1);
+intercept(IllegalArgumentException.class,
+"Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2",
+"Should throw IllegalArgumentException",
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Override
+  protected Configuration getConfiguration() {
+Configuration configuration = super.getConfiguration();
+S3ATestUtils.disableFilesystemCaching(configuration);
+return configuration;
+  }
+
+  @After
+  public void tearDown() throws Exception {
+IOUtils.cleanupWithLogger(getLogger(), fs);
 
 Review comment:
   no worries


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---

[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377838398
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  fail("Exception shouldn't have occurred");
+}
+assertNotNull("FileSystem should have been initialized", fs);
+
+Path path = new Path(uri);
+intercept(FileNotFoundException.class,
 
 Review comment:
   potentially  brittle, but we can deal with that when the text changes. We 
have found in the past hat any test coded to look for AWS error messages is 
brittle against SDK. 
   
   Lets just go with this and when things break, catch up


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377827793
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 ##
 @@ -452,6 +450,33 @@ public void initialize(URI name, Configuration 
originalConf)
 
   }
 
+  /**
+   * Test bucket existence in S3.
+   * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client,
+   * bucket existence check is not done to improve performance of
+   * S3AFileSystem initialisation. When set to 1 or 2, bucket existence check
+   * will be performed which is potentially slow.
+   * @throws IOException
+   */
+  private void doBucketProbing() throws IOException {
 
 Review comment:
   yes. This is all just information for people looking at the code


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377796949
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.After;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  throw ex;
+}
+
+Path path = new Path(uri);
+intercept(FileNotFoundException.class,
+"No such file or directory: " + path,
+() -> fs.getFileStatus(path));
+
+Path src = new Path(fs.getUri() + "/testfile");
+byte[] data = dataset(1024, 'a', 'z');
+intercept(FileNotFoundException.class,
+"The specified bucket does not exist",
+() -> writeDataset(fs, src, data, data.length, 1024 * 1024, true));
+  }
+
+  @Test
+  public void testBucketProbingV1() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 1);
+intercept(FileNotFoundException.class,
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Test
+  public void testBucketProbingV2() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 2);
+intercept(FileNotFoundException.class,
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Test
+  public void testBucketProbingParameterValidation() throws Exception {
+Configuration configuration = getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 3);
+intercept(IllegalArgumentException.class,
+"Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2",
+"Should throw IllegalArgumentException",
+() -> FileSystem.get(uri, configuration));
+configuration.setInt(S3A_BUCKET_PROBE, -1);
+intercept(IllegalArgumentException.class,
+"Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2",
+"Should throw IllegalArgumentException",
+() -> FileSystem.get(uri, configuration));
+  }
+
+  @Override
+  protected Configuration getConfiguration() {
+Configuration configuration = super.getConfiguration();
+S3ATestUtils.disableFilesystemCaching(configuration);
+return configuration;
+  }
+
+  @After
+  public void tearDown() throws Exception {
+IOUtils.cleanupWithLogger(getLogger(), fs);
 
 Review comment:
   ok. So what's happening then is that your tests are picking up a shared FS 
instance, not the one you've just configured with different bucket init 
settings. your tests aren't doing what you think they are


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL 

[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377796308
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
 ##
 @@ -608,3 +608,27 @@ with HADOOP-15669.
 
 Other options may be added to `fs.s3a.ssl.channel.mode` in the future as
 further SSL optimizations are made.
+
+## Tuning S3AFileSystem Initialization.
+Any client using S3AFileSystem has to initialize it by providing a S3 bucket
+and configuration.  The init method checks if the bucket provided is valid
+or not which is a slow operation leading poor performance. We can ignore
+bucket validation by configuring `fs.s3a.bucket.probe` as follows:
+
+```xml
+
+  fs.s3a.bucket.probe
+  0
+  
+ The value can be 0, 1 or 2(default). When set to 0, bucket existence
+ check won't be done during initialization thus making it faster.
+ Though it should be noted that if bucket is not available in S3,
 
 Review comment:
   no, that's fine


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377555178
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
 
 Review comment:
   actually, given it's non-static it will be unique to each test case. You can 
just override the `teardown()` method and add an `IOUtilcleanupWithLogger(LOG, 
fs)` & so close the fs variable robustly if it is set. Do call the superclass 
after.
   
   FWIW, the S3A base test suite already retrieves an FS instance for each test 
case, so you can pick that up, it's just a bit fiddlier to configure. Don't' 
worry about it here, but you will eventually have to learn your way around that 
test code


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-11 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377552989
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 ##
 @@ -452,6 +450,33 @@ public void initialize(URI name, Configuration 
originalConf)
 
   }
 
+  /**
+   * Test bucket existence in S3.
+   * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client,
+   * bucket existence check is not done to improve performance of
+   * S3AFileSystem initialisation. When set to 1 or 2, bucket existence check
+   * will be performed which is potentially slow.
+   * @throws IOException
+   */
+  private void doBucketProbing() throws IOException {
 
 Review comment:
   you just need to add the @RetryPolicy on the method based on the inner ones. 
it's not about actually doing the retries, just declare what it is for people 
looking at it.
   
   The goal is that if we can keep those attributes accurate you just need to 
look at the method and determine the complete retrial policy that it has -all 
the way down.
   
   This means we need to add the attribute to all methods, and keep an eye on 
them to make sure that they don't go invalid/out of date after changes 
underneath.
   
   It's a shame we can't automate this -but the need to have them does force us 
to audit the code. Like you say: we mustn't retry around a retry -but we must 
have a retry somewhere above every non-retried operation.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377166880
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 ##
 @@ -452,6 +450,33 @@ public void initialize(URI name, Configuration 
originalConf)
 
   }
 
+  /**
+   * Test bucket existence in S3.
+   * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client,
+   * bucket existence check is not done to improve performance of
+   * S3AFileSystem initialisation. When set to 1 or 2, bucket existence check
+   * will be performed which is potentially slow.
+   * @throws IOException
+   */
+  private void doBucketProbing() throws IOException {
+int bucketProbe = this.getConf()
+.getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT);
+Preconditions.checkArgument(bucketProbe >= 0 && bucketProbe <= 2,
+"Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2");
+switch (bucketProbe) {
+case 0:
+  break;
+case 1:
+  verifyBucketExists();
+  break;
+case 2:
+  verifyBucketExistsV2();
+  break;
+default:
+  break;
 
 Review comment:
   Add a comment here saying this won't be reached because of the checks above


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377170722
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  fail("Exception shouldn't have occurred");
+}
+assertNotNull("FileSystem should have been initialized", fs);
+
+Path path = new Path(uri);
+intercept(FileNotFoundException.class,
 
 Review comment:
   What's the full stack here? Because I don't want the FNFE from a missing 
path to be confused with a getFileStatus failure, as that could go on to 
confuse other things


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377164858
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 ##
 @@ -452,6 +450,33 @@ public void initialize(URI name, Configuration 
originalConf)
 
   }
 
+  /**
+   * Test bucket existence in S3.
+   * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client,
+   * bucket existence check is not done to improve performance of
+   * S3AFileSystem initialisation. When set to 1 or 2, bucket existence check
 
 Review comment:
   even though I support this spelling, I'm afraid we need to use the US one. 
That avoids us having field bug reports about misspelt words


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377168238
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
 ##
 @@ -452,6 +450,33 @@ public void initialize(URI name, Configuration 
originalConf)
 
   }
 
+  /**
+   * Test bucket existence in S3.
+   * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client,
+   * bucket existence check is not done to improve performance of
+   * S3AFileSystem initialisation. When set to 1 or 2, bucket existence check
+   * will be performed which is potentially slow.
+   * @throws IOException
+   */
+  private void doBucketProbing() throws IOException {
 
 Review comment:
   add a RetryPolicy by looking @ the methods it calls and see what they do
   
   We mustn't have a retry() calling operations which retry themselves anyway, 
as it explodes the #of retries which take place. It's ok to use once() round 
either of them, as the exception translating stuff is a no-op on already 
translated exceptions


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377170084
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  fail("Exception shouldn't have occurred");
+}
+assertNotNull("FileSystem should have been initialized", fs);
+
+Path path = new Path(uri);
+intercept(FileNotFoundException.class,
+() -> fs.getFileStatus(path));
+
+Path src = new Path(fs.getUri() + "/testfile");
+byte[] data = dataset(1024, 'a', 'z');
+intercept(FileNotFoundException.class,
+() -> writeDataset(fs, src, data, data.length, 1024 * 1024, true));
+  }
+
+  @Test
+  public void testBucketProbingV1() throws Exception {
+Configuration configuration = this.getConfiguration();
 
 Review comment:
   no need for `this.`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377169124
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  fail("Exception shouldn't have occurred");
 
 Review comment:
   don't do this...just have the exception thrown all the way up


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377172646
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
 
 Review comment:
   You get a problem in this code because FileSystem.cache() will cache on the 
URI only; if there's an FS in the cache, your settings aren't picked up -you 
will always get the previous instance. That often causes intermittent problems 
with test runs.
   
   1. Use S3ATestUtils.disableFilesystemCaching to turn off caching of the 
filesystems you get via FileSystem.get
   2. and close() them at the end of each test case. You can do with with 
try/finally or a try-with-resources clause
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377169421
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
+
+  private final String randomBucket =
+  "random-bucket-" + UUID.randomUUID().toString();
+
+  private final URI uri = URI.create(FS_S3A + "://" + randomBucket);
+
+  @Test
+  public void testNoBucketProbing() throws Exception {
+Configuration configuration = this.getConfiguration();
+configuration.setInt(S3A_BUCKET_PROBE, 0);
+try {
+  fs = FileSystem.get(uri, configuration);
+} catch (IOException ex) {
+  LOG.error("Exception : ", ex);
+  fail("Exception shouldn't have occurred");
+}
+assertNotNull("FileSystem should have been initialized", fs);
 
 Review comment:
   and no need to worry about this. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org



[GitHub] [hadoop] steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init()

2020-02-10 Thread GitBox
steveloughran commented on a change in pull request #1838: HADOOP-16711 Add way 
to skip verifyBuckets check in S3A fs init()
URL: https://github.com/apache/hadoop/pull/1838#discussion_r377173220
 
 

 ##
 File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java
 ##
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.UUID;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A;
+import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Class to test bucket existence api.
+ * See {@link S3AFileSystem#doBucketProbing()}.
+ */
+public class ITestS3ABucketExistence extends AbstractS3ATestBase {
+
+  private FileSystem fs;
 
 Review comment:
   unless you want to share across test cases (you don't) or want to have 
cleanup in the teardown code, move this into a local variable in each test case


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org