HADOOP-12292. Make use of DeleteObjects optional. (Thomas Demoor via stevel)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/60474a76 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/60474a76 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/60474a76 Branch: refs/heads/branch-2 Commit: 60474a769eaabb5615153d43e9d81e0a1d0c204d Parents: e4c01b8 Author: Steve Loughran <[email protected]> Authored: Sat Feb 6 18:17:30 2016 +0000 Committer: Steve Loughran <[email protected]> Committed: Sat Feb 6 18:20:24 2016 +0000 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 3 ++ .../src/main/resources/core-default.xml | 9 ++++ hadoop-project/pom.xml | 3 +- .../org/apache/hadoop/fs/s3a/Constants.java | 3 ++ .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 57 +++++++++++++------- .../src/site/markdown/tools/hadoop-aws/index.md | 9 ++++ .../hadoop/fs/s3a/scale/S3AScaleTestBase.java | 11 +++- .../s3a/scale/TestS3ADeleteFilesOneByOne.java | 40 ++++++++++++++ .../fs/s3a/scale/TestS3ADeleteManyFiles.java | 2 +- 9 files changed, 115 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 18a1af8..a8497c7 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -445,6 +445,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12755. Fix typo in defaultFS warning message. (wang) + HADOOP-12292. Make use of DeleteObjects optional. + (Thomas Demoor via stevel) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 252c42e..e1cd883 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -889,6 +889,15 @@ for ldap providers in the same way as above does. </property> <property> + <name>fs.s3a.multiobjectdelete.enable</name> + <value>true</value> + <description>When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + </description> +</property> + +<property> <name>fs.s3a.acl.default</name> <description>Set a canned ACL for newly created and copied objects. Value may be private, public-read, public-read-write, authenticated-read, log-delivery-write, http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 55b2256..3f0dd07 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -111,6 +111,7 @@ <exec-maven-plugin.version>1.3.1</exec-maven-plugin.version> <make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version> <native-maven-plugin.version>1.0-alpha-8</native-maven-plugin.version> + <surefire.fork.timeout>900</surefire.fork.timeout> </properties> <dependencyManagement> @@ -1172,7 +1173,7 @@ <artifactId>maven-surefire-plugin</artifactId> <configuration> <reuseForks>false</reuseForks> - <forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds> + <forkedProcessTimeoutInSeconds>${surefire.fork.timeout}</forkedProcessTimeoutInSeconds> <argLine>${maven-surefire-plugin.argLine}</argLine> <environmentVariables> <HADOOP_COMMON_HOME>${hadoop.common.build.dir}</HADOOP_COMMON_HOME> http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 60d4b9b..faa760c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -84,6 +84,9 @@ public class Constants { public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; + //enable multiobject-delete calls? + public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable"; + // comma separated list of directories public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 6ede9f2..adf6178 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -41,6 +41,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.CannedAccessControlList; +import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; @@ -85,6 +86,7 @@ public class S3AFileSystem extends FileSystem { private String bucket; private int maxKeys; private long partSize; + private boolean enableMultiObjectsDelete; private TransferManager transfers; private ThreadPoolExecutor threadPoolExecutor; private long multiPartThreshold; @@ -252,6 +254,7 @@ public class S3AFileSystem extends FileSystem { partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE); multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); + enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); if (partSize < 5 * 1024 * 1024) { LOG.error(MULTIPART_SIZE + " must be at least 5 MB"); @@ -580,11 +583,7 @@ public class S3AFileSystem extends FileSystem { copyFile(summary.getKey(), newDstKey); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keysToDelete.clear(); + removeKeys(keysToDelete, true); } } @@ -592,11 +591,8 @@ public class S3AFileSystem extends FileSystem { objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { - if (keysToDelete.size() > 0) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + if (!keysToDelete.isEmpty()) { + removeKeys(keysToDelete, false); } break; } @@ -610,6 +606,36 @@ public class S3AFileSystem extends FileSystem { return true; } + /** + * A helper method to delete a list of keys on a s3-backend. + * + * @param keysToDelete collection of keys to delete on the s3-backend + * @param clearKeys clears the keysToDelete-list after processing the list + * when set to true + */ + private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete, + boolean clearKeys) { + if (enableMultiObjectsDelete) { + DeleteObjectsRequest deleteRequest + = new DeleteObjectsRequest(bucket).withKeys(keysToDelete); + s3.deleteObjects(deleteRequest); + statistics.incrementWriteOps(1); + } else { + int writeops = 0; + + for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { + s3.deleteObject( + new DeleteObjectRequest(bucket, keyVersion.getKey())); + writeops++; + } + + statistics.incrementWriteOps(writeops); + } + if (clearKeys) { + keysToDelete.clear(); + } + } + /** Delete a file. * * @param f the path to delete. @@ -684,11 +710,7 @@ public class S3AFileSystem extends FileSystem { } if (keys.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keys.clear(); + removeKeys(keys, true); } } @@ -697,10 +719,7 @@ public class S3AFileSystem extends FileSystem { statistics.incrementReadOps(1); } else { if (!keys.isEmpty()) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + removeKeys(keys, false); } break; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index b27c050..2f3352b 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -273,6 +273,15 @@ If you do any of these: change your credentials immediately! </property> <property> + <name>fs.s3a.multiobjectdelete.enable</name> + <value>false</value> + <description>When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + </description> + </property> + + <property> <name>fs.s3a.acl.default</name> <description>Set a canned ACL for newly created and copied objects. Value may be private, public-read, public-read-write, authenticated-read, log-delivery-write, http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index e0cbc92..e44a90e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -36,13 +36,21 @@ import static org.junit.Assume.assumeTrue; /** * Base class for scale tests; here is where the common scale configuration - * keys are defined + * keys are defined. */ public class S3AScaleTestBase { public static final String SCALE_TEST = "scale.test."; + + /** + * The number of operations to perform: {@value} + */ public static final String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; + + /** + * The default number of operations to perform: {@value} + */ public static final long DEFAULT_OPERATION_COUNT = 2005; protected S3AFileSystem fs; @@ -71,6 +79,7 @@ public class S3AScaleTestBase { @Before public void setUp() throws Exception { conf = createConfiguration(); + LOG.info("Scale test operation count = {}", getOperationCount()); fs = S3ATestUtils.createTestFileSystem(conf); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java new file mode 100644 index 0000000..77c85a9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteFilesOneByOne.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.Constants; +import org.junit.Test; + +import java.io.IOException; + +public class TestS3ADeleteFilesOneByOne extends TestS3ADeleteManyFiles { + + @Override + protected Configuration createConfiguration() { + Configuration configuration = super.createConfiguration(); + configuration.setBoolean(Constants.ENABLE_MULTI_DELETE, false); + return configuration; + } + + @Test + public void testOpenCreate() throws IOException { + + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/60474a76/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java index c913a67..d521ba8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -61,7 +61,7 @@ public class TestS3ADeleteManyFiles extends S3AScaleTestBase { // use Executor to speed up file creation ExecutorService exec = Executors.newFixedThreadPool(16); final ExecutorCompletionService<Boolean> completionService = - new ExecutorCompletionService<Boolean>(exec); + new ExecutorCompletionService<>(exec); try { final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');
