[ 
https://issues.apache.org/jira/browse/HADOOP-18112?focusedWorklogId=737871&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-737871
 ]

ASF GitHub Bot logged work on HADOOP-18112:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 08/Mar/22 00:00
            Start Date: 08/Mar/22 00:00
    Worklog Time Spent: 10m 
      Work Description: steveloughran commented on a change in pull request 
#4045:
URL: https://github.com/apache/hadoop/pull/4045#discussion_r821191725



##########
File path: 
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLists.java
##########
@@ -79,6 +81,39 @@ public void testItrLinkedLists() {
     Assert.assertEquals(4, list.size());
   }
 
+  @Test
+  public void testListsPartition() {
+    List<String> list = new ArrayList<>();
+    list.add("a");
+    list.add("b");
+    list.add("c");
+    list.add("d");
+    list.add("e");
+    List<List<String>> res = Lists.
+            partition(list, 2);
+    Assertions.assertThat(res.size())

Review comment:
       use the `hasSize()` assertj assert, it will dump the list on a failure

##########
File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java
##########
@@ -72,6 +76,40 @@ public void testMultiObjectDeleteNoFile() throws Throwable {
     removeKeys(getFileSystem(), "ITestS3AFailureHandling/missingFile");
   }
 
+  /**
+   * See HADOOP-18112.
+   */
+  @Test
+  public void testMultiObjectDeleteLargeNumKeys() throws Exception {
+    S3AFileSystem fs =  getFileSystem();
+    Path path = path("largeDir");
+    mkdirs(path);
+    createFiles(fs, path, 1, 1005, 0);
+    List<String> keys = new ArrayList<>();
+    RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
+            fs.listFiles(path, false);
+    while (locatedFileStatusRemoteIterator.hasNext()) {
+      Path file = locatedFileStatusRemoteIterator.next().getPath();
+      keys.add(fs.pathToKey(file));
+    }
+    // After implementation of paging during multi object deletion,
+    // no exception is encountered.
+    Long bulkDeleteReqBefore = getNumberOfBulkDeleteRequestsMadeTillNow(fs);

Review comment:
       I was going to suggest adding this to `ITestS3ADeleteCost` , but you 
want to get at the private method, don't you?

##########
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerToolOperations.java
##########
@@ -70,7 +70,7 @@
    * @throws IOException other IO Exception.
    */
   @Retries.RetryMixed
-  DeleteObjectsResult removeKeys(
+  void removeKeys(
       List<DeleteObjectsRequest.KeyVersion> keysToDelete,
       boolean deleteFakeDir,
       boolean quiet)

Review comment:
       cut this too

##########
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##########
@@ -2847,16 +2846,21 @@ private DeleteObjectsResult removeKeysS3(
     }
     if (keysToDelete.isEmpty()) {
       // exit fast if there are no keys to delete
-      return null;
+      return;
     }
     for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
       blockRootDelete(keyVersion.getKey());
     }
-    DeleteObjectsResult result = null;
     try {
       if (enableMultiObjectsDelete) {
-        result = deleteObjects(
-            getRequestFactory().newBulkDeleteRequest(keysToDelete, quiet));
+        // Multi object deletion of more than 1000 keys is not supported
+        // by s3. So we are paging the keys by page size whose default
+        // value is 250.

Review comment:
       best to leave the value out...if we ever change that default, this 
comment becomes inconsistent

##########
File path: 
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Lists.java
##########
@@ -232,4 +232,20 @@ private static int saturatedCast(long value) {
     return addAll(addTo, elementsToAdd.iterator());
   }
 
+  public static <T> List<List<T>> partition(List<T> originalList, int 
pageSize) {
+
+    Preconditions.checkArgument(originalList != null && originalList.size() > 
0,
+            "Invalid original list");
+    Preconditions.checkArgument(pageSize > 0, "Page size should " +
+            "be greater than 0 for performing partition");
+
+    List<List<T>> result = new ArrayList<>();

Review comment:
       this always returns a new list even when the source is in range. seems 
like overkill

##########
File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java
##########
@@ -72,6 +76,40 @@ public void testMultiObjectDeleteNoFile() throws Throwable {
     removeKeys(getFileSystem(), "ITestS3AFailureHandling/missingFile");
   }
 
+  /**
+   * See HADOOP-18112.
+   */
+  @Test
+  public void testMultiObjectDeleteLargeNumKeys() throws Exception {
+    S3AFileSystem fs =  getFileSystem();
+    Path path = path("largeDir");
+    mkdirs(path);
+    createFiles(fs, path, 1, 1005, 0);
+    List<String> keys = new ArrayList<>();
+    RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
+            fs.listFiles(path, false);
+    while (locatedFileStatusRemoteIterator.hasNext()) {

Review comment:
       Try using RemoteIterators here to see how well it works...do a mapping 
and then the toList() call
   
   ```java
   toList( mappingRemoteIterator(locatedFileStatusRemoteIterator, (n) ->
    fs.pathToKey(n.getPath()));
   ```

##########
File path: 
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java
##########
@@ -37,6 +40,7 @@
 import java.nio.file.AccessDeniedException;
 
 import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static 
org.apache.hadoop.fs.s3a.impl.ITestPartialRenamesDeletes.createFiles;

Review comment:
       do you think we should be moving this somewhere more reusable? 
S3ATestUtils, etc

##########
File path: 
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##########
@@ -2847,16 +2846,21 @@ private DeleteObjectsResult removeKeysS3(
     }
     if (keysToDelete.isEmpty()) {
       // exit fast if there are no keys to delete
-      return null;
+      return;
     }
     for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
       blockRootDelete(keyVersion.getKey());
     }
-    DeleteObjectsResult result = null;
     try {
       if (enableMultiObjectsDelete) {
-        result = deleteObjects(
-            getRequestFactory().newBulkDeleteRequest(keysToDelete, quiet));
+        // Multi object deletion of more than 1000 keys is not supported
+        // by s3. So we are paging the keys by page size whose default
+        // value is 250.
+        for (List<DeleteObjectsRequest.KeyVersion> batchOfKeysToDelete :

Review comment:
       only need to partition when size > that the threshold -avoids building 
new lists




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 737871)
    Time Spent: 2h 10m  (was: 2h)

> Rename operation fails during multi object delete of size more than 1000.
> -------------------------------------------------------------------------
>
>                 Key: HADOOP-18112
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18112
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/s3
>    Affects Versions: 3.3.1
>            Reporter: Mukund Thakur
>            Assignee: Mukund Thakur
>            Priority: Critical
>              Labels: pull-request-available
>          Time Spent: 2h 10m
>  Remaining Estimate: 0h
>
> We see below exception during multi object delete of more than 1000 keys in 
> one go during rename operation.
>  
> {noformat}
> org.apache.hadoop.fs.s3a.AWSBadRequestException: rename 
> s3a://ms-targeting-prod-cdp-aws-dr-bkt/data/ms-targeting-prod-hbase/hbase/.tmp/data/default/dr-productionL.Address
>  to 
> s3a://ms-targeting-prod-cdp-aws-dr-bkt/user/root/.Trash/Current/data/ms-targetin
> g-prod-hbase/hbase/.tmp/data/default/dr-productionL.Address16438377847941643837797901
>  on 
> s3a://ms-targeting-prod-cdp-aws-dr-bkt/data/ms-targeting-prod-hbase/hbase/.tmp/data/default/dr-productionL.Address:
>  com.amazonaws.services.s3.model.AmazonS3Exception
> : The XML you provided was not well-formed or did not validate against our 
> published schema (Service: Amazon S3; Status Code: 400; Error Code: 
> MalformedXML; Request ID: XZ8PGAQHP0FGHPYS; S3 Extended Request ID: 
> vTG8c+koukzQ8yMRGd9BvWfmRwkCZ3fAs/EOiAV5S9E
> JjLqFTNCgDOKokuus5W600Z5iOa/iQBI=; Proxy: null), S3 Extended Request ID: 
> vTG8c+koukzQ8yMRGd9BvWfmRwkCZ3fAs/EOiAV5S9EJjLqFTNCgDOKokuus5W600Z5iOa/iQBI=:MalformedXML:
>  The XML you provided was not well-formed or did not validate against our 
> published schema 
> (Service: Amazon S3; Status Code: 400; Error Code: MalformedXML; Request ID: 
> XZ8PGAQHP0FGHPYS; S3 Extended Request ID: 
> vTG8c+koukzQ8yMRGd9BvWfmRwkCZ3fAs/EOiAV5S9EJjLqFTNCgDOKokuus5W600Z5iOa/iQBI=; 
> Proxy: null)
>         at 
> org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:247)
>         at 
> org.apache.hadoop.fs.s3a.s3guard.RenameTracker.convertToIOException(RenameTracker.java:267)
>         at 
> org.apache.hadoop.fs.s3a.s3guard.RenameTracker.deleteFailed(RenameTracker.java:198)
>         at 
> org.apache.hadoop.fs.s3a.impl.RenameOperation.removeSourceObjects(RenameOperation.java:706)
>         at 
> org.apache.hadoop.fs.s3a.impl.RenameOperation.completeActiveCopiesAndDeleteSources(RenameOperation.java:274)
>         at 
> org.apache.hadoop.fs.s3a.impl.RenameOperation.recursiveDirectoryRename(RenameOperation.java:484)
>         at 
> org.apache.hadoop.fs.s3a.impl.RenameOperation.execute(RenameOperation.java:312)
>         at 
> org.apache.hadoop.fs.s3a.S3AFileSystem.innerRename(S3AFileSystem.java:1912)
>         at 
> org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$rename$7(S3AFileSystem.java:1759)
>         at 
> org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.lambda$trackDurationOfOperation$5(IOStatisticsBinding.java:499)
>         at 
> org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration(IOStatisticsBinding.java:444)
>         at 
> org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2250)
>         at 
> org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:1757)
>         at org.apache.hadoop.fs.FileSystem.rename(FileSystem.java:1605)
>         at 
> org.apache.hadoop.fs.TrashPolicyDefault.moveToTrash(TrashPolicyDefault.java:186)
>         at org.apache.hadoop.fs.Trash.moveToTrash(Trash.java:110){noformat}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to