kunwp1 commented on code in PR #5569:
URL: https://github.com/apache/texera/pull/5569#discussion_r3399419172


##########
common/workflow-core/src/main/scala/org/apache/texera/service/util/S3StorageClient.scala:
##########
@@ -106,41 +109,44 @@ object S3StorageClient {
     // Ensure the directory prefix ends with `/` to avoid accidental deletions
     val prefix = if (directoryPrefix.endsWith("/")) directoryPrefix else 
directoryPrefix + "/"
 
-    // List objects under the given prefix
-    val listRequest = ListObjectsV2Request
-      .builder()
-      .bucket(bucketName)
-      .prefix(prefix)
-      .build()
-
-    val listResponse = s3Client.listObjectsV2(listRequest)
-
-    // Extract object keys
-    val objectKeys = listResponse.contents().asScala.map(_.key())
-
-    if (objectKeys.nonEmpty) {
-      val objectsToDelete =
-        objectKeys.map(key => 
ObjectIdentifier.builder().key(key).build()).asJava
-
-      val deleteRequest = Delete
-        .builder()
-        .objects(objectsToDelete)
-        .build()
-
-      // Compute MD5 checksum for MinIO if required
-      val md5Hash = MessageDigest
-        .getInstance("MD5")
-        .digest(deleteRequest.toString.getBytes("UTF-8"))
-
-      // Convert object keys to S3 DeleteObjectsRequest format
-      val deleteObjectsRequest = DeleteObjectsRequest
-        .builder()
-        .bucket(bucketName)
-        .delete(deleteRequest)
-        .build()
+    val listRequest = 
ListObjectsV2Request.builder().bucket(bucketName).prefix(prefix).build()
+
+    // Paginate across all pages, then delete in batches within the 
per-request key limit.
+    s3Client
+      .listObjectsV2Paginator(listRequest)
+      .contents()
+      .asScala
+      .iterator
+      .map(obj => ObjectIdentifier.builder().key(obj.key()).build())
+      .grouped(MAX_KEYS_PER_DELETE_REQUEST)
+      .foreach { batch =>
+        val response = s3Client.deleteObjects(
+          DeleteObjectsRequest
+            .builder()
+            .bucket(bucketName)
+            .delete(Delete.builder().objects(batch.asJava).build())
+            .build()
+        )
+        throwOnDeleteErrors(prefix, response)

Review Comment:
   I addressed it by collecting failures across all batches and throw once at 
the end.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to