This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new f8e515e56 TIKA-4525: migrate to aws v2
f8e515e56 is described below

commit f8e515e568958d833ea9a998695f0335266deeb1
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Oct 24 20:22:06 2025 +0200

    TIKA-4525: migrate to aws v2
---
 .../tika/pipes/s3/tests/PipeIntegrationTests.java  | 50 ++++++++++++++--------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
index b32304d69..1f7568e4e 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorCompletionService;
@@ -31,15 +32,17 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
 
-import com.amazonaws.auth.profile.ProfileCredentialsProvider;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.iterable.S3Objects;
-import com.amazonaws.services.s3.model.S3Object;
-import com.amazonaws.services.s3.model.S3ObjectSummary;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
+import software.amazon.awssdk.services.s3.model.S3Object;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -52,41 +55,49 @@ import org.apache.tika.pipes.fetcher.FetcherManager;
 import org.apache.tika.pipes.pipesiterator.CallablePipesIterator;
 import org.apache.tika.pipes.pipesiterator.PipesIterator;
 
+// To enable these tests, fill OUTDIR and bucket, and adjust profile and 
region if needed.
 @Disabled("turn these into actual tests with mock s3")
 public class PipeIntegrationTests {
 
     private static final Path OUTDIR = Paths.get("");
 
+    /**
+     * This downloads files from a specific bucket.
+     * @throws Exception 
+     */
     @Test
     public void testBruteForce() throws Exception {
-        String region = "";
-        String profile = "";
+        String region = "us-east-1";
+        String profile = "default";
         String bucket = "";
-        AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region)
-                .withCredentials(new 
ProfileCredentialsProvider(profile)).build();
-        s3Client.listObjects(bucket);
+        AwsCredentialsProvider provider = 
ProfileCredentialsProvider.builder().profileName(profile).build();
+        S3Client s3Client = 
S3Client.builder().credentialsProvider(provider).region(Region.of(region)).build();
+
         int cnt = 0;
         long sz = 0;
 
-        for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, 
"")) {
-            Path targ = OUTDIR.resolve(summary.getKey());
+        ListObjectsV2Request listObjectsV2Request = 
ListObjectsV2Request.builder().bucket(bucket).prefix("").build();
+        List<S3Object> s3ObjectList = 
s3Client.listObjectsV2Paginator(listObjectsV2Request).stream().
+                flatMap(resp -> 
resp.contents().stream()).collect(Collectors.toList());
+        for (S3Object s3Object : s3ObjectList) {
+            String key = s3Object.key();
+            Path targ = OUTDIR.resolve(key);
             if (Files.isRegularFile(targ)) {
                 continue;
             }
             if (!Files.isDirectory(targ.getParent())) {
                 Files.createDirectories(targ.getParent());
             }
-            System.out
-                    .println("id: " + cnt + " :: " + summary.getKey() + " : " 
+ summary.getSize());
-            S3Object s3Object = s3Client.getObject(bucket, summary.getKey());
-            Files.copy(s3Object.getObjectContent(), targ);
-            summary.getSize();
+            System.out.println("id: " + cnt + " :: " + key + " : " + 
s3Object.size());
+            GetObjectRequest objectRequest = 
GetObjectRequest.builder().bucket(bucket).key(key).build();
+            s3Client.getObject(objectRequest, targ);
             cnt++;
-            sz += summary.getSize();
+            sz += s3Object.size();
         }
         System.out.println("iterated: " + cnt + " sz: " + sz);
     }
 
+    // to test this, files must be in the fetcher bucket
     @Test
     public void testS3ToFS() throws Exception {
         Fetcher fetcher = getFetcher("tika-config-s3ToFs.xml", "s3f");
@@ -117,6 +128,7 @@ public class PipeIntegrationTests {
         }
     }
 
+    // to test this, files must be in the iterator bucket
     @Test
     public void testS3ToS3() throws Exception {
         Fetcher fetcher = getFetcher("tika-config-s3Tos3.xml", "s3f");

Reply via email to