This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new fb76290e3 TIKA-4525: migrate to aws v2
fb76290e3 is described below

commit fb76290e363ef3ea5f0d8e5b840965d94072da73
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Oct 24 10:56:34 2025 +0200

    TIKA-4525: migrate to aws v2
---
 .../tika-pipes-s3-integration-tests/pom.xml        |  5 ---
 .../tika/pipes/s3/tests/PipeIntegrationTests.java  | 42 +++++++++++++---------
 tika-parent/pom.xml                                |  7 ----
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml 
b/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
index db6bb518f..54c28ea5d 100644
--- a/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
+++ b/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
@@ -79,11 +79,6 @@
       <artifactId>s3</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>aws-java-sdk-s3</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 
   <build>
diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
index e1e6ea78c..96df9cd56 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorCompletionService;
@@ -32,14 +33,17 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import com.amazonaws.auth.profile.ProfileCredentialsProvider;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.iterable.S3Objects;
-import com.amazonaws.services.s3.model.S3Object;
-import com.amazonaws.services.s3.model.S3ObjectSummary;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
+import software.amazon.awssdk.core.ResponseInputStream;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.GetObjectResponse;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
+import software.amazon.awssdk.services.s3.model.S3Object;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -62,27 +66,31 @@ public class PipeIntegrationTests {
         String region = "";
         String profile = "";
         String bucket = "";
-        AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region)
-                .withCredentials(new 
ProfileCredentialsProvider(profile)).build();
-        s3Client.listObjects(bucket);
+        AwsCredentialsProvider provider = 
ProfileCredentialsProvider.builder().profileName(profile).build();
+        S3Client s3Client = 
S3Client.builder().credentialsProvider(provider).region(Region.of(region)).build();
+
         int cnt = 0;
         long sz = 0;
 
-        for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, 
"")) {
-            Path targ = OUTDIR.resolve(summary.getKey());
+        ListObjectsV2Request listObjectsV2Request = 
ListObjectsV2Request.builder().bucket(bucket).prefix("").build();
+        List<S3Object> s3ObjectList = 
s3Client.listObjectsV2Paginator(listObjectsV2Request).stream().
+                flatMap(resp -> resp.contents().stream()).toList();
+        for (S3Object s3Object : s3ObjectList) {
+            String key = s3Object.key();
+            Path targ = OUTDIR.resolve(key);
             if (Files.isRegularFile(targ)) {
                 continue;
             }
             if (!Files.isDirectory(targ.getParent())) {
                 Files.createDirectories(targ.getParent());
             }
-            System.out
-                    .println("id: " + cnt + " :: " + summary.getKey() + " : " 
+ summary.getSize());
-            S3Object s3Object = s3Client.getObject(bucket, summary.getKey());
-            Files.copy(s3Object.getObjectContent(), targ);
-            summary.getSize();
+            System.out.println("id: " + cnt + " :: " + key + " : " + 
s3Object.size());
+            GetObjectRequest objectRequest = 
GetObjectRequest.builder().bucket(bucket).key(key).build();
+            try (ResponseInputStream<GetObjectResponse> is = 
s3Client.getObject(objectRequest)) {
+                Files.copy(is, targ);
+            }
             cnt++;
-            sz += summary.getSize();
+            sz += s3Object.size();
         }
         System.out.println("iterated: " + cnt + " sz: " + sz);
     }
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 3d7de8b0e..315dd22ed 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -601,13 +601,6 @@
         <artifactId>biz.aQute.bndlib</artifactId>
         <version>${biz.aqute.version}</version>
       </dependency>
-      <dependency>
-        <groupId>com.amazonaws</groupId>
-        <artifactId>aws-java-sdk-bom</artifactId>
-        <version>${aws.version}</version>
-        <type>pom</type>
-        <scope>import</scope>
-      </dependency>
       <dependency>
         <groupId>software.amazon.awssdk</groupId>
         <artifactId>bom</artifactId>

Reply via email to