This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 0bc03abab TIKA-4524: migrate to aws v2
0bc03abab is described below

commit 0bc03abab213294cfeb3b1d489f949349aee071e
Author: Tilman Hausherr <[email protected]>
AuthorDate: Sat Oct 18 10:44:11 2025 +0200

    TIKA-4524: migrate to aws v2
---
 .../tika-pipes-s3-integration-tests/pom.xml        |  5 ++
 .../tika/pipes/s3/tests/S3PipeIntegrationTest.java | 73 ++++++++++++++--------
 2 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml 
b/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
index 8137e8154..54c28ea5d 100644
--- a/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
+++ b/tika-integration-tests/tika-pipes-s3-integration-tests/pom.xml
@@ -74,6 +74,11 @@
       <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>software.amazon.awssdk</groupId>
+      <artifactId>s3</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
index cb2188932..3584f4906 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
@@ -18,19 +18,18 @@ package org.apache.tika.pipes.s3.tests;
 
 import java.io.File;
 import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.time.Duration;
 import java.time.temporal.ChronoUnit;
+import java.util.Base64;
 import java.util.HashSet;
 import java.util.Set;
 
-import com.amazonaws.auth.AWSStaticCredentialsProvider;
-import com.amazonaws.auth.BasicAWSCredentials;
-import com.amazonaws.client.builder.AwsClientBuilder;
-import com.amazonaws.regions.Regions;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.model.S3Object;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.jetbrains.annotations.NotNull;
 import org.junit.jupiter.api.AfterAll;
@@ -42,9 +41,20 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.DockerComposeContainer;
 import org.testcontainers.junit.jupiter.Testcontainers;
-import org.testcontainers.shaded.org.apache.commons.io.FileUtils;
 import org.testcontainers.shaded.org.hamcrest.MatcherAssert;
 import org.testcontainers.shaded.org.hamcrest.Matchers;
+import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
+import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
+import software.amazon.awssdk.core.ResponseInputStream;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.S3Configuration;
+import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm;
+import software.amazon.awssdk.services.s3.model.CreateBucketRequest;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.GetObjectResponse;
+import software.amazon.awssdk.services.s3.model.PutObjectRequest;
 
 import org.apache.tika.cli.TikaCLI;
 import org.apache.tika.pipes.core.HandlerConfig;
@@ -65,14 +75,14 @@ class S3PipeIntegrationTest {
     private static final String FETCH_BUCKET = "fetch-bucket";
     private static final String EMIT_BUCKET = "emit-bucket";
 
-    private static final String REGION = Regions.US_EAST_1.getName();
+    private static final Region REGION = Region.US_EAST_1;
 
-    private AmazonS3 s3Client;
+    private S3Client s3Client;
 
     private final File testFileFolder = new File("target", "test-files");
     private final Set<String> testFiles = new HashSet<>();
 
-    private void createTestFiles() {
+    private void createTestFiles() throws NoSuchAlgorithmException {
         if (testFileFolder.mkdirs()) {
             LOG.info("Created test folder: {}", testFileFolder);
         }
@@ -80,13 +90,24 @@ class S3PipeIntegrationTest {
         for (int i = 0; i < numDocs; ++i) {
             String nextFileName = "test-" + i + ".html";
             testFiles.add(nextFileName);
-            s3Client.putObject(FETCH_BUCKET, nextFileName,
-                    "<html><body>body-of-" + nextFileName + "</body></html>");
+            String s = "<html><body>body-of-" + nextFileName + 
"</body></html>";
+            byte[] bytes = s.getBytes(StandardCharsets.US_ASCII);
+            // checksum must be done explicitely, or we get an exception:
+            // "The provided 'x-amz-content-sha256' header does not match what 
was computed"
+            // https://github.com/minio/minio/issues/17662
+            // https://github.com/minio/minio/issues/20845
+            MessageDigest md = MessageDigest.getInstance("SHA256");
+            byte [] hash = md.digest(bytes);
+            String enc64 = Base64.getEncoder().encodeToString(hash);
+            PutObjectRequest request = 
PutObjectRequest.builder().bucket(FETCH_BUCKET).key(nextFileName).
+                    
checksumAlgorithm(ChecksumAlgorithm.SHA256).checksumSHA256(enc64).build();
+            RequestBody requestBody = RequestBody.fromBytes(bytes);
+            s3Client.putObject(request, requestBody);
         }
     }
 
     @BeforeAll
-    void setupMinio() {
+    void setupMinio() throws URISyntaxException {
         minioContainer.start();
         initializeS3Client();
     }
@@ -96,20 +117,21 @@ class S3PipeIntegrationTest {
         minioContainer.close();
     }
 
-    private void initializeS3Client() {
-        AwsClientBuilder.EndpointConfiguration endpoint =
-                new AwsClientBuilder.EndpointConfiguration(MINIO_ENDPOINT, 
REGION);
-        s3Client = AmazonS3ClientBuilder.standard().withCredentials(
-                        new AWSStaticCredentialsProvider(new 
BasicAWSCredentials(ACCESS_KEY, SECRET_KEY)))
-                
.withEndpointConfiguration(endpoint).withPathStyleAccessEnabled(true).build();
+    private void initializeS3Client() throws URISyntaxException {
+        AwsBasicCredentials awsCreds = AwsBasicCredentials.create(ACCESS_KEY, 
SECRET_KEY);
+        // https://github.com/aws/aws-sdk-java-v2/discussions/3536
+        StaticCredentialsProvider credentialsProvider = 
StaticCredentialsProvider.create(awsCreds);
+        S3Configuration s3c = 
S3Configuration.builder().pathStyleAccessEnabled(true).build(); // SO11228792
+        s3Client = S3Client.builder().serviceConfiguration(s3c).region(REGION).
+                credentialsProvider(credentialsProvider).endpointOverride(new 
URI(MINIO_ENDPOINT)).build();
     }
 
     @Test
     void s3PipelineIteratorS3FetcherAndS3Emitter() throws Exception {
 
         // create s3 bucket for fetches and for emits
-        s3Client.createBucket(FETCH_BUCKET);
-        s3Client.createBucket(EMIT_BUCKET);
+        
s3Client.createBucket(CreateBucketRequest.builder().bucket(FETCH_BUCKET).build());
+        
s3Client.createBucket(CreateBucketRequest.builder().bucket(EMIT_BUCKET).build());
 
         // create some test files and insert into fetch bucket
         createTestFiles();
@@ -139,9 +161,10 @@ class S3PipeIntegrationTest {
         }
 
         for (String testFile : testFiles) {
-            S3Object object = s3Client.getObject(EMIT_BUCKET, testFile + 
".json");
+            GetObjectRequest objectRequest = 
GetObjectRequest.builder().bucket(EMIT_BUCKET).key(testFile + ".json").build();
+            ResponseInputStream<GetObjectResponse> object = 
s3Client.getObject(objectRequest);
             Assertions.assertNotNull(object);
-            String data = IOUtils.toString(object.getObjectContent(), 
StandardCharsets.UTF_8);
+            String data = IOUtils.toString(object, StandardCharsets.UTF_8);
             MatcherAssert.assertThat(
                     "Should be able to read the parsed body of the HTML file 
as the body of the document",
                     data, Matchers.containsString("body-of-" + testFile));
@@ -159,6 +182,6 @@ class S3PipeIntegrationTest {
                 .replace("{EMIT_BUCKET}", 
EMIT_BUCKET).replace("{FETCH_BUCKET}", FETCH_BUCKET)
                 .replace("{ACCESS_KEY}", ACCESS_KEY).replace("{SECRET_KEY}", 
SECRET_KEY)
                 .replace("{ENDPOINT_CONFIGURATION_SERVICE}", MINIO_ENDPOINT)
-                .replace("{REGION}", REGION);
+                .replace("{REGION}", REGION.id());
     }
 }

Reply via email to