This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new f8e515e56 TIKA-4525: migrate to aws v2
f8e515e56 is described below
commit f8e515e568958d833ea9a998695f0335266deeb1
Author: Tilman Hausherr <[email protected]>
AuthorDate: Fri Oct 24 20:22:06 2025 +0200
TIKA-4525: migrate to aws v2
---
.../tika/pipes/s3/tests/PipeIntegrationTests.java | 50 ++++++++++++++--------
1 file changed, 31 insertions(+), 19 deletions(-)
diff --git
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
index b32304d69..1f7568e4e 100644
---
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
+++
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/PipeIntegrationTests.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
@@ -31,15 +32,17 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
-import com.amazonaws.auth.profile.ProfileCredentialsProvider;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.iterable.S3Objects;
-import com.amazonaws.services.s3.model.S3Object;
-import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
+import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
+import software.amazon.awssdk.services.s3.model.S3Object;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -52,41 +55,49 @@ import org.apache.tika.pipes.fetcher.FetcherManager;
import org.apache.tika.pipes.pipesiterator.CallablePipesIterator;
import org.apache.tika.pipes.pipesiterator.PipesIterator;
+// To enable these tests, fill OUTDIR and bucket, and adjust profile and
region if needed.
@Disabled("turn these into actual tests with mock s3")
public class PipeIntegrationTests {
private static final Path OUTDIR = Paths.get("");
+ /**
+ * This downloads files from a specific bucket.
+ * @throws Exception
+ */
@Test
public void testBruteForce() throws Exception {
- String region = "";
- String profile = "";
+ String region = "us-east-1";
+ String profile = "default";
String bucket = "";
- AmazonS3 s3Client = AmazonS3ClientBuilder.standard().withRegion(region)
- .withCredentials(new
ProfileCredentialsProvider(profile)).build();
- s3Client.listObjects(bucket);
+ AwsCredentialsProvider provider =
ProfileCredentialsProvider.builder().profileName(profile).build();
+ S3Client s3Client =
S3Client.builder().credentialsProvider(provider).region(Region.of(region)).build();
+
int cnt = 0;
long sz = 0;
- for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket,
"")) {
- Path targ = OUTDIR.resolve(summary.getKey());
+ ListObjectsV2Request listObjectsV2Request =
ListObjectsV2Request.builder().bucket(bucket).prefix("").build();
+ List<S3Object> s3ObjectList =
s3Client.listObjectsV2Paginator(listObjectsV2Request).stream().
+ flatMap(resp ->
resp.contents().stream()).collect(Collectors.toList());
+ for (S3Object s3Object : s3ObjectList) {
+ String key = s3Object.key();
+ Path targ = OUTDIR.resolve(key);
if (Files.isRegularFile(targ)) {
continue;
}
if (!Files.isDirectory(targ.getParent())) {
Files.createDirectories(targ.getParent());
}
- System.out
- .println("id: " + cnt + " :: " + summary.getKey() + " : "
+ summary.getSize());
- S3Object s3Object = s3Client.getObject(bucket, summary.getKey());
- Files.copy(s3Object.getObjectContent(), targ);
- summary.getSize();
+ System.out.println("id: " + cnt + " :: " + key + " : " +
s3Object.size());
+ GetObjectRequest objectRequest =
GetObjectRequest.builder().bucket(bucket).key(key).build();
+ s3Client.getObject(objectRequest, targ);
cnt++;
- sz += summary.getSize();
+ sz += s3Object.size();
}
System.out.println("iterated: " + cnt + " sz: " + sz);
}
+ // to test this, files must be in the fetcher bucket
@Test
public void testS3ToFS() throws Exception {
Fetcher fetcher = getFetcher("tika-config-s3ToFs.xml", "s3f");
@@ -117,6 +128,7 @@ public class PipeIntegrationTests {
}
}
+ // to test this, files must be in the iterator bucket
@Test
public void testS3ToS3() throws Exception {
Fetcher fetcher = getFetcher("tika-config-s3Tos3.xml", "s3f");