This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4519
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d173308da61493a6514ac64c5688ff6209368324
Merge: b06877bce e15e9f22e
Author: tallison <[email protected]>
AuthorDate: Mon Oct 27 14:47:15 2025 -0400

    Merge branch 'main' into TIKA-4519
    
    # Conflicts:
    #       tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    #       
tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java

 CHANGES.txt                                        |  14 +--
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   4 +-
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  31 ++++-
 tika-app/src/test/resources/test-data/testPST.pst  | Bin 0 -> 2302976 bytes
 .../extractor/DefaultEmbeddedStreamTranslator.java |  21 ++--
 .../tika/extractor/EmbeddedStreamTranslator.java   |   8 +-
 .../apache/tika/extractor/RUnpackExtractor.java    |  36 ++++--
 .../java/org/apache/tika/io/FilenameUtils.java     |  31 ++++-
 .../java/org/apache/tika/io/FilenameUtilsTest.java |   8 +-
 tika-detectors/tika-detector-magika/pom.xml        |  36 ------
 tika-detectors/tika-detector-siegfried/pom.xml     |  36 ------
 .../tika/pipes/kafka/tests/TikaPipesKafkaTest.java |   2 +-
 .../tika-pipes-s3-integration-tests/pom.xml        |   5 +
 .../tika/pipes/s3/tests/PipeIntegrationTests.java  |  49 +++++---
 .../tika/pipes/s3/tests/S3PipeIntegrationTest.java |  77 +++++++------
 .../src/test/resources/tika-config-s3ToFs.xml      |  21 ++--
 .../src/test/resources/tika-config-s3Tos3.xml      |  23 ++--
 .../pipes/solr/tests/TikaPipesSolrTestBase.java    |   2 +-
 tika-parent/pom.xml                                |  66 ++++++-----
 tika-parsers/pom.xml                               |   2 +-
 .../tika-parser-scientific-package/pom.xml         |  47 --------
 .../tika-parser-sqlite3-package/pom.xml            |  41 -------
 tika-parsers/tika-parsers-ml/pom.xml               |   2 +
 .../microsoft/MSEmbeddedStreamTranslator.java      |  39 +++----
 .../microsoft/PSTEmailStreamTranslator.java        |  55 +++++++++
 ....apache.tika.extractor.EmbeddedStreamTranslator |   3 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |   3 +
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |   2 +-
 .../tika/async/cli/TikaConfigAsyncWriter.java      |   8 +-
 .../tika-emitters/tika-emitter-az-blob/pom.xml     |  43 -------
 tika-pipes/tika-emitters/tika-emitter-gcs/pom.xml  |  43 -------
 .../tika-emitters/tika-emitter-kafka/pom.xml       |  43 -------
 .../tika-emitters/tika-emitter-opensearch/pom.xml  |  43 -------
 tika-pipes/tika-emitters/tika-emitter-s3/pom.xml   |  51 +--------
 .../apache/tika/pipes/emitter/s3/S3Emitter.java    | 111 +++++++++++-------
 tika-pipes/tika-emitters/tika-emitter-solr/pom.xml |  43 -------
 .../tika-fetchers/tika-fetcher-az-blob/pom.xml     |  43 -------
 tika-pipes/tika-fetchers/tika-fetcher-gcs/pom.xml  |  43 -------
 tika-pipes/tika-fetchers/tika-fetcher-http/pom.xml |  42 -------
 .../tika-fetcher-microsoft-graph/pom.xml           |  45 +-------
 tika-pipes/tika-fetchers/tika-fetcher-s3/pom.xml   |  55 ++-------
 .../apache/tika/pipes/fetcher/s3/S3Fetcher.java    | 127 ++++++++++++---------
 .../org/apache/tika/pipes/core/PipesClient.java    |   2 +-
 .../AbstractEmbeddedDocumentBytesHandler.java      |  37 +-----
 .../tika-pipes-iterator-az-blob/pom.xml            |  43 -------
 .../tika-pipes-iterator-csv/pom.xml                |  43 -------
 .../tika-pipes-iterator-gcs/pom.xml                |  43 -------
 .../tika-pipes-iterator-jdbc/pom.xml               |  43 -------
 .../tika-pipes-iterator-json/pom.xml               |  43 -------
 .../tika-pipes-iterator-kafka/pom.xml              |  43 -------
 .../tika-pipes-iterator-s3/pom.xml                 |  51 +--------
 .../pipes/pipesiterator/s3/S3PipesIterator.java    |  97 ++++++++++------
 .../tika-pipes-iterator-solr/pom.xml               |  43 -------
 .../tika-pipes-reporter-fs-status/pom.xml          |  43 -------
 .../tika-pipes-reporter-jdbc/pom.xml               |  43 -------
 .../tika-pipes-reporter-opensearch/pom.xml         |  43 -------
 tika-server/tika-server-client/pom.xml             |  47 --------
 .../server/core/resource/TranslateResource.java    |  50 +++++---
 .../server/core/resource/UnpackerResource.java     |  27 ++---
 tika-translate/pom.xml                             |   2 +-
 60 files changed, 599 insertions(+), 1548 deletions(-)

diff --cc 
tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java
index 3d9e25530,4a63046f6..a35a559fe
--- 
a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java
+++ 
b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-s3/src/main/java/org/apache/tika/pipes/pipesiterator/s3/S3PipesIterator.java
@@@ -187,12 -201,19 +201,19 @@@ public class S3PipesIterator extends Pi
          long start = System.currentTimeMillis();
          int count = 0;
          HandlerConfig handlerConfig = getHandlerConfig();
-         Matcher fileNameMatcher = null;
+         final Matcher fileNameMatcher;
          if (fileNamePattern != null) {
              fileNameMatcher = fileNamePattern.matcher("");
+         } else {
+             fileNameMatcher = null;
          }
-         for (S3ObjectSummary summary : S3Objects.withPrefix(s3Client, bucket, 
prefix)) {
-             if (fileNameMatcher != null && !accept(fileNameMatcher, 
summary.getKey())) {
 -        
++
+         ListObjectsV2Request listObjectsV2Request = 
ListObjectsV2Request.builder().bucket(bucket).prefix(prefix).build();
+         List<S3Object> s3ObjectList = 
s3Client.listObjectsV2Paginator(listObjectsV2Request).stream().
+                 flatMap(resp -> resp.contents().stream()).toList();
+         for (S3Object s3Object : s3ObjectList) {
+             String key = s3Object.key();
+             if (fileNameMatcher != null && !accept(fileNameMatcher, key)) {
                  continue;
              }
              long elapsed = System.currentTimeMillis() - start;

Reply via email to