This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 066412ea14 WIP: Checkpoint - CachingSource metadata update and cleanup 
(#2535)
066412ea14 is described below

commit 066412ea149f71f01ae92473c6bbe13fee433c6c
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jan 15 18:29:09 2026 -0500

    WIP: Checkpoint - CachingSource metadata update and cleanup (#2535)
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.5 <[email protected]>
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   2 +-
 .../main/java/org/apache/tika/digest/Digester.java |  11 +-
 .../org/apache/tika/digest/DigesterFactory.java    |   3 +-
 .../apache/tika/digest/InputStreamDigester.java    | 115 ++++----------------
 .../java/org/apache/tika/io/ByteArraySource.java   |   6 +-
 .../java/org/apache/tika/io/CachingSource.java     |  15 ++-
 .../main/java/org/apache/tika/io/FileSource.java   |   2 +-
 .../java/org/apache/tika/io/TikaInputSource.java   |   3 +-
 .../java/org/apache/tika/io/TikaInputStream.java   |  10 +-
 .../org/apache/tika/io/TikaInputStreamTest.java    | 120 +++++++++++++++++++++
 .../parser/digestutils/BouncyCastleDigester.java   |  31 +++---
 .../digestutils/BouncyCastleDigesterFactory.java   |  14 +--
 .../tika/parser/digestutils/CommonsDigester.java   |  29 +++--
 .../parser/digestutils/CommonsDigesterFactory.java |  14 +--
 .../apache/tika/detect/ole/MiscOLEDetector.java    |  20 ----
 .../digest/SkipContainerDocumentDigestTest.java    |   7 +-
 .../src/test/resources/configs/tika-4533.json      |   1 -
 .../configs/tika-config-bc-digests-base32.json     |   1 -
 .../configs/tika-config-bc-digests-basic.json      |   1 -
 .../configs/tika-config-bc-digests-multiple.json   |   1 -
 .../configs/tika-config-commons-digests-basic.json |   1 -
 .../configs/tika-config-digests-pdf-only.json      |   1 -
 .../tika-config-digests-skip-container.json        |   1 -
 .../resources/configs/tika-config-digests.json     |   1 -
 .../resources/configs/tika-config-md5-digest.json  |   1 -
 .../configs/tika-config-write-filter.json          |   1 -
 .../apache/tika/digest/MockDigesterFactory.java    |   2 +-
 .../org/apache/tika/server/core/CXFTestBase.java   |   1 -
 .../resources/configs/cxf-test-base-template.json  |   1 -
 .../resources/configs/cxf-test-base-template.json  |   1 -
 .../configs/tika-config-for-server-tests.json      |   1 -
 .../tika-config-langdetect-opennlp-filter.json     |   1 -
 .../tika-config-langdetect-optimaize-filter.json   |   1 -
 33 files changed, 207 insertions(+), 213 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index cb25539678..9bf9990271 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -433,7 +433,7 @@ public class TikaCLI {
         } else if (arg.startsWith("--digest=")) {
             String algorithmName = 
arg.substring("--digest=".length()).toUpperCase(Locale.ROOT);
             DigestDef.Algorithm algorithm = 
DigestDef.Algorithm.valueOf(algorithmName);
-            digester = new CommonsDigester(MAX_MARK, algorithm);
+            digester = new CommonsDigester(algorithm);
         } else if (arg.startsWith("-e")) {
             encoding = arg.substring("-e".length());
         } else if (arg.startsWith("--encoding=")) {
diff --git a/tika-core/src/main/java/org/apache/tika/digest/Digester.java 
b/tika-core/src/main/java/org/apache/tika/digest/Digester.java
index ac6459607c..133d5dce09 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/Digester.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/Digester.java
@@ -30,14 +30,11 @@ import org.apache.tika.parser.ParseContext;
 public interface Digester {
     /**
      * Digests a TikaInputStream and sets the appropriate value(s) in the 
metadata.
-     * The Digester is also responsible for marking and resetting the stream.
+     * The Digester is responsible for calling {@link 
TikaInputStream#enableRewind()}
+     * and {@link TikaInputStream#rewind()} to ensure the stream can be read by
+     * subsequent processing after digesting.
      * <p>
-     * The given stream is guaranteed to support the
-     * {@link TikaInputStream#markSupported() mark feature} and the detector
-     * is expected to {@link TikaInputStream#mark(int) mark} the stream before
-     * reading any bytes from it, and to {@link TikaInputStream#reset() reset}
-     * the stream before returning. The stream must not be closed by the
-     * detector.
+     * The stream must not be closed by the digester.
      *
      * @param tis          TikaInputStream to digest
      * @param m            Metadata to set the values for
diff --git 
a/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java 
b/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
index 66744718aa..1b9215d226 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
@@ -19,7 +19,7 @@ package org.apache.tika.digest;
 /**
  * Factory interface for creating Digester instances.
  * Implementations should be annotated with {@code @TikaComponent} and
- * provide bean properties for configuration (e.g., markLimit, digests).
+ * provide bean properties for configuration (e.g., digests).
  * <p>
  * This is used in {@link org.apache.tika.parser.AutoDetectParserConfig} to
  * configure digesting in the AutoDetectParser.
@@ -29,7 +29,6 @@ package org.apache.tika.digest;
  * "auto-detect-parser": {
  *   "digesterFactory": {
  *     "commons-digester-factory": {
- *       "markLimit": 1000000,
  *       "digests": [
  *         { "algorithm": "MD5" },
  *         { "algorithm": "SHA256", "encoding": "BASE32" }
diff --git 
a/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java 
b/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
index a384137300..2d1180435b 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
@@ -16,69 +16,37 @@
  */
 package org.apache.tika.digest;
 
-import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.security.Provider;
 
-import org.apache.tika.io.BoundedInputStream;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.StringUtils;
 
-// TODO: TIKA-FOLLOWUP - With TikaInputStream.rewind(), markLimit is no longer 
needed.
-//  The digester can simply read the entire stream, then call tis.rewind().
-//  This would simplify this class and allow removing markLimit from:
-//  - InputStreamDigester, CommonsDigester, BouncyCastleDigester
-//  - CommonsDigesterFactory, BouncyCastleDigesterFactory 
(setMarkLimit/getMarkLimit)
-//  - All JSON config files that specify markLimit for digesters
+/**
+ * Digester that uses {@link TikaInputStream#enableRewind()} and {@link 
TikaInputStream#rewind()}
+ * to read the entire stream for digesting, then rewind for subsequent 
processing.
+ */
 public class InputStreamDigester implements Digester {
 
     private final String algorithm;
     private final String metadataKey;
     private final Encoder encoder;
-    private final int markLimit;
 
     /**
-     * @param markLimit   limit in bytes to allow for mark/reset.  If the 
inputstream is longer
-     *                    than this limit, the stream will be reset and then 
spooled to a
-     *                    temporary file.
-     *                    Throws IllegalArgumentException if < 0.
      * @param algorithm   name of the digest algorithm to retrieve from the 
Provider
      * @param metadataKey the full metadata key to use when storing the digest
      *                    (e.g., "X-TIKA:digest:MD5" or 
"X-TIKA:digest:SHA256:BASE32")
      * @param encoder     encoder to convert the byte array returned from the 
digester to a
      *                    string
      */
-    public InputStreamDigester(int markLimit, String algorithm, String 
metadataKey,
-                               Encoder encoder) {
+    public InputStreamDigester(String algorithm, String metadataKey, Encoder 
encoder) {
         this.algorithm = algorithm;
         this.metadataKey = metadataKey;
         this.encoder = encoder;
-        this.markLimit = markLimit;
-
-        if (markLimit < 0) {
-            throw new IllegalArgumentException("markLimit must be >= 0");
-        }
-    }
-
-    /**
-     * Copied from commons-codec
-     */
-    private static MessageDigest updateDigest(MessageDigest digest, 
InputStream data,
-                                              Metadata metadata) throws 
IOException {
-        byte[] buffer = new byte[1024];
-        long total = 0;
-        for (int read = data.read(buffer, 0, 1024); read > -1; read = 
data.read(buffer, 0, 1024)) {
-            digest.update(buffer, 0, read);
-            total += read;
-        }
-        setContentLength(total, metadata);
-        return digest;
     }
 
     private static void setContentLength(long length, Metadata metadata) {
@@ -113,6 +81,12 @@ public class InputStreamDigester implements Digester {
     }
 
     /**
+     * Digests the TikaInputStream and stores the result in metadata.
+     * <p>
+     * Uses {@link TikaInputStream#enableRewind()} to ensure the stream can be
+     * rewound after digesting, then calls {@link TikaInputStream#rewind()} to
+     * reset the stream for subsequent processing.
+     *
      * @param tis          TikaInputStream to digest
      * @param metadata     metadata in which to store the digest information
      * @param parseContext ParseContext -- not actually used yet, but there 
for future expansion
@@ -121,66 +95,21 @@ public class InputStreamDigester implements Digester {
     @Override
     public void digest(TikaInputStream tis, Metadata metadata, ParseContext 
parseContext)
             throws IOException {
-        if (tis.hasFile()) {
-            long sz = tis.getLength();
-            //if the inputstream has a file,
-            //and its size is greater than its mark limit,
-            //just digest the underlying file.
-            if (sz > markLimit) {
-                digestFile(tis.getFile(), sz, metadata);
-                return;
-            }
-        }
-
-        //try the usual mark/reset stuff.
-        //however, if you actually hit the bound,
-        //then stop and spool to file via TikaInputStream
-        BoundedInputStream bis = new BoundedInputStream(markLimit, tis);
-        boolean finishedStream = false;
-        bis.mark(markLimit + 1);
-        finishedStream = digestStream(bis, metadata);
-        bis.reset();
-        if (finishedStream) {
-            return;
-        }
-        //if the stream wasn't finished -- if the stream was longer than the 
mark limit --
-        //spool to File and digest that.
-        digestFile(tis.getFile(), -1, metadata);
-    }
-
-    private void digestFile(File f, long sz, Metadata m) throws IOException {
-        //only add it if it hasn't been populated already
-        if (StringUtils.isBlank(m.get(Metadata.CONTENT_LENGTH))) {
-            if (sz < 0) {
-                sz = f.length();
-            }
-            setContentLength(sz, m);
-        }
-        try (InputStream is = new FileInputStream(f)) {
-            digestStream(is, m);
-        }
-    }
+        tis.enableRewind();
 
-    /**
-     * @param is       input stream to read from
-     * @param metadata metadata for reporting the digest
-     * @return whether or not this finished the input stream
-     * @throws IOException
-     */
-    private boolean digestStream(InputStream is, Metadata metadata) throws 
IOException {
-        byte[] digestBytes;
         MessageDigest messageDigest = newMessageDigest();
+        byte[] buffer = new byte[8192];
+        long total = 0;
+        int read;
+        while ((read = tis.read(buffer)) != -1) {
+            messageDigest.update(buffer, 0, read);
+            total += read;
+        }
 
-        updateDigest(messageDigest, is, metadata);
-        digestBytes = messageDigest.digest();
+        setContentLength(total, metadata);
+        metadata.set(metadataKey, encoder.encode(messageDigest.digest()));
 
-        if (is instanceof BoundedInputStream) {
-            if (((BoundedInputStream) is).hasHitBound()) {
-                return false;
-            }
-        }
-        metadata.set(metadataKey, encoder.encode(digestBytes));
-        return true;
+        tis.rewind();
     }
 
 }
diff --git a/tika-core/src/main/java/org/apache/tika/io/ByteArraySource.java 
b/tika-core/src/main/java/org/apache/tika/io/ByteArraySource.java
index a9dcd8da96..6a2046cdf4 100644
--- a/tika-core/src/main/java/org/apache/tika/io/ByteArraySource.java
+++ b/tika-core/src/main/java/org/apache/tika/io/ByteArraySource.java
@@ -36,12 +36,14 @@ class ByteArraySource extends InputStream implements 
TikaInputSource {
 
     private final byte[] data;
     private final int length;
+    private final TemporaryResources tmp;
     private int position;
     private Path spilledPath;
 
-    ByteArraySource(byte[] data) {
+    ByteArraySource(byte[] data, TemporaryResources tmp) {
         this.data = data;
         this.length = data.length;
+        this.tmp = tmp;
         this.position = 0;
         this.spilledPath = null;
     }
@@ -97,7 +99,7 @@ class ByteArraySource extends InputStream implements 
TikaInputSource {
     }
 
     @Override
-    public Path getPath(TemporaryResources tmp, String suffix) throws 
IOException {
+    public Path getPath(String suffix) throws IOException {
         if (spilledPath == null) {
             // Spill to temp file on first call
             spilledPath = tmp.createTempFile(suffix);
diff --git a/tika-core/src/main/java/org/apache/tika/io/CachingSource.java 
b/tika-core/src/main/java/org/apache/tika/io/CachingSource.java
index 07c6f0fdc3..d84b98fdd9 100644
--- a/tika-core/src/main/java/org/apache/tika/io/CachingSource.java
+++ b/tika-core/src/main/java/org/apache/tika/io/CachingSource.java
@@ -24,6 +24,9 @@ import java.nio.file.Path;
 
 import org.apache.commons.io.IOUtils;
 
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.utils.StringUtils;
+
 /**
  * Input source that wraps a raw InputStream with optional caching.
  * <p>
@@ -38,6 +41,7 @@ import org.apache.commons.io.IOUtils;
 class CachingSource extends InputStream implements TikaInputSource {
 
     private final TemporaryResources tmp;
+    private final Metadata metadata;
     private long length;
 
     // Passthrough mode: just a BufferedInputStream
@@ -52,9 +56,10 @@ class CachingSource extends InputStream implements 
TikaInputSource {
     private InputStream fileStream;
     private long filePosition;  // Track position in file mode
 
-    CachingSource(InputStream source, TemporaryResources tmp, long length) {
+    CachingSource(InputStream source, TemporaryResources tmp, long length, 
Metadata metadata) {
         this.tmp = tmp;
         this.length = length;
+        this.metadata = metadata;
         // Start in passthrough mode
         this.passthroughStream = source instanceof BufferedInputStream
                 ? (BufferedInputStream) source
@@ -222,7 +227,7 @@ class CachingSource extends InputStream implements 
TikaInputSource {
     }
 
     @Override
-    public Path getPath(TemporaryResources tmp, String suffix) throws 
IOException {
+    public Path getPath(String suffix) throws IOException {
         if (spilledPath == null) {
             // If still in passthrough mode, enable caching first
             if (cachingStream == null) {
@@ -256,6 +261,12 @@ class CachingSource extends InputStream implements 
TikaInputSource {
                 length = fileSize;
             }
 
+            // Update metadata if not already set
+            if (metadata != null &&
+                    
StringUtils.isBlank(metadata.get(Metadata.CONTENT_LENGTH))) {
+                metadata.set(Metadata.CONTENT_LENGTH, Long.toString(length));
+            }
+
             cachingStream = null;
         }
         return spilledPath;
diff --git a/tika-core/src/main/java/org/apache/tika/io/FileSource.java 
b/tika-core/src/main/java/org/apache/tika/io/FileSource.java
index 95f6458574..79163f0ab2 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FileSource.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FileSource.java
@@ -102,7 +102,7 @@ class FileSource extends InputStream implements 
TikaInputSource {
     }
 
     @Override
-    public Path getPath(TemporaryResources tmp, String suffix) throws 
IOException {
+    public Path getPath(String suffix) throws IOException {
         // Already file-backed, just return the path
         return path;
     }
diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputSource.java 
b/tika-core/src/main/java/org/apache/tika/io/TikaInputSource.java
index 7a8da5d703..1620614f99 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputSource.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputSource.java
@@ -43,11 +43,10 @@ interface TikaInputSource extends Closeable {
 
     /**
      * Gets the file path, potentially spilling to a temp file if needed.
-     * @param tmp temporary resources for creating temp files
      * @param suffix file suffix for temp files
      * @return the file path
      */
-    Path getPath(TemporaryResources tmp, String suffix) throws IOException;
+    Path getPath(String suffix) throws IOException;
 
     /**
      * Returns the length of the content, or -1 if unknown.
diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java 
b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
index 7eee791e3a..eb4c3cca76 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
@@ -103,7 +103,7 @@ public class TikaInputStream extends TaggedInputStream {
             return (TikaInputStream) stream;
         }
         String ext = getExtension(metadata);
-        TikaInputSource inputSource = new CachingSource(stream, tmp, -1);
+        TikaInputSource inputSource = new CachingSource(stream, tmp, -1, 
metadata);
         return new TikaInputStream(inputSource, tmp, ext);
     }
 
@@ -123,7 +123,7 @@ public class TikaInputStream extends TaggedInputStream {
         metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(data.length));
         String ext = getExtension(metadata);
         TemporaryResources tmp = new TemporaryResources();
-        TikaInputSource inputSource = new ByteArraySource(data);
+        TikaInputSource inputSource = new ByteArraySource(data, tmp);
         return new TikaInputStream(inputSource, tmp, ext);
     }
 
@@ -180,7 +180,7 @@ public class TikaInputStream extends TaggedInputStream {
             String ext = getExtension(metadata);
             TemporaryResources tmp = new TemporaryResources();
             TikaInputSource inputSource = new CachingSource(
-                    new BufferedInputStream(blob.getBinaryStream()), tmp, 
length);
+                    new BufferedInputStream(blob.getBinaryStream()), tmp, 
length, metadata);
             return new TikaInputStream(inputSource, tmp, ext);
         }
     }
@@ -241,7 +241,7 @@ public class TikaInputStream extends TaggedInputStream {
         String ext = getExtension(metadata);
         TemporaryResources tmp = new TemporaryResources();
         TikaInputSource inputSource = new CachingSource(
-                new BufferedInputStream(connection.getInputStream()), tmp, 
length);
+                new BufferedInputStream(connection.getInputStream()), tmp, 
length, metadata);
         return new TikaInputStream(inputSource, tmp, ext);
     }
 
@@ -379,7 +379,7 @@ public class TikaInputStream extends TaggedInputStream {
         if (source == null) {
             throw new IOException("No TikaInputSource available");
         }
-        return source.getPath(tmp, suffix);
+        return source.getPath(suffix);
     }
 
     public File getFile() throws IOException {
diff --git 
a/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java 
b/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
index bf8e0b7ba0..84676683a5 100644
--- a/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
@@ -673,6 +673,126 @@ public class TikaInputStreamTest {
         }
     }
 
+    // ========== CachingSource Tests ==========
+
+    @Test
+    public void testCachingSourceUpdatesMetadataOnSpill() throws IOException {
+        byte[] data = bytes("Hello, World!");
+        Metadata metadata = new Metadata();
+        // Don't set CONTENT_LENGTH - let CachingSource set it on spill
+
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            CachingSource source = new CachingSource(
+                    new ByteArrayInputStream(data), tmp, -1, metadata);
+            source.enableRewind(); // Enable caching for spill support
+
+            // Read all data
+            byte[] buffer = new byte[data.length];
+            int totalRead = 0;
+            int n;
+            while ((n = source.read(buffer, totalRead, buffer.length - 
totalRead)) != -1) {
+                totalRead += n;
+                if (totalRead >= buffer.length) break;
+            }
+
+            // Before spill, metadata should not have length
+            assertNull(metadata.get(Metadata.CONTENT_LENGTH));
+
+            // Force spill to file
+            Path path = source.getPath(".tmp");
+            assertNotNull(path);
+            assertTrue(Files.exists(path));
+
+            // After spill, metadata should have length
+            assertEquals("13", metadata.get(Metadata.CONTENT_LENGTH));
+
+            source.close();
+        }
+    }
+
+    @Test
+    public void testCachingSourceDoesNotOverwriteExistingMetadata() throws 
IOException {
+        byte[] data = bytes("Hello, World!");
+        Metadata metadata = new Metadata();
+        // Pre-set CONTENT_LENGTH
+        metadata.set(Metadata.CONTENT_LENGTH, "999");
+
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            CachingSource source = new CachingSource(
+                    new ByteArrayInputStream(data), tmp, -1, metadata);
+            source.enableRewind(); // Enable caching for seek/spill support
+
+            // Read and spill
+            IOUtils.toByteArray(source);
+            source.seekTo(0);
+            Path path = source.getPath(".tmp");
+
+            // Existing value should not be overwritten
+            assertEquals("999", metadata.get(Metadata.CONTENT_LENGTH));
+
+            source.close();
+        }
+    }
+
+    @Test
+    public void testCachingSourceSeekTo() throws IOException {
+        byte[] data = bytes("ABCDEFGHIJ");
+
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            CachingSource source = new CachingSource(
+                    new ByteArrayInputStream(data), tmp, -1, null);
+            source.enableRewind(); // Enable caching for seek support
+
+            // Read first 5 bytes
+            byte[] buf = new byte[5];
+            source.read(buf);
+            assertEquals("ABCDE", str(buf));
+
+            // Seek back to position 2
+            source.seekTo(2);
+
+            // Read again
+            buf = new byte[3];
+            source.read(buf);
+            assertEquals("CDE", str(buf));
+
+            source.close();
+        }
+    }
+
+    @Test
+    public void testCachingSourceAfterSpill() throws IOException {
+        byte[] data = bytes("ABCDEFGHIJ");
+
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            CachingSource source = new CachingSource(
+                    new ByteArrayInputStream(data), tmp, -1, null);
+            source.enableRewind(); // Enable caching for spill/seek support
+
+            // Read first 5 bytes
+            byte[] buf = new byte[5];
+            source.read(buf);
+            assertEquals("ABCDE", str(buf));
+
+            // Force spill
+            Path path = source.getPath(".tmp");
+            assertTrue(Files.exists(path));
+
+            // Continue reading after spill
+            buf = new byte[5];
+            source.read(buf);
+            assertEquals("FGHIJ", str(buf));
+
+            // Seek back and read again
+            source.seekTo(0);
+            buf = new byte[10];
+            source.read(buf);
+            assertEquals("ABCDEFGHIJ", str(buf));
+
+            source.close();
+        }
+    }
+
     // ========== enableRewind() Tests ==========
 
     @Test
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigester.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigester.java
index d3d1465dfd..68a7280f83 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigester.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigester.java
@@ -37,35 +37,35 @@ import org.apache.tika.digest.InputStreamDigester;
  * <p>
  * BouncyCastle supports additional algorithms beyond the standard Java ones,
  * such as SHA3-256, SHA3-384, SHA3-512.
+ * <p>
+ * This digester uses {@link 
org.apache.tika.io.TikaInputStream#enableRewind()} and
+ * {@link org.apache.tika.io.TikaInputStream#rewind()} to read the entire 
stream,
+ * compute the digest, and then rewind for subsequent processing.
  */
 public class BouncyCastleDigester extends CompositeDigester {
 
     /**
-     * @param markLimit limit for mark/reset; after this limit is hit, the
-     *                  stream is reset and spooled to disk
-     * @param digests   list of digest definitions (algorithm + encoding pairs)
+     * @param digests list of digest definitions (algorithm + encoding pairs)
      */
-    public BouncyCastleDigester(int markLimit, List<DigestDef> digests) {
-        super(buildDigesters(markLimit, digests));
+    public BouncyCastleDigester(List<DigestDef> digests) {
+        super(buildDigesters(digests));
     }
 
     /**
      * Convenience constructor using Algorithm enum with HEX encoding.
      *
-     * @param markLimit  limit for mark/reset; after this limit is hit, the
-     *                   stream is reset and spooled to disk
      * @param algorithms algorithms to run (uses HEX encoding for all)
      */
-    public BouncyCastleDigester(int markLimit, DigestDef.Algorithm... 
algorithms) {
-        super(buildDigesters(markLimit, algorithms));
+    public BouncyCastleDigester(DigestDef.Algorithm... algorithms) {
+        super(buildDigesters(algorithms));
     }
 
-    private static Digester[] buildDigesters(int markLimit, List<DigestDef> 
digests) {
+    private static Digester[] buildDigesters(List<DigestDef> digests) {
         Digester[] digesters = new Digester[digests.size()];
         int i = 0;
         for (DigestDef def : digests) {
             Encoder encoder = getEncoder(def.getEncoding());
-            digesters[i++] = new BCInputStreamDigester(markLimit,
+            digesters[i++] = new BCInputStreamDigester(
                     def.getAlgorithm().getJavaName(),
                     def.getMetadataKey(),
                     encoder);
@@ -73,13 +73,13 @@ public class BouncyCastleDigester extends CompositeDigester 
{
         return digesters;
     }
 
-    private static Digester[] buildDigesters(int markLimit, 
DigestDef.Algorithm[] algorithms) {
+    private static Digester[] buildDigesters(DigestDef.Algorithm[] algorithms) 
{
         Digester[] digesters = new Digester[algorithms.length];
         Encoder encoder = getEncoder(DigestDef.Encoding.HEX);
         int i = 0;
         for (DigestDef.Algorithm algorithm : algorithms) {
             DigestDef def = new DigestDef(algorithm, DigestDef.Encoding.HEX);
-            digesters[i++] = new BCInputStreamDigester(markLimit,
+            digesters[i++] = new BCInputStreamDigester(
                     algorithm.getJavaName(),
                     def.getMetadataKey(),
                     encoder);
@@ -123,9 +123,8 @@ public class BouncyCastleDigester extends CompositeDigester 
{
 
     private static class BCInputStreamDigester extends InputStreamDigester {
 
-        public BCInputStreamDigester(int markLimit, String algorithm, String 
algorithmKeyName,
-                                     Encoder encoder) {
-            super(markLimit, algorithm, algorithmKeyName, encoder);
+        public BCInputStreamDigester(String algorithm, String 
algorithmKeyName, Encoder encoder) {
+            super(algorithm, algorithmKeyName, encoder);
             try {
                 MessageDigest.getInstance(algorithm, getProvider());
             } catch (NoSuchAlgorithmException e) {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
index 895880f246..a8a1894586 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/BouncyCastleDigesterFactory.java
@@ -27,7 +27,7 @@ import org.apache.tika.digest.DigesterFactory;
 /**
  * Factory for {@link BouncyCastleDigester} with configurable algorithms and 
encodings.
  * <p>
- * Default: markLimit = 1000000, MD5 with HEX encoding.
+ * Default: MD5 with HEX encoding.
  * <p>
  * BouncyCastle supports additional algorithms beyond the standard Java ones,
  * such as SHA3-256, SHA3-384, SHA3-512.
@@ -37,7 +37,6 @@ import org.apache.tika.digest.DigesterFactory;
  * {
  *   "digesterFactory": {
  *     "bouncy-castle-digester-factory": {
- *       "markLimit": 1000000,
  *       "digests": [
  *         { "algorithm": "MD5" },
  *         { "algorithm": "SHA3_256", "encoding": "BASE32" }
@@ -50,7 +49,6 @@ import org.apache.tika.digest.DigesterFactory;
 @TikaComponent
 public class BouncyCastleDigesterFactory implements DigesterFactory {
 
-    private int markLimit = 1000000;
     private List<DigestDef> digests = new ArrayList<>();
 
     public BouncyCastleDigesterFactory() {
@@ -59,15 +57,7 @@ public class BouncyCastleDigesterFactory implements 
DigesterFactory {
 
     @Override
     public Digester build() {
-        return new BouncyCastleDigester(markLimit, digests);
-    }
-
-    public int getMarkLimit() {
-        return markLimit;
-    }
-
-    public void setMarkLimit(int markLimit) {
-        this.markLimit = markLimit;
+        return new BouncyCastleDigester(digests);
     }
 
     public List<DigestDef> getDigests() {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
index 4ae544ff64..0f5185b0f5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigester.java
@@ -32,38 +32,33 @@ import org.apache.tika.digest.InputStreamDigester;
  * Implementation of {@link Digester}
  * that relies on commons.codec.digest.DigestUtils to calculate digest hashes.
  * <p>
- * This digester tries to use the regular mark/reset protocol on the 
InputStream.
- * However, this wraps an internal BoundedInputStream, and if the InputStream
- * is not fully read, then this will reset the stream and
- * spool the InputStream to disk (via TikaInputStream) and then digest the 
file.
+ * This digester uses {@link 
org.apache.tika.io.TikaInputStream#enableRewind()} and
+ * {@link org.apache.tika.io.TikaInputStream#rewind()} to read the entire 
stream,
+ * compute the digest, and then rewind for subsequent processing.
  */
 public class CommonsDigester extends CompositeDigester {
 
     /**
-     * @param markLimit limit for mark/reset; after this limit is hit, the
-     *                  stream is reset and spooled to disk
-     * @param digests   list of digest definitions (algorithm + encoding pairs)
+     * @param digests list of digest definitions (algorithm + encoding pairs)
      */
-    public CommonsDigester(int markLimit, List<DigestDef> digests) {
-        super(buildDigesters(markLimit, digests));
+    public CommonsDigester(List<DigestDef> digests) {
+        super(buildDigesters(digests));
     }
 
     /**
-     * @param markLimit  limit for mark/reset; after this limit is hit, the
-     *                   stream is reset and spooled to disk
      * @param algorithms algorithms to run (uses HEX encoding for all)
      */
-    public CommonsDigester(int markLimit, DigestDef.Algorithm... algorithms) {
-        super(buildDigesters(markLimit, algorithms));
+    public CommonsDigester(DigestDef.Algorithm... algorithms) {
+        super(buildDigesters(algorithms));
     }
 
-    private static Digester[] buildDigesters(int markLimit, List<DigestDef> 
digests) {
+    private static Digester[] buildDigesters(List<DigestDef> digests) {
         Digester[] digesters = new Digester[digests.size()];
         int i = 0;
         for (DigestDef def : digests) {
             checkSupported(def.getAlgorithm());
             Encoder encoder = getEncoder(def.getEncoding());
-            digesters[i++] = new InputStreamDigester(markLimit,
+            digesters[i++] = new InputStreamDigester(
                     def.getAlgorithm().getJavaName(),
                     def.getMetadataKey(),
                     encoder);
@@ -71,14 +66,14 @@ public class CommonsDigester extends CompositeDigester {
         return digesters;
     }
 
-    private static Digester[] buildDigesters(int markLimit, 
DigestDef.Algorithm[] algorithms) {
+    private static Digester[] buildDigesters(DigestDef.Algorithm[] algorithms) 
{
         Digester[] digesters = new Digester[algorithms.length];
         Encoder encoder = getEncoder(DigestDef.Encoding.HEX);
         int i = 0;
         for (DigestDef.Algorithm algorithm : algorithms) {
             checkSupported(algorithm);
             DigestDef def = new DigestDef(algorithm, DigestDef.Encoding.HEX);
-            digesters[i++] = new InputStreamDigester(markLimit,
+            digesters[i++] = new InputStreamDigester(
                     algorithm.getJavaName(),
                     def.getMetadataKey(),
                     encoder);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
index 8d26fbce16..b141c7340e 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-digest-commons/src/main/java/org/apache/tika/parser/digestutils/CommonsDigesterFactory.java
@@ -27,14 +27,13 @@ import org.apache.tika.digest.DigesterFactory;
 /**
  * Factory for {@link CommonsDigester} with configurable algorithms and 
encodings.
  * <p>
- * Default: markLimit = 1000000, MD5 with HEX encoding.
+ * Default: MD5 with HEX encoding.
  * <p>
  * Example JSON configuration:
  * <pre>
  * {
  *   "digesterFactory": {
  *     "commons-digester": {
- *       "markLimit": 1000000,
  *       "digests": [
  *         { "algorithm": "MD5" },
  *         { "algorithm": "SHA256", "encoding": "BASE32" }
@@ -47,7 +46,6 @@ import org.apache.tika.digest.DigesterFactory;
 @TikaComponent
 public class CommonsDigesterFactory implements DigesterFactory {
 
-    private int markLimit = 1000000;
     private List<DigestDef> digests = new ArrayList<>();
 
     public CommonsDigesterFactory() {
@@ -56,15 +54,7 @@ public class CommonsDigesterFactory implements 
DigesterFactory {
 
     @Override
     public Digester build() {
-        return new CommonsDigester(markLimit, digests);
-    }
-
-    public int getMarkLimit() {
-        return markLimit;
-    }
-
-    public void setMarkLimit(int markLimit) {
-        this.markLimit = markLimit;
+        return new CommonsDigester(digests);
     }
 
     public List<DigestDef> getDigests() {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/detect/ole/MiscOLEDetector.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/detect/ole/MiscOLEDetector.java
index f887b2d7e7..fc37999a48 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/detect/ole/MiscOLEDetector.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/detect/ole/MiscOLEDetector.java
@@ -64,9 +64,6 @@ public class MiscOLEDetector implements Detector {
      */
     public static final MediaType QUATTROPRO = application("x-quattro-pro");
 
-
-    private int markLimit = 16 * 1024 * 1024;
-
     /**
      * Internal detection of the specific kind of OLE2 document, based on the
      * names of the top level streams within the file.
@@ -117,23 +114,6 @@ public class MiscOLEDetector implements Detector {
         return names;
     }
 
-    /**
-     * If a TikaInputStream is passed in to {@link #detect(InputStream, 
Metadata)},
-     * and there is not an underlying file, this detector will spool up to 
{@link #markLimit}
-     * to disk.  If the stream was read in entirety (e.g. the spooled file is 
not truncated),
-     * this detector will open the file with POI and perform detection.
-     * If the spooled file is truncated, the detector will return {@link #OLE} 
(or
-     * {@link MediaType#OCTET_STREAM} if there's no OLE header).
-     * <p>
-     * As of Tika 1.21, this detector respects the legacy behavior of not 
performing detection
-     * on a non-TikaInputStream.
-     *
-     * @param markLimit
-     */
-    public void setMarkLimit(int markLimit) {
-        this.markLimit = markLimit;
-    }
-
     private Set<String> getTopLevelNames(TikaInputStream stream) throws 
IOException {
         // Force the document stream to a (possibly temporary) file
         // so we don't modify the current position of the stream.
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
index 09397ff1f0..a211165f56 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
@@ -47,7 +47,7 @@ public class SkipContainerDocumentDigestTest extends TikaTest 
{
     public void testDigestContainerAndEmbedded() throws Exception {
         // skipContainerDocumentDigest = false means digest everything
         AutoDetectParserConfig config = new AutoDetectParserConfig();
-        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
+        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
         config.setSkipContainerDocumentDigest(false);
 
         AutoDetectParser parser = new AutoDetectParser();
@@ -71,7 +71,7 @@ public class SkipContainerDocumentDigestTest extends TikaTest 
{
     public void testSkipContainerDigestOnly() throws Exception {
         // skipContainerDocumentDigest = true means skip container, digest 
only embedded
         AutoDetectParserConfig config = new AutoDetectParserConfig();
-        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
+        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
         config.setSkipContainerDocumentDigest(true);
 
         AutoDetectParser parser = new AutoDetectParser();
@@ -95,7 +95,7 @@ public class SkipContainerDocumentDigestTest extends TikaTest 
{
     public void testSkipContainerDocumentDigestMarkerInParseContext() throws 
Exception {
         // Test that the SkipContainerDocumentDigest marker in ParseContext 
works
         AutoDetectParserConfig config = new AutoDetectParserConfig();
-        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
+        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
         config.setSkipContainerDocumentDigest(false); // Config says digest all
 
         AutoDetectParser parser = new AutoDetectParser();
@@ -145,7 +145,6 @@ public class SkipContainerDocumentDigestTest extends 
TikaTest {
     public void testDigestWithFactory() throws Exception {
         // Test using the factory pattern
         CommonsDigesterFactory factory = new CommonsDigesterFactory();
-        factory.setMarkLimit(100000);
 
         AutoDetectParserConfig config = new AutoDetectParserConfig();
         config.setDigesterFactory(factory);
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
index 9245331ec5..12b49d6267 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.json
@@ -6,7 +6,6 @@
     "throwOnZeroBytes": false,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "SHA256" }
         ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
index fed21bc5af..5ac209517f 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
@@ -3,7 +3,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
-        "markLimit": 1000000,
         "digests": [
           { "algorithm": "SHA1", "encoding": "BASE32" }
         ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
index 770fba7ffe..53bfd01732 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
@@ -3,7 +3,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
-        "markLimit": 1000000,
         "digests": [
           { "algorithm": "MD2" },
           { "algorithm": "MD5" },
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
index 830d8c0809..b2e23ad974 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
@@ -3,7 +3,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
-        "markLimit": 1000000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA256" },
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
index 2a2634a88e..c37e6965f2 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
@@ -3,7 +3,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 1000000,
         "digests": [
           { "algorithm": "MD2" },
           { "algorithm": "MD5" },
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
index cf7c3874a0..60825fe974 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
@@ -12,7 +12,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "SHA256", "encoding": "BASE32" },
           { "algorithm": "MD5" }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
index ed2145a404..8ed562166a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
@@ -4,7 +4,6 @@
     "skipContainerDocumentDigest": true,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "SHA256", "encoding": "BASE32" },
           { "algorithm": "MD5" }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
index 004e6ea753..50bbd90b99 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
@@ -3,7 +3,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "SHA256", "encoding": "BASE32" },
           { "algorithm": "MD5" }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
index caffd0c709..a13a80c7db 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-md5-digest.json
@@ -2,7 +2,6 @@
   "auto-detect-parser": {
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" }
         ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
index 1872313a9c..3ca9aa461a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
@@ -4,7 +4,6 @@
     "skipContainerDocumentDigest": true,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "SHA256", "encoding": "BASE32" },
           { "algorithm": "MD5" }
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
 
b/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
index a8cbc69301..857267d74b 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
@@ -23,7 +23,7 @@ public class MockDigesterFactory implements DigesterFactory {
 
     @Override
     public Digester build() {
-        return new InputStreamDigester(1000000, "SHA-256", 
"X-TIKA:digest:SHA-256", new MockEncoder());
+        return new InputStreamDigester("SHA-256", "X-TIKA:digest:SHA-256", new 
MockEncoder());
     }
 
     private static class MockEncoder implements Encoder {
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 3a1389b140..d2a290fe91 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -74,7 +74,6 @@ public abstract class CXFTestBase {
                 "outputThreshold": 1000000,
                 "digesterFactory": {
                   "commons-digester-factory": {
-                    "markLimit": 100000,
                     "digests": [
                       { "algorithm": "MD5" }
                     ]
diff --git 
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
 
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
index f8284e5e4d..bcae4fb7e6 100644
--- 
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
+++ 
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
@@ -50,7 +50,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA1", "encoding": "BASE32" }
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
index f8284e5e4d..bcae4fb7e6 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
@@ -50,7 +50,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA1", "encoding": "BASE32" }
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
index fdf80cb998..d134099806 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
@@ -13,7 +13,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA1", "encoding": "BASE32" }
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
index 97646bc879..dd199e46d2 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
@@ -18,7 +18,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA1", "encoding": "BASE32" }
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
index 8d3f74ed3c..4f30e99b4b 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
@@ -18,7 +18,6 @@
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
-        "markLimit": 100000,
         "digests": [
           { "algorithm": "MD5" },
           { "algorithm": "SHA1", "encoding": "BASE32" }

Reply via email to