This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4624
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 8d8bdb37f054d16c78a1b0ba8756ec75b869b139
Merge: 500ef0dbe1 5f9a808ac3
Author: tallison <[email protected]>
AuthorDate: Thu Jan 15 17:45:44 2026 -0500

    TIKA-4624 -- clean up marklimits

 .github/workflows/main-jdk17-build.yml             |   2 +-
 docs/spooling.adoc                                 |  72 ++++-----
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   2 +-
 .../org/apache/tika/digest/DigesterFactory.java    |   3 +-
 .../apache/tika/digest/InputStreamDigester.java    | 115 +++-----------
 .../java/org/apache/tika/io/ByteArraySource.java   |  26 +++
 .../org/apache/tika/io/CachingInputStream.java     |  19 ++-
 .../java/org/apache/tika/io/CachingSource.java     | 174 ++++++++++++++++++---
 .../main/java/org/apache/tika/io/FileSource.java   |  26 +++
 .../java/org/apache/tika/io/TikaInputSource.java   |  13 ++
 .../java/org/apache/tika/io/TikaInputStream.java   |  47 +++++-
 .../parser/multiple/AbstractMultipleParser.java    |   3 +
 .../org/apache/tika/io/TikaInputStreamTest.java    | 164 ++++++++++++++++++-
 .../org/apache/tika/parser/crypto/TSDParser.java   |   2 +
 .../org/apache/tika/parser/pkg/PackageParser.java  |   4 +-
 .../org/apache/tika/zip/utils/ZipSalvager.java     |   2 +
 .../digest/SkipContainerDocumentDigestTest.java    |   7 +-
 .../apache/tika/digest/MockDigesterFactory.java    |   2 +-
 .../org/apache/tika/server/core/CXFTestBase.java   |   1 -
 19 files changed, 512 insertions(+), 172 deletions(-)

diff --cc tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index cb25539678,cb25539678..9bf9990271
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@@ -433,7 -433,7 +433,7 @@@ public class TikaCLI 
          } else if (arg.startsWith("--digest=")) {
              String algorithmName = 
arg.substring("--digest=".length()).toUpperCase(Locale.ROOT);
              DigestDef.Algorithm algorithm = 
DigestDef.Algorithm.valueOf(algorithmName);
--            digester = new CommonsDigester(MAX_MARK, algorithm);
++            digester = new CommonsDigester(algorithm);
          } else if (arg.startsWith("-e")) {
              encoding = arg.substring("-e".length());
          } else if (arg.startsWith("--encoding=")) {
diff --cc tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
index 66744718aa,66744718aa..1b9215d226
--- a/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/DigesterFactory.java
@@@ -19,7 -19,7 +19,7 @@@ package org.apache.tika.digest
  /**
   * Factory interface for creating Digester instances.
   * Implementations should be annotated with {@code @TikaComponent} and
-- * provide bean properties for configuration (e.g., markLimit, digests).
++ * provide bean properties for configuration (e.g., digests).
   * <p>
   * This is used in {@link org.apache.tika.parser.AutoDetectParserConfig} to
   * configure digesting in the AutoDetectParser.
@@@ -29,7 -29,7 +29,6 @@@
   * "auto-detect-parser": {
   *   "digesterFactory": {
   *     "commons-digester-factory": {
-- *       "markLimit": 1000000,
   *       "digests": [
   *         { "algorithm": "MD5" },
   *         { "algorithm": "SHA256", "encoding": "BASE32" }
diff --cc 
tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
index a384137300,a384137300..2d1180435b
--- a/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
@@@ -16,69 -16,69 +16,37 @@@
   */
  package org.apache.tika.digest;
  
--import java.io.File;
--import java.io.FileInputStream;
  import java.io.IOException;
--import java.io.InputStream;
  import java.security.MessageDigest;
  import java.security.NoSuchAlgorithmException;
  import java.security.Provider;
  
--import org.apache.tika.io.BoundedInputStream;
  import org.apache.tika.io.TikaInputStream;
  import org.apache.tika.metadata.Metadata;
  import org.apache.tika.parser.ParseContext;
  import org.apache.tika.utils.StringUtils;
  
--// TODO: TIKA-FOLLOWUP - With TikaInputStream.rewind(), markLimit is no 
longer needed.
--//  The digester can simply read the entire stream, then call tis.rewind().
--//  This would simplify this class and allow removing markLimit from:
--//  - InputStreamDigester, CommonsDigester, BouncyCastleDigester
--//  - CommonsDigesterFactory, BouncyCastleDigesterFactory 
(setMarkLimit/getMarkLimit)
--//  - All JSON config files that specify markLimit for digesters
++/**
++ * Digester that uses {@link TikaInputStream#enableRewind()} and {@link 
TikaInputStream#rewind()}
++ * to read the entire stream for digesting, then rewind for subsequent 
processing.
++ */
  public class InputStreamDigester implements Digester {
  
      private final String algorithm;
      private final String metadataKey;
      private final Encoder encoder;
--    private final int markLimit;
  
      /**
--     * @param markLimit   limit in bytes to allow for mark/reset.  If the 
inputstream is longer
--     *                    than this limit, the stream will be reset and then 
spooled to a
--     *                    temporary file.
--     *                    Throws IllegalArgumentException if < 0.
       * @param algorithm   name of the digest algorithm to retrieve from the 
Provider
       * @param metadataKey the full metadata key to use when storing the digest
       *                    (e.g., "X-TIKA:digest:MD5" or 
"X-TIKA:digest:SHA256:BASE32")
       * @param encoder     encoder to convert the byte array returned from the 
digester to a
       *                    string
       */
--    public InputStreamDigester(int markLimit, String algorithm, String 
metadataKey,
--                               Encoder encoder) {
++    public InputStreamDigester(String algorithm, String metadataKey, Encoder 
encoder) {
          this.algorithm = algorithm;
          this.metadataKey = metadataKey;
          this.encoder = encoder;
--        this.markLimit = markLimit;
--
--        if (markLimit < 0) {
--            throw new IllegalArgumentException("markLimit must be >= 0");
--        }
--    }
--
--    /**
--     * Copied from commons-codec
--     */
--    private static MessageDigest updateDigest(MessageDigest digest, 
InputStream data,
--                                              Metadata metadata) throws 
IOException {
--        byte[] buffer = new byte[1024];
--        long total = 0;
--        for (int read = data.read(buffer, 0, 1024); read > -1; read = 
data.read(buffer, 0, 1024)) {
--            digest.update(buffer, 0, read);
--            total += read;
--        }
--        setContentLength(total, metadata);
--        return digest;
      }
  
      private static void setContentLength(long length, Metadata metadata) {
@@@ -113,6 -113,6 +81,12 @@@
      }
  
      /**
++     * Digests the TikaInputStream and stores the result in metadata.
++     * <p>
++     * Uses {@link TikaInputStream#enableRewind()} to ensure the stream can be
++     * rewound after digesting, then calls {@link TikaInputStream#rewind()} to
++     * reset the stream for subsequent processing.
++     *
       * @param tis          TikaInputStream to digest
       * @param metadata     metadata in which to store the digest information
       * @param parseContext ParseContext -- not actually used yet, but there 
for future expansion
@@@ -121,66 -121,66 +95,21 @@@
      @Override
      public void digest(TikaInputStream tis, Metadata metadata, ParseContext 
parseContext)
              throws IOException {
--        if (tis.hasFile()) {
--            long sz = tis.getLength();
--            //if the inputstream has a file,
--            //and its size is greater than its mark limit,
--            //just digest the underlying file.
--            if (sz > markLimit) {
--                digestFile(tis.getFile(), sz, metadata);
--                return;
--            }
--        }
--
--        //try the usual mark/reset stuff.
--        //however, if you actually hit the bound,
--        //then stop and spool to file via TikaInputStream
--        BoundedInputStream bis = new BoundedInputStream(markLimit, tis);
--        boolean finishedStream = false;
--        bis.mark(markLimit + 1);
--        finishedStream = digestStream(bis, metadata);
--        bis.reset();
--        if (finishedStream) {
--            return;
--        }
--        //if the stream wasn't finished -- if the stream was longer than the 
mark limit --
--        //spool to File and digest that.
--        digestFile(tis.getFile(), -1, metadata);
--    }
--
--    private void digestFile(File f, long sz, Metadata m) throws IOException {
--        //only add it if it hasn't been populated already
--        if (StringUtils.isBlank(m.get(Metadata.CONTENT_LENGTH))) {
--            if (sz < 0) {
--                sz = f.length();
--            }
--            setContentLength(sz, m);
--        }
--        try (InputStream is = new FileInputStream(f)) {
--            digestStream(is, m);
--        }
--    }
++        tis.enableRewind();
  
--    /**
--     * @param is       input stream to read from
--     * @param metadata metadata for reporting the digest
--     * @return whether or not this finished the input stream
--     * @throws IOException
--     */
--    private boolean digestStream(InputStream is, Metadata metadata) throws 
IOException {
--        byte[] digestBytes;
          MessageDigest messageDigest = newMessageDigest();
++        byte[] buffer = new byte[8192];
++        long total = 0;
++        int read;
++        while ((read = tis.read(buffer)) != -1) {
++            messageDigest.update(buffer, 0, read);
++            total += read;
++        }
  
--        updateDigest(messageDigest, is, metadata);
--        digestBytes = messageDigest.digest();
++        setContentLength(total, metadata);
++        metadata.set(metadataKey, encoder.encode(messageDigest.digest()));
  
--        if (is instanceof BoundedInputStream) {
--            if (((BoundedInputStream) is).hasHitBound()) {
--                return false;
--            }
--        }
--        metadata.set(metadataKey, encoder.encode(digestBytes));
--        return true;
++        tis.rewind();
      }
  
  }
diff --cc tika-core/src/main/java/org/apache/tika/io/CachingSource.java
index 15a43d0b33,07c6f0fdc3..d84b98fdd9
--- a/tika-core/src/main/java/org/apache/tika/io/CachingSource.java
+++ b/tika-core/src/main/java/org/apache/tika/io/CachingSource.java
@@@ -24,34 -24,42 +24,47 @@@ import java.nio.file.Path
  
  import org.apache.commons.io.IOUtils;
  
 +import org.apache.tika.metadata.Metadata;
 +import org.apache.tika.utils.StringUtils;
 +
  /**
-  * Input source that caches bytes from a raw InputStream.
+  * Input source that wraps a raw InputStream with optional caching.
   * <p>
-  * Uses {@link CachingInputStream} to cache bytes as they are read,
-  * enabling mark/reset/seek operations. If the cache exceeds a threshold,
-  * it spills to a temporary file via {@link StreamCache}.
+  * Starts in passthrough mode using {@link BufferedInputStream} for basic
+  * mark/reset support. When {@link #enableRewind()} is called (at position 0),
+  * switches to caching mode using {@link CachingInputStream} which enables
+  * full rewind/seek capability.
+  * <p>
+  * If caching is not enabled, {@link #seekTo(long)} will fail for any position
+  * other than the current position.
   */
  class CachingSource extends InputStream implements TikaInputSource {
  
-     private CachingInputStream cachingStream;
-     private long length;
+     private final TemporaryResources tmp;
 +    private final Metadata metadata;
+     private long length;
+ 
+     // Passthrough mode: just a BufferedInputStream
+     private BufferedInputStream passthroughStream;
+     private long passthroughPosition;
+ 
+     // Caching mode: CachingInputStream for full rewind support
+     private CachingInputStream cachingStream;
  
      // After spilling to file, we switch to file-backed mode
      private Path spilledPath;
      private InputStream fileStream;
+     private long filePosition;  // Track position in file mode
  
 -    CachingSource(InputStream source, TemporaryResources tmp, long length) {
 +    CachingSource(InputStream source, TemporaryResources tmp, long length, 
Metadata metadata) {
+         this.tmp = tmp;
          this.length = length;
 +        this.metadata = metadata;
-         StreamCache cache = new StreamCache(tmp);
-         this.cachingStream = new CachingInputStream(
-                 source instanceof BufferedInputStream ? source : new 
BufferedInputStream(source),
-                 cache
-         );
+         // Start in passthrough mode
+         this.passthroughStream = source instanceof BufferedInputStream
+                 ? (BufferedInputStream) source
+                 : new BufferedInputStream(source);
+         this.passthroughPosition = 0;
      }
  
      @Override
@@@ -107,8 -222,18 +227,18 @@@
      }
  
      @Override
 -    public Path getPath(TemporaryResources tmp, String suffix) throws 
IOException {
 +    public Path getPath(String suffix) throws IOException {
          if (spilledPath == null) {
+             // If still in passthrough mode, enable caching first
+             if (cachingStream == null) {
+                 if (passthroughPosition != 0) {
+                     throw new IOException(
+                             "Cannot spill to file: position is " + 
passthroughPosition +
+                                     ", must be 0. Call enableRewind() before 
reading if you need file access.");
+                 }
+                 enableRewind();
+             }
+ 
              // Spill to file and switch to file-backed mode
              spilledPath = cachingStream.spillToFile(suffix);
  
diff --cc tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
index 9c0c05dcc4,bf8e0b7ba0..84676683a5
--- a/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
@@@ -669,122 -673,158 +673,278 @@@ public class TikaInputStreamTest 
          }
      }
  
 +    // ========== CachingSource Tests ==========
 +
 +    @Test
 +    public void testCachingSourceUpdatesMetadataOnSpill() throws IOException {
 +        byte[] data = bytes("Hello, World!");
 +        Metadata metadata = new Metadata();
 +        // Don't set CONTENT_LENGTH - let CachingSource set it on spill
 +
 +        try (TemporaryResources tmp = new TemporaryResources()) {
 +            CachingSource source = new CachingSource(
 +                    new ByteArrayInputStream(data), tmp, -1, metadata);
++            source.enableRewind(); // Enable caching for spill support
 +
 +            // Read all data
 +            byte[] buffer = new byte[data.length];
 +            int totalRead = 0;
 +            int n;
 +            while ((n = source.read(buffer, totalRead, buffer.length - 
totalRead)) != -1) {
 +                totalRead += n;
 +                if (totalRead >= buffer.length) break;
 +            }
 +
 +            // Before spill, metadata should not have length
 +            assertNull(metadata.get(Metadata.CONTENT_LENGTH));
 +
 +            // Force spill to file
 +            Path path = source.getPath(".tmp");
 +            assertNotNull(path);
 +            assertTrue(Files.exists(path));
 +
 +            // After spill, metadata should have length
 +            assertEquals("13", metadata.get(Metadata.CONTENT_LENGTH));
 +
 +            source.close();
 +        }
 +    }
 +
 +    @Test
 +    public void testCachingSourceDoesNotOverwriteExistingMetadata() throws 
IOException {
 +        byte[] data = bytes("Hello, World!");
 +        Metadata metadata = new Metadata();
 +        // Pre-set CONTENT_LENGTH
 +        metadata.set(Metadata.CONTENT_LENGTH, "999");
 +
 +        try (TemporaryResources tmp = new TemporaryResources()) {
 +            CachingSource source = new CachingSource(
 +                    new ByteArrayInputStream(data), tmp, -1, metadata);
++            source.enableRewind(); // Enable caching for seek/spill support
 +
 +            // Read and spill
 +            IOUtils.toByteArray(source);
 +            source.seekTo(0);
 +            Path path = source.getPath(".tmp");
 +
 +            // Existing value should not be overwritten
 +            assertEquals("999", metadata.get(Metadata.CONTENT_LENGTH));
 +
 +            source.close();
 +        }
 +    }
 +
 +    @Test
 +    public void testCachingSourceSeekTo() throws IOException {
 +        byte[] data = bytes("ABCDEFGHIJ");
 +
 +        try (TemporaryResources tmp = new TemporaryResources()) {
 +            CachingSource source = new CachingSource(
 +                    new ByteArrayInputStream(data), tmp, -1, null);
++            source.enableRewind(); // Enable caching for seek support
 +
 +            // Read first 5 bytes
 +            byte[] buf = new byte[5];
 +            source.read(buf);
 +            assertEquals("ABCDE", str(buf));
 +
 +            // Seek back to position 2
 +            source.seekTo(2);
 +
 +            // Read again
 +            buf = new byte[3];
 +            source.read(buf);
 +            assertEquals("CDE", str(buf));
 +
 +            source.close();
 +        }
 +    }
 +
 +    @Test
 +    public void testCachingSourceAfterSpill() throws IOException {
 +        byte[] data = bytes("ABCDEFGHIJ");
 +
 +        try (TemporaryResources tmp = new TemporaryResources()) {
 +            CachingSource source = new CachingSource(
 +                    new ByteArrayInputStream(data), tmp, -1, null);
++            source.enableRewind(); // Enable caching for spill/seek support
 +
 +            // Read first 5 bytes
 +            byte[] buf = new byte[5];
 +            source.read(buf);
 +            assertEquals("ABCDE", str(buf));
 +
 +            // Force spill
 +            Path path = source.getPath(".tmp");
 +            assertTrue(Files.exists(path));
 +
 +            // Continue reading after spill
 +            buf = new byte[5];
 +            source.read(buf);
 +            assertEquals("FGHIJ", str(buf));
 +
 +            // Seek back and read again
 +            source.seekTo(0);
 +            buf = new byte[10];
 +            source.read(buf);
 +            assertEquals("ABCDEFGHIJ", str(buf));
 +
 +            source.close();
 +        }
 +    }
 +
+     // ========== enableRewind() Tests ==========
+ 
+     @Test
+     public void testEnableRewindByteArrayNoOp() throws Exception {
+         // ByteArraySource is always rewindable - enableRewind() is no-op
+         byte[] data = bytes("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(data)) {
+             tis.enableRewind(); // Should be no-op
+ 
+             byte[] buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+ 
+             tis.rewind();
+             assertEquals(0, tis.getPosition());
+ 
+             buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+         }
+     }
+ 
+     @Test
+     public void testEnableRewindFileNoOp() throws Exception {
+         // FileSource is always rewindable - enableRewind() is no-op
+         Path tempFile = createTempFile("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(tempFile)) {
+             tis.enableRewind(); // Should be no-op
+ 
+             byte[] buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+ 
+             tis.rewind();
+             assertEquals(0, tis.getPosition());
+ 
+             buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+         }
+     }
+ 
+     @Test
+     public void testEnableRewindStreamEnablesCaching() throws Exception {
+         // CachingSource starts in passthrough mode, enableRewind() enables 
caching
+         byte[] data = bytes("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(new 
ByteArrayInputStream(data))) {
+             tis.enableRewind(); // Enable caching mode
+ 
+             byte[] buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+ 
+             tis.rewind();
+             assertEquals(0, tis.getPosition());
+ 
+             buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+         }
+     }
+ 
+     @Test
+     public void testEnableRewindAfterReadThrows() throws Exception {
+         // enableRewind() must be called at position 0
+         byte[] data = bytes("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(new 
ByteArrayInputStream(data))) {
+             tis.read(); // Read one byte, position is now 1
+             assertEquals(1, tis.getPosition());
+ 
+             assertThrows(IllegalStateException.class, tis::enableRewind,
+                     "enableRewind() should throw when position != 0");
+         }
+     }
+ 
+     @Test
+     public void testEnableRewindMultipleCallsNoOp() throws Exception {
+         // Multiple enableRewind() calls should be safe (no-op after first)
+         byte[] data = bytes("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(new 
ByteArrayInputStream(data))) {
+             tis.enableRewind();
+             tis.enableRewind(); // Should be no-op
+             tis.enableRewind(); // Should be no-op
+ 
+             byte[] buf = readAllBytes(tis);
+             assertEquals("Hello, World!", str(buf));
+ 
+             tis.rewind();
+             buf = readAllBytes(tis);
+             assertEquals("Hello, World!", str(buf));
+         }
+     }
+ 
+     @Test
+     public void testStreamWithoutEnableRewindCannotRewind() throws Exception {
+         // Without enableRewind(), CachingSource is in passthrough mode
+         // rewind() should fail after reading in passthrough mode
+         byte[] data = bytes("Hello, World!");
+         try (TikaInputStream tis = TikaInputStream.get(new 
ByteArrayInputStream(data))) {
+             // Don't call enableRewind()
+ 
+             byte[] buf = new byte[5];
+             tis.read(buf);
+             assertEquals("Hello", str(buf));
+ 
+             // rewind() internally calls reset() which calls seekTo()
+             // In passthrough mode, seekTo() fails if not at current position
+             assertThrows(IOException.class, tis::rewind,
+                     "rewind() should fail in passthrough mode after reading");
+         }
+     }
+ 
+     @Test
+     public void testMarkResetThenEnableRewind() throws Exception {
+         // Test transitioning from passthrough mode (using 
BufferedInputStream's mark/reset)
+         // to caching mode via enableRewind()
+         byte[] data = bytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+         try (TikaInputStream tis = TikaInputStream.get(new 
ByteArrayInputStream(data))) {
+             // Passthrough mode - use BufferedInputStream's mark/reset
+             tis.mark(100);
+             byte[] buf = new byte[5];
+             tis.read(buf);
+             assertEquals("ABCDE", str(buf));
+ 
+             tis.reset();  // Back to 0
+             assertEquals(0, tis.getPosition());
+ 
+             // Another mark/reset cycle in passthrough mode
+             tis.mark(100);
+             buf = new byte[10];
+             tis.read(buf);
+             assertEquals("ABCDEFGHIJ", str(buf));
+ 
+             tis.reset();  // Back to 0 again
+             assertEquals(0, tis.getPosition());
+ 
+             // Now enable rewind (switches to caching mode)
+             tis.enableRewind();
+ 
+             // Should still work with caching mode
+             buf = new byte[5];
+             tis.read(buf);
+             assertEquals("ABCDE", str(buf));
+ 
+             tis.rewind();  // Full rewind now works
+             assertEquals(0, tis.getPosition());
+ 
+             buf = readAllBytes(tis);
+             assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ", str(buf));
+         }
+     }
+ 
      // ========== Helper Methods ==========
  
      private TikaInputStream createTikaInputStream(byte[] data, boolean 
fileBacked) throws IOException {
diff --cc 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
index 09397ff1f0,09397ff1f0..a211165f56
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/digest/SkipContainerDocumentDigestTest.java
@@@ -47,7 -47,7 +47,7 @@@ public class SkipContainerDocumentDiges
      public void testDigestContainerAndEmbedded() throws Exception {
          // skipContainerDocumentDigest = false means digest everything
          AutoDetectParserConfig config = new AutoDetectParserConfig();
--        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
++        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
          config.setSkipContainerDocumentDigest(false);
  
          AutoDetectParser parser = new AutoDetectParser();
@@@ -71,7 -71,7 +71,7 @@@
      public void testSkipContainerDigestOnly() throws Exception {
          // skipContainerDocumentDigest = true means skip container, digest 
only embedded
          AutoDetectParserConfig config = new AutoDetectParserConfig();
--        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
++        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
          config.setSkipContainerDocumentDigest(true);
  
          AutoDetectParser parser = new AutoDetectParser();
@@@ -95,7 -95,7 +95,7 @@@
      public void testSkipContainerDocumentDigestMarkerInParseContext() throws 
Exception {
          // Test that the SkipContainerDocumentDigest marker in ParseContext 
works
          AutoDetectParserConfig config = new AutoDetectParserConfig();
--        config.digester(new CommonsDigester(100000, DigestDef.Algorithm.MD5));
++        config.digester(new CommonsDigester(DigestDef.Algorithm.MD5));
          config.setSkipContainerDocumentDigest(false); // Config says digest 
all
  
          AutoDetectParser parser = new AutoDetectParser();
@@@ -145,7 -145,7 +145,6 @@@
      public void testDigestWithFactory() throws Exception {
          // Test using the factory pattern
          CommonsDigesterFactory factory = new CommonsDigesterFactory();
--        factory.setMarkLimit(100000);
  
          AutoDetectParserConfig config = new AutoDetectParserConfig();
          config.setDigesterFactory(factory);
diff --cc 
tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
index a8cbc69301,a8cbc69301..857267d74b
--- 
a/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/digest/MockDigesterFactory.java
@@@ -23,7 -23,7 +23,7 @@@ public class MockDigesterFactory implem
  
      @Override
      public Digester build() {
--        return new InputStreamDigester(1000000, "SHA-256", 
"X-TIKA:digest:SHA-256", new MockEncoder());
++        return new InputStreamDigester("SHA-256", "X-TIKA:digest:SHA-256", 
new MockEncoder());
      }
  
      private static class MockEncoder implements Encoder {
diff --cc 
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 3a1389b140,3a1389b140..d2a290fe91
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@@ -74,7 -74,7 +74,6 @@@ public abstract class CXFTestBase 
                  "outputThreshold": 1000000,
                  "digesterFactory": {
                    "commons-digester-factory": {
--                    "markLimit": 100000,
                      "digests": [
                        { "algorithm": "MD5" }
                      ]

Reply via email to