This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a34d52da20 TIKA-4618 -- improve spooling strategy configuration (#2533)
a34d52da20 is described below

commit a34d52da20815cdc19d8fc46cfd5284f219cd730
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jan 15 08:41:52 2026 -0500

    TIKA-4618 -- improve spooling strategy configuration (#2533)
---
 docs/spooling.adoc                                 | 211 +++++++++++++++++++++
 .../org/apache/tika/detect/DefaultDetector.java    | 101 ++++++++--
 .../apache/tika/digest/InputStreamDigester.java    |   6 +
 .../java/org/apache/tika/io/SpoolingStrategy.java  | 140 ++++++++++++++
 .../apache/tika/metadata/TikaCoreProperties.java   |   7 +
 .../org/apache/tika/parser/AutoDetectParser.java   |  32 ----
 .../apache/tika/parser/AutoDetectParserConfig.java |  21 +-
 .../apache/tika/sax/BodyContentHandlerTest.java    |   3 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |  16 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |  44 +++--
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |   2 +-
 .../org/apache/tika/parser/pkg/tika-config.xml     |  31 ---
 .../apache/tika/parser/warc/WARCParserTest.java    |   1 +
 .../apache/tika/detect/TestDetectorLoading.java    |   3 +-
 .../apache/tika/parser/crypto/TSDParserTest.java   |   3 +-
 .../configs/tika-config-bc-digests-base32.json     |   1 -
 .../configs/tika-config-bc-digests-basic.json      |   1 -
 .../configs/tika-config-bc-digests-multiple.json   |   1 -
 .../configs/tika-config-commons-digests-basic.json |   1 -
 .../configs/tika-config-digests-pdf-only.json      |   1 -
 .../tika-config-digests-skip-container.json        |   1 -
 .../resources/configs/tika-config-digests.json     |   1 -
 ...a-config-doubling-custom-handler-decorator.json |   1 -
 .../resources/configs/tika-config-no-names.json    |   1 -
 ...a-config-upcasing-custom-handler-decorator.json |   1 -
 .../resources/configs/tika-config-with-names.json  |   1 -
 .../configs/tika-config-write-filter.json          |   1 -
 .../test/resources/configs/tika-config-basic.json  |   1 -
 .../resources/configs/tika-config-passback.json    |   1 -
 .../resources/configs/tika-config-truncate.json    |   1 -
 .../resources/configs/tika-config-uppercasing.json |   1 -
 .../apache/tika/config/loader/TikaJsonConfig.java  |   2 +-
 .../org/apache/tika/serialization/TikaModule.java  |  16 ++
 .../tika/config/loader/ConfigLoaderTest.java       |  16 ++
 .../tika/config/loader/TikaJsonConfigTest.java     |   6 +-
 .../apache/tika/config/loader/TikaLoaderTest.java  |  52 +++--
 .../test/resources/configs/TIKA-3695-exclude.json  |   1 -
 .../test/resources/configs/TIKA-3695-fields.json   |   1 -
 .../src/test/resources/configs/TIKA-3695.json      |   1 -
 .../configs/TIKA-4207-embedded-bytes-config.json   |   1 -
 .../test/resources/configs/test-config-loader.json |   4 +
 .../org/apache/tika/server/core/CXFTestBase.java   |   1 -
 .../resources/configs/cxf-test-base-template.json  |   1 -
 .../tika/server/standard/TikaDetectorsTest.java    |  19 +-
 .../resources/configs/cxf-test-base-template.json  |   1 -
 .../configs/tika-config-for-server-tests.json      |   1 -
 .../tika-config-langdetect-opennlp-filter.json     |   1 -
 .../tika-config-langdetect-optimaize-filter.json   |   1 -
 48 files changed, 575 insertions(+), 187 deletions(-)

diff --git a/docs/spooling.adoc b/docs/spooling.adoc
new file mode 100644
index 0000000000..7e1959a5ec
--- /dev/null
+++ b/docs/spooling.adoc
@@ -0,0 +1,211 @@
+= Spooling in Apache Tika
+:toc:
+:toclevels: 3
+:sectnums:
+
+== Background
+
+=== What is Spooling?
+
+Spooling refers to the process of writing an input stream to a temporary file 
on disk.
+This is necessary for certain file formats that require random access to the 
underlying
+bytes during detection or parsing.
+
+=== Why Some Formats Benefit from Random Access
+
+Several file formats are most efficiently processed with random access vs 
streaming:
+
+* **OLE2 (Microsoft Office legacy formats)**: The POI library needs to read 
the file
+  as a random-access structure to navigate the OLE2 container.
+* **ZIP-based formats**: Container detection requires reading the ZIP central 
directory,
+  which is located at the end of the file.
+* **Binary Property Lists (bplist)**: Apple's binary plist format requires 
random access
+  for efficient parsing.
+* **PDF**: While detection works via magic bytes, parsing requires random 
access for
+  the PDF cross-reference table.
+
+=== Architectural Decision: Decentralized Spooling
+
+==== The Problem with Centralized Spooling
+
+Earlier versions of Tika considered centralizing spooling decisions in 
`DefaultDetector`.
+The detector would check the detected media type and spool to disk before 
passing the
+stream to specialized detectors or parsers.
+
+This approach had several drawbacks:
+
+1. **Unnecessary spooling**: PDF files need spooling for _parsing_ but not for 
_detection_
+   (magic bytes suffice). Centralized detection-time spooling would spool PDFs 
unnecessarily
+   when only detecting.
+
+2. **Redundant logic**: Specialized detectors like `POIFSContainerDetector` and
+   `DefaultZipContainerDetector` already call `TikaInputStream.getFile()` or 
`getPath()`
+   when they need random access. They know best when spooling is required.
+
+3. **Coupling**: Centralized spooling couples the detector to knowledge about 
which
+   formats need random access, duplicating logic that already exists in 
specialized
+   components.
+
+==== The Solution: Let Components Self-Spool
+
+The current architecture follows a simple principle: **each component that 
needs random
+access is responsible for obtaining it**.
+
+When a detector or parser needs random access, it calls:
+
+[source,java]
+----
+Path path = TikaInputStream.get(inputStream).getPath();
+// or
+File file = TikaInputStream.get(inputStream).getFile();
+----
+
+`TikaInputStream` handles the spooling transparently:
+
+* If the stream is already backed by a file, it returns that file directly.
+* If the stream is in-memory or network-based, it spools to a temporary file.
+* The temporary file is automatically cleaned up when the stream is closed.
+
+==== Benefits of Decentralized Spooling
+
+1. **Efficiency**: Spooling happens only when actually needed, not 
preemptively.
+2. **Simplicity**: No central configuration of "which types need spooling."
+3. **Correctness**: Each component knows its own requirements.
+4. **Flexibility**: New formats can be added without modifying central 
spooling logic.
+
+=== TikaInputStream Backing Strategies
+
+`TikaInputStream` uses configurable backing strategies that handle caching and 
temporary
+file management. This means:
+
+* Repeated calls to `getFile()` return the same temporary file (no 
re-spooling).
+* The `rewind()` method efficiently resets the stream for re-reading.
+* Memory-mapped and disk-backed strategies can be selected based on use case.
+
+== User Guide
+
+=== Default Behavior
+
+By default, Tika handles spooling automatically. You don't need to configure 
anything
+for most use cases. When a detector or parser needs random access to a file, 
it will
+spool the input stream to a temporary file if necessary.
+
+=== SpoolingStrategy for Fine-Grained Control
+
+For advanced use cases, you can use `SpoolingStrategy` to control spooling 
behavior.
+This is useful when you want to:
+
+* Restrict which file types are allowed to spool (e.g., for security reasons)
+* Customize spooling behavior based on metadata or stream properties
+
+==== Programmatic Configuration
+
+[source,java]
+----
+import org.apache.tika.io.SpoolingStrategy;
+import org.apache.tika.parser.ParseContext;
+
+// Create a custom spooling strategy
+SpoolingStrategy strategy = new SpoolingStrategy();
+strategy.setSpoolTypes(Set.of(
+    MediaType.application("zip"),
+    MediaType.application("pdf")
+));
+
+// Add to parse context
+ParseContext context = new ParseContext();
+context.set(SpoolingStrategy.class, strategy);
+
+// Parse with the custom context
+parser.parse(inputStream, handler, metadata, context);
+----
+
+==== SpoolingStrategy Methods
+
+[source,java]
+----
+// Check if spooling should occur for a given type
+boolean shouldSpool(TikaInputStream tis, Metadata metadata, MediaType 
mediaType)
+
+// Configure which types should be spooled
+void setSpoolTypes(Set<MediaType> types)
+
+// Set the media type registry for specialization checking
+void setMediaTypeRegistry(MediaTypeRegistry registry)
+----
+
+==== How Type Matching Works
+
+The `shouldSpool()` method returns `true` if:
+
+1. The stream doesn't already have a backing file (`tis.hasFile()` is false), 
AND
+2. The media type matches one of the configured spool types
+
+Type matching considers:
+
+* Exact matches (e.g., `application/zip`)
+* Base type matches (e.g., `application/zip` matches `application/zip; 
charset=utf-8`)
+* Specializations (e.g., `application/vnd.oasis.opendocument.text` is a 
specialization of `application/zip`)
+
+==== Default Spool Types
+
+The default spool types are:
+
+* `application/zip` - ZIP archives and ZIP-based formats (OOXML, ODF, EPUB, 
etc.)
+* `application/x-tika-msoffice` - OLE2 Microsoft Office formats
+* `application/x-bplist` - Apple binary property lists
+* `application/pdf` - PDF documents
+
+=== JSON Configuration
+
+SpoolingStrategy can be configured via JSON in your `tika-config.json` file.
+Place the configuration in the `other-configs` section:
+
+[source,json]
+----
+{
+  "other-configs": {
+    "spooling-strategy": {
+      "spoolTypes": [
+        "application/zip",
+        "application/x-tika-msoffice",
+        "application/pdf"
+      ]
+    }
+  }
+}
+----
+
+Load the configuration using `TikaLoader`:
+
+[source,java]
+----
+TikaLoader loader = TikaLoader.load(Path.of("tika-config.json"));
+SpoolingStrategy strategy = loader.configs().load(SpoolingStrategy.class);
+
+// Add to parse context
+ParseContext context = new ParseContext();
+context.set(SpoolingStrategy.class, strategy);
+----
+
+=== Best Practices
+
+1. **Let Tika handle it**: For most applications, the default behavior is 
optimal.
+   Don't configure spooling unless you have a specific need.
+
+2. **Use TikaInputStream**: Always wrap your input streams with 
`TikaInputStream`
+   to enable efficient spooling and rewind capabilities.
+
+3. **Close streams properly**: Use try-with-resources to ensure temporary files
+   are cleaned up:
++
+[source,java]
+----
+try (TikaInputStream tis = TikaInputStream.get(inputStream)) {
+    parser.parse(tis, handler, metadata, context);
+}
+----
+
+4. **Consider memory vs. disk tradeoffs**: For very large files, spooling to 
disk
+   is necessary. For small files processed in bulk, keeping data in memory may 
be
+   faster. `TikaInputStream` backing strategies can be tuned for your workload.
diff --git 
a/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java 
b/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
index 2d71c5b180..3c1e91138a 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/DefaultDetector.java
@@ -16,50 +16,66 @@
  */
 package org.apache.tika.detect;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import javax.imageio.spi.ServiceRegistry;
 
 import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.config.TikaComponent;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.ServiceLoaderUtils;
 
 /**
- * A composite detector based on all the {@link Detector} implementations
- * available through the {@link ServiceRegistry service provider mechanism}.
+ * A composite detector that orchestrates the detection pipeline:
+ * <ol>
+ *   <li>MimeTypes (magic byte) detection</li>
+ *   <li>Container and other detectors loaded via SPI</li>
+ *   <li>TextDetector as fallback for unknown types</li>
+ *   <li>Returns the most specific type detected</li>
+ * </ol>
  * <p>
  * Detectors are loaded and returned in a specified order, of user supplied
- * followed by non-MimeType Tika, followed by the Tika MimeType class.
+ * followed by non-MimeType Tika detectors.
  * If you need to control the order of the Detectors, you should instead
  * construct your own {@link CompositeDetector} and pass in the list
  * of Detectors in the required order.
+ * <p>
+ * Individual detectors that need random access (e.g., for container 
inspection)
+ * handle their own spooling by calling {@link TikaInputStream#getFile()}.
  *
  * @since Apache Tika 0.9
  */
 @TikaComponent(spi = false)
 public class DefaultDetector extends CompositeDetector {
 
-    /**
-     * Serial version UID
-     */
     private static final long serialVersionUID = -8170114575326908027L;
+
     private transient final ServiceLoader loader;
     private final Collection<Class<? extends Detector>> excludedClasses;
+    private final MimeTypes mimeTypes;
+    private final TextDetector textDetector;
 
     public DefaultDetector(MimeTypes types, ServiceLoader loader,
                            Collection<Class<? extends Detector>> 
excludeDetectors) {
-        super(types.getMediaTypeRegistry(), getDefaultDetectors(types, loader, 
excludeDetectors));
+        super(types.getMediaTypeRegistry(), getDefaultDetectors(loader, 
excludeDetectors));
         this.loader = loader;
+        this.mimeTypes = types;
+        this.textDetector = new TextDetector();
         this.excludedClasses = excludeDetectors != null ?
                 Collections.unmodifiableCollection(new 
ArrayList<>(excludeDetectors)) :
                 Collections.emptySet();
     }
 
     public DefaultDetector(MimeTypes types, ServiceLoader loader) {
-        this(types, loader, Collections.EMPTY_SET);
+        this(types, loader, Collections.emptySet());
     }
 
     public DefaultDetector(MimeTypes types, ClassLoader loader) {
@@ -86,11 +102,13 @@ public class DefaultDetector extends CompositeDetector {
      * <p>
      * If an {@link OverrideDetector} is loaded, it takes precedence over
      * all other detectors.
+     * <p>
+     * Note: MimeTypes is handled separately in the detect() method, not 
included here.
      *
      * @param loader service loader
      * @return ordered list of statically loadable detectors
      */
-    private static List<Detector> getDefaultDetectors(MimeTypes types, 
ServiceLoader loader,
+    private static List<Detector> getDefaultDetectors(ServiceLoader loader,
                                                       Collection<Class<? 
extends Detector>>
                                                               
excludeDetectors) {
         List<Detector> detectors =
@@ -111,16 +129,73 @@ public class DefaultDetector extends CompositeDetector {
             Detector detector = detectors.remove(overrideIndex);
             detectors.add(0, detector);
         }
-        // Finally the Tika MimeTypes as a fallback
-        detectors.add(types);
         return detectors;
     }
 
+    @Override
+    public MediaType detect(TikaInputStream tis, Metadata metadata, 
ParseContext parseContext)
+            throws IOException {
+        // 1. Magic detection via MimeTypes
+        MediaType magicType = mimeTypes.detect(tis, metadata, parseContext);
+        metadata.set(TikaCoreProperties.CONTENT_TYPE_MAGIC_DETECTED, 
magicType.toString());
+
+        // 2. Run other detectors (container detectors, etc.)
+        // Note: Container detectors that need random access handle their own 
spooling
+        MediaType detectedType = super.detect(tis, metadata, parseContext);
+
+        // 3. Text detection - only if still unknown
+        MediaType textType = null;
+        if (MediaType.OCTET_STREAM.equals(detectedType) &&
+                MediaType.OCTET_STREAM.equals(magicType)) {
+            textType = textDetector.detect(tis, metadata, parseContext);
+        }
+
+        // 4. Return most specific
+        return mostSpecific(magicType, detectedType, textType);
+    }
+
+    private MediaType mostSpecific(MediaType magicType, MediaType 
detectedType, MediaType textType) {
+        MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+
+        // Collect non-null, non-octet-stream candidates
+        MediaType best = MediaType.OCTET_STREAM;
+
+        // Start with magic type as baseline if valid
+        if (magicType != null && !MediaType.OCTET_STREAM.equals(magicType)) {
+            best = magicType;
+        }
+
+        // Container detectors may find more specific types (e.g., OLE -> 
msword)
+        // or less specific (e.g., commons-compress tar vs magic gtar)
+        // Use the registry to determine which is more specific
+        if (detectedType != null && 
!MediaType.OCTET_STREAM.equals(detectedType)) {
+            if (MediaType.OCTET_STREAM.equals(best)) {
+                best = detectedType;
+            } else if (registry.isSpecializationOf(detectedType, best)) {
+                // detectedType is more specific than best
+                best = detectedType;
+            } else if (!registry.isSpecializationOf(best, detectedType)) {
+                // Neither is a specialization of the other - prefer container 
detection
+                // for unrelated types (e.g., different format families)
+                best = detectedType;
+            }
+            // else: best is already more specific than detectedType, keep best
+        }
+
+        // Text detection as fallback only if still unknown
+        if (MediaType.OCTET_STREAM.equals(best) && textType != null &&
+                !MediaType.OCTET_STREAM.equals(textType)) {
+            best = textType;
+        }
+
+        return best;
+    }
+
     @Override
     public List<Detector> getDetectors() {
         if (loader != null && loader.isDynamic()) {
             List<Detector> detectors = 
loader.loadDynamicServiceProviders(Detector.class);
-            if (detectors.size() > 0) {
+            if (!detectors.isEmpty()) {
                 detectors.addAll(super.getDetectors());
                 return detectors;
             } else {
diff --git 
a/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java 
b/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
index 5a458fd6a2..a384137300 100644
--- a/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
+++ b/tika-core/src/main/java/org/apache/tika/digest/InputStreamDigester.java
@@ -30,6 +30,12 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.StringUtils;
 
+// TODO: TIKA-FOLLOWUP - With TikaInputStream.rewind(), markLimit is no longer 
needed.
+//  The digester can simply read the entire stream, then call tis.rewind().
+//  This would simplify this class and allow removing markLimit from:
+//  - InputStreamDigester, CommonsDigester, BouncyCastleDigester
+//  - CommonsDigesterFactory, BouncyCastleDigesterFactory 
(setMarkLimit/getMarkLimit)
+//  - All JSON config files that specify markLimit for digesters
 public class InputStreamDigester implements Digester {
 
     private final String algorithm;
diff --git a/tika-core/src/main/java/org/apache/tika/io/SpoolingStrategy.java 
b/tika-core/src/main/java/org/apache/tika/io/SpoolingStrategy.java
new file mode 100644
index 0000000000..0a3a45bad8
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/io/SpoolingStrategy.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.io;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.tika.config.TikaComponent;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+/**
+ * Strategy for determining when to spool a TikaInputStream to disk.
+ * <p>
+ * Components (detectors, parsers) can check this strategy before calling
+ * {@link TikaInputStream#getFile()} to determine if spooling is appropriate
+ * for the given media type.
+ * <p>
+ * Default behavior (when no strategy is in ParseContext): components spool 
when needed.
+ * A strategy allows fine-grained control over spooling decisions.
+ * <p>
+ * Configure via JSON:
+ * <pre>
+ * {
+ *   "spooling-strategy": {
+ *     "spoolTypes": ["application/zip", "application/x-tika-msoffice", 
"application/pdf"]
+ *   }
+ * }
+ * </pre>
+ */
+@TikaComponent(spi = false)
+public class SpoolingStrategy {
+
+    private static final Set<MediaType> DEFAULT_SPOOL_TYPES;
+
+    static {
+        Set<MediaType> types = new HashSet<>();
+        types.add(MediaType.application("zip"));
+        types.add(MediaType.application("x-tika-msoffice"));
+        types.add(MediaType.application("x-bplist"));
+        types.add(MediaType.application("pdf"));
+        DEFAULT_SPOOL_TYPES = Set.copyOf(types);
+    }
+
+    private Set<MediaType> spoolTypes = new HashSet<>(DEFAULT_SPOOL_TYPES);
+    private MediaTypeRegistry mediaTypeRegistry;
+
+    /**
+     * Determines whether the stream should be spooled to disk.
+     *
+     * @param tis       the TikaInputStream (can check hasFile(), getLength())
+     * @param metadata  metadata (can check content-type hints, filename)
+     * @param mediaType the detected or declared media type
+     * @return true if the stream should be spooled to disk
+     */
+    public boolean shouldSpool(TikaInputStream tis, Metadata metadata, 
MediaType mediaType) {
+        // Already has file? No need to spool
+        if (tis != null && tis.hasFile()) {
+            return false;
+        }
+        // Check type against spoolTypes
+        return matchesSpoolType(mediaType);
+    }
+
+    private boolean matchesSpoolType(MediaType type) {
+        if (type == null) {
+            return false;
+        }
+        // Exact match
+        if (spoolTypes.contains(type)) {
+            return true;
+        }
+        // Base type match (without parameters)
+        MediaType baseType = type.getBaseType();
+        if (spoolTypes.contains(baseType)) {
+            return true;
+        }
+        // Check if type is a specialization of any spool type
+        if (mediaTypeRegistry != null) {
+            for (MediaType spoolType : spoolTypes) {
+                if (mediaTypeRegistry.isSpecializationOf(type, spoolType)) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Sets the media types that should be spooled to disk.
+     * Specializations of these types are also included.
+     *
+     * @param spoolTypes set of media types to spool
+     */
+    public void setSpoolTypes(Set<MediaType> spoolTypes) {
+        this.spoolTypes = spoolTypes != null ? new HashSet<>(spoolTypes) : new 
HashSet<>();
+    }
+
+    /**
+     * Returns the media types that should be spooled to disk.
+     *
+     * @return set of media types to spool
+     */
+    public Set<MediaType> getSpoolTypes() {
+        return spoolTypes;
+    }
+
+    /**
+     * Sets the media type registry used for checking type specializations.
+     *
+     * @param registry the media type registry
+     */
+    public void setMediaTypeRegistry(MediaTypeRegistry registry) {
+        this.mediaTypeRegistry = registry;
+    }
+
+    /**
+     * Returns the media type registry.
+     *
+     * @return the media type registry, or null if not set
+     */
+    public MediaTypeRegistry getMediaTypeRegistry() {
+        return mediaTypeRegistry;
+    }
+}
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java 
b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index a2ac99c2bb..b89323fc11 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -229,6 +229,13 @@ public interface TikaCoreProperties {
      */
     Property CONTENT_TYPE_PARSER_OVERRIDE =
             Property.internalText(HttpHeaders.CONTENT_TYPE + 
"-Parser-Override");
+    /**
+     * This is set by DefaultDetector to store the result of MimeTypes (magic 
byte)
+     * detection. This allows downstream detectors to use it as a hint without
+     * re-running magic detection.
+     */
+    Property CONTENT_TYPE_MAGIC_DETECTED =
+            Property.internalText(HttpHeaders.CONTENT_TYPE + 
"-Magic-Detected");
     /**
      * @see DublinCore#FORMAT
      */
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index da423e8e13..d03eb89961 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -30,7 +30,6 @@ import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
 import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractorFactory;
 import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -151,8 +150,6 @@ public class AutoDetectParser extends CompositeParser {
             metadata.setMetadataWriteFilter(
                     
autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance());
         }
-        //figure out if we should spool to disk
-        maybeSpool(tis, autoDetectParserConfig, metadata);
 
         // Compute digests before type detection if configured
         DigestHelper.maybeDigest(tis,
@@ -211,35 +208,6 @@ public class AutoDetectParser extends CompositeParser {
         return handler;
     }
 
-    private void maybeSpool(TikaInputStream tis, AutoDetectParserConfig 
autoDetectParserConfig,
-                            Metadata metadata) throws IOException {
-        if (tis.hasFile()) {
-            return;
-        }
-        if (autoDetectParserConfig.getSpoolToDisk() == null) {
-            return;
-        }
-        //whether or not a content-length has been sent in,
-        //if spoolToDisk == 0, spool it
-        if (autoDetectParserConfig.getSpoolToDisk() == 0) {
-            tis.getPath();
-            metadata.set(HttpHeaders.CONTENT_LENGTH, 
Long.toString(tis.getLength()));
-            return;
-        }
-        if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
-            long len = -1l;
-            try {
-                len = Long.parseLong(metadata.get(Metadata.CONTENT_LENGTH));
-                if (len > autoDetectParserConfig.getSpoolToDisk()) {
-                    tis.getPath();
-                    metadata.set(HttpHeaders.CONTENT_LENGTH, 
Long.toString(tis.getLength()));
-                }
-            } catch (NumberFormatException e) {
-                //swallow...maybe log?
-            }
-        }
-    }
-
     private void initializeEmbeddedDocumentExtractor(Metadata metadata, 
ParseContext context) {
         if (context.get(EmbeddedDocumentExtractor.class) != null) {
             return;
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
index 75ba7f6f79..0aba04ad61 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
@@ -52,13 +52,6 @@ public class AutoDetectParserConfig implements Serializable {
 
     public static AutoDetectParserConfig DEFAULT = new 
AutoDetectParserConfig();
 
-    /**
-     * If this is not null and greater than -1, the AutoDetectParser
-     * will spool the stream to disk if the length of the stream is known
-     * ahead of time.
-     */
-    private Long spoolToDisk = null;
-
     /**
      * SecureContentHandler -- Desired output threshold in characters.
      */
@@ -102,16 +95,14 @@ public class AutoDetectParserConfig implements 
Serializable {
     /**
      * Creates a SecureContentHandlerConfig using the passed in parameters.
      *
-     * @param spoolToDisk
      * @param outputThreshold          SecureContentHandler - character output 
threshold.
      * @param maximumCompressionRatio  SecureContentHandler - max compression 
ratio allowed.
      * @param maximumDepth             SecureContentHandler - maximum XML 
element nesting level.
      * @param maximumPackageEntryDepth SecureContentHandler - maximum package 
entry nesting level.
      */
-    public AutoDetectParserConfig(Long spoolToDisk, Long outputThreshold,
+    public AutoDetectParserConfig(Long outputThreshold,
                                   Long maximumCompressionRatio, Integer 
maximumDepth,
                                   Integer maximumPackageEntryDepth) {
-        this.spoolToDisk = spoolToDisk;
         this.outputThreshold = outputThreshold;
         this.maximumCompressionRatio = maximumCompressionRatio;
         this.maximumDepth = maximumDepth;
@@ -122,14 +113,6 @@ public class AutoDetectParserConfig implements 
Serializable {
 
     }
 
-    public Long getSpoolToDisk() {
-        return spoolToDisk;
-    }
-
-    public void setSpoolToDisk(Long spoolToDisk) {
-        this.spoolToDisk = spoolToDisk;
-    }
-
     public Long getOutputThreshold() {
         return outputThreshold;
     }
@@ -264,7 +247,7 @@ public class AutoDetectParserConfig implements Serializable 
{
 
     @Override
     public String toString() {
-        return "AutoDetectParserConfig{" + "spoolToDisk=" + spoolToDisk + ", 
outputThreshold=" +
+        return "AutoDetectParserConfig{" + "outputThreshold=" +
                 outputThreshold + ", maximumCompressionRatio=" + 
maximumCompressionRatio +
                 ", maximumDepth=" + maximumDepth + ", 
maximumPackageEntryDepth=" +
                 maximumPackageEntryDepth + ", metadataWriteFilterFactory=" +
diff --git 
a/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java 
b/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
index 80b6315e93..ac5b0b077f 100644
--- a/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
+++ b/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
@@ -62,8 +62,9 @@ public class BodyContentHandlerTest extends TikaTest {
     @Test
     public void testLimit() throws Exception {
         //TIKA-2668 - java 11-ea
+        // Note: limit is 16 to account for metadata overhead (each metadata 
field adds a newline)
         Parser p = new MockParser();
-        WriteOutContentHandler handler = new WriteOutContentHandler(15);
+        WriteOutContentHandler handler = new WriteOutContentHandler(16);
         Metadata metadata = new Metadata();
         ParseContext parseContext = new ParseContext();
         Parser[] parsers = new Parser[1];
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
index b7e6752511..5ff9e0cf15 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/main/java/org/apache/tika/parser/apple/AppleSingleFileParser.java
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.input.BoundedInputStream;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -98,10 +99,17 @@ public class AppleSingleFileParser implements Parser {
             long diff = contentFieldInfo.offset - bytesRead;
             IOUtils.skipFully(tis, diff);
             if (ex.shouldParseEmbedded(embeddedMetadata)) {
-                // TODO: we should probably add a readlimiting wrapper around 
this
-                // stream to ensure that not more than contentFieldInfo.length 
bytes
-                // are read
-                ex.parseEmbedded(tis, xhtml, embeddedMetadata, context, true);
+                // Use BoundedInputStream to limit bytes read, then spool to 
temp file
+                // for complete isolation from parent stream (reset() goes to 
embedded start)
+                BoundedInputStream bounded =
+                        BoundedInputStream.builder()
+                                .setInputStream(tis)
+                                .setMaxCount(contentFieldInfo.length)
+                                .get();
+                try (TikaInputStream inner = TikaInputStream.get(bounded)) {
+                    inner.getPath();
+                    ex.parseEmbedded(inner, xhtml, embeddedMetadata, context, 
true);
+                }
             }
         }
         xhtml.endDocument();
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
index 0edcb0a6b3..0337729955 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
@@ -18,7 +18,10 @@ package org.apache.tika.parser.crypto;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStream;
 import java.math.BigInteger;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
 import java.security.NoSuchProviderException;
@@ -55,8 +58,10 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -167,10 +172,27 @@ public class TSDParser implements Parser {
         EmbeddedDocumentExtractor edx = 
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
 
         if (edx.shouldParseEmbedded(metadata)) {
-            try {
+            try (TemporaryResources tmp = new TemporaryResources()) {
                 cmsTimeStampedDataParser = new 
CMSTimeStampedDataParser(stream);
 
-                try (TikaInputStream inner = 
TikaInputStream.get(cmsTimeStampedDataParser.getContent())) {
+                // Spool content to temp file, catching any EOF from truncated 
files
+                Path tempFile = tmp.createTempFile();
+                try (InputStream content = 
cmsTimeStampedDataParser.getContent();
+                     OutputStream out = Files.newOutputStream(tempFile)) {
+                    byte[] buffer = new byte[8192];
+                    int n;
+                    while ((n = content.read(buffer)) != -1) {
+                        out.write(buffer, 0, n);
+                    }
+                } catch (IOException e) {
+                    // Truncated file - record exception and work with what we 
got
+                    metadata.set(TikaCoreProperties.EMBEDDED_EXCEPTION,
+                            e.getClass().getName() + ": " + e.getMessage());
+                    LOG.debug("Error reading TSD content (possibly 
truncated)", e);
+                }
+
+                // Parse whatever we managed to extract
+                try (TikaInputStream inner = TikaInputStream.get(tempFile)) {
                     edx.parseEmbedded(inner, handler, metadata, context, true);
                 }
 
@@ -180,17 +202,13 @@ public class TSDParser implements Parser {
                 WriteLimitReachedException.throwIfWriteLimitReached(ex);
                 LOG.error("Error in TSDParser.parseTSDContent {}", 
ex.getMessage());
             } finally {
-                this.closeCMSParser(cmsTimeStampedDataParser);
-            }
-        }
-    }
-
-    private void closeCMSParser(CMSTimeStampedDataParser 
cmsTimeStampedDataParser) {
-        if (cmsTimeStampedDataParser != null) {
-            try {
-                cmsTimeStampedDataParser.close();
-            } catch (IOException ex) {
-                LOG.error("Error in TSDParser.closeCMSParser {}", 
ex.getMessage());
+                if (cmsTimeStampedDataParser != null) {
+                    try {
+                        cmsTimeStampedDataParser.close();
+                    } catch (IOException e) {
+                        LOG.debug("Error closing CMSTimeStampedDataParser", e);
+                    }
+                }
             }
         }
     }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 72753ce022..a18c27c6c5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -237,7 +237,7 @@ public class PDFParserTest extends TikaTest {
         assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("true", metadata.get("pdf:encrypted"));
         //pdf:encrypted, X-Parsed-By and Content-Type
-        assertEquals(8, metadata.names().length, "very little metadata should 
be parsed");
+        assertEquals(9, metadata.names().length, "very little metadata should 
be parsed");
         assertEquals(0, handler.toString().length());
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/org/apache/tika/parser/pkg/tika-config.xml
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/org/apache/tika/parser/pkg/tika-config.xml
deleted file mode 100644
index 11b888c9f0..0000000000
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/org/apache/tika/parser/pkg/tika-config.xml
+++ /dev/null
@@ -1,31 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<properties>
-    <parsers/>
-    <detectors>
-        <detector class="org.apache.tika.detect.DefaultDetector">
-            <detector-exclude class="ZipContainerDetector"/>
-        </detector>
-        <detector class="ZipContainerDetector">
-            <params>
-                <param name="markLimit" type="int">100000</param>
-            </params>
-        </detector>
-    </detectors>
-    <translator class="org.apache.tika.language.translate.DefaultTranslator"/>
-</properties>
\ No newline at end of file
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-webarchive-module/src/test/java/org/apache/tika/parser/warc/WARCParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-webarchive-module/src/test/java/org/apache/tika/parser/warc/WARCParserTest.java
index 2e70fe8315..d0718c9ac5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-webarchive-module/src/test/java/org/apache/tika/parser/warc/WARCParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-webarchive-module/src/test/java/org/apache/tika/parser/warc/WARCParserTest.java
@@ -59,6 +59,7 @@ public class WARCParserTest extends TikaTest {
         Set<String> fieldsToIgnore = new HashSet<>();
         fieldsToIgnore.add("X-TIKA:parse_time_millis");
         fieldsToIgnore.add("Content-Type");
+        fieldsToIgnore.add("Content-Type-Magic-Detected");
         assertMetadataListEquals(metadataList, gzMetadataList, fieldsToIgnore);
 
         assertEquals("application/warc", 
metadataList.get(0).get(Metadata.CONTENT_TYPE));
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
index e419b18e59..cec44d39a9 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
@@ -32,7 +32,7 @@ public class TestDetectorLoading {
         //integration test - detectors should be sorted alphabetically by 
class name
         Detector detector = TikaLoader.loadDefault().loadDetectors();
         List<Detector> detectors = ((CompositeDetector) 
detector).getDetectors();
-        assertEquals(8, detectors.size());
+        assertEquals(7, detectors.size());
         // Sorted alphabetically by full class name (all are org.apache.tika.*)
         assertEquals("org.apache.tika.detect.MatroskaDetector", 
detectors.get(0).getClass().getName());
         assertEquals("org.apache.tika.detect.apple.BPListDetector", 
detectors.get(1).getClass().getName());
@@ -44,6 +44,5 @@ public class TestDetectorLoading {
         assertEquals("org.apache.tika.detect.ole.MiscOLEDetector", 
detectors.get(5).getClass().getName());
         assertEquals("org.apache.tika.detect.zip.DefaultZipContainerDetector",
                 detectors.get(6).getClass().getName());
-        assertEquals("org.apache.tika.mime.MimeTypes", 
detectors.get(7).getClass().getName());
     }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
index 140a82d5f0..56c9f29572 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
@@ -44,7 +44,8 @@ public class TSDParserTest extends TikaTest {
         assertEquals(2, list.size());
         assertEquals("application/pdf", 
list.get(1).get(Metadata.CONTENT_TYPE));
         assertNotNull(list.get(1).get(TikaCoreProperties.EMBEDDED_EXCEPTION));
-        assertContains("org.apache.pdfbox.io.RandomAccessReadBuffer.<init>",
+        // Exception occurs during TSD content extraction (truncated file)
+        assertContains("EOFException",
                 list.get(1).get(TikaCoreProperties.EMBEDDED_EXCEPTION));
     }
 
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
index 3abbeaef18..fed21bc5af 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-base32.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
index 98714ec028..770fba7ffe 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-basic.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
index 38b2a17bed..830d8c0809 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-bc-digests-multiple.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "bouncy-castle-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
index 039384ea5e..2a2634a88e 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-commons-digests-basic.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
index 124b07adca..cf7c3874a0 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
@@ -9,7 +9,6 @@
     }
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
index d4f565519b..ed2145a404 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": true,
     "digesterFactory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
index d2e238f0f4..004e6ea753 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json
index 012142231b..c721b2df1a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000,
     "outputThreshold": 1000,
     "contentHandlerDecoratorFactory": 
"doubling-content-handler-decorator-factory"
   }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
index 2f0ac2a2fe..b56a7d5d2d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 123450,
     "outputThreshold": 678900,
     "embeddedDocumentExtractorFactory": {
       "runpack-extractor-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
index a58fa91fc3..6a466c1385 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000,
     "outputThreshold": 1000,
     "maximumCompressionRatio": 0.8,
     "maximumDepth": 1000,
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
index 0659adb852..17811c8dec 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 123450,
     "outputThreshold": 678900,
     "embeddedDocumentExtractorFactory": {
       "runpack-extractor-factory": {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
index 1b6f13c1cb..1872313a9c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
@@ -1,6 +1,5 @@
 {
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": true,
     "digesterFactory": {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
index 5873c39a87..755c345dfa 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-basic.json
@@ -45,7 +45,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": false,
     "digesterFactory": {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
index 529e878cb6..2e0748f854 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-passback.json
@@ -44,7 +44,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": false,
     "digesterFactory": {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
index b58bfe269c..07a78edf3c 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
@@ -45,7 +45,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": false,
     "digesterFactory": {
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
index e7d8a21c02..6498c15a7a 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-uppercasing.json
@@ -40,7 +40,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "skipContainerDocumentDigest": false,
     "digesterFactory": {
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index 3d6a1ba473..a8160b1548 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -78,7 +78,7 @@ import org.apache.tika.exception.TikaConfigException;
  *   ],
  *   "detectors": [
  *     "poifs-container-detector",       // String shorthand
- *     { "mime-types": { "markLimit": 10000 } }
+ *     { "default-detector": { "spoolTypes": ["application/zip", 
"application/pdf"] } }
  *   ],
  *
  *   // Pipes components (validated by validateKeys())
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
index 181e6b90e1..8ad0d588ff 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaModule.java
@@ -129,6 +129,22 @@ public class TikaModule extends SimpleModule {
     public TikaModule() {
         super("TikaModule");
 
+        // Register MediaType serializers (string-based)
+        addSerializer(MediaType.class, new JsonSerializer<MediaType>() {
+            @Override
+            public void serialize(MediaType value, JsonGenerator gen, 
SerializerProvider serializers)
+                    throws IOException {
+                gen.writeString(value.toString());
+            }
+        });
+        addDeserializer(MediaType.class, new JsonDeserializer<MediaType>() {
+            @Override
+            public MediaType deserialize(JsonParser p, DeserializationContext 
ctxt)
+                    throws IOException {
+                return MediaType.parse(p.getValueAsString());
+            }
+        });
+
         // Register Metadata serializers
         addSerializer(Metadata.class, new MetadataSerializer());
         addDeserializer(Metadata.class, new MetadataDeserializer());
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
index 1db87866e7..12695472c8 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/ConfigLoaderTest.java
@@ -31,6 +31,8 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.io.SpoolingStrategy;
+import org.apache.tika.mime.MediaType;
 
 /**
  * Unit tests for {@link ConfigLoader}.
@@ -211,6 +213,20 @@ public class ConfigLoaderTest {
         assertEquals(30000, timeout.getMillis());
     }
 
+    @Test
+    public void testLoadSpoolingStrategy() throws Exception {
+        // SpoolingStrategy -> "spooling-strategy"
+        // JSON has "spooling-strategy" with spoolTypes: ["application/zip", 
"application/pdf"]
+        SpoolingStrategy strategy = configLoader.load(SpoolingStrategy.class);
+
+        assertNotNull(strategy);
+        assertEquals(2, strategy.getSpoolTypes().size());
+        
assertTrue(strategy.getSpoolTypes().contains(MediaType.application("zip")));
+        
assertTrue(strategy.getSpoolTypes().contains(MediaType.application("pdf")));
+        // Verify default types are NOT present (we replaced the set)
+        
assertFalse(strategy.getSpoolTypes().contains(MediaType.application("x-tika-msoffice")));
+    }
+
     @Test
     public void testLoadByClassNameMyFeatureSettings() throws Exception {
         // MyFeatureSettings -> "my-feature-settings" (full name, no suffix 
stripping)
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaJsonConfigTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaJsonConfigTest.java
index edd8e55634..10578c1f0c 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaJsonConfigTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaJsonConfigTest.java
@@ -70,7 +70,7 @@ public class TikaJsonConfigTest {
             {
               "detectors": [
                 "poifs-container-detector",
-                { "mime-types": { "markLimit": 10000 } },
+                { "default-detector": { "spoolTypes": ["application/zip", 
"application/pdf"] } },
                 "zip-container-detector"
               ]
             }
@@ -85,8 +85,8 @@ public class TikaJsonConfigTest {
         assertEquals("poifs-container-detector", detectors.get(0).getKey());
         assertTrue(detectors.get(0).getValue().isEmpty());
 
-        assertEquals("mime-types", detectors.get(1).getKey());
-        assertEquals(10000, 
detectors.get(1).getValue().get("markLimit").asInt());
+        assertEquals("default-detector", detectors.get(1).getKey());
+        assertTrue(detectors.get(1).getValue().get("spoolTypes").isArray());
 
         assertEquals("zip-container-detector", detectors.get(2).getKey());
         assertTrue(detectors.get(2).getValue().isEmpty());
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
index 33e493d509..5b8e44c788 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
@@ -322,42 +322,40 @@ public class TikaLoaderTest {
                 "Should NOT support application/test+optin (opt-in only, not 
in SPI)");
     }
 
-    // TODO: TIKA-SERIALIZATION-FOLLOWUP - Implement validation for common 
typos
-    @Disabled("TIKA-SERIALIZATION-FOLLOWUP: Validation for excludes typo not 
yet implemented")
+    // TODO: TIKA-SERIALIZATION-FOLLOWUP - Jackson may need configuration to 
fail on unknown properties
+    @Disabled("TIKA-SERIALIZATION-FOLLOWUP")
     @Test
-    public void testExcludesInsteadOfExcludeThrowsException() throws Exception 
{
-        // Create a config with the common mistake: "excludes" instead of 
"exclude"
-        String invalidConfig = "{\n" +
-                "  \"parsers\": [\n" +
-                "    {\n" +
-                "      \"default-parser\": {\n" +
-                "        \"excludes\": [\"pdf-parser\"]\n" +
-                "      }\n" +
-                "    }\n" +
-                "  ]\n" +
-                "}";
-
-        // Write to a temp file
-        Path tempFile = Files.createTempFile("test-invalid-excludes", ".json");
+    public void testInvalidBeanPropertyThrowsException() throws Exception {
+        // Config with a property that doesn't exist on DefaultDetector
+        String invalidConfig = """
+                {
+                  "detectors": [
+                    {
+                      "default-detector": {
+                        "nonExistentProperty": 12345
+                      }
+                    }
+                  ]
+                }
+                """;
+
+        Path tempFile = Files.createTempFile("test-invalid-property", ".json");
         try {
             Files.write(tempFile, 
invalidConfig.getBytes(StandardCharsets.UTF_8));
 
-            // Attempt to load should throw TikaConfigException
+            TikaLoader loader = TikaLoader.load(tempFile);
             try {
-                TikaLoader loader = TikaLoader.load(tempFile);
-                loader.get(Parser.class);
-                throw new AssertionError("Expected TikaConfigException to be 
thrown");
+                loader.loadDetectors();
+                throw new AssertionError("Expected TikaConfigException for 
invalid property");
             } catch (org.apache.tika.exception.TikaConfigException e) {
-                // Expected - verify the error message is helpful
-                assertTrue(e.getMessage().contains("excludes"),
-                        "Error message should mention 'excludes'");
-                assertTrue(e.getMessage().contains("exclude"),
-                        "Error message should mention the correct field 
'exclude'");
-                assertTrue(e.getMessage().contains("singular"),
-                        "Error message should explain it should be singular");
+                // Expected - Jackson should fail on unknown property
+                assertTrue(e.getMessage().contains("nonExistentProperty") ||
+                                
e.getCause().getMessage().contains("nonExistentProperty"),
+                        "Error should mention the invalid property name");
             }
         } finally {
             Files.deleteIfExists(tempFile);
         }
     }
+
 }
diff --git 
a/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json 
b/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
index e40af02044..6675ef1d3c 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695-exclude.json
@@ -3,7 +3,6 @@
     "default-parser"
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 12345,
     "outputThreshold": 6789,
     "metadataWriteFilterFactory": {
       "standard-write-filter-factory": {
diff --git 
a/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json 
b/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
index 9d38adbdd2..27b73f7e9a 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695-fields.json
@@ -3,7 +3,6 @@
     "default-parser"
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 12345,
     "outputThreshold": 6789,
     "metadataWriteFilterFactory": {
       "standard-write-filter-factory": {
diff --git a/tika-serialization/src/test/resources/configs/TIKA-3695.json 
b/tika-serialization/src/test/resources/configs/TIKA-3695.json
index f7c05313ba..f24ce43246 100644
--- a/tika-serialization/src/test/resources/configs/TIKA-3695.json
+++ b/tika-serialization/src/test/resources/configs/TIKA-3695.json
@@ -3,7 +3,6 @@
     "default-parser"
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 12345,
     "outputThreshold": 6789,
     "metadataWriteFilterFactory": {
       "standard-write-filter-factory": {
diff --git 
a/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
 
b/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
index d32516e877..b014152172 100644
--- 
a/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
+++ 
b/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
@@ -3,7 +3,6 @@
     "default-parser"
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 123450,
     "outputThreshold": 678900,
     "embeddedDocumentExtractorFactory": {
       "runpack-extractor-factory": {
diff --git 
a/tika-serialization/src/test/resources/configs/test-config-loader.json 
b/tika-serialization/src/test/resources/configs/test-config-loader.json
index dd657c81e0..c5c24254eb 100644
--- a/tika-serialization/src/test/resources/configs/test-config-loader.json
+++ b/tika-serialization/src/test/resources/configs/test-config-loader.json
@@ -21,6 +21,10 @@
       "millis": 30000
     },
 
+    "spooling-strategy": {
+      "spoolTypes": ["application/zip", "application/pdf"]
+    },
+
     "my-feature-settings": {
       "featureName": "test-feature",
       "priority": 10
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 8004fb79ae..3a1389b140 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -71,7 +71,6 @@ public abstract class CXFTestBase {
     public final static String BASIC_CONFIG = """
             {
               "auto-detect-parser": {
-                "spoolToDisk": 1000000,
                 "outputThreshold": 1000000,
                 "digesterFactory": {
                   "commons-digester-factory": {
diff --git 
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
 
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
index 355e34ecdd..f8284e5e4d 100644
--- 
a/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
+++ 
b/tika-server/tika-server-core/src/test/resources/configs/cxf-test-base-template.json
@@ -47,7 +47,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaDetectorsTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaDetectorsTest.java
index 8f50d35901..ae76831340 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaDetectorsTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaDetectorsTest.java
@@ -33,7 +33,6 @@ import org.junit.jupiter.api.Test;
 import org.apache.tika.detect.microsoft.POIFSContainerDetector;
 import org.apache.tika.detect.ogg.OggDetector;
 import org.apache.tika.detect.zip.DefaultZipContainerDetector;
-import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.server.core.CXFTestBase;
 import org.apache.tika.server.core.resource.TikaDetectors;
 
@@ -64,7 +63,7 @@ public class TikaDetectorsTest extends CXFTestBase {
         assertContains(OggDetector.class.getName(), text);
         assertContains(POIFSContainerDetector.class.getName(), text);
         assertContains(DefaultZipContainerDetector.class.getName(), text);
-        assertContains(MimeTypes.class.getName(), text);
+        // Note: MimeTypes is now handled internally by DefaultDetector, not 
as a child detector
     }
 
     @Test
@@ -81,12 +80,11 @@ public class TikaDetectorsTest extends CXFTestBase {
 
         assertContains("<h3>OggDetector", text);
         assertContains("<h3>POIFSContainerDetector", text);
-        assertContains("<h3>MimeTypes", text);
+        // Note: MimeTypes is now handled internally by DefaultDetector, not 
as a child detector
 
         assertContains(OggDetector.class.getName(), text);
         assertContains(POIFSContainerDetector.class.getName(), text);
         assertContains(DefaultZipContainerDetector.class.getName(), text);
-        assertContains(MimeTypes.class.getName(), text);
     }
 
     @Test
@@ -110,10 +108,11 @@ public class TikaDetectorsTest extends CXFTestBase {
         assertEquals("org.apache.tika.detect.DefaultDetector", 
json.get("name"));
         assertEquals(Boolean.TRUE, json.get("composite"));
 
-        // At least 4 child detectors, none of them composite
+        // At least 3 child detectors, none of them composite
+        // Note: MimeTypes is now handled internally by DefaultDetector, not 
as a child detector
         List<Object> children = (List) json.get("children");
-        assertTrue(children.size() >= 4);
-        boolean hasOgg = false, hasPOIFS = false, hasZIP = false, hasMime = 
false;
+        assertTrue(children.size() >= 3);
+        boolean hasOgg = false, hasPOIFS = false, hasZIP = false;
         for (Object o : children) {
             Map<String, Object> d = (Map<String, Object>) o;
             assertTrue(d.containsKey("name"));
@@ -137,16 +136,10 @@ public class TikaDetectorsTest extends CXFTestBase {
                     .equals(name)) {
                 hasZIP = true;
             }
-            if (MimeTypes.class
-                    .getName()
-                    .equals(name)) {
-                hasMime = true;
-            }
         }
         assertTrue(hasOgg);
         assertTrue(hasPOIFS);
         assertTrue(hasZIP);
-        assertTrue(hasMime);
     }
 
 }
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
index 355e34ecdd..f8284e5e4d 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/cxf-test-base-template.json
@@ -47,7 +47,6 @@
     }
   },
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
index e96b3b7f71..fdf80cb998 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-for-server-tests.json
@@ -10,7 +10,6 @@
     }
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
index e2b779035a..97646bc879 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-opennlp-filter.json
@@ -15,7 +15,6 @@
     }
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {
diff --git 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
index 52a9a4a871..8d3f74ed3c 100644
--- 
a/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
+++ 
b/tika-server/tika-server-standard/src/test/resources/configs/tika-config-langdetect-optimaize-filter.json
@@ -15,7 +15,6 @@
     }
   ],
   "auto-detect-parser": {
-    "spoolToDisk": 1000000,
     "outputThreshold": 1000000,
     "digesterFactory": {
       "commons-digester-factory": {

Reply via email to