This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new f63bebfdea TIKA-4565 -- tweak configurations for include/exclude 
(#2441)
f63bebfdea is described below

commit f63bebfdea38c152ab1ffdff591938d7ef8c02b3
Author: Tim Allison <[email protected]>
AuthorDate: Thu Dec 11 12:44:13 2025 -0500

    TIKA-4565 -- tweak configurations for include/exclude (#2441)
---
 .../apache/tika/cli/XmlToJsonConfigConverter.java  |  8 ++--
 .../tika/cli/XmlToJsonConfigConverterTest.java     |  6 +--
 .../src/test/resources/configs/tika-config1.json   | 18 ++++----
 .../src/test/resources/configs/tika-config2.json   | 17 ++------
 .../src/test/resources/s3/tika-config-s3.json      | 14 +++----
 .../configs/TIKA-1702-detector-exclude.json        |  2 +-
 .../configs/TIKA-1708-detector-default.json        | 13 +++---
 ...2273-encoding-detector-outside-static-init.json |  2 +-
 ...IKA-2273-exclude-encoding-detector-default.json |  2 +-
 .../TIKA-2273-no-icu4j-encoding-detector.json      |  2 +-
 .../configs/test-default-with-exclusions.json      |  2 +-
 .../test/resources/configs/tika-4424-config.json   |  7 +---
 .../configs/tika-config-digests-pdf-only.json      |  2 +-
 .../resources/configs/tika-config-lib-pst.json     |  2 +-
 .../org/apache/tika/config/TIKA-1558-exclude.json  | 10 ++---
 .../apache/tika/config/TIKA-1558-excludesub.json   |  2 +-
 .../apache/tika/parser/ocr/tesseract-config.json   |  2 +-
 .../apache/tika/config/loader/DetectorLoader.java  |  4 +-
 .../tika/config/loader/EncodingDetectorLoader.java |  4 +-
 .../apache/tika/config/loader/FrameworkConfig.java | 49 ++++++----------------
 .../apache/tika/config/loader/ParserLoader.java    | 40 ++++--------------
 .../apache/tika/config/loader/TikaJsonConfig.java  |  4 ++
 .../org/apache/tika/config/loader/TikaLoader.java  | 26 ++++++------
 .../tika/config/loader/FrameworkConfigTest.java    | 49 +++++++++++++---------
 .../apache/tika/config/loader/TikaLoaderTest.java  | 12 +++---
 .../resources/configs/example-tika-config.json     | 11 ++---
 .../resources/configs/test-decoration-config.json  |  6 +--
 .../test-default-parser-with-exclusions.json       |  2 +-
 .../test/resources/configs/test-loader-config.json |  5 +--
 29 files changed, 129 insertions(+), 194 deletions(-)

diff --git 
a/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java 
b/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
index fc43b553d5..9be0ee12ee 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/XmlToJsonConfigConverter.java
@@ -118,7 +118,7 @@ import org.apache.tika.utils.XMLReaderUtils;
  *     },
  *     {
  *       "default-parser": {
- *         "exclude": ["pdf-parser"]
+ *         "_exclude": ["pdf-parser"]
  *       }
  *     }
  *   ]
@@ -257,9 +257,9 @@ public class XmlToJsonConfigConverter {
         for (Map<String, Object> parserEntry : parsersList) {
             if (parserEntry.containsKey("default-parser")) {
                 Map<?, ?> config = (Map<?, ?>) 
parserEntry.get("default-parser");
-                if (config.containsKey("exclude")) {
+                if (config.containsKey("_exclude")) {
                     @SuppressWarnings("unchecked")
-                    List<String> excludes = (List<String>) 
config.get("exclude");
+                    List<String> excludes = (List<String>) 
config.get("_exclude");
                     excludedParsers.addAll(excludes);
                 }
             }
@@ -364,7 +364,7 @@ public class XmlToJsonConfigConverter {
         }
 
         if (excludes != null && !excludes.isEmpty()) {
-            config.put("exclude", excludes);
+            config.put("_exclude", excludes);
         }
 
         Map<String, Object> result = new LinkedHashMap<>();
diff --git 
a/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
index 98671c37bd..6f73f810d3 100644
--- 
a/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
+++ 
b/tika-app/src/test/java/org/apache/tika/cli/XmlToJsonConfigConverterTest.java
@@ -92,8 +92,8 @@ public class XmlToJsonConfigConverterTest {
         System.out.println("Generated JSON:");
         System.out.println(json);
 
-        // Verify exclude is at the correct level (not under _decorate)
-        assertTrue(json.contains("\"exclude\""), "Should have exclude array");
+        // Verify exclude is at the correct level (with underscore prefix)
+        assertTrue(json.contains("\"_exclude\""), "Should have _exclude 
array");
         assertFalse(json.contains("\"_decorate\""), "_decorate should not be 
used for parser excludes");
         assertTrue(json.contains("\"jsoup-parser\""), "Should exclude 
jsoup-parser");
         assertTrue(json.contains("\"pdf-parser\""), "Should exclude 
pdf-parser");
@@ -224,7 +224,7 @@ public class XmlToJsonConfigConverterTest {
         System.out.println(json);
 
         // Verify the JSON still contains the exclusions (we don't remove 
them, just inform)
-        assertTrue(json.contains("\"exclude\""), "Should still have exclude 
array");
+        assertTrue(json.contains("\"_exclude\""), "Should still have _exclude 
array");
         assertTrue(json.contains("\"pdf-parser\""), "Should have pdf-parser 
configured");
         assertTrue(json.contains("\"jsoup-parser\""), "Should have 
jsoup-parser configured");
 
diff --git a/tika-app/src/test/resources/configs/tika-config1.json 
b/tika-app/src/test/resources/configs/tika-config1.json
index e4cdbaf96e..1b5f391a97 100644
--- a/tika-app/src/test/resources/configs/tika-config1.json
+++ b/tika-app/src/test/resources/configs/tika-config1.json
@@ -2,16 +2,14 @@
   "parsers": [
     {
       "jsoup-parser": {
-        "_decorate": {
-          "mimeInclude": [
-            "application/vnd.wap.xhtml+xml",
-            "application/x-asp",
-            "application/xhtml+xml",
-            "text/html",
-            "application/xml",
-            "text/xml"
-          ]
-        }
+        "_mime-include": [
+          "application/vnd.wap.xhtml+xml",
+          "application/x-asp",
+          "application/xhtml+xml",
+          "text/html",
+          "application/xml",
+          "text/xml"
+        ]
       }
     }
   ]
diff --git a/tika-app/src/test/resources/configs/tika-config2.json 
b/tika-app/src/test/resources/configs/tika-config2.json
index 0f3cf8ac41..d25f49d852 100644
--- a/tika-app/src/test/resources/configs/tika-config2.json
+++ b/tika-app/src/test/resources/configs/tika-config2.json
@@ -2,24 +2,13 @@
   "parsers": [
     {
       "default-parser": {
-        "_decorate": {
-          "mimeExclude": [
-            "image/jpeg",
-            "application/pdf"
-          ],
-          "parserExclude": [
-            "org.apache.tika.parser.executable.ExecutableParser"
-          ]
-        }
+        "_exclude": ["executable-parser"],
+        "_mime-exclude": ["image/jpeg", "application/pdf"]
       }
     },
     {
       "empty-parser": {
-        "_decorate": {
-          "mimeInclude": [
-            "application/pdf"
-          ]
-        }
+        "_mime-include": ["application/pdf"]
       }
     }
   ]
diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
index 043da2349f..e16f0a9b6b 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
@@ -2,14 +2,12 @@
   "parsers": [
     {
       "default-parser": {
-        "_decorate": {
-          "parserExclude": [
-            "org.apache.tika.parser.ocr.TesseractOCRParser",
-            "org.apache.tika.parser.pdf.PDFParser",
-            "org.apache.tika.parser.microsoft.ooxml.OOXMLParser",
-            "org.apache.tika.parser.microsoft.OfficeParser"
-          ]
-        }
+        "_exclude": [
+          "tesseract-ocr-parser",
+          "pdf-parser",
+          "ooxml-parser",
+          "office-parser"
+        ]
       }
     },
     {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1702-detector-exclude.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1702-detector-exclude.json
index fe356421d3..80a611f6b5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1702-detector-exclude.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1702-detector-exclude.json
@@ -3,7 +3,7 @@
   "detectors": [
     {
       "default-detector": {
-        "exclude": [
+        "_exclude": [
           "default-zip-container-detector",
           "poifs-container-detector"
         ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1708-detector-default.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1708-detector-default.json
index 4d76bc86a9..4c49c1e460 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1708-detector-default.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1708-detector-default.json
@@ -1,14 +1,17 @@
 {
+  "parsers": [],
   "detectors": [
     {
-      "default-detector": {
-        "exclude": [
+      "default-detector" : {
+        "_exclude": [
           "default-zip-container-detector"
         ]
       }
     }
   ],
-  "translator": {
-    "class": "default-translator"
-  }
+  "translator": [
+    {
+      "default-translator": {}
+    }
+  ]
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-encoding-detector-outside-static-init.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-encoding-detector-outside-static-init.json
index 2c05becdc8..c1818466e3 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-encoding-detector-outside-static-init.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-encoding-detector-outside-static-init.json
@@ -10,7 +10,7 @@
   "encoding-detectors": [
     {
       "default-encoding-detector" : {
-        "exclude":["icu4j-encoding-detector"]
+        "_exclude":["icu4j-encoding-detector"]
       }
     }
   ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-exclude-encoding-detector-default.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-exclude-encoding-detector-default.json
index 240924a28c..56327103c5 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-exclude-encoding-detector-default.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-exclude-encoding-detector-default.json
@@ -3,7 +3,7 @@
   "encoding-detectors": [
     {
       "default-encoding-detector": {
-        "exclude": [
+        "_exclude": [
           "html-encoding-detector"
         ]
       }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-no-icu4j-encoding-detector.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-no-icu4j-encoding-detector.json
index 8099326159..b37a45121a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-no-icu4j-encoding-detector.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-2273-no-icu4j-encoding-detector.json
@@ -2,7 +2,7 @@
   "encoding-detectors": [
     {
       "default-encoding-detector":{
-        "exclude": [
+        "_exclude": [
           "icu4j-encoding-detector"
         ]
       }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/test-default-with-exclusions.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/test-default-with-exclusions.json
index c29e0f4208..5233e290fe 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/test-default-with-exclusions.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/test-default-with-exclusions.json
@@ -2,7 +2,7 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": ["pdf-parser", "jsoup-parser"]
+        "_exclude": ["pdf-parser", "jsoup-parser"]
       }
     }
   ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4424-config.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4424-config.json
index 0175fe3181..82a03978b2 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4424-config.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4424-config.json
@@ -1,11 +1,8 @@
 {
   "detectors": [
     {
-      "_name": "default-detector",
-      "_decorate": {
-        "detectorExclude": [
-          "org.apache.tika.detect.zip.DefaultZipContainerDetector"
-        ]
+      "default-detector": {
+        "_exclude": ["default-zip-container-detector"]
       }
     }
   ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
index 9f31bfbc9e..34e5248c7c 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.json
@@ -2,7 +2,7 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": [
+        "_exclude": [
           "pdf-parser"
         ]
       }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.json
index 1396afc7af..da45f42ee8 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.json
@@ -2,7 +2,7 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": [
+        "_exclude": [
           "outlook-pst-parser",
           "pst-mail-item-parser"
         ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.json
index 10101b8536..d25f49d852 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.json
@@ -2,17 +2,13 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": ["executable-parser"],
-        "_decorate": {
-          "mimeExclude": ["image/jpeg", "application/pdf"]
-        }
+        "_exclude": ["executable-parser"],
+        "_mime-exclude": ["image/jpeg", "application/pdf"]
       }
     },
     {
       "empty-parser": {
-        "_decorate": {
-          "mimeInclude": ["application/pdf"]
-        }
+        "_mime-include": ["application/pdf"]
       }
     }
   ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.json
index aa34ec2fbd..0ec57f490a 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.json
@@ -2,7 +2,7 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": ["xml-parser", "dc-xml-parser", "fiction-book-parser"]
+        "_exclude": ["xml-parser", "dc-xml-parser", "fiction-book-parser"]
       }
     }
   ]
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
index 672584b483..00c67e9ebe 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
@@ -2,7 +2,7 @@
   "parsers": [
     {
       "default-parser": {
-        "exclude": ["tesseract-ocr-parser"]
+        "_exclude": ["tesseract-ocr-parser"]
       }
     },
     {
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
index 79b0840abd..321cf878e5 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/DetectorLoader.java
@@ -85,8 +85,8 @@ public class DetectorLoader {
 
                     // Parse exclusions from default-detector config
                     JsonNode configNode = entry.getValue();
-                    if (configNode != null && configNode.has("exclude")) {
-                        JsonNode excludeNode = configNode.get("exclude");
+                    if (configNode != null && configNode.has("_exclude")) {
+                        JsonNode excludeNode = configNode.get("_exclude");
                         if (excludeNode.isArray()) {
                             for (JsonNode excludeName : excludeNode) {
                                 if (excludeName.isTextual()) {
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
index 426140ff80..66fa71adc8 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/EncodingDetectorLoader.java
@@ -85,8 +85,8 @@ public class EncodingDetectorLoader {
 
                     // Parse exclusions from default-encoding-detector config
                     JsonNode configNode = entry.getValue();
-                    if (configNode != null && configNode.has("exclude")) {
-                        JsonNode excludeNode = configNode.get("exclude");
+                    if (configNode != null && configNode.has("_exclude")) {
+                        JsonNode excludeNode = configNode.get("_exclude");
                         if (excludeNode.isArray()) {
                             for (JsonNode excludeName : excludeNode) {
                                 if (excludeName.isTextual()) {
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/FrameworkConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/FrameworkConfig.java
index 573aa75a47..96a101d34a 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/FrameworkConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/FrameworkConfig.java
@@ -31,14 +31,16 @@ import org.apache.tika.config.JsonConfig;
  * Extracts framework-level configuration from component JSON,
  * separating fields prefixed with underscore from component-specific config.
  *
- * <p>Framework fields:
+ * <p>Framework fields (underscore prefix):
  * <ul>
- *   <li>{@code _decorate} - Parser decoration config (mime filtering, 
fallbacks)</li>
+ *   <li>{@code _mime-include} - Only handle these mime types</li>
+ *   <li>{@code _mime-exclude} - Don't handle these mime types</li>
  * </ul>
  */
 public class FrameworkConfig {
 
-    private static final String DECORATE_KEY = "_decorate";
+    private static final String MIME_INCLUDE_KEY = "_mime-include";
+    private static final String MIME_EXCLUDE_KEY = "_mime-exclude";
 
     private final ParserDecoration decoration;
     private final JsonConfig componentConfigJson;
@@ -66,11 +68,13 @@ public class FrameworkConfig {
 
         ObjectNode objNode = (ObjectNode) configNode.deepCopy();
 
-        // Extract decoration (parser-specific)
+        // Extract mime filtering config (framework-level, underscore prefix)
+        List<String> mimeInclude = 
parseStringList(objNode.remove(MIME_INCLUDE_KEY));
+        List<String> mimeExclude = 
parseStringList(objNode.remove(MIME_EXCLUDE_KEY));
+
         ParserDecoration decoration = null;
-        if (objNode.has(DECORATE_KEY)) {
-            JsonNode decorateNode = objNode.remove(DECORATE_KEY);
-            decoration = parseDecoration(decorateNode);
+        if (!mimeInclude.isEmpty() || !mimeExclude.isEmpty()) {
+            decoration = new ParserDecoration(mimeInclude, mimeExclude);
         }
 
         // Remaining fields are component-specific config
@@ -80,22 +84,6 @@ public class FrameworkConfig {
         return new FrameworkConfig(decoration, componentConfigJson);
     }
 
-    private static ParserDecoration parseDecoration(JsonNode decorateNode) {
-        if (decorateNode == null || !decorateNode.isObject()) {
-            return null;
-        }
-
-        List<String> mimeInclude = 
parseStringList(decorateNode.get("mimeInclude"));
-        List<String> mimeExclude = 
parseStringList(decorateNode.get("mimeExclude"));
-        List<String> fallbacks = 
parseStringList(decorateNode.get("fallbacks"));
-
-        if (mimeInclude.isEmpty() && mimeExclude.isEmpty() && 
fallbacks.isEmpty()) {
-            return null;
-        }
-
-        return new ParserDecoration(mimeInclude, mimeExclude, fallbacks);
-    }
-
     private static List<String> parseStringList(JsonNode node) {
         if (node == null) {
             return Collections.emptyList();
@@ -124,18 +112,15 @@ public class FrameworkConfig {
     }
 
     /**
-     * Parser decoration configuration for mime type filtering and fallbacks.
+     * Parser decoration configuration for mime type filtering.
      */
     public static class ParserDecoration {
         private final List<String> mimeInclude;
         private final List<String> mimeExclude;
-        private final List<String> fallbacks;
 
-        public ParserDecoration(List<String> mimeInclude, List<String> 
mimeExclude,
-                                 List<String> fallbacks) {
+        public ParserDecoration(List<String> mimeInclude, List<String> 
mimeExclude) {
             this.mimeInclude = Collections.unmodifiableList(mimeInclude);
             this.mimeExclude = Collections.unmodifiableList(mimeExclude);
-            this.fallbacks = Collections.unmodifiableList(fallbacks);
         }
 
         public List<String> getMimeInclude() {
@@ -146,16 +131,8 @@ public class FrameworkConfig {
             return mimeExclude;
         }
 
-        public List<String> getFallbacks() {
-            return fallbacks;
-        }
-
         public boolean hasFiltering() {
             return !mimeInclude.isEmpty() || !mimeExclude.isEmpty();
         }
-
-        public boolean hasFallbacks() {
-            return !fallbacks.isEmpty();
-        }
     }
 }
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
index 95f0dc6166..aa19032f93 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ParserLoader.java
@@ -42,13 +42,11 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.RenderingParser;
-import org.apache.tika.parser.multiple.AbstractMultipleParser.MetadataPolicy;
-import org.apache.tika.parser.multiple.FallbackParser;
 import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.ServiceLoaderUtils;
 
 /**
- * Loader for parsers with support for decoration (mime type filtering, 
fallbacks).
+ * Loader for parsers with support for decoration (mime type filtering).
  */
 public class ParserLoader {
 
@@ -112,16 +110,16 @@ public class ParserLoader {
                     // Parse exclusions from default-parser config
                     JsonNode configNode = entry.getValue();
 
-                    // Check for common mistake: using "excludes" instead of 
"exclude"
-                    if (configNode != null && configNode.has("excludes")) {
+                    // Check for common mistake: using "_excludes" instead of 
"_exclude"
+                    if (configNode != null && configNode.has("_excludes")) {
                         throw new TikaConfigException(
-                            "Invalid configuration for default-parser: found 
'excludes' but the correct " +
-                            "field name is 'exclude' (singular). Please change 
'excludes' to 'exclude' " +
+                            "Invalid configuration for default-parser: found 
'_excludes' but the correct " +
+                            "field name is '_exclude' (singular). Please 
change '_excludes' to '_exclude' " +
                             "in your configuration.");
                     }
 
-                    if (configNode != null && configNode.has("exclude")) {
-                        JsonNode excludeNode = configNode.get("exclude");
+                    if (configNode != null && configNode.has("_exclude")) {
+                        JsonNode excludeNode = configNode.get("_exclude");
                         if (excludeNode.isArray()) {
                             for (JsonNode excludeName : excludeNode) {
                                 if (excludeName.isTextual()) {
@@ -198,11 +196,6 @@ public class ParserLoader {
                     if (parsed.decoration.hasFiltering()) {
                         parser = applyMimeFiltering(parser, parsed.decoration);
                     }
-
-                    // Apply fallbacks
-                    if (parsed.decoration.hasFallbacks()) {
-                        parser = applyFallbacks(parser, parsed.decoration, 
parsedConfigs);
-                    }
                 }
 
                 parserList.add(parser);
@@ -342,25 +335,6 @@ public class ParserLoader {
         return parser;
     }
 
-    private Parser applyFallbacks(Parser parser, 
FrameworkConfig.ParserDecoration decoration,
-                                   Map<String, ParsedParserConfig> 
parsedConfigs)
-            throws TikaConfigException {
-
-        List<String> fallbackNames = decoration.getFallbacks();
-        List<Parser> fallbackParsers = new ArrayList<>();
-        fallbackParsers.add(parser); // Primary parser first
-
-        for (String fallbackName : fallbackNames) {
-            ParsedParserConfig fallbackConfig = 
parsedConfigs.get(fallbackName);
-            if (fallbackConfig == null) {
-                throw new TikaConfigException("Unknown fallback parser: " + 
fallbackName);
-            }
-            fallbackParsers.add(fallbackConfig.parser);
-        }
-
-        return new FallbackParser(TikaLoader.getMediaTypeRegistry(), 
MetadataPolicy.KEEP_ALL, fallbackParsers);
-    }
-
     private List<Parser> loadSpiParsers(Set<Class<?>> excludeClasses) {
         List<Parser> result = new ArrayList<>();
         ServiceLoader<Parser> serviceLoader = ServiceLoader.load(Parser.class, 
classLoader);
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
index d90854666d..06da0f3175 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaJsonConfig.java
@@ -69,6 +69,9 @@ import org.apache.tika.exception.TikaConfigException;
  * {
  *   // Core Tika components (validated by TikaLoader)
  *   "parsers": [
+ *     { "pdf-parser": { "_mime-include": ["application/pdf"], "ocrStrategy": 
"AUTO", ... } },
+ *     { "html-parser": { ... } },
+ *     { "default-parser": { "_exclude": ["some-parser"] } }
  *     { "pdf-parser": { "_mime-include": ["application/pdf"], "ocrStrategy": 
"AUTO" } },
  *     "html-parser",                    // String shorthand for no-config 
components
  *     { "default-parser": { "_exclude": ["ocr-parser"] } }
@@ -86,6 +89,7 @@ import org.apache.tika.exception.TikaConfigException;
  * </pre>
  *
  * <p>All components use array format for explicit ordering.
+ * Parsers support decoration via "_mime-include" and "_mime-exclude" fields.
  * Components without configuration can use string shorthand: "component-name"
  * instead of { "component-name": {} }.
  * Parsers support mime filtering via "_mime-include" and "_mime-exclude" 
fields.
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index e3a4c63a6d..01aa21e0f6 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -54,21 +54,19 @@ import org.apache.tika.renderer.Renderer;
  * <p>JSON configuration format:
  * <pre>
  * {
- *   "parsers": {
- *     "pdf-parser": {
- *       "_priority": 10,
- *       "_decorate": {
- *         "mimeInclude": ["application/pdf"],
- *         "mimeExclude": ["application/pdf+fdf"],
- *         "fallbacks": ["empty-parser"]
- *       },
- *       "ocrStrategy": "AUTO",
- *       "extractInlineImages": true
+ *   "parsers": [
+ *     {
+ *       "pdf-parser": {
+ *         "_mime-include": ["application/pdf"],
+ *         "_mime-exclude": ["application/pdf+fdf"],
+ *         "ocrStrategy": "AUTO",
+ *         "extractInlineImages": true
+ *       }
  *     }
- *   },
- *   "detectors": {
- *     "mime-magic-detector": { ... }
- *   }
+ *   ],
+ *   "detectors": [
+ *     { "mime-magic-detector": { ... } }
+ *   ]
  * }
  * </pre>
  */
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/FrameworkConfigTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/FrameworkConfigTest.java
index 0fd8ffaa6c..9a8444bbf4 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/FrameworkConfigTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/FrameworkConfigTest.java
@@ -37,11 +37,8 @@ public class FrameworkConfigTest {
     public void testExtractDecoration() throws Exception {
         String json = """
             {
-              "_decorate": {
-                "mimeInclude": ["application/pdf"],
-                "mimeExclude": ["application/pdf+fdf"],
-                "fallbacks": ["backup-parser"]
-              },
+              "_mime-include": ["application/pdf"],
+              "_mime-exclude": ["application/pdf+fdf"],
               "name": "test"
             }
                 """;
@@ -53,7 +50,6 @@ public class FrameworkConfigTest {
 
         FrameworkConfig.ParserDecoration decoration = config.getDecoration();
         assertTrue(decoration.hasFiltering(), "Should have filtering");
-        assertTrue(decoration.hasFallbacks(), "Should have fallbacks");
 
         assertEquals(1, decoration.getMimeInclude().size());
         assertEquals("application/pdf", decoration.getMimeInclude().get(0));
@@ -61,11 +57,10 @@ public class FrameworkConfigTest {
         assertEquals(1, decoration.getMimeExclude().size());
         assertEquals("application/pdf+fdf", 
decoration.getMimeExclude().get(0));
 
-        assertEquals(1, decoration.getFallbacks().size());
-        assertEquals("backup-parser", decoration.getFallbacks().get(0));
-
-        
assertFalse(config.getComponentConfigJson().json().contains("_decorate"),
-                "Component config should not contain _decorate");
+        
assertFalse(config.getComponentConfigJson().json().contains("_mime-include"),
+                "Component config should not contain _mime-include");
+        
assertFalse(config.getComponentConfigJson().json().contains("_mime-exclude"),
+                "Component config should not contain _mime-exclude");
     }
 
     @Test
@@ -83,10 +78,10 @@ public class FrameworkConfigTest {
     }
 
     @Test
-    public void testEmptyDecoration() throws Exception {
+    public void testMimeIncludeOnly() throws Exception {
         String json = """
             {
-              "_decorate": {},
+              "_mime-include": ["text/plain"],
               "name": "test"
             }
                 """;
@@ -94,17 +89,33 @@ public class FrameworkConfigTest {
 
         FrameworkConfig config = FrameworkConfig.extract(node, MAPPER);
 
-        // Empty decoration should return null
-        assertNull(config.getDecoration(), "Empty decoration should be null");
+        assertNotNull(config.getDecoration(), "Decoration should be present");
+        assertEquals(1, config.getDecoration().getMimeInclude().size());
+        assertTrue(config.getDecoration().getMimeExclude().isEmpty());
+    }
+
+    @Test
+    public void testMimeExcludeOnly() throws Exception {
+        String json = """
+            {
+              "_mime-exclude": ["image/jpeg"],
+              "name": "test"
+            }
+                """;
+        JsonNode node = MAPPER.readTree(json);
+
+        FrameworkConfig config = FrameworkConfig.extract(node, MAPPER);
+
+        assertNotNull(config.getDecoration(), "Decoration should be present");
+        assertTrue(config.getDecoration().getMimeInclude().isEmpty());
+        assertEquals(1, config.getDecoration().getMimeExclude().size());
     }
 
     @Test
     public void testComponentConfigJsonClean() throws Exception {
         String json = """
             {
-              "_decorate": {
-                "mimeInclude": ["text/plain"]
-              },
+              "_mime-include": ["text/plain"],
               "bufferSize": 1024,
               "enabled": true
             }
@@ -116,7 +127,7 @@ public class FrameworkConfigTest {
         String componentJson = config.getComponentConfigJson().json();
 
         // Verify framework fields are removed
-        assertFalse(componentJson.contains("_decorate"), "Should not contain 
_decorate");
+        assertFalse(componentJson.contains("_mime-include"), "Should not 
contain _mime-include");
 
         // Verify component fields remain
         assertTrue(componentJson.contains("bufferSize"), "Should contain 
bufferSize");
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
index 435282998a..85e0ed1e18 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
+++ 
b/tika-serialization/src/test/java/org/apache/tika/config/loader/TikaLoaderTest.java
@@ -362,12 +362,12 @@ public class TikaLoaderTest {
 
     @Test
     public void testExcludesInsteadOfExcludeThrowsException() throws Exception 
{
-        // Create a config with the common mistake: "excludes" instead of 
"exclude"
+        // Create a config with the common mistake: "_excludes" instead of 
"_exclude"
         String invalidConfig = "{\n" +
                 "  \"parsers\": [\n" +
                 "    {\n" +
                 "      \"default-parser\": {\n" +
-                "        \"excludes\": [\"pdf-parser\"]\n" +
+                "        \"_excludes\": [\"pdf-parser\"]\n" +
                 "      }\n" +
                 "    }\n" +
                 "  ]\n" +
@@ -385,10 +385,10 @@ public class TikaLoaderTest {
                 throw new AssertionError("Expected TikaConfigException to be 
thrown");
             } catch (org.apache.tika.exception.TikaConfigException e) {
                 // Expected - verify the error message is helpful
-                assertTrue(e.getMessage().contains("excludes"),
-                        "Error message should mention 'excludes'");
-                assertTrue(e.getMessage().contains("exclude"),
-                        "Error message should mention the correct field 
'exclude'");
+                assertTrue(e.getMessage().contains("_excludes"),
+                        "Error message should mention '_excludes'");
+                assertTrue(e.getMessage().contains("_exclude"),
+                        "Error message should mention the correct field 
'_exclude'");
                 assertTrue(e.getMessage().contains("singular"),
                         "Error message should explain it should be singular");
             }
diff --git 
a/tika-serialization/src/test/resources/configs/example-tika-config.json 
b/tika-serialization/src/test/resources/configs/example-tika-config.json
index e6810d34bd..acf6125587 100644
--- a/tika-serialization/src/test/resources/configs/example-tika-config.json
+++ b/tika-serialization/src/test/resources/configs/example-tika-config.json
@@ -2,20 +2,15 @@
   "parsers": [
     {
       "pdf-parser": {
-        "_decorate": {
-          "mimeInclude": ["application/pdf"],
-          "mimeExclude": ["application/pdf+fdf"],
-          "fallbacks": ["empty-parser"]
-        },
+        "_mime-include": ["application/pdf"],
+        "_mime-exclude": ["application/pdf+fdf"],
         "ocrStrategy": "AUTO",
         "extractInlineImages": true
       }
     },
     {
       "html-parser": {
-        "_decorate": {
-          "mimeExclude": ["application/xhtml+xml"]
-        },
+        "_mime-exclude": ["application/xhtml+xml"],
         "encoding": "UTF-8"
       }
     },
diff --git 
a/tika-serialization/src/test/resources/configs/test-decoration-config.json 
b/tika-serialization/src/test/resources/configs/test-decoration-config.json
index 63e5b169e5..9568a8f47d 100644
--- a/tika-serialization/src/test/resources/configs/test-decoration-config.json
+++ b/tika-serialization/src/test/resources/configs/test-decoration-config.json
@@ -2,10 +2,8 @@
   "parsers": [
     {
       "configurable-test-parser": {
-        "_decorate": {
-          "mimeInclude": ["application/pdf", "text/plain"],
-          "mimeExclude": ["application/pdf+fdf"]
-        },
+        "_mime-include": ["application/pdf", "text/plain"],
+        "_mime-exclude": ["application/pdf+fdf"],
         "name": "filtered-parser",
         "bufferSize": 4096
       }
diff --git 
a/tika-serialization/src/test/resources/configs/test-default-parser-with-exclusions.json
 
b/tika-serialization/src/test/resources/configs/test-default-parser-with-exclusions.json
index 1d6c1dab9e..e8c90fe201 100644
--- 
a/tika-serialization/src/test/resources/configs/test-default-parser-with-exclusions.json
+++ 
b/tika-serialization/src/test/resources/configs/test-default-parser-with-exclusions.json
@@ -9,7 +9,7 @@
     },
     {
       "default-parser": {
-        "exclude": ["minimal-test-parser", "fallback-test-parser"]
+        "_exclude": ["minimal-test-parser", "fallback-test-parser"]
       }
     }
   ]
diff --git 
a/tika-serialization/src/test/resources/configs/test-loader-config.json 
b/tika-serialization/src/test/resources/configs/test-loader-config.json
index 1c1db9688b..d270d8f788 100644
--- a/tika-serialization/src/test/resources/configs/test-loader-config.json
+++ b/tika-serialization/src/test/resources/configs/test-loader-config.json
@@ -10,10 +10,7 @@
     },
     {
       "fallback-test-parser": {
-        "_decorate": {
-          "mimeInclude": ["application/test+fallback"],
-          "fallbacks": ["minimal-test-parser"]
-        },
+        "_mime-include": ["application/test+fallback"],
         "message": "primary parser",
         "failOnPurpose": false
       }


Reply via email to