This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 589d1c25b1 TIKA-4636-simplify-embedded-extractor-handling (#2558)
589d1c25b1 is described below

commit 589d1c25b1dcb70c6a77acc9c2a5455b0258b21e
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jan 29 08:03:46 2026 -0500

    TIKA-4636-simplify-embedded-extractor-handling (#2558)
---
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  41 +++++---
 .../ParsingEmbeddedDocumentExtractor.java          |   2 +-
 .../tika/extractor/RUnpackExtractorFactory.java    | 112 --------------------
 ...rFactory.java => StandardExtractorFactory.java} |   7 +-
 .../java/org/apache/tika/io/FilenameUtils.java     |   2 +-
 .../org/apache/tika/parser/AutoDetectParser.java   |   7 +-
 .../apache/tika/parser/AutoDetectParserConfig.java |  17 +--
 .../apache/tika/parser/AutoDetectParserTest.java   |  23 ----
 .../resources/configs/tika-config-no-names.json    |   8 +-
 ...a-config-upcasing-custom-handler-decorator.json |  21 +---
 .../resources/configs/tika-config-with-names.json  |   7 +-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  10 +-
 .../apache/tika/async/cli/AsyncProcessorTest.java  |  12 +--
 .../AbstractEmbeddedDocumentBytesHandler.java      |  49 +++------
 .../BasicEmbeddedDocumentBytesHandler.java         |  57 ----------
 .../EmittingEmbeddedDocumentBytesHandler.java      |  14 +--
 .../pipes/core}/extractor/RUnpackExtractor.java    |  13 ++-
 .../core/extractor/RUnpackExtractorFactory.java    |  17 ++-
 ...dDocumentBytesConfig.java => UnpackConfig.java} | 116 +++++++++++++++++----
 .../apache/tika/pipes/core/server/EmitHandler.java |  12 +--
 .../tika/pipes/core/server/ParseHandler.java       |   8 +-
 .../apache/tika/pipes/core/server/PipesServer.java |  14 +--
 .../apache/tika/pipes/core/server/PipesWorker.java |  34 +++---
 .../core/extractor/UnpackConfigSelectorTest.java   |  33 +++---
 .../core/serialization/JsonFetchEmitTupleTest.java |   2 +-
 tika-pipes/tika-pipes-integration-tests/pom.xml    |  12 +++
 .../pipes/core/DigestingOpenContainersTest.java    |  66 ++++++++++++
 .../apache/tika/pipes/core/PipesServerTest.java    |  97 +----------------
 .../src/test/resources/configs/tika-4533.json      |  19 ++++
 .../resources/configs/tika-config-truncate.json    |  11 +-
 .../resources/test-documents/testLargeOLEDoc.doc   | Bin 0 -> 2077696 bytes
 .../tika/config/loader/ComponentRegistry.java      |   6 +-
 .../org/apache/tika/config/loader/TikaLoader.java  |   2 +
 .../configs/TIKA-4207-embedded-bytes-config.json   |  13 ---
 .../tika/server/core/resource/AsyncResource.java   |  10 +-
 .../apache/tika/server/standard/TikaPipesTest.java |  10 +-
 36 files changed, 370 insertions(+), 514 deletions(-)

diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index f09338059a..24886a6726 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -43,6 +43,8 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -56,6 +58,8 @@ import org.apache.tika.utils.StringUtils;
  */
 public class TikaCLITest {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(TikaCLITest.class);
+
     static final File TEST_DATA_FILE = new 
File("src/test/resources/test-data");
     static final File CONFIGS_DIR = new File("src/test/resources/configs");
     private final URI testDataURI = TEST_DATA_FILE.toURI();
@@ -271,28 +275,31 @@ public class TikaCLITest {
     public void testRUnpack() throws Exception {
         //TODO -- rework this to use two separate emitters
         //one for bytes and one for json
+        // TODO: 00000001.bin extension may be wrong - see 
~/Desktop/unpack-discussion/mime-todo.txt
         String[] expectedChildren = new String[]{
                 "testPDFPackage.pdf.json",
                 //the first two test that the default single file config is 
working
-                "testPDFPackage.pdf-embed/00000001-embedded-1",
-                "testPDFPackage.pdf-embed/00000002-image0.jpg",
-                "testPDFPackage.pdf-embed/00000003-PDF1.pdf",
-                "testPDFPackage.pdf-embed/00000004-PDF2.pdf"};
+                "testPDFPackage.pdf-embed/00000001.bin",
+                "testPDFPackage.pdf-embed/00000002.jpg",
+                "testPDFPackage.pdf-embed/00000003.pdf",
+                "testPDFPackage.pdf-embed/00000004.pdf"};
         testRecursiveUnpack("testPDFPackage.pdf", expectedChildren, 2);
     }
 
     @Test
     public void testPSTRUnpack() throws Exception {
+        // TODO: The .bin extensions for embedded .msg files are wrong - they 
should be .msg
+        // CONTENT_TYPE is not being set for embedded documents - see 
~/Desktop/unpack-discussion/mime-todo.txt
         String[] expectedChildren = new String[]{"testPST.pst.json",
-                "testPST.pst-embed/00000007-First email.msg",
-                "testPST.pst-embed/00000001-Feature Generators.msg",
-                "testPST.pst-embed/00000008-First email.msg",
-                "testPST.pst-embed/00000004-[jira] [Resolved] (TIKA-1249) 
Vcard files detection.msg",
-                "testPST.pst-embed/00000003-Feature Generators.msg",
-                "testPST.pst-embed/00000002-putstatic%22.msg",
-                "testPST.pst-embed/00000005-[jira] [Commented] (TIKA-1250) 
Process loops infintely processing a CHM file.msg",
-                "testPST.pst-embed/00000009-attachment.docx",
-                "testPST.pst-embed/00000006-[WEBINAR] - %22Introducing 
Couchbase Server 2.5%22.msg"};
+                "testPST.pst-embed/00000007.bin",
+                "testPST.pst-embed/00000001.bin",
+                "testPST.pst-embed/00000008.bin",
+                "testPST.pst-embed/00000004.bin",
+                "testPST.pst-embed/00000003.bin",
+                "testPST.pst-embed/00000002.bin",
+                "testPST.pst-embed/00000005.bin",
+                "testPST.pst-embed/00000009.docx",
+                "testPST.pst-embed/00000006.bin"};
         testRecursiveUnpack("testPST.pst", expectedChildren, 2);
         try (Reader reader = 
Files.newBufferedReader(extractDir.resolve("testPST.pst.json"))) {
             List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
@@ -400,6 +407,14 @@ public class TikaCLITest {
                 .toFile()
                 .list();
         assertNotNull(jsonFile);
+
+        // Debug: log actual files found
+        LOG.info("=== Actual files found ===");
+        for (String name : fileNames) {
+            LOG.info("  {}", name);
+        }
+        LOG.info("=== End actual files ===");
+
         assertEquals(expectedLength, jsonFile.length);
 
         for (String expectedChildName : expectedChildrenFileNames) {
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
 
b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
index 2c0a9c0f28..2d88fcd445 100644
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
+++ 
b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
@@ -177,7 +177,7 @@ public class ParsingEmbeddedDocumentExtractor implements 
EmbeddedDocumentExtract
         }
     }
 
-    void recordException(Exception e, ParseContext context) {
+    protected void recordException(Exception e, ParseContext context) {
         ParseRecord record = context.get(ParseRecord.class);
         if (record == null) {
             return;
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractorFactory.java
 
b/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractorFactory.java
deleted file mode 100644
index 858e8e61f7..0000000000
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractorFactory.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.extractor;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.tika.config.TikaComponent;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-
-@TikaComponent(name = "runpack-extractor-factory")
-public class RUnpackExtractorFactory implements 
EmbeddedDocumentByteStoreExtractorFactory {
-
-    public static long DEFAULT_MAX_EMBEDDED_BYTES_FOR_EXTRACTION = 10l * 1024l 
* 1024l * 1024l;
-
-    private Set<String> embeddedBytesIncludeMimeTypes = new HashSet<>();
-    private Set<String> embeddedBytesExcludeMimeTypes = new HashSet<>();
-    private Set<String> embeddedBytesIncludeEmbeddedResourceTypes = new 
HashSet<>();
-    private Set<String> embeddedBytesExcludeEmbeddedResourceTypes = new 
HashSet<>();
-
-    private long maxEmbeddedBytesForExtraction = 
DEFAULT_MAX_EMBEDDED_BYTES_FOR_EXTRACTION;
-
-    public void setEmbeddedBytesIncludeMimeTypes(Set<String> includeMimeTypes) 
{
-        embeddedBytesIncludeMimeTypes = new HashSet<>(includeMimeTypes);
-    }
-
-    public void setEmbeddedBytesExcludeMimeTypes(Set<String> excludeMimeTypes) 
{
-        embeddedBytesExcludeMimeTypes = new HashSet<>(excludeMimeTypes);
-    }
-
-    public void setEmbeddedBytesIncludeEmbeddedResourceTypes(Set<String> 
includeAttachmentTypes) {
-        embeddedBytesIncludeEmbeddedResourceTypes = new 
HashSet<>(includeAttachmentTypes);
-    }
-
-    public void setEmbeddedBytesExcludeEmbeddedResourceTypes(Set<String> 
excludeAttachmentTypes) {
-        embeddedBytesExcludeEmbeddedResourceTypes = new 
HashSet<>(excludeAttachmentTypes);
-    }
-
-    /**
-     * Total number of bytes to write out. A good zip bomb may contain 
petabytes
-     * compressed into a few kb. Make sure that you can't fill up a disk!
-     *
-     * This does not include the container file in the count of bytes written 
out.
-     * This only counts the lengths of the embedded files.
-     *
-     * @param maxEmbeddedBytesForExtraction
-     */
-    public void setMaxEmbeddedBytesForExtraction(long 
maxEmbeddedBytesForExtraction) throws TikaConfigException {
-        if (maxEmbeddedBytesForExtraction < 0) {
-            throw new TikaConfigException("maxEmbeddedBytesForExtraction must 
be >= 0");
-        }
-        this.maxEmbeddedBytesForExtraction = maxEmbeddedBytesForExtraction;
-    }
-
-    public Set<String> getEmbeddedBytesIncludeMimeTypes() {
-        return embeddedBytesIncludeMimeTypes;
-    }
-
-    public Set<String> getEmbeddedBytesExcludeMimeTypes() {
-        return embeddedBytesExcludeMimeTypes;
-    }
-
-    public Set<String> getEmbeddedBytesIncludeEmbeddedResourceTypes() {
-        return embeddedBytesIncludeEmbeddedResourceTypes;
-    }
-
-    public Set<String> getEmbeddedBytesExcludeEmbeddedResourceTypes() {
-        return embeddedBytesExcludeEmbeddedResourceTypes;
-    }
-
-    public long getMaxEmbeddedBytesForExtraction() {
-        return maxEmbeddedBytesForExtraction;
-    }
-
-    @Override
-    public EmbeddedDocumentExtractor newInstance(Metadata metadata, 
ParseContext parseContext) {
-        RUnpackExtractor ex =
-                new RUnpackExtractor(parseContext,
-                        maxEmbeddedBytesForExtraction);
-        ex.setEmbeddedBytesSelector(createEmbeddedBytesSelector());
-        return ex;
-    }
-
-
-    private EmbeddedBytesSelector createEmbeddedBytesSelector() {
-        if (embeddedBytesIncludeMimeTypes.size() == 0 &&
-                embeddedBytesExcludeMimeTypes.size() == 0 &&
-                embeddedBytesIncludeEmbeddedResourceTypes.size() == 0 &&
-                embeddedBytesExcludeEmbeddedResourceTypes.size() == 0) {
-            return EmbeddedBytesSelector.ACCEPT_ALL;
-        }
-        return new BasicEmbeddedBytesSelector(embeddedBytesIncludeMimeTypes,
-                embeddedBytesExcludeMimeTypes, 
embeddedBytesIncludeEmbeddedResourceTypes,
-                embeddedBytesExcludeEmbeddedResourceTypes);
-    }
-}
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
 
b/tika-core/src/main/java/org/apache/tika/extractor/StandardExtractorFactory.java
similarity index 82%
copy from 
tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
copy to 
tika-core/src/main/java/org/apache/tika/extractor/StandardExtractorFactory.java
index 1cc53da2df..87dd18a1ca 100644
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
+++ 
b/tika-core/src/main/java/org/apache/tika/extractor/StandardExtractorFactory.java
@@ -20,9 +20,12 @@ import org.apache.tika.config.TikaComponent;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 
+/**
+ * Standard factory for creating {@link ParsingEmbeddedDocumentExtractor} 
instances.
+ * This is the default embedded document extractor factory in tika-core.
+ */
 @TikaComponent
-public class ParsingEmbeddedDocumentExtractorFactory
-        implements EmbeddedDocumentExtractorFactory {
+public class StandardExtractorFactory implements 
EmbeddedDocumentExtractorFactory {
 
     @Override
     public EmbeddedDocumentExtractor newInstance(Metadata metadata, 
ParseContext parseContext) {
diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index b2e2f5d878..9b363cb8a4 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -349,7 +349,7 @@ public class FilenameUtils {
             String ext = MIME_TYPES
                     .forName(mime)
                     .getExtension();
-            if (ext == null) {
+            if (StringUtils.isBlank(ext)) {
                 return ".bin";
             } else {
                 return ext;
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index 64067cbad4..ae9a33e170 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@ -28,7 +28,7 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.ZeroByteFileException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractorFactory;
+import org.apache.tika.extractor.StandardExtractorFactory;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -217,10 +217,9 @@ public class AutoDetectParser extends CompositeParser {
         if (d == null) {
             context.set(Detector.class, getDetector());
         }
-        EmbeddedDocumentExtractorFactory edxf =
-                autoDetectParserConfig.getEmbeddedDocumentExtractorFactory();
+        EmbeddedDocumentExtractorFactory edxf = 
context.get(EmbeddedDocumentExtractorFactory.class);
         if (edxf == null) {
-            edxf = new ParsingEmbeddedDocumentExtractorFactory();
+            edxf = new StandardExtractorFactory();
         }
         EmbeddedDocumentExtractor edx = edxf.newInstance(metadata, context);
         context.set(EmbeddedDocumentExtractor.class, edx);
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java 
b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
index 2ce72443b8..21f08a9191 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
@@ -21,7 +21,6 @@ import java.io.Serializable;
 import org.xml.sax.ContentHandler;
 
 import org.apache.tika.config.TikaComponent;
-import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.sax.ContentHandlerDecoratorFactory;
 
@@ -32,7 +31,7 @@ import org.apache.tika.sax.ContentHandlerDecoratorFactory;
  * via {@link org.apache.tika.config.OutputLimits} in the ParseContext, not 
here.
  * <p>
  * This is a config POJO. It uses standard Jackson deserialization for its
- * primitive fields, but component fields (like 
embeddedDocumentExtractorFactory)
+ * primitive fields, but component fields (like contentHandlerDecoratorFactory)
  * use compact format.
  */
 @TikaComponent(spi = false)
@@ -49,8 +48,6 @@ public class AutoDetectParserConfig implements Serializable {
 
     public static AutoDetectParserConfig DEFAULT = new 
AutoDetectParserConfig();
 
-    private EmbeddedDocumentExtractorFactory embeddedDocumentExtractorFactory 
= null;
-
     private ContentHandlerDecoratorFactory contentHandlerDecoratorFactory =
             NOOP_CONTENT_HANDLER_DECORATOR_FACTORY;
 
@@ -59,15 +56,6 @@ public class AutoDetectParserConfig implements Serializable {
     public AutoDetectParserConfig() {
     }
 
-    public void setEmbeddedDocumentExtractorFactory(
-            EmbeddedDocumentExtractorFactory embeddedDocumentExtractorFactory) 
{
-        this.embeddedDocumentExtractorFactory = 
embeddedDocumentExtractorFactory;
-    }
-
-    public EmbeddedDocumentExtractorFactory 
getEmbeddedDocumentExtractorFactory() {
-        return embeddedDocumentExtractorFactory;
-    }
-
     public void setContentHandlerDecoratorFactory(
             ContentHandlerDecoratorFactory contentHandlerDecoratorFactory) {
         this.contentHandlerDecoratorFactory = contentHandlerDecoratorFactory;
@@ -88,8 +76,7 @@ public class AutoDetectParserConfig implements Serializable {
     @Override
     public String toString() {
         return "AutoDetectParserConfig{" +
-                "embeddedDocumentExtractorFactory=" + 
embeddedDocumentExtractorFactory +
-                ", contentHandlerDecoratorFactory=" + 
contentHandlerDecoratorFactory +
+                "contentHandlerDecoratorFactory=" + 
contentHandlerDecoratorFactory +
                 ", throwOnZeroBytes=" + throwOnZeroBytes + '}';
     }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
index 01d28b5188..c16a0f825b 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -19,14 +19,12 @@ package org.apache.tika.parser;
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;
@@ -34,14 +32,12 @@ import java.util.zip.ZipOutputStream;
 import org.junit.jupiter.api.Test;
 import org.xml.sax.ContentHandler;
 
-import org.apache.tika.TikaLoaderHelper;
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.loader.TikaLoader;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.exception.ZeroByteFileException;
-import org.apache.tika.extractor.RUnpackExtractorFactory;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -562,23 +558,4 @@ public class AutoDetectParserTest extends TikaTest {
         }
     }
 
-    @Test
-    public void testDigestingOpenContainers() throws Exception {
-        //TIKA-4533 -- this tests both that a very large embedded OLE doc 
doesn't cause a zip bomb
-        //exception AND that the sha for the embedded OLE doc is not the sha 
for a zero-byte file
-        String expectedSha = 
"bbc2057a1ff8fe859a296d2fbb493fc0c3e5796749ba72507c0e13f7a3d81f78";
-        TikaLoader loader = TikaLoaderHelper.getLoader("tika-4533.json");
-        AutoDetectParser autoDetectParser = (AutoDetectParser) 
loader.loadAutoDetectParser();
-        ParseContext parseContext = loader.loadParseContext();
-        //this models what happens in tika-pipes
-        if (autoDetectParser.getAutoDetectParserConfig()
-                    .getEmbeddedDocumentExtractorFactory() == null) {
-            autoDetectParser.getAutoDetectParserConfig()
-                                                     
.setEmbeddedDocumentExtractorFactory(new RUnpackExtractorFactory());
-        }
-        List<Metadata> metadataList = 
getRecursiveMetadata("testLargeOLEDoc.doc", autoDetectParser, parseContext);
-        assertEquals(expectedSha, 
metadataList.get(2).get("X-TIKA:digest:SHA256"));
-        
assertNull(metadataList.get(2).get(TikaCoreProperties.EMBEDDED_EXCEPTION));
-        assertEquals(2049290L, 
Long.parseLong(metadataList.get(2).get(Metadata.CONTENT_LENGTH)));
-    }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
index 8196d32f10..33fcd5ffd7 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.json
@@ -1,9 +1,9 @@
 {
-  "auto-detect-parser": {
-    "embeddedDocumentExtractorFactory": {
-      "runpack-extractor-factory": {
+  "other-configs": {
+    "embedded-document-extractor-factory": {
+      "standard-extractor-factory": {
+        "writeFileNameToContent": false
       }
     }
   }
 }
-
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
index 65148f0f61..8e9b5b6012 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.json
@@ -1,27 +1,16 @@
 {
   "auto-detect-parser": {
-    "embeddedDocumentExtractorFactory": {
-      "runpack-extractor-factory": {
-        "embeddedBytesIncludeMimeTypes": [
-          "text/pdf"
-        ],
-        "embeddedBytesExcludeMimeTypes": [
-          "rtf/application"
-        ],
-        "embeddedBytesIncludeEmbeddedResourceTypes": [
-          "appended"
-        ],
-        "embeddedBytesExcludeEmbeddedResourceTypes": [
-        ],
-        "maxEmbeddedBytesForExtraction": 10737418240
-      }
-    },
     "contentHandlerDecoratorFactory": 
"upcasing-content-handler-decorator-factory",
     "throwOnZeroBytes": true
   },
   "other-configs": {
     "digester-factory": {
       "commons-digester-factory": {}
+    },
+    "embedded-document-extractor-factory": {
+      "standard-extractor-factory": {
+        "writeFileNameToContent": true
+      }
     }
   }
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
index 0c90785bd1..28f542245b 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.json
@@ -1,7 +1,8 @@
 {
-  "auto-detect-parser": {
-    "embeddedDocumentExtractorFactory": {
-      "runpack-extractor-factory": {
+  "other-configs": {
+    "embedded-document-extractor-factory": {
+      "standard-extractor-factory": {
+        "writeFileNameToContent": true
       }
     }
   }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index 15586c526c..015917d51a 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -41,7 +41,7 @@ import org.apache.tika.pipes.api.emitter.EmitKey;
 import org.apache.tika.pipes.api.fetcher.FetchKey;
 import org.apache.tika.pipes.api.pipesiterator.PipesIterator;
 import org.apache.tika.pipes.core.async.AsyncProcessor;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.pipes.core.pipesiterator.PipesIteratorManager;
 import org.apache.tika.plugins.ExtensionConfig;
 import org.apache.tika.plugins.TikaPluginManager;
@@ -302,15 +302,15 @@ public class TikaAsyncCLI {
             return;
         }
         ParseContext parseContext = t.getParseContext();
-        EmbeddedDocumentBytesConfig config = new EmbeddedDocumentBytesConfig();
+        UnpackConfig config = new UnpackConfig();
         config.setExtractEmbeddedDocumentBytes(true);
         config.setEmitter(TikaConfigAsyncWriter.EMITTER_NAME);
         config.setIncludeOriginal(false);
-        
config.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.DETECTED);
+        config.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
         config.setEmbeddedIdPrefix("-");
         config.setZeroPadName(8);
-        
config.setKeyBaseStrategy(EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_AS_IS);
-        parseContext.set(EmbeddedDocumentBytesConfig.class, config);
+        config.setKeyBaseStrategy(UnpackConfig.KEY_BASE_STRATEGY.DEFAULT);
+        parseContext.set(UnpackConfig.class, config);
     }
 
     private static void usage(Options options) throws IOException {
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
index 6d26b6dd0f..782ea015b7 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
@@ -48,7 +48,7 @@ import org.apache.tika.pipes.api.fetcher.FetchKey;
 import org.apache.tika.pipes.api.pipesiterator.PipesIterator;
 import org.apache.tika.pipes.core.PipesException;
 import org.apache.tika.pipes.core.async.AsyncProcessor;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.serialization.JsonMetadataList;
 
 /**
@@ -112,13 +112,13 @@ public class AsyncProcessorTest extends TikaTest {
     public void testRecursiveUnpacking() throws Exception {
         AsyncProcessor processor = 
AsyncProcessor.load(configDir.resolve("tika-config.json"));
 
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = new 
EmbeddedDocumentBytesConfig(true);
+        UnpackConfig embeddedDocumentBytesConfig = new UnpackConfig(true);
         embeddedDocumentBytesConfig.setIncludeOriginal(true);
         embeddedDocumentBytesConfig.setEmitter("fse-bytes");
-        
embeddedDocumentBytesConfig.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.NONE);
+        
embeddedDocumentBytesConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.NONE);
         embeddedDocumentBytesConfig.setEmbeddedIdPrefix("-");
         ParseContext parseContext = new ParseContext();
-        parseContext.set(EmbeddedDocumentBytesConfig.class, 
embeddedDocumentBytesConfig);
+        parseContext.set(UnpackConfig.class, embeddedDocumentBytesConfig);
         FetchEmitTuple t =
                 new FetchEmitTuple("myId-1", new FetchKey("fsf", "mock.xml"),
                         new EmitKey("fse-json", "emit-1"), new Metadata(), 
parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
@@ -133,10 +133,10 @@ public class AsyncProcessorTest extends TikaTest {
         }
         processor.close();
 
-        String container = 
Files.readString(bytesOutputDir.resolve("emit-1-embed/emit-1-0"));
+        String container = 
Files.readString(bytesOutputDir.resolve("emit-1-embed/0"));
         assertContains("\"dc:creator\">Nikolai Lobachevsky", container);
 
-        String xmlEmbedded = 
Files.readString(bytesOutputDir.resolve("emit-1-embed/emit-1-1"));
+        String xmlEmbedded = 
Files.readString(bytesOutputDir.resolve("emit-1-embed/1"));
         assertContains("name=\"dc:creator\"", xmlEmbedded);
         assertContains(">embeddedAuthor</metadata>", xmlEmbedded);
 
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
index 5dd27e419b..798b80f625 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
@@ -33,41 +33,26 @@ public abstract class AbstractEmbeddedDocumentBytesHandler 
implements EmbeddedDo
     List<Integer> ids = new ArrayList<>();
 
     public String getEmitKey(String containerEmitKey, int embeddedId,
-                             EmbeddedDocumentBytesConfig 
embeddedDocumentBytesConfig,
+                             UnpackConfig unpackConfig,
                              Metadata metadata) {
-        String embeddedIdString = embeddedDocumentBytesConfig.getZeroPadName() 
> 0 ?
+        String embeddedIdString = unpackConfig.getZeroPadName() > 0 ?
                 StringUtils.leftPad(Integer.toString(embeddedId),
-                        embeddedDocumentBytesConfig.getZeroPadName(), "0") :
+                        unpackConfig.getZeroPadName(), "0") :
                 Integer.toString(embeddedId);
 
-
         StringBuilder emitKey = new StringBuilder();
-        if (embeddedDocumentBytesConfig.getKeyBaseStrategy() ==
-                
EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_AS_IS) {
-            emitKey.append(containerEmitKey);
-            emitKey.append("-embed");
-            emitKey.append("/");
-            
emitKey.append(embeddedIdString).append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix());
-            String fName = 
FilenameUtils.getSanitizedEmbeddedFileName(metadata, ".bin", 100);
-            if (! StringUtils.isBlank(fName)) {
-                emitKey.append(fName);
-            }
-            return emitKey.toString();
-        } else if (embeddedDocumentBytesConfig.getKeyBaseStrategy() ==
-                
EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_NUMBERED) {
+        if (unpackConfig.getKeyBaseStrategy() == 
UnpackConfig.KEY_BASE_STRATEGY.DEFAULT) {
+            // Default pattern: {containerKey}-embed/{id}{suffix}
             emitKey.append(containerEmitKey);
-            emitKey.append("-embed");
-            emitKey.append("/")
-                    .append(FilenameUtils.getName(containerEmitKey));
+            emitKey.append("-embed/");
+            emitKey.append(embeddedIdString);
         } else {
-            emitKey.append(embeddedDocumentBytesConfig.getEmitKeyBase());
+            // CUSTOM: use the configured emitKeyBase
+            emitKey.append(unpackConfig.getEmitKeyBase());
+            emitKey.append(unpackConfig.getEmbeddedIdPrefix());
+            emitKey.append(embeddedIdString);
         }
-        //at this point the emit key has the full "file" part, now we
-        //add the embedded id prefix, the embedded id string and then maybe
-        //the file extension
-        emitKey.append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix())
-                    .append(embeddedIdString);
-        appendSuffix(emitKey, metadata, embeddedDocumentBytesConfig);
+        appendSuffix(emitKey, metadata, unpackConfig);
         return emitKey.toString();
     }
 
@@ -81,15 +66,15 @@ public abstract class AbstractEmbeddedDocumentBytesHandler 
implements EmbeddedDo
         return ids;
     }
 
-    private void appendSuffix(StringBuilder emitKey, Metadata metadata, 
EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig) {
-        if (embeddedDocumentBytesConfig.getSuffixStrategy().equals(
-                EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING)) {
+    private void appendSuffix(StringBuilder emitKey, Metadata metadata, 
UnpackConfig unpackConfig) {
+        if (unpackConfig.getSuffixStrategy().equals(
+                UnpackConfig.SUFFIX_STRATEGY.EXISTING)) {
             String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
             String suffix = FilenameUtils.getSuffixFromPath(fName);
             suffix = suffix.toLowerCase(Locale.US);
             emitKey.append(suffix);
-        } else if (embeddedDocumentBytesConfig.getSuffixStrategy()
-                                              
.equals(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.DETECTED)) {
+        } else if (unpackConfig.getSuffixStrategy()
+                                              
.equals(UnpackConfig.SUFFIX_STRATEGY.DETECTED)) {
             emitKey.append(FilenameUtils.calculateExtension(metadata, ".bin"));
         }
     }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/BasicEmbeddedDocumentBytesHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/BasicEmbeddedDocumentBytesHandler.java
deleted file mode 100644
index 93a4c8ce65..0000000000
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/BasicEmbeddedDocumentBytesHandler.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.pipes.core.extractor;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.io.input.UnsynchronizedBufferedInputStream;
-
-import org.apache.tika.metadata.Metadata;
-
-/**
- * For now, this is an in-memory EmbeddedDocumentBytesHandler that stores
- * all the bytes in memory. Users can retrieve the documents with {@link 
#getDocument(int)}.
- *
- * We'll need to make this cache to disk at some point if there are many bytes 
of
- * embedded documents.
- */
-public class BasicEmbeddedDocumentBytesHandler extends 
AbstractEmbeddedDocumentBytesHandler {
-    private final EmbeddedDocumentBytesConfig config;
-    public BasicEmbeddedDocumentBytesHandler(EmbeddedDocumentBytesConfig 
config) {
-        this.config = config;
-    }
-    //this won't scale, but let's start fully in memory for now;
-    Map<Integer, byte[]> docBytes = new HashMap<>();
-    @Override
-    public void add(int id, Metadata metadata, InputStream is) throws 
IOException {
-        super.add(id, metadata, is);
-        docBytes.put(id, IOUtils.toByteArray(is));
-    }
-
-    public InputStream getDocument(int id) throws IOException {
-        return new 
UnsynchronizedBufferedInputStream.Builder().setByteArray(docBytes.get(id)).get();
-    }
-
-    @Override
-    public void close() throws IOException {
-        //delete tmp dir or whatever here
-    }
-}
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmittingEmbeddedDocumentBytesHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmittingEmbeddedDocumentBytesHandler.java
index 5d74c49ef5..b7e8fd4a69 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmittingEmbeddedDocumentBytesHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmittingEmbeddedDocumentBytesHandler.java
@@ -33,7 +33,7 @@ import 
org.apache.tika.pipes.core.emitter.TikaEmitterException;
 
 public class EmittingEmbeddedDocumentBytesHandler extends 
AbstractEmbeddedDocumentBytesHandler {
     private final EmitKey containerEmitKey;
-    private final EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig;
+    private final UnpackConfig unpackConfig;
     private final StreamEmitter emitter;
 
     private static final Metadata METADATA = new Metadata();
@@ -43,15 +43,15 @@ public class EmittingEmbeddedDocumentBytesHandler extends 
AbstractEmbeddedDocume
                                                 EmitterManager emitterManager) 
throws TikaException, IOException {
 
         this.containerEmitKey = fetchEmitTuple.getEmitKey();
-        this.embeddedDocumentBytesConfig = 
fetchEmitTuple.getParseContext().get(EmbeddedDocumentBytesConfig.class);
-        if (this.embeddedDocumentBytesConfig == null) {
-            throw new TikaConfigException("EmbeddedDocumentBytesConfig must 
not be null!");
+        this.unpackConfig = 
fetchEmitTuple.getParseContext().get(UnpackConfig.class);
+        if (this.unpackConfig == null) {
+            throw new TikaConfigException("UnpackConfig must not be null!");
         }
         Emitter tmpEmitter =
-                
emitterManager.getEmitter(embeddedDocumentBytesConfig.getEmitter());
+                emitterManager.getEmitter(unpackConfig.getEmitter());
         if (! (tmpEmitter instanceof StreamEmitter)) {
             throw new TikaConfigException("Emitter " +
-                    embeddedDocumentBytesConfig.getEmitter()
+                    unpackConfig.getEmitter()
                     + " must implement a StreamEmitter");
         }
         this.emitter = (StreamEmitter) tmpEmitter;
@@ -61,7 +61,7 @@ public class EmittingEmbeddedDocumentBytesHandler extends 
AbstractEmbeddedDocume
     public void add(int id, Metadata metadata, InputStream inputStream) throws 
IOException {
         //intentionally do not call super.add, because we want the ids list to 
be empty
         String emitKey = getEmitKey(containerEmitKey.getEmitKey(),
-                id, embeddedDocumentBytesConfig, metadata);
+                id, unpackConfig, metadata);
         try {
             emitter.emit(emitKey, inputStream, METADATA, PARSE_CONTEXT);
         } catch (TikaEmitterException e) {
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractor.java
similarity index 94%
rename from 
tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
rename to 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractor.java
index 8c5074843e..356411cf6c 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractor.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.extractor;
+package org.apache.tika.pipes.core.extractor;
 
 import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
 
@@ -34,6 +34,11 @@ import org.xml.sax.helpers.AttributesImpl;
 import org.apache.tika.exception.CorruptedFileException;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.DefaultEmbeddedStreamTranslator;
+import org.apache.tika.extractor.EmbeddedBytesSelector;
+import org.apache.tika.extractor.EmbeddedDocumentBytesHandler;
+import org.apache.tika.extractor.EmbeddedStreamTranslator;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
 import org.apache.tika.io.BoundedInputStream;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -182,12 +187,6 @@ public class RUnpackExtractor extends 
ParsingEmbeddedDocumentExtractor {
             }
         } catch (IOException e) {
             LOGGER.warn("problem writing out embedded bytes", e);
-            //info in metadata doesn't actually make it back to the metadata 
list
-            //because we're filtering and cloning the metadata at the end of 
the parse
-            //which happens before we try to copy out the files.
-            //TODO fix this
-            //metadata.set(TikaCoreProperties.EMBEDDED_BYTES_EXCEPTION,
-              //      ExceptionUtils.getStackTrace(e));
         }
     }
 
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractorFactory.java
similarity index 61%
rename from 
tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
rename to 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractorFactory.java
index 1cc53da2df..1e77c2fb94 100644
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/RUnpackExtractorFactory.java
@@ -14,18 +14,25 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.extractor;
+package org.apache.tika.pipes.core.extractor;
 
 import org.apache.tika.config.TikaComponent;
+import org.apache.tika.extractor.EmbeddedDocumentByteStoreExtractorFactory;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 
-@TikaComponent
-public class ParsingEmbeddedDocumentExtractorFactory
-        implements EmbeddedDocumentExtractorFactory {
+@TikaComponent(name = "runpack-extractor-factory")
+public class RUnpackExtractorFactory implements 
EmbeddedDocumentByteStoreExtractorFactory {
 
     @Override
     public EmbeddedDocumentExtractor newInstance(Metadata metadata, 
ParseContext parseContext) {
-        return new ParsingEmbeddedDocumentExtractor(parseContext);
+        UnpackConfig config = parseContext.get(UnpackConfig.class);
+        if (config == null) {
+            config = UnpackConfig.SKIP;
+        }
+        RUnpackExtractor ex = new RUnpackExtractor(parseContext, 
Long.MAX_VALUE);
+        ex.setEmbeddedBytesSelector(config.createEmbeddedBytesSelector());
+        return ex;
     }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackConfig.java
similarity index 58%
rename from 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
rename to 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackConfig.java
index c02b780671..dde5298c71 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackConfig.java
@@ -17,12 +17,16 @@
 package org.apache.tika.pipes.core.extractor;
 
 import java.io.Serializable;
+import java.util.HashSet;
 import java.util.Objects;
+import java.util.Set;
 
 import org.apache.tika.config.TikaComponent;
+import org.apache.tika.extractor.BasicEmbeddedBytesSelector;
+import org.apache.tika.extractor.EmbeddedBytesSelector;
 
-@TikaComponent(name = "embedded-document-bytes-config")
-public class EmbeddedDocumentBytesConfig implements Serializable {
+@TikaComponent(name = "unpack-config")
+public class UnpackConfig implements Serializable {
 
     /**
      * Serial version UID
@@ -30,7 +34,7 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
     private static final long serialVersionUID = -3861669115439125268L;
 
 
-    public static EmbeddedDocumentBytesConfig SKIP = new 
EmbeddedDocumentBytesConfig(false);
+    public static UnpackConfig SKIP = new UnpackConfig(false);
 
     public enum SUFFIX_STRATEGY {
             NONE, EXISTING, DETECTED;
@@ -48,17 +52,20 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
     }
 
     public enum KEY_BASE_STRATEGY {
-        CONTAINER_NAME_NUMBERED,
-        CONTAINER_NAME_AS_IS,
-        CUSTOM_BASE;
+        /**
+         * Default pattern: {containerKey}-embed/{id}{suffix}
+         */
+        DEFAULT,
+        /**
+         * Custom pattern using emitKeyBase
+         */
+        CUSTOM;
 
         public static KEY_BASE_STRATEGY parse(String s) {
-            if (s.equalsIgnoreCase(CONTAINER_NAME_NUMBERED.name())) {
-                return CONTAINER_NAME_NUMBERED;
-            } else if (s.equalsIgnoreCase(CONTAINER_NAME_AS_IS.name())) {
-                return CONTAINER_NAME_AS_IS;
-            } else if (s.equalsIgnoreCase(CUSTOM_BASE.name())) {
-                return CUSTOM_BASE;
+            if (s.equalsIgnoreCase(DEFAULT.name())) {
+                return DEFAULT;
+            } else if (s.equalsIgnoreCase(CUSTOM.name())) {
+                return CUSTOM;
             }
             throw new IllegalArgumentException("can't parse " + s);
         }
@@ -76,26 +83,32 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
 
     private boolean includeOriginal = false;
 
-    private KEY_BASE_STRATEGY keyBaseStrategy = 
KEY_BASE_STRATEGY.CONTAINER_NAME_NUMBERED;
+    private KEY_BASE_STRATEGY keyBaseStrategy = KEY_BASE_STRATEGY.DEFAULT;
     //This should be set per file. This allows a custom
     //emit key base that bypasses the algorithmic generation of the emitKey
-    //from the primary json emitKey when keyBase Strategy is CUSTOM_BASE
+    //from the primary json emitKey when keyBase Strategy is CUSTOM
     private String emitKeyBase = "";
 
+    // Filter parameters for embedded bytes selection
+    private Set<String> includeMimeTypes = new HashSet<>();
+    private Set<String> excludeMimeTypes = new HashSet<>();
+    private Set<String> includeEmbeddedResourceTypes = new HashSet<>();
+    private Set<String> excludeEmbeddedResourceTypes = new HashSet<>();
+
     /**
-     * Create an EmbeddedDocumentBytesConfig with
-     * {@link EmbeddedDocumentBytesConfig#extractEmbeddedDocumentBytes}
+     * Create an UnpackConfig with
+     * {@link UnpackConfig#extractEmbeddedDocumentBytes}
      * set to <code>true</code>
      */
-    public EmbeddedDocumentBytesConfig() {
+    public UnpackConfig() {
         this.extractEmbeddedDocumentBytes = true;
     }
 
-    public EmbeddedDocumentBytesConfig(boolean extractEmbeddedDocumentBytes) {
+    public UnpackConfig(boolean extractEmbeddedDocumentBytes) {
         this.extractEmbeddedDocumentBytes = extractEmbeddedDocumentBytes;
     }
 
-    public static EmbeddedDocumentBytesConfig getSKIP() {
+    public static UnpackConfig getSKIP() {
         return SKIP;
     }
 
@@ -171,22 +184,75 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
         return emitKeyBase;
     }
 
+    public Set<String> getIncludeMimeTypes() {
+        return includeMimeTypes;
+    }
+
+    public void setIncludeMimeTypes(Set<String> includeMimeTypes) {
+        this.includeMimeTypes = new HashSet<>(includeMimeTypes);
+    }
+
+    public Set<String> getExcludeMimeTypes() {
+        return excludeMimeTypes;
+    }
+
+    public void setExcludeMimeTypes(Set<String> excludeMimeTypes) {
+        this.excludeMimeTypes = new HashSet<>(excludeMimeTypes);
+    }
+
+    public Set<String> getIncludeEmbeddedResourceTypes() {
+        return includeEmbeddedResourceTypes;
+    }
+
+    public void setIncludeEmbeddedResourceTypes(Set<String> 
includeEmbeddedResourceTypes) {
+        this.includeEmbeddedResourceTypes = new 
HashSet<>(includeEmbeddedResourceTypes);
+    }
+
+    public Set<String> getExcludeEmbeddedResourceTypes() {
+        return excludeEmbeddedResourceTypes;
+    }
+
+    public void setExcludeEmbeddedResourceTypes(Set<String> 
excludeEmbeddedResourceTypes) {
+        this.excludeEmbeddedResourceTypes = new 
HashSet<>(excludeEmbeddedResourceTypes);
+    }
+
+    /**
+     * Creates an EmbeddedBytesSelector based on the configured filter 
parameters.
+     *
+     * @return an EmbeddedBytesSelector that will filter embedded documents 
based on
+     *         configured mime types and resource types
+     */
+    public EmbeddedBytesSelector createEmbeddedBytesSelector() {
+        if (includeMimeTypes.isEmpty() && excludeMimeTypes.isEmpty()
+                && includeEmbeddedResourceTypes.isEmpty() && 
excludeEmbeddedResourceTypes.isEmpty()) {
+            return EmbeddedBytesSelector.ACCEPT_ALL;
+        }
+        return new BasicEmbeddedBytesSelector(includeMimeTypes, 
excludeMimeTypes,
+                includeEmbeddedResourceTypes, excludeEmbeddedResourceTypes);
+    }
+
     @Override
     public String toString() {
-        return "EmbeddedDocumentBytesConfig{" + 
"extractEmbeddedDocumentBytes=" + extractEmbeddedDocumentBytes + ", 
zeroPadName=" + zeroPadName + ", suffixStrategy=" +
+        return "UnpackConfig{" + "extractEmbeddedDocumentBytes=" + 
extractEmbeddedDocumentBytes + ", zeroPadName=" + zeroPadName + ", 
suffixStrategy=" +
                 suffixStrategy + ", embeddedIdPrefix='" + embeddedIdPrefix + 
'\'' + ", emitter='" + emitter + '\'' + ", includeOriginal=" + includeOriginal 
+ ", keyBaseStrategy=" +
-                keyBaseStrategy + ", emitKeyBase='" + emitKeyBase + '\'' + '}';
+                keyBaseStrategy + ", emitKeyBase='" + emitKeyBase + '\'' +
+                ", includeMimeTypes=" + includeMimeTypes + ", 
excludeMimeTypes=" + excludeMimeTypes +
+                ", includeEmbeddedResourceTypes=" + 
includeEmbeddedResourceTypes + ", excludeEmbeddedResourceTypes=" + 
excludeEmbeddedResourceTypes + '}';
     }
 
     @Override
     public final boolean equals(Object o) {
-        if (!(o instanceof EmbeddedDocumentBytesConfig config)) {
+        if (!(o instanceof UnpackConfig config)) {
             return false;
         }
 
         return extractEmbeddedDocumentBytes == 
config.extractEmbeddedDocumentBytes && zeroPadName == config.zeroPadName && 
includeOriginal == config.includeOriginal &&
                 suffixStrategy == config.suffixStrategy && 
Objects.equals(embeddedIdPrefix, config.embeddedIdPrefix) && 
Objects.equals(emitter, config.emitter) &&
-                keyBaseStrategy == config.keyBaseStrategy && 
Objects.equals(emitKeyBase, config.emitKeyBase);
+                keyBaseStrategy == config.keyBaseStrategy && 
Objects.equals(emitKeyBase, config.emitKeyBase) &&
+                Objects.equals(includeMimeTypes, config.includeMimeTypes) &&
+                Objects.equals(excludeMimeTypes, config.excludeMimeTypes) &&
+                Objects.equals(includeEmbeddedResourceTypes, 
config.includeEmbeddedResourceTypes) &&
+                Objects.equals(excludeEmbeddedResourceTypes, 
config.excludeEmbeddedResourceTypes);
     }
 
     @Override
@@ -199,6 +265,10 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
         result = 31 * result + Boolean.hashCode(includeOriginal);
         result = 31 * result + Objects.hashCode(keyBaseStrategy);
         result = 31 * result + Objects.hashCode(emitKeyBase);
+        result = 31 * result + Objects.hashCode(includeMimeTypes);
+        result = 31 * result + Objects.hashCode(excludeMimeTypes);
+        result = 31 * result + Objects.hashCode(includeEmbeddedResourceTypes);
+        result = 31 * result + Objects.hashCode(excludeEmbeddedResourceTypes);
         return result;
     }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
index a11014478c..dddf11c502 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
@@ -41,7 +41,7 @@ import org.apache.tika.pipes.core.EmitStrategyConfig;
 import org.apache.tika.pipes.core.PassbackFilter;
 import org.apache.tika.pipes.core.emitter.EmitDataImpl;
 import org.apache.tika.pipes.core.emitter.EmitterManager;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.StringUtils;
 
@@ -68,7 +68,7 @@ class EmitHandler {
         //we need to apply the metadata filter after we pull out the stacktrace
         filterMetadata(parseData, parseContext);
         FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = 
t.getOnParseException();
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = 
parseContext.get(EmbeddedDocumentBytesConfig.class);
+        UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
         if (StringUtils.isBlank(stack) ||
                 onParseException == FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT) {
             injectUserMetadata(t.getMetadata(), parseData.getMetadataList());
@@ -78,8 +78,8 @@ class EmitHandler {
                 t.setEmitKey(emitKey);
             }
             EmitDataImpl emitDataTuple = new 
EmitDataImpl(t.getEmitKey().getEmitKey(), parseData.getMetadataList(), stack);
-            if (shouldEmit(embeddedDocumentBytesConfig, parseData, 
emitDataTuple, parseContext)) {
-                return emit(t.getId(), emitKey, 
embeddedDocumentBytesConfig.isExtractEmbeddedDocumentBytes(),
+            if (shouldEmit(unpackConfig, parseData, emitDataTuple, 
parseContext)) {
+                return emit(t.getId(), emitKey, 
unpackConfig.isExtractEmbeddedDocumentBytes(),
                         parseData, stack, parseContext);
             } else {
                 if (StringUtils.isBlank(stack)) {
@@ -153,7 +153,7 @@ class EmitHandler {
     }
 
 
-    private boolean shouldEmit(EmbeddedDocumentBytesConfig 
embeddedDocumentBytesConfig, MetadataListAndEmbeddedBytes parseData,
+    private boolean shouldEmit(UnpackConfig unpackConfig, 
MetadataListAndEmbeddedBytes parseData,
                                EmitDataImpl emitDataTuple, ParseContext 
parseContext) {
         EmitStrategy strategy = emitStrategy;
         long thresholdBytes = directEmitThresholdBytes;
@@ -168,7 +168,7 @@ class EmitHandler {
 
         if (strategy == EmitStrategy.EMIT_ALL) {
             return true;
-        } else if 
(embeddedDocumentBytesConfig.isExtractEmbeddedDocumentBytes() &&
+        } else if (unpackConfig.isExtractEmbeddedDocumentBytes() &&
                 parseData.toBePackagedForStreamEmitter()) {
             return true;
         } else if (strategy == EmitStrategy.PASSBACK_ALL) {
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
index e56132f268..6e86502d2b 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
@@ -47,7 +47,7 @@ import org.apache.tika.parser.ParseRecord;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.pipes.api.FetchEmitTuple;
 import org.apache.tika.pipes.api.ParseMode;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.apache.tika.sax.ContentHandlerFactory;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
@@ -143,9 +143,9 @@ class ParseHandler {
         } catch (IOException e) {
             LOG.warn("problem detecting: " + t.getId(), e);
         }
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = 
parseContext.get(EmbeddedDocumentBytesConfig.class);
-        if (embeddedDocumentBytesConfig != null &&
-                embeddedDocumentBytesConfig.isIncludeOriginal()) {
+        UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
+        if (unpackConfig != null &&
+                unpackConfig.isIncludeOriginal()) {
             EmbeddedDocumentBytesHandler embeddedDocumentByteStore = 
parseContext.get(EmbeddedDocumentBytesHandler.class);
             try (InputStream is = Files.newInputStream(tis.getPath())) {
                 embeddedDocumentByteStore.add(0, metadata, is);
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index 5c6e551f50..d5a6c72497 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@ -55,7 +55,7 @@ import org.apache.tika.config.loader.TikaLoader;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.RUnpackExtractorFactory;
+import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
@@ -71,6 +71,7 @@ import org.apache.tika.pipes.core.PipesConfig;
 import org.apache.tika.pipes.core.config.ConfigStore;
 import org.apache.tika.pipes.core.config.ConfigStoreFactory;
 import org.apache.tika.pipes.core.emitter.EmitterManager;
+import org.apache.tika.pipes.core.extractor.RUnpackExtractorFactory;
 import org.apache.tika.pipes.core.fetcher.FetcherManager;
 import org.apache.tika.pipes.core.serialization.JsonPipesIpc;
 import org.apache.tika.plugins.ExtensionConfig;
@@ -471,12 +472,6 @@ public class PipesServer implements AutoCloseable {
         this.fetcherManager = FetcherManager.load(tikaPluginManager, 
tikaJsonConfig, true, configStore);
         this.emitterManager = EmitterManager.load(tikaPluginManager, 
tikaJsonConfig, true, configStore);
         this.autoDetectParser = (AutoDetectParser) 
tikaLoader.loadAutoDetectParser();
-
-        // If the user hasn't configured an embedded document extractor, set 
up the
-        // RUnpackExtractorFactory
-        if 
(autoDetectParser.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory()
 == null) {
-                
autoDetectParser.getAutoDetectParserConfig().setEmbeddedDocumentExtractorFactory(new
 RUnpackExtractorFactory());
-        }
         this.detector = this.autoDetectParser.getDetector();
         this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);
 
@@ -494,6 +489,11 @@ public class PipesServer implements AutoCloseable {
     private ParseContext createMergedParseContext(ParseContext requestContext) 
throws TikaConfigException {
         // Create fresh context with defaults from tika-config (e.g., 
DigesterFactory)
         ParseContext mergedContext = tikaLoader.loadParseContext();
+        // If no embedded document extractor factory is configured, use 
RUnpackExtractorFactory
+        // as the default for pipes scenarios (supports embedded byte 
extraction)
+        if (mergedContext.get(EmbeddedDocumentExtractorFactory.class) == null) 
{
+            mergedContext.set(EmbeddedDocumentExtractorFactory.class, new 
RUnpackExtractorFactory());
+        }
         // Overlay request's values (request takes precedence)
         mergedContext.copyFrom(requestContext);
         return mergedContext;
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
index df54ea0042..d2f4d2cff6 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
@@ -30,8 +30,6 @@ import 
org.apache.tika.extractor.EmbeddedDocumentByteStoreExtractorFactory;
 import org.apache.tika.extractor.EmbeddedDocumentBytesHandler;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
-import org.apache.tika.extractor.RUnpackExtractor;
-import org.apache.tika.extractor.RUnpackExtractorFactory;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
@@ -41,9 +39,9 @@ import org.apache.tika.pipes.api.FetchEmitTuple;
 import org.apache.tika.pipes.api.PipesResult;
 import org.apache.tika.pipes.core.PipesResults;
 import org.apache.tika.pipes.core.emitter.EmitterManager;
-import org.apache.tika.pipes.core.extractor.BasicEmbeddedDocumentBytesHandler;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
 import 
org.apache.tika.pipes.core.extractor.EmittingEmbeddedDocumentBytesHandler;
+import org.apache.tika.pipes.core.extractor.RUnpackExtractor;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.utils.ExceptionUtils;
 import org.apache.tika.utils.StringUtils;
 
@@ -152,33 +150,29 @@ class PipesWorker implements Callable<PipesResult> {
             parseContext.set(MetadataWriteLimiterFactory.class, 
defaultMetadataWriteLimiterFactory);
         }
 
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = 
parseContext.get(EmbeddedDocumentBytesConfig.class);
-        if (embeddedDocumentBytesConfig == null) {
+        UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
+        if (unpackConfig == null) {
             //make sure there's one here -- or do we make this default in 
fetchemit tuple?
-            parseContext.set(EmbeddedDocumentBytesConfig.class, 
EmbeddedDocumentBytesConfig.SKIP);
+            parseContext.set(UnpackConfig.class, UnpackConfig.SKIP);
             return parseContext;
         }
-        EmbeddedDocumentExtractorFactory factory = autoDetectParser
-                
.getAutoDetectParserConfig().getEmbeddedDocumentExtractorFactory();
+        EmbeddedDocumentExtractorFactory factory = 
parseContext.get(EmbeddedDocumentExtractorFactory.class);
         if (factory == null) {
-            parseContext.set(EmbeddedDocumentExtractor.class, new 
RUnpackExtractor(parseContext,
-                    
RUnpackExtractorFactory.DEFAULT_MAX_EMBEDDED_BYTES_FOR_EXTRACTION));
+            parseContext.set(EmbeddedDocumentExtractor.class,
+                    new RUnpackExtractor(parseContext, Long.MAX_VALUE));
         } else {
-            if (! (factory instanceof 
EmbeddedDocumentByteStoreExtractorFactory)) {
+            if (!(factory instanceof 
EmbeddedDocumentByteStoreExtractorFactory)) {
                 throw new 
TikaConfigException("EmbeddedDocumentExtractorFactory must be an " +
-                        "instance of EmbeddedDocumentByteStoreExtractorFactory 
if you want" +
+                        "instance of EmbeddedDocumentByteStoreExtractorFactory 
if you want " +
                         "to extract embedded bytes! I see this embedded doc 
factory: " +
-                        factory.getClass() + "and a request: " +
-                        embeddedDocumentBytesConfig);
+                        factory.getClass() + " and a request: " +
+                        unpackConfig);
             }
         }
-        //TODO: especially clean this up.
-        if (!StringUtils.isBlank(embeddedDocumentBytesConfig.getEmitter())) {
+        // Only set up embedded document bytes handler if an emitter is 
configured
+        if (!StringUtils.isBlank(unpackConfig.getEmitter())) {
             parseContext.set(EmbeddedDocumentBytesHandler.class,
                     new EmittingEmbeddedDocumentBytesHandler(fetchEmitTuple, 
emitterManager));
-        } else {
-            parseContext.set(EmbeddedDocumentBytesHandler.class,
-                    new 
BasicEmbeddedDocumentBytesHandler(embeddedDocumentBytesConfig));
         }
 
         return parseContext;
diff --git 
a/tika-serialization/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
 
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/extractor/UnpackConfigSelectorTest.java
similarity index 70%
rename from 
tika-serialization/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
rename to 
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/extractor/UnpackConfigSelectorTest.java
index 817a7ab435..685d8d0715 100644
--- 
a/tika-serialization/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
+++ 
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/extractor/UnpackConfigSelectorTest.java
@@ -14,36 +14,31 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+package org.apache.tika.pipes.core.extractor;
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.util.Set;
+
 import org.junit.jupiter.api.Test;
 
 import org.apache.tika.TikaTest;
-import org.apache.tika.config.loader.TikaLoader;
 import org.apache.tika.extractor.EmbeddedBytesSelector;
-import org.apache.tika.extractor.RUnpackExtractor;
-import org.apache.tika.extractor.RUnpackExtractorFactory;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.utils.StringUtils;
 
-public class AutoDetectParserConfigTest extends TikaTest {
+public class UnpackConfigSelectorTest extends TikaTest {
 
     @Test
     public void testEmbeddedBytesSelector() throws Exception {
-        TikaLoader loader = TikaLoader.load(getConfigPath(getClass(), 
"TIKA-4207-embedded-bytes-config.json"));
-        AutoDetectParser parser = (AutoDetectParser) 
loader.loadAutoDetectParser();
-        AutoDetectParserConfig config = parser.getAutoDetectParserConfig();
-        RUnpackExtractorFactory f =
-                (RUnpackExtractorFactory) 
config.getEmbeddedDocumentExtractorFactory();
+        UnpackConfig config = new UnpackConfig();
+        config.setIncludeMimeTypes(Set.of("application/pdf", 
"application/rtf", "text/plain"));
+        config.setIncludeEmbeddedResourceTypes(Set.of("ATTACHMENT", "INLINE"));
+
+        EmbeddedBytesSelector selector = config.createEmbeddedBytesSelector();
 
-        Metadata metadata = new Metadata();
-        ParseContext parseContext = new ParseContext();
-        RUnpackExtractor ex = (RUnpackExtractor) f.newInstance(metadata, 
parseContext);
-        EmbeddedBytesSelector selector = ex.getEmbeddedBytesSelector();
         assertFalse(selector.select(getMetadata("", "")));
         assertTrue(selector.select(getMetadata("application/pdf", "")));
         assertTrue(selector.select(getMetadata("application/pdf", 
"ATTACHMENT")));
@@ -52,7 +47,17 @@ public class AutoDetectParserConfigTest extends TikaTest {
 
         assertFalse(selector.select(getMetadata("application/pdf", "MACRO")));
         assertFalse(selector.select(getMetadata("application/docx", "")));
+    }
 
+    @Test
+    public void testAcceptAllWhenNoFilters() {
+        UnpackConfig config = new UnpackConfig();
+        EmbeddedBytesSelector selector = config.createEmbeddedBytesSelector();
+
+        // With no filters, should accept all
+        assertTrue(selector.select(getMetadata("application/pdf", "")));
+        assertTrue(selector.select(getMetadata("application/docx", "MACRO")));
+        assertTrue(selector.select(getMetadata("", "")));
     }
 
     private Metadata getMetadata(String mime, String embeddedResourceType) {
diff --git 
a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleTest.java
 
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleTest.java
index 1650e7d00a..499f165dd5 100644
--- 
a/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleTest.java
+++ 
b/tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/serialization/JsonFetchEmitTupleTest.java
@@ -86,7 +86,7 @@ public class JsonFetchEmitTupleTest {
     @Test
     public void testBytes() throws Exception {
         // TODO -- add these to the ParseContext:
-        // EmbeddedDocumentBytesConfig bytesConfig = new 
EmbeddedDocumentBytesConfig(true);
+        // UnpackConfig bytesConfig = new UnpackConfig(true);
         // bytesConfig.setEmitter("emitter");
         // parseContext.set(ContentHandlerFactory.class, new 
BasicContentHandlerFactory(
         //     BasicContentHandlerFactory.HANDLER_TYPE.XML, 10000));
diff --git a/tika-pipes/tika-pipes-integration-tests/pom.xml 
b/tika-pipes/tika-pipes-integration-tests/pom.xml
index 56bb2d1225..0d14c9e952 100644
--- a/tika-pipes/tika-pipes-integration-tests/pom.xml
+++ b/tika-pipes/tika-pipes-integration-tests/pom.xml
@@ -81,6 +81,18 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-digest-commons</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parsers-standard-package</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <plugins>
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/DigestingOpenContainersTest.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/DigestingOpenContainersTest.java
new file mode 100644
index 0000000000..1beb9fba75
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/DigestingOpenContainersTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.pipes.core;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import java.nio.file.Paths;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.loader.TikaLoader;
+import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.pipes.core.extractor.RUnpackExtractorFactory;
+
+public class DigestingOpenContainersTest extends TikaTest {
+
+    @Test
+    public void testDigestingOpenContainers() throws Exception {
+        //TIKA-4533 -- this tests both that a very large embedded OLE doc 
doesn't cause a zip bomb
+        //exception AND that the sha for the embedded OLE doc is not the sha 
for a zero-byte file
+        String expectedSha = 
"bbc2057a1ff8fe859a296d2fbb493fc0c3e5796749ba72507c0e13f7a3d81f78";
+        TikaLoader loader = getLoader("tika-4533.json");
+        AutoDetectParser autoDetectParser = (AutoDetectParser) 
loader.loadAutoDetectParser();
+        ParseContext parseContext = loader.loadParseContext();
+        //this models what happens in tika-pipes
+        if (parseContext.get(EmbeddedDocumentExtractorFactory.class) == null) {
+            parseContext.set(EmbeddedDocumentExtractorFactory.class, new 
RUnpackExtractorFactory());
+        }
+        List<Metadata> metadataList = 
getRecursiveMetadata("testLargeOLEDoc.doc",
+                autoDetectParser, parseContext);
+        assertEquals(expectedSha, 
metadataList.get(2).get("X-TIKA:digest:SHA256"));
+        
assertNull(metadataList.get(2).get(TikaCoreProperties.EMBEDDED_EXCEPTION));
+        assertEquals(2049290L, 
Long.parseLong(metadataList.get(2).get(Metadata.CONTENT_LENGTH)));
+    }
+
+    private TikaLoader getLoader(String config) {
+        try {
+            return TikaLoader.load(Paths.get(getClass()
+                    .getResource("/configs/" + config)
+                    .toURI()));
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java
 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java
index 621822fd23..c428128b2c 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesServerTest.java
@@ -49,100 +49,5 @@ public class PipesServerTest extends TikaTest {
         
assertEquals("5f3b924303e960ce35d7f705e91d3018dd110a9c3cef0546a91fe013d6dad6fd",
                 parseData.metadataList.get(0).get("X-TIKA:digest:SHA-256"));
     }
-
-    @Test
-    public void testEmbeddedStreamEmitter(@TempDir Path tmp) throws Exception {
-
-        String testDoc = "basic_embedded.xml";
-        Path tikaConfig = PluginsTestHelper.getFileSystemFetcherConfig(tmp);
-        PluginsTestHelper.copyTestFilesToTmpInput(tmp, testDoc);
-
-
-        PipesServer pipesServer = new PipesServer(tikaConfig,
-                UnsynchronizedByteArrayInputStream.builder().setByteArray(new 
byte[0]).get(),
-                new 
PrintStream(UnsynchronizedByteArrayOutputStream.builder().get(), true,
-                        StandardCharsets.UTF_8.name()),
-                -1, 30000, 30000);
-
-        pipesServer.initializeResources();
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig =
-                new EmbeddedDocumentBytesConfig(true);
-        embeddedDocumentBytesConfig.setIncludeOriginal(true);
-        ParseContext parseContext = new ParseContext();
-        parseContext.set(HandlerConfig.class, 
PipesIteratorBaseConfig.DEFAULT_HANDLER_CONFIG);
-        parseContext.set(EmbeddedDocumentBytesConfig.class, 
embeddedDocumentBytesConfig);
-        FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id",
-                new FetchKey("fs", testDoc),
-                new EmitKey("", ""), new Metadata(), parseContext);
-        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(tikaConfig);
-        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
-        Fetcher fetcher = FetcherManager.load(pluginManager, 
tikaJsonConfig).getFetcher();
-        PipesServer.MetadataListAndEmbeddedBytes
-                parseData = pipesServer.parseFromTuple(fetchEmitTuple, 
fetcher);
-        assertEquals(2, parseData.metadataList.size());
-
-        byte[] bytes0 =
-                IOUtils.toByteArray(
-                        
((BasicEmbeddedDocumentBytesHandler)parseData.getEmbeddedDocumentBytesHandler())
-                        .getDocument(0));
-        byte[] bytes1 =
-                IOUtils.toByteArray(
-                        
((BasicEmbeddedDocumentBytesHandler)parseData.getEmbeddedDocumentBytesHandler())
-                                .getDocument(1));
-
-        assertContains("is to trigger mock on the embedded",
-                new String(bytes0, StandardCharsets.UTF_8));
-
-        assertContains("embeddedAuthor</metadata>",
-                new String(bytes1, StandardCharsets.UTF_8));
-        
assertEquals("fdaa937c96d1ed010b8d307ccddf9d11c3b48db732a8771eaafe99d59e076d0a",
-                parseData.metadataList.get(0).get("X-TIKA:digest:SHA-256"));
-    }
-
-    @Test
-    public void testEmbeddedStreamEmitterLimitBytes(@TempDir Path tmp) throws 
Exception {
-        String testDoc = "basic_embedded.xml";
-        Path pipesConfig = 
PluginsTestHelper.getFileSystemFetcherConfig("tika-config-truncate.json", tmp);
-        PluginsTestHelper.copyTestFilesToTmpInput(tmp, testDoc);
-
-        PipesServer pipesServer = new PipesServer(pipesConfig,
-                UnsynchronizedByteArrayInputStream.builder().setByteArray(new 
byte[0]).get(),
-                new 
PrintStream(UnsynchronizedByteArrayOutputStream.builder().get(), true,
-                        StandardCharsets.UTF_8.name()),
-                -1, 30000, 30000);
-
-        pipesServer.initializeResources();
-        EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig =
-                new EmbeddedDocumentBytesConfig(true);
-        embeddedDocumentBytesConfig.setIncludeOriginal(true);
-        ParseContext parseContext = new ParseContext();
-        parseContext.set(HandlerConfig.class, 
PipesIteratorBaseConfig.DEFAULT_HANDLER_CONFIG);
-        parseContext.set(EmbeddedDocumentBytesConfig.class, 
embeddedDocumentBytesConfig);
-        FetchEmitTuple fetchEmitTuple = new FetchEmitTuple("id",
-                new FetchKey("fs", testDoc),
-                new EmitKey("", ""), new Metadata(), parseContext);
-
-        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pipesConfig);
-        TikaPluginManager pluginManager = 
TikaPluginManager.load(tikaJsonConfig);
-        Fetcher fetcher = FetcherManager.load(pluginManager, 
tikaJsonConfig).getFetcher();
-        PipesServer.MetadataListAndEmbeddedBytes
-                parseData = pipesServer.parseFromTuple(fetchEmitTuple, 
fetcher);
-        assertEquals(2, parseData.metadataList.size());
-
-        byte[] bytes0 =
-                IOUtils.toByteArray(
-                        
((BasicEmbeddedDocumentBytesHandler)parseData.getEmbeddedDocumentBytesHandler())
-                                .getDocument(0));
-        byte[] bytes1 =
-                IOUtils.toByteArray(
-                        
((BasicEmbeddedDocumentBytesHandler)parseData.getEmbeddedDocumentBytesHandler())
-                                .getDocument(1));
-
-        assertContains("is to trigger mock on the embedded",
-                new String(bytes0, StandardCharsets.UTF_8));
-
-        assertEquals(10, bytes1.length);
-        
assertEquals("fdaa937c96d1ed010b8d307ccddf9d11c3b48db732a8771eaafe99d59e076d0a",
-                parseData.metadataList.get(0).get("X-TIKA:digest:SHA-256"));
-    }*/
+    */
 }
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
new file mode 100644
index 0000000000..b741ae8921
--- /dev/null
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
@@ -0,0 +1,19 @@
+{
+  "auto-detect-parser": {
+    "throwOnZeroBytes": false
+  },
+  "other-configs": {
+    "output-limits": {
+      "zipBombRatio": 100,
+      "maxXmlDepth": 100,
+      "maxPackageEntryDepth": 100
+    },
+    "digester-factory": {
+      "commons-digester-factory": {
+        "digests": [
+          { "algorithm": "SHA256" }
+        ]
+      }
+    }
+  }
+}
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
index b02932ebe7..d8acd13939 100644
--- 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
+++ 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-truncate.json
@@ -44,16 +44,17 @@
     }
   },
   "auto-detect-parser": {
-    "embeddedDocumentExtractorFactory": {
-      "runpack-extractor-factory": {
-        "maxEmbeddedBytesForExtraction": 10
-      }
-    },
     "throwOnZeroBytes": false
   },
   "other-configs": {
     "digester-factory": {
       "mock-digester-factory": {}
+    },
+    "embedded-document-extractor-factory": {
+      "runpack-extractor-factory": {
+        "writeFileNameToContent": false,
+        "maxEmbeddedBytesForExtraction": 10
+      }
     }
   },
   "plugin-roots": "PLUGINS_PATHS"
diff --git 
a/tika-pipes/tika-pipes-integration-tests/src/test/resources/test-documents/testLargeOLEDoc.doc
 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/test-documents/testLargeOLEDoc.doc
new file mode 100644
index 0000000000..473eada534
Binary files /dev/null and 
b/tika-pipes/tika-pipes-integration-tests/src/test/resources/test-documents/testLargeOLEDoc.doc
 differ
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
index 5ecfffecb5..cbd9b932b8 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentRegistry.java
@@ -56,9 +56,9 @@ public class ComponentRegistry {
 
     private static Map<String, String> createBuiltinAliases() {
         Map<String, String> aliases = new HashMap<>();
-        // EmbeddedDocumentBytesConfig is in tika-pipes-core which can't 
depend on tika-core for @TikaComponent
-        aliases.put("embedded-document-bytes-config",
-                
"org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig");
+        // UnpackConfig is in tika-pipes-core which can't depend on tika-core 
for @TikaComponent
+        aliases.put("unpack-config",
+                "org.apache.tika.pipes.core.extractor.UnpackConfig");
         return Collections.unmodifiableMap(aliases);
     }
 
diff --git 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 95d6197598..0277a82e85 100644
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@ -41,6 +41,7 @@ import org.apache.tika.detect.Detector;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.digest.DigesterFactory;
 import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
 import org.apache.tika.language.translate.DefaultTranslator;
 import org.apache.tika.language.translate.Translator;
 import org.apache.tika.metadata.filter.CompositeMetadataFilter;
@@ -410,6 +411,7 @@ public class TikaLoader {
         ParseContext context = new ParseContext();
         loadOne(DigesterFactory.class, context);
         loadOne(MetadataWriteLimiterFactory.class, context);
+        loadOne(EmbeddedDocumentExtractorFactory.class, context);
         loadOne(EmbeddedLimits.class, context);
         loadOne(OutputLimits.class, context);
         loadOne(TimeoutLimits.class, context);
diff --git 
a/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
 
b/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
deleted file mode 100644
index 5cc734f2be..0000000000
--- 
a/tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "parsers": [
-    "default-parser"
-  ],
-  "auto-detect-parser": {
-    "embeddedDocumentExtractorFactory": {
-      "runpack-extractor-factory": {
-        "embeddedBytesIncludeMimeTypes": ["application/pdf", 
"application/rtf", "text/plain"],
-        "embeddedBytesIncludeEmbeddedResourceTypes": ["ATTACHMENT", "INLINE"]
-      }
-    }
-  }
-}
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java
index 908fdf867e..ef764c404b 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java
@@ -48,7 +48,7 @@ import org.apache.tika.pipes.core.async.AsyncProcessor;
 import org.apache.tika.pipes.core.async.OfferLargerThanQueueSize;
 import org.apache.tika.pipes.core.emitter.EmitDataImpl;
 import org.apache.tika.pipes.core.emitter.EmitterManager;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.pipes.core.serialization.JsonFetchEmitTupleList;
 import org.apache.tika.plugins.TikaPluginManager;
 
@@ -113,10 +113,10 @@ public class AsyncResource {
                         .getEmitterId());
             }
             ParseContext parseContext = t.getParseContext();
-            EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = 
parseContext.get(EmbeddedDocumentBytesConfig.class);
-            if (embeddedDocumentBytesConfig != null && 
embeddedDocumentBytesConfig.isExtractEmbeddedDocumentBytes() &&
-                    
!StringUtils.isAllBlank(embeddedDocumentBytesConfig.getEmitter())) {
-                String bytesEmitter = embeddedDocumentBytesConfig.getEmitter();
+            UnpackConfig unpackConfig = parseContext.get(UnpackConfig.class);
+            if (unpackConfig != null && 
unpackConfig.isExtractEmbeddedDocumentBytes() &&
+                    !StringUtils.isAllBlank(unpackConfig.getEmitter())) {
+                String bytesEmitter = unpackConfig.getEmitter();
                 if (!emitterManager
                         .getSupported()
                         .contains(bytesEmitter)) {
diff --git 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
index 28337e5b26..8e69634ff4 100644
--- 
a/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
+++ 
b/tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java
@@ -59,7 +59,7 @@ import org.apache.tika.pipes.api.FetchEmitTuple;
 import org.apache.tika.pipes.api.ParseMode;
 import org.apache.tika.pipes.api.emitter.EmitKey;
 import org.apache.tika.pipes.api.fetcher.FetchKey;
-import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.extractor.UnpackConfig;
 import org.apache.tika.pipes.core.fetcher.FetcherManager;
 import org.apache.tika.pipes.core.serialization.JsonFetchEmitTuple;
 import org.apache.tika.plugins.TikaPluginManager;
@@ -251,18 +251,20 @@ public class TikaPipesTest extends CXFTestBase {
 
     @Test
     public void testBytes() throws Exception {
-        EmbeddedDocumentBytesConfig config = new 
EmbeddedDocumentBytesConfig(true);
+        UnpackConfig config = new UnpackConfig(true);
         config.setEmitter(EMITTER_BYTES_ID);
         config.setIncludeOriginal(true);
+        config.setKeyBaseStrategy(UnpackConfig.KEY_BASE_STRATEGY.CUSTOM);
+        config.setEmitKeyBase("test_recursive_embedded.docx");
         config.setEmbeddedIdPrefix("-");
         config.setZeroPadName(10);
-        
config.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING);
+        config.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.EXISTING);
         ParseContext parseContext = new ParseContext();
         // Set default content handler and parse mode
         parseContext.set(ContentHandlerFactory.class,
                 new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
         parseContext.set(ParseMode.class, ParseMode.RMETA);
-        parseContext.set(EmbeddedDocumentBytesConfig.class, config);
+        parseContext.set(UnpackConfig.class, config);
         FetchEmitTuple t =
                 new FetchEmitTuple("myId", new FetchKey(FETCHER_ID, 
"test_recursive_embedded.docx"),
                         new EmitKey(EMITTER_JSON_ID, 
"test_recursive_embedded.docx"), new Metadata(), parseContext,


Reply via email to