This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new cb1b9fef1 TIKA-4515 -- add fully recursive extraction (#2363)
cb1b9fef1 is described below

commit cb1b9fef1e7cd59486a32f291da275e5603b2c71
Author: Tim Allison <[email protected]>
AuthorDate: Tue Oct 14 16:25:35 2025 -0400

    TIKA-4515 -- add fully recursive extraction (#2363)
    
    * TIKA-4515 -- add fully recursive extraction
---
 .../main/java/org/apache/tika/cli/AsyncHelper.java |  61 ++++++
 .../src/main/java/org/apache/tika/cli/TikaCLI.java | 107 +++++------
 .../java/org/apache/tika/cli/TikaCLIAsyncTest.java |   2 +-
 .../test/java/org/apache/tika/cli/TikaCLITest.java |  87 ++++++++-
 .../java/org/apache/tika/io/FilenameUtils.java     | 204 ++++++++++++++++++++-
 .../main/java/org/apache/tika/mime/MimeTypes.java  |  33 ++--
 .../java/org/apache/tika/io/FilenameUtilsTest.java | 126 +++++++++++++
 .../tika/pipes/kafka/tests/TikaPipesKafkaTest.java |   2 +-
 .../pipes/opensearch/tests/OpenSearchTest.java     |   2 +-
 .../tika/pipes/s3/tests/S3PipeIntegrationTest.java |   2 +-
 .../pipes/solr/tests/TikaPipesSolrTestBase.java    |   4 +-
 .../apache/tika/async/cli/SimpleAsyncConfig.java   |   9 +-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  84 ++++++++-
 .../tika/async/cli/TikaConfigAsyncWriter.java      |  59 +++---
 .../apache/tika/async/cli/AsyncProcessorTest.java  |   4 +-
 .../tika/async/cli/TikaConfigAsyncWriterTest.java  |   4 +-
 .../AbstractEmbeddedDocumentBytesHandler.java      |  65 ++++++-
 .../extractor/EmbeddedDocumentBytesConfig.java     |  57 ++++--
 18 files changed, 758 insertions(+), 154 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java 
b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
new file mode 100644
index 000000000..a9cc2330c
--- /dev/null
+++ b/tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.cli;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+
+public class AsyncHelper {
+    public static String[] translateArgs(String[] args) {
+        List<String> argList = new ArrayList<>();
+        if (args.length == 2) {
+            if (args[0].startsWith("-Z")) {
+                argList.add("-Z");
+                argList.add("-i");
+                argList.add(args[1]);
+                argList.add("-o");
+                argList.add(args[1]);
+                return argList.toArray(new String[0]);
+            } else if (args[0].startsWith("-") || args[1].startsWith("-")) {
+                argList.add(args[0]);
+                argList.add(args[1]);
+                return argList.toArray(new String[0]);
+            } else {
+                argList.add("-i");
+                argList.add(args[0]);
+                argList.add("-o");
+                argList.add(args[1]);
+                return argList.toArray(new String[0]);
+            }
+        }
+        if (args.length == 3) {
+            if (args[0].equals("-Z") && ! args[1].startsWith("-") && ! 
args[2].startsWith("-")) {
+                argList.add("-Z");
+                argList.add("-i");
+                argList.add(args[1]);
+                argList.add("-o");
+                argList.add(args[2]);
+                return argList.toArray(new String[0]);
+            }
+        }
+        argList.addAll(Arrays.asList(args));
+        argList.remove("-a");
+        return argList.toArray(new String[0]);
+    }
+}
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 96276935b..7706c0f59 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -37,6 +37,8 @@ import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -79,7 +81,7 @@ import org.apache.tika.gui.TikaGUI;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.language.detect.LanguageHandler;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeType;
@@ -104,6 +106,7 @@ import org.apache.tika.sax.WriteOutContentHandler;
 import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
 import org.apache.tika.serialization.JsonMetadata;
 import org.apache.tika.serialization.JsonMetadataList;
+import org.apache.tika.utils.StringUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 import org.apache.tika.xmp.XMPMetadata;
 
@@ -112,6 +115,7 @@ import org.apache.tika.xmp.XMPMetadata;
  */
 public class TikaCLI {
     private static final Logger LOG = LoggerFactory.getLogger(TikaCLI.class);
+    private static final Property NORMALIZED_EMBEDDED_NAME = 
Property.externalText("tk:normalized-embedded-name");
 
     private final int MAX_MARK = 20 * 1024 * 1024;//20MB
 
@@ -254,16 +258,35 @@ public class TikaCLI {
     }
 
     private static void async(String[] args) throws Exception {
+        args = AsyncHelper.translateArgs(args);
         String tikaConfigPath = "";
-        String config = "--config=";
-        for (String arg : args) {
-            if (arg.startsWith(config)) {
-                tikaConfigPath = arg.substring(config.length());
-                TikaAsyncCLI.main(new String[]{tikaConfigPath});
-                return;
+        for (int i = 0; i < args.length - 1; i++) {
+            if (args[i].equals("-c")) {
+                tikaConfigPath = args[i + 1];
+                break;
+            }
+        }
+        if (! StringUtils.isBlank(tikaConfigPath)) {
+            TikaAsyncCLI.main(args);
+            return;
+        }
+        Path tmpConfig = null;
+        try {
+            tmpConfig = Files.createTempFile("tika-config-", ".xml");
+            
Files.copy(TikaCLI.class.getResourceAsStream("/tika-config-default-single-file.xml"),
+                    tmpConfig, StandardCopyOption.REPLACE_EXISTING);
+            List<String> argList = new ArrayList<>();
+            for (String arg : args) {
+                argList.add(arg);
+            }
+            argList.add("-c");
+            argList.add(tmpConfig.toAbsolutePath().toString());
+            TikaAsyncCLI.main(argList.toArray(new String[0]));
+        } finally {
+            if (tmpConfig != null) {
+                Files.delete(tmpConfig);
             }
         }
-        TikaAsyncCLI.main(args);
     }
 
     /**
@@ -318,6 +341,7 @@ public class TikaCLI {
     }
 
     private boolean testForAsync(String[] args) {
+
         if (args.length == 2) {
             if (Files.isDirectory(Paths.get(args[0]))) {
                 return true;
@@ -333,6 +357,9 @@ public class TikaCLI {
             if (arg.equals("-o") || arg.startsWith("--output")) {
                 return true;
             }
+            if (arg.equals("-Z")) {
+                return true;
+            }
 
         }
         return false;
@@ -1076,16 +1103,18 @@ public class TikaCLI {
 
         @Override
         public void parseEmbedded(TikaInputStream tis, ContentHandler 
contentHandler, Metadata metadata, boolean outputHtml) throws SAXException, 
IOException {
-
-            MediaType contentType = detector.detect(tis, metadata);
-
-            String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
-            Path outputFile = null;
-            if (name == null) {
-                name = "file_" + count++;
+            String contentType = metadata.get(Metadata.CONTENT_TYPE);
+            if (StringUtils.isBlank(contentType)) {
+                MediaType mediaType = detector.detect(tis, metadata);
+                if (mediaType == null) {
+                    mediaType = MediaType.OCTET_STREAM;
+                }
+                contentType = mediaType.toString();
+                metadata.set(Metadata.CONTENT_TYPE, contentType);
             }
-            outputFile = getOutputFile(name, metadata, contentType);
 
+            Path outputFile = getOutputFile(metadata);
+            String name = metadata.get(NORMALIZED_EMBEDDED_NAME);
 
             Path parent = outputFile.getParent();
             if (parent != null && ! Files.isDirectory(parent)) {
@@ -1110,33 +1139,14 @@ public class TikaCLI {
             }
         }
 
-        private Path getOutputFile(String name, Metadata metadata, MediaType 
contentType) throws IOException {
-            String ext = getExtension(contentType);
-            if (name.indexOf('.') == -1 && contentType != null) {
-                name += ext;
-            }
-
-            String relID = 
metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
-            if (relID != null && !name.startsWith(relID)) {
-                name = relID + "_" + name;
-            }
-            //defensively do this so that we don't get an exception
-            //from FilenameUtils.normalize
-            name = name.replaceAll("\u0000", " ");
-            String normalizedName = FilenameUtils.normalize(name);
-
+        private Path getOutputFile(Metadata metadata) throws IOException {
+            String normalizedName = 
org.apache.tika.io.FilenameUtils.getSanitizedEmbeddedFilePath(metadata, ".bin", 
50);
             if (normalizedName == null) {
-                normalizedName = FilenameUtils.getName(name);
+                String ext = 
org.apache.tika.io.FilenameUtils.calculateExtension(metadata, ".bin");
+                normalizedName = "file-" + count++ + ext;
             }
+            metadata.set(NORMALIZED_EMBEDDED_NAME, normalizedName);
 
-            if (normalizedName == null) {
-                normalizedName = "file" + count++ + ext;
-            }
-            //strip off initial C:/ or ~/ or /
-            int prefixLength = FilenameUtils.getPrefixLength(normalizedName);
-            if (prefixLength > -1) {
-                normalizedName = normalizedName.substring(prefixLength);
-            }
             Path outputFile = extractDir.resolve(normalizedName);
             //if file already exists, prepend uuid
             if (Files.exists(outputFile)) {
@@ -1149,23 +1159,6 @@ public class TikaCLI {
             return outputFile;
         }
 
-        private String getExtension(MediaType contentType) {
-            try {
-                String ext = config
-                        .getMimeRepository()
-                        .forName(contentType.toString())
-                        .getExtension();
-                if (ext == null) {
-                    return ".bin";
-                } else {
-                    return ext;
-                }
-            } catch (MimeTypeException e) {
-                LOG.info("bad mime type?", e);
-            }
-            return ".bin";
-
-        }
     }
 
     private class NoDocumentJSONMetHandler extends DefaultHandler {
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
index dfcbad297..096e3ce73 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java
@@ -103,7 +103,7 @@ public class TikaCLIAsyncTest {
 
     @Test
     public void testAsync() throws Exception {
-        String content = getParamOutContent("-a", "--config=" + 
ASYNC_CONFIG.toAbsolutePath());
+        String content = getParamOutContent("-a", "-c", 
ASYNC_CONFIG.toAbsolutePath().toString());
 
         int json = 0;
         for (File f : ASYNC_OUTPUT_DIR
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 2195685d7..79c765a32 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -27,9 +27,17 @@ import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.net.URI;
+import java.nio.file.FileVisitResult;
+import java.nio.file.FileVisitor;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.HashSet;
+import java.util.Set;
 
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -265,6 +273,19 @@ public class TikaCLITest {
         assertTrue(json.contains("Module1"));
     }
 
+    @Test
+    public void testRUnpack() throws Exception {
+        String[] expectedChildren = new String[]{
+                "testPDFPackage.pdf.json",
+                //the first two test that the default single file config is 
working
+                "testPDFPackage.pdf-embed/00000001-embedded-1",
+                "testPDFPackage.pdf-embed/00000002-image0.jpg",
+                "testPDFPackage.pdf-embed/00000003-PDF1.pdf",
+                "testPDFPackage.pdf-embed/00000004-PDF2.pdf"};
+        testRecursiveUnpack("testPDFPackage.pdf", expectedChildren, 2);
+    }
+
+
     /**
      * Tests -l option of the cli
      *
@@ -311,7 +332,7 @@ public class TikaCLITest {
 
     @Test
     public void testExtractSimple() throws Exception {
-        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file_4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file_0.emf"};
+        String[] expectedChildren = new String[]{"MBD002B040A.cdx", 
"file-4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file-0.emf"};
         testExtract("/coffee.xls", expectedChildren, 8);
     }
 
@@ -323,7 +344,7 @@ public class TikaCLITest {
 
     @Test
     public void testExtractRelative() throws Exception {
-        String[] expectedChildren = new String[]{"touch.pl",};
+        String[] expectedChildren = new String[]{"dangerous/dont/touch.pl",};
         testExtract("testZip_relative.zip", expectedChildren);
     }
 
@@ -340,6 +361,60 @@ public class TikaCLITest {
         testExtract("testZip_zeroByte.zip", expectedChildren);
     }
 
+
+    private void testRecursiveUnpack(String targetFile, String[] 
expectedChildrenFileNames) throws Exception {
+        testRecursiveUnpack(targetFile, expectedChildrenFileNames, 
expectedChildrenFileNames.length);
+    }
+
+    private void testRecursiveUnpack(String targetFile, String[] 
expectedChildrenFileNames, int expectedLength) throws Exception {
+        Path input = Paths.get(new URI(resourcePrefix + "/" + targetFile));
+        String[] params = {"-Z",
+                
ProcessUtils.escapeCommandLine(input.toAbsolutePath().toString()),
+                ProcessUtils.escapeCommandLine(extractDir
+                .toAbsolutePath()
+                .toString())};
+
+        TikaCLI.main(params);
+        Set<String> fileNames = getFileNames(extractDir);
+        String[] jsonFile = extractDir
+                .toFile()
+                .list();
+        assertNotNull(jsonFile);
+        assertEquals(expectedLength, jsonFile.length);
+        //assertEquals(fileNames.size(), expectedChildrenFileNames.length);
+
+        for (String expectedChildName : expectedChildrenFileNames) {
+            assertTrue(fileNames.contains(expectedChildName));
+        }
+    }
+
+    private Set<String> getFileNames(Path extractDir) throws IOException {
+        final Set<String> names = new HashSet<>();
+        Files.walkFileTree(extractDir, new FileVisitor<Path>() {
+            @Override
+            public @NotNull FileVisitResult preVisitDirectory(Path path, 
@NotNull BasicFileAttributes basicFileAttributes) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public @NotNull FileVisitResult visitFile(Path path, @NotNull 
BasicFileAttributes basicFileAttributes) throws IOException {
+                names.add(extractDir.relativize(path).toString());
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public @NotNull FileVisitResult visitFileFailed(Path path, 
@NotNull IOException e) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public @NotNull FileVisitResult postVisitDirectory(Path path, 
@Nullable IOException e) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+        });
+        return names;
+    }
+
     private void testExtract(String targetFile, String[] 
expectedChildrenFileNames) throws Exception {
         testExtract(targetFile, expectedChildrenFileNames, 
expectedChildrenFileNames.length);
     }
@@ -399,10 +474,10 @@ public class TikaCLITest {
         new File("subdir/foo.txt").delete();
         new File("subdir").delete();
         String content = getParamOutContent("-z", "--extract-dir=target", 
resourcePrefix + "testWithSubdirs.zip");
-        assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
+        //assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
         // clean up. TODO: These should be in target.
-        new File("target/subdir/foo.txt").delete();
-        new File("target/subdir").delete();
+        assertTrue(new File("target/subdir/foo.txt").delete());
+        assertTrue(new File("target/subdir").delete());
     }
 
     @Test
@@ -420,7 +495,7 @@ public class TikaCLITest {
         Path jpeg = extractDir.resolve("image0.jpg");
         //tiff isn't extracted without optional image dependency
 //            File tiff = new File(tempFile, "image1.tif");
-        Path jobOptions = extractDir.resolve("Press Quality(1).joboptions");
+        Path jobOptions = extractDir.resolve("Press 
Quality(1).joboptions.txt");
         Path doc = extractDir.resolve("Unit10.doc");
 
         assertExtracted(jpeg, allFiles);
diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java 
b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index 5178d3274..de57eda72 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -18,14 +18,21 @@ package org.apache.tika.io;
 
 import java.util.HashSet;
 import java.util.Locale;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.utils.StringUtils;
 
 
 public class FilenameUtils {
 
-
+    private static final MimeTypes MIME_TYPES = 
TikaConfig.getDefaultConfig().getMimeRepository();
+    private static final Pattern PROTOCOL_PATTERN = 
Pattern.compile("[A-Za-z0-9]{1,10}://+");
     /**
      * Reserved characters
      */
@@ -130,4 +137,199 @@ public class FilenameUtils {
         }
         return StringUtils.EMPTY;
     }
+
+    public static String getSanitizedEmbeddedFileName(Metadata metadata,
+                                                      String defaultExtension, 
int maxLength) {
+        String path = getEmbeddedPath(metadata);
+        //fName could be a full path or null
+        if (StringUtils.isBlank(path)) {
+            return null;
+        }
+        path = path.replaceAll("\u0000", " ");
+        int prefixLength = getPrefixLength(path);
+        if (prefixLength > 0) {
+            path = path.substring(prefixLength);
+        }
+        path = path.replaceAll("[:\\\\]+", "/");
+        String fName = getName(path);
+        fName = normalize(fName);
+        String extension = FilenameUtils.getSuffixFromPath(fName);
+        if (extension.equals(fName)) {
+            return null;
+        }
+        String namePart = null;
+        if (StringUtils.isBlank(extension)) {
+            namePart = fName;
+            extension = calculateExtension(metadata, defaultExtension);
+        } else {
+            namePart = fName.substring(0, fName.length() - extension.length());
+        }
+        if (StringUtils.isBlank(namePart)) {
+            return null;
+        }
+        //remove all initial .
+        namePart = namePart.replaceAll("\\A\\.+", "_");
+        //defense in depth. We shouldn't need this
+        namePart = namePart.replaceAll("(\\.\\.)+", "_");
+        namePart = namePart.replaceAll("[/\\\\]+", "_");
+        namePart = namePart.replaceAll(":+", "_");
+
+        if (StringUtils.isBlank(namePart)) {
+            return null;
+        }
+
+        //if path is > max length, return only the name part
+        if (namePart.length() > maxLength) {
+            return namePart.substring(0, maxLength - extension.length() - 3) + 
"..." + extension;
+        }
+        return namePart + extension;
+
+    }
+
+    /**
+     * This tries to sanitize dangerous user generated embedded file paths.
+     * If trusting these paths for writing files, users should run checks to 
make
+     * sure that the generated file path does not zipslip out of the target 
directory.
+     *
+     * @param metadata
+     * @param defaultExtension
+     * @param maxLength
+     * @return
+     */
+    public static String getSanitizedEmbeddedFilePath(Metadata metadata,
+                                                      String defaultExtension, 
int maxLength) {
+        String path = getEmbeddedPath(metadata);
+        //fName could be a full path or null
+        if (StringUtils.isBlank(path)) {
+            return null;
+        }
+        path = path.replaceAll("\u0000", " ");
+        int prefixLength = getPrefixLength(path);
+        if (prefixLength > 0) {
+            path = path.substring(prefixLength);
+        }
+        path = path.replaceAll("\\\\", "/");
+        path = removeProtocol(path);
+        path = path.replaceAll(":+", "/");
+        path = path.replaceAll("/+", "/");
+        path = normalize(path);
+        path = path.replaceAll("\\.{2,}", ".");
+        path = path.replaceAll("\\./", "/");
+        if (path.isBlank()) {
+            return null;
+        }
+        path = path.replaceAll("\\A/+", "");
+        path = path.replaceAll("/+\\Z", "");
+        String fName = getName(path);
+        if (StringUtils.isBlank(fName)) {
+            return null;
+        }
+        String relPath = "";
+        if (path.length() > fName.length()) {
+            relPath = path.substring(0, path.length() - fName.length() - 1);
+        }
+        String extension = FilenameUtils.getSuffixFromPath(fName);
+        if (extension.equals(path)) {
+            return extension;
+        }
+        String namePart = null;
+        if (StringUtils.isBlank(extension)) {
+            namePart = path;
+            extension = calculateExtension(metadata, defaultExtension);
+        } else {
+            namePart = fName.substring(0, fName.length() - extension.length());
+        }
+        if (StringUtils.isBlank(namePart)) {
+            return null;
+        }
+        //remove all initial .
+        namePart = namePart.replaceAll("\\A\\.+", "_");
+        //defense in depth. We shouldn't need this
+        namePart = namePart.replaceAll("\\.{2,}", ".");
+        namePart = namePart.replaceAll("[/\\\\]+", "_");
+
+        if (StringUtils.isBlank(namePart)) {
+            return null;
+        }
+        String retPath = StringUtils.isBlank(relPath) ? namePart + extension : 
relPath + "/" + namePart + extension;
+
+        //if path is > max length, return only the name part
+        if (retPath.length() > maxLength) {
+            if (namePart.length() > maxLength) {
+                return namePart.substring(0, maxLength - extension.length() - 
3) + "..." + extension;
+            }
+            return namePart + extension;
+        }
+        return retPath;
+    }
+
+    private static int getPrefixLength(String path) {
+        int prefixLength = 
org.apache.commons.io.FilenameUtils.getPrefixLength(path);
+        if (prefixLength > 0) {
+            return prefixLength;
+        }
+        if (path.length() == 2 && path.charAt(0) >= 'A' && path.charAt(0) <= 
'Z' && path.charAt(1) == ':') {
+            return 2;
+        }
+        return 0;
+    }
+
+    private static String removeProtocol(String path) {
+        Matcher m = PROTOCOL_PATTERN.matcher(path);
+        int last = -1;
+        while (m.find()) {
+            last = m.end();
+        }
+        if (last > -1) {
+            return path.substring(last);
+        }
+        return path;
+    }
+
+    private static String getEmbeddedPath(Metadata metadata) {
+        //potentially look for other values in embedded path or original file 
name, etc...
+        //maybe different fallback order?
+        String path = metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH);
+        if (! StringUtils.isBlank(path)) {
+            return path;
+        }
+        path = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+        if (! StringUtils.isBlank(path)) {
+            return path;
+        }
+        path = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
+        if (! StringUtils.isBlank(path)) {
+            return path;
+        }
+        return metadata.get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME);
+    }
+
+    /**
+     * Calculate the extension based on the {@link Metadata#CONTENT_TYPE} 
value.
+     * On parse exception or null value, return the default value.
+     *
+     * @param metadata
+     * @param defaultValue
+     * @return the extension based on the mime type, including the initial "."
+     */
+    public static String calculateExtension(Metadata metadata, String 
defaultValue) {
+        String mime = metadata.get(Metadata.CONTENT_TYPE);
+        if (mime == null) {
+            return defaultValue;
+        }
+        try {
+            String ext = MIME_TYPES
+                    .forName(mime)
+                    .getExtension();
+            if (ext == null) {
+                return ".bin";
+            } else {
+                return ext;
+            }
+        } catch (MimeTypeException e) {
+            //swallow
+        }
+        return ".bin";
+    }
+
 }
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java 
b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
index f961e8ee0..e146e8e0f 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
@@ -347,26 +347,25 @@ public final class MimeTypes implements Detector, 
Serializable {
      */
     public MimeType forName(String name) throws MimeTypeException {
         MediaType type = MediaType.parse(name);
-        if (type != null) {
-            MediaType normalisedType = registry.normalize(type);
-            MimeType mime = types.get(normalisedType);
-
-            if (mime == null) {
-                synchronized (this) {
-                    // Double check it didn't already get added while
-                    //  we were waiting for the lock
-                    mime = types.get(normalisedType);
-                    if (mime == null) {
-                        mime = new MimeType(type);
-                        add(mime);
-                        types.put(type, mime);
-                    }
+        if (type == null) {
+            throw new MimeTypeException("Invalid media type name: " + name);
+        }
+        MediaType normalisedType = registry.normalize(type);
+        MimeType mime = types.get(normalisedType);
+
+        if (mime == null) {
+            synchronized (this) {
+                // Double check it didn't already get added while
+                //  we were waiting for the lock
+                mime = types.get(normalisedType);
+                if (mime == null) {
+                    mime = new MimeType(type);
+                    add(mime);
+                    types.put(type, mime);
                 }
             }
-            return mime;
-        } else {
-            throw new MimeTypeException("Invalid media type name: " + name);
         }
+        return mime;
     }
 
     /**
diff --git a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java 
b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
index 39f0ae757..c3abd4134 100644
--- a/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/FilenameUtilsTest.java
@@ -18,11 +18,14 @@
 package org.apache.tika.io;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
 import org.junit.jupiter.api.Test;
 
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.utils.StringUtils;
 
 public class FilenameUtilsTest {
@@ -113,4 +116,127 @@ public class FilenameUtilsTest {
         assertEquals(expected, FilenameUtils.getName(path));
     }
 
+    @Test
+    public void testEmbeddedFileNames() throws Exception {
+        String n = "the quick brown fox.docx";
+        assertEquals(n, sanitizeFilename(n));
+        assertEquals(n, sanitizeFilename(n.substring(0, n.length() - 5),
+                
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
+
+        assertEquals(n, sanitizeFilename("the quick\u0000brown fox.docx"));
+        assertEquals(n, sanitizeFilename(n.substring(0, n.length() - 5),
+                
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
+
+        assertEquals("the quick brown fox.bin", 
sanitizeFilename(n.substring(0, n.length() - 5)));
+        assertEquals("brown fox.docx", sanitizeFilename("the quick..\\brown 
fox.docx"));
+        assertEquals("brown fox.docx", sanitizeFilename("the 
quick..\\/\\/\\brown fox.docx"));
+        assertEquals("brown fox.docx", sanitizeFilename("the quick../brown 
fox.docx"));
+        assertEquals("_brown fox.docx", sanitizeFilename("the quick../..brown 
fox.docx"));
+        assertEquals("brown_ fox.docx", sanitizeFilename("the quick../brown.. 
fox.docx"));
+        assertEquals("brown_. fox.docx", sanitizeFilename("the 
quick../brown... fox.docx"));
+        assertEquals("brown_ fox.docx", sanitizeFilename("the 
quick../brown.... fox.docx"));
+        assertEquals("_brown fox.docx", sanitizeFilename("...brown fox.docx"));
+        assertEquals("_brown fox.docx", sanitizeFilename("....brown 
fox.docx"));
+        assertEquals("_brown fox.docx", sanitizeFilename(".brown fox.docx"));
+        assertEquals("abcdefghijklmnopqrstuvwxyz_abcdefghijklmno....docx", 
sanitizeFilename(
+                
"abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz.docx"));
+
+        assertEquals("the quick brown fox.xlsx", sanitizeFilename("C:\\the 
quick brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizeFilename("/the quick 
brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizeFilename("~/the quick 
brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizeFilename("https://the 
quick brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", 
sanitizeFilename("https://tika.apache.org/the quick brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", 
sanitizeFilename("file:///tika.apache.org/the quick brown fox.xlsx"));
+
+        assertEquals("brown fox.xlsx", sanitizeFilename("a:/the quick:brown 
fox.xlsx"));
+        assertEquals("_the quick brown fox.xlsx", 
sanitizeFilename("C:\\a/b/c/..the quick brown fox.xlsx"));
+        assertEquals("_the quick brown fox.xlsx", 
sanitizeFilename("~/a/b/c/.the quick brown fox.xlsx"));
+
+        assertEquals("_.docx", sanitizeFilename("..................docx"));
+        assertEquals("_.docx", sanitizeFilename("..docx"));
+        assertNull(sanitizeFilename(".docx"));
+        assertNull(sanitizeFilename(""));
+        assertNull(sanitizeFilename(null));
+        assertNull(sanitizeFilename("/"));
+        assertNull(sanitizeFilename("~/"));
+        assertNull(sanitizeFilename("C:"));
+        assertNull(sanitizeFilename("C:/"));
+        assertNull(sanitizeFilename("C:\\"));
+
+    }
+
+    @Test
+    public void testEmbeddedFilePaths() throws Exception {
+        String n = "the quick brown fox.docx";
+        /*assertEquals(n, sanitizePath(n));
+        assertEquals(n, sanitizePath(n.substring(0, n.length() - 5),
+                
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
+        assertEquals(n, sanitizeFilename("the quick\u0000brown fox.docx"));
+
+        assertEquals("the quick brown fox.bin", sanitizePath(n.substring(0, 
n.length() - 5)));
+        assertEquals("the quick/brown fox.docx", sanitizePath("the 
quick..\\brown fox.docx"));
+        assertEquals("the quick/brown fox.docx", sanitizePath("the 
quick..\\/\\/\\brown fox.docx"));
+        assertEquals("the quick/brown fox.docx", sanitizePath("the 
quick../brown fox.docx"));
+        assertEquals("the quick/_brown fox.docx", sanitizePath("the 
quick../..brown fox.docx"));
+        assertEquals("the quick/brown. fox.docx", sanitizePath("the 
quick../brown.. fox.docx"));
+        assertEquals("the quick/brown. fox.docx", sanitizePath("the 
quick../brown... fox.docx"));
+        assertEquals("the quick/brown. fox.docx", sanitizePath("the 
quick../brown.... fox.docx"));
+        assertEquals("_brown fox.docx", sanitizePath("...brown fox.docx"));
+        assertEquals("_brown fox.docx", sanitizePath("....brown fox.docx"));
+        assertEquals("_brown fox.docx", sanitizePath(".brown fox.docx"));
+        assertEquals("abcdefghijklmnopqrstuvwxyz_abcdefghijklmno....docx", 
sanitizePath(
+                
"abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz.docx"));
+
+        assertEquals("the quick brown fox.xlsx", sanitizePath("C:\\the quick 
brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizePath("/the quick 
brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizePath("~/the quick 
brown fox.xlsx"));
+        assertEquals("the quick brown fox.xlsx", sanitizePath("https://the 
quick brown fox.xlsx"));
+        assertEquals("tika.apache.org/the quick brown fox.xlsx", 
sanitizePath("https://tika.apache.org/the quick brown fox.xlsx"));
+        assertEquals("tika.apache.org/the quick brown fox.xlsx", 
sanitizePath("file:///tika.apache.org/the quick brown fox.xlsx"));
+
+        assertEquals("the quick/brown fox.xlsx", sanitizePath("a:/the 
quick:brown fox.xlsx"));
+        assertEquals("a/b/c/_the quick brown fox.xlsx", 
sanitizePath("C:\\a/b/c/..the quick brown fox.xlsx"));
+        assertEquals("a/b/c/_the quick brown fox.xlsx", 
sanitizePath("~/a/b/c/.the quick brown fox.xlsx"));
+
+        assertEquals(".docx", sanitizePath("..................docx"));
+        assertEquals(".docx", sanitizePath("..docx"));
+        assertEquals(".docx", sanitizePath(".docx"));
+        assertNull(sanitizePath(""));
+        assertNull(sanitizePath(null));
+        assertNull(sanitizePath("/"));
+        assertNull(sanitizePath("~/"));*/
+        assertNull(sanitizePath("C:"));
+        assertNull(sanitizePath("C:/"));
+        assertNull(sanitizePath("C:\\"));
+
+    }
+
+    private String sanitizePath(String name) {
+        return FilenameUtils.getSanitizedEmbeddedFilePath(getMetadata(name), 
".bin", 50);
+    }
+
+    private String sanitizePath(String name, String mimeType) {
+        return FilenameUtils.getSanitizedEmbeddedFilePath(getMetadata(name, 
mimeType), ".bin", 50);
+    }
+
+    private String sanitizeFilename(String name, String mimeType) {
+        return FilenameUtils.getSanitizedEmbeddedFileName(getMetadata(name, 
mimeType), ".bin", 50);
+    }
+
+    private String sanitizeFilename(String name) {
+        return FilenameUtils.getSanitizedEmbeddedFileName(getMetadata(name), 
".bin", 50);
+    }
+
+    private Metadata getMetadata(String name, String contentType) {
+        Metadata metadata = getMetadata(name);
+        metadata.set(Metadata.CONTENT_TYPE, contentType);
+        return metadata;
+    }
+
+    private Metadata getMetadata(String name) {
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_PATH, name);
+        return metadata;
+    }
+
 }
diff --git 
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/java/org/apache/tika/pipes/kafka/tests/TikaPipesKafkaTest.java
 
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/java/org/apache/tika/pipes/kafka/tests/TikaPipesKafkaTest.java
index efc8786b0..f54b2a6f6 100644
--- 
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/java/org/apache/tika/pipes/kafka/tests/TikaPipesKafkaTest.java
+++ 
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/java/org/apache/tika/pipes/kafka/tests/TikaPipesKafkaTest.java
@@ -183,7 +183,7 @@ public class TikaPipesKafkaTest {
                         createTikaConfigXml(tikaConfigFile, log4jPropFile, 
tikaConfigTemplateXml);
 
                 FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
-                TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
+                TikaCLI.main(new String[]{"-a", "-c", 
tikaConfigFile.getAbsolutePath()});
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
diff --git 
a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java
 
b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java
index b0d882f15..9923a320a 100644
--- 
a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java
+++ 
b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/java/org/apache/tika/pipes/opensearch/tests/OpenSearchTest.java
@@ -420,7 +420,7 @@ public class OpenSearchTest {
         Path tikaConfigFile = getTikaConfigFile(attachmentStrategy, 
updateStrategy, parseMode,
                 endpoint, pipesDirectory, testDocDirectory);
 
-        TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.toAbsolutePath().toString()});
+        TikaCLI.main(new String[]{"-a", "-c",  
tikaConfigFile.toAbsolutePath().toString()});
 
         //refresh to make sure the content is searchable
         JsonResponse refresh = client.getJson(endpoint + "/_refresh");
diff --git 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
index c59b0d699..cb2188932 100644
--- 
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
+++ 
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/java/org/apache/tika/pipes/s3/tests/S3PipeIntegrationTest.java
@@ -133,7 +133,7 @@ class S3PipeIntegrationTest {
                     createTikaConfigXml(tikaConfigFile, log4jPropFile, 
tikaConfigTemplateXml);
 
             FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
-            TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
+            TikaCLI.main(new String[]{"-a", "-c", 
tikaConfigFile.getAbsolutePath()});
         } catch (Exception e) {
             throw new RuntimeException(e);
         }
diff --git 
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
 
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
index 6121ee166..e7a3cf649 100644
--- 
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
+++ 
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/java/org/apache/tika/pipes/solr/tests/TikaPipesSolrTestBase.java
@@ -223,7 +223,7 @@ public abstract class TikaPipesSolrTestBase {
                         SolrEmitter.UpdateStrategy.ADD, 
SolrEmitter.AttachmentStrategy.PARENT_CHILD,
                         HandlerConfig.PARSE_MODE.RMETA);
         FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
-        TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
+        TikaCLI.main(new String[]{"-a", "-c", 
tikaConfigFile.getAbsolutePath()});
 
         try (SolrClient solrClient = new 
Http2SolrClient.Builder(solrEndpoint).build()) {
             solrClient.commit(collection, true, true);
@@ -257,7 +257,7 @@ public abstract class TikaPipesSolrTestBase {
                         HandlerConfig.PARSE_MODE.RMETA);
         FileUtils.writeStringToFile(tikaConfigFile, tikaConfigXml, 
StandardCharsets.UTF_8);
 
-        TikaCLI.main(new String[]{"-a", "--config=" + 
tikaConfigFile.getAbsolutePath()});
+        TikaCLI.main(new String[]{"-a", "-c", 
tikaConfigFile.getAbsolutePath()});
 
         try (SolrClient solrClient = new 
Http2SolrClient.Builder(solrEndpoint).build()) {
             solrClient.commit(collection, true, true);
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
index 603f80e3d..0c3987165 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/SimpleAsyncConfig.java
@@ -25,9 +25,11 @@ class SimpleAsyncConfig {
     private String xmx;
     private String fileList;
     private String tikaConfig;//path to the tikaConfig file to be used in the 
forked process
+    private boolean extractBytes;
 
     //TODO -- switch to a builder
-    public SimpleAsyncConfig(String inputDir, String outputDir, Integer 
numClients, Long timeoutMs, String xmx, String fileList, String tikaConfig) {
+    public SimpleAsyncConfig(String inputDir, String outputDir, Integer 
numClients, Long timeoutMs, String xmx, String fileList,
+                             String tikaConfig, boolean extractBytes) {
         this.inputDir = inputDir;
         this.outputDir = outputDir;
         this.numClients = numClients;
@@ -35,6 +37,7 @@ class SimpleAsyncConfig {
         this.xmx = xmx;
         this.fileList = fileList;
         this.tikaConfig = tikaConfig;
+        this.extractBytes = extractBytes;
     }
 
     public String getInputDir() {
@@ -64,4 +67,8 @@ class SimpleAsyncConfig {
     public String getTikaConfig() {
         return tikaConfig;
     }
+
+    public boolean isExtractBytes() {
+        return extractBytes;
+    }
 }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
index 2a87a4b1a..fe4377213 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -30,9 +30,15 @@ import org.apache.commons.cli.ParseException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.core.FetchEmitTuple;
 import org.apache.tika.pipes.core.async.AsyncProcessor;
+import org.apache.tika.pipes.core.emitter.EmitKey;
+import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
+import org.apache.tika.pipes.core.fetcher.FetchKey;
 import org.apache.tika.pipes.core.pipesiterator.PipesIterator;
+import org.apache.tika.utils.StringUtils;
 
 public class TikaAsyncCLI {
 
@@ -51,6 +57,7 @@ public class TikaAsyncCLI {
         options.addOption("l", "fileList", true, "file list");
         options.addOption("c", "config", true, "tikaConfig to inherit from -- 
" +
                 "commandline options will not overwrite existing iterators, 
emitters, fetchers and async");
+        options.addOption("Z", "unzip", false, "extract raw bytes from 
attachments");
 
         return options;
     }
@@ -58,15 +65,21 @@ public class TikaAsyncCLI {
     public static void main(String[] args) throws Exception {
         if (args.length == 0) {
             usage(getOptions());
-        } else if (args.length == 1) {
-            processWithTikaConfig(Paths.get(args[0]));
         } else {
             processCommandLine(args);
         }
     }
 
     private static void processCommandLine(String[] args) throws Exception {
+        if (args.length == 1) {
+            processWithTikaConfig(PipesIterator.build(Paths.get(args[0])), 
Paths.get(args[0]), false);
+            return;
 
+        }
+        if (args.length == 2 && args[0].equals("-c")) {
+            processWithTikaConfig(PipesIterator.build(Paths.get(args[1])), 
Paths.get(args[1]), false);
+            return;
+        }
         SimpleAsyncConfig simpleAsyncConfig = parseCommandLine(args);
 
         Path tikaConfig = null;
@@ -74,7 +87,8 @@ public class TikaAsyncCLI {
             tikaConfig = Files.createTempFile("tika-async-tmp-", ".xml");
             TikaConfigAsyncWriter tikaConfigAsyncWriter = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
             tikaConfigAsyncWriter.write(tikaConfig);
-            processWithTikaConfig(tikaConfig);
+            PipesIterator pipesIterator = buildPipesIterator(tikaConfig, 
simpleAsyncConfig);
+            processWithTikaConfig(pipesIterator, tikaConfig, 
simpleAsyncConfig.isExtractBytes());
         } finally {
             if (tikaConfig != null) {
                 Files.delete(tikaConfig);
@@ -82,10 +96,23 @@ public class TikaAsyncCLI {
         }
     }
 
+    private static PipesIterator buildPipesIterator(Path tikaConfig, 
SimpleAsyncConfig simpleAsyncConfig) throws TikaConfigException, IOException {
+        String inputDirString = simpleAsyncConfig.getInputDir();
+        if (StringUtils.isBlank(inputDirString)) {
+            return PipesIterator.build(tikaConfig);
+        }
+        Path p = Paths.get(simpleAsyncConfig.getInputDir());
+        if (Files.isRegularFile(p)) {
+            return new SingleFilePipesIterator(p.getFileName().toString(), 
simpleAsyncConfig.isExtractBytes());
+        }
+        return PipesIterator.build(tikaConfig);
+    }
+
     //not private for testing purposes
     static SimpleAsyncConfig parseCommandLine(String[] args) throws 
ParseException, IOException {
         if (args.length == 2 && ! args[0].startsWith("-")) {
-            return new SimpleAsyncConfig(args[0], args[1], null, null, null, 
null, null);
+            return new SimpleAsyncConfig(args[0], args[1], null,
+                    null, null, null, null, false);
         }
 
         Options options = getOptions();
@@ -103,6 +130,7 @@ public class TikaAsyncCLI {
         Integer numClients = null;
         String fileList = null;
         String tikaConfig = null;
+        boolean extractBytes = false;
         if (line.hasOption("i")) {
             inputDir = line.getOptionValue("i");
         }
@@ -121,21 +149,24 @@ public class TikaAsyncCLI {
         if (line.hasOption("l")) {
             fileList = line.getOptionValue("l");
         }
-
         if (line.hasOption("c")) {
             tikaConfig = line.getOptionValue("c");
         }
+        if (line.hasOption("Z")) {
+            extractBytes = true;
+        }
+
         return new SimpleAsyncConfig(inputDir, outputDir,
-                numClients, timeoutMs, xmx, fileList, tikaConfig);
+                numClients, timeoutMs, xmx, fileList, tikaConfig, 
extractBytes);
     }
 
 
-    private static void processWithTikaConfig(Path tikaConfigPath) throws 
Exception {
-        PipesIterator pipesIterator = PipesIterator.build(tikaConfigPath);
+    private static void processWithTikaConfig(PipesIterator pipesIterator, 
Path tikaConfigPath, boolean extractBytes) throws Exception {
         long start = System.currentTimeMillis();
         try (AsyncProcessor processor = new AsyncProcessor(tikaConfigPath, 
pipesIterator)) {
 
             for (FetchEmitTuple t : pipesIterator) {
+                configureExtractBytes(t, extractBytes);
                 boolean offered = processor.offer(t, TIMEOUT_MS);
                 if (!offered) {
                     throw new TimeoutException("timed out waiting to add a 
fetch emit tuple");
@@ -155,12 +186,47 @@ public class TikaAsyncCLI {
         }
     }
 
+    private static void configureExtractBytes(FetchEmitTuple t, boolean 
extractBytes) {
+        if (! extractBytes) {
+            return;
+        }
+        ParseContext parseContext = t.getParseContext();
+        EmbeddedDocumentBytesConfig config = new EmbeddedDocumentBytesConfig();
+        config.setExtractEmbeddedDocumentBytes(true);
+        config.setEmitter(TikaConfigAsyncWriter.EMITTER_NAME);
+        config.setIncludeOriginal(false);
+        
config.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.DETECTED);
+        config.setEmbeddedIdPrefix("-");
+        config.setZeroPadName(8);
+        
config.setKeyBaseStrategy(EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_AS_IS);
+        parseContext.set(EmbeddedDocumentBytesConfig.class, config);
+    }
+
     private static void usage(Options options) throws IOException {
         System.out.println("Two primary options:");
         System.out.println("\t1. Specify a tika-config.xml on the commandline 
that includes the definitions for async");
         System.out.println("\t2. Commandline:");
         org.apache.commons.cli.help.HelpFormatter helpFormatter = 
org.apache.commons.cli.help.HelpFormatter.builder().get();
-        helpFormatter.printHelp("tikaAsynCli", null, options, null, true);
+        helpFormatter.printHelp("tikaAsyncCli", null, options, null, true);
         System.exit(1);
     }
+
+    private static class SingleFilePipesIterator extends PipesIterator {
+        private final String fName;
+        private final boolean extractBytes;
+        public SingleFilePipesIterator(String string, boolean extractBytes) {
+            super();
+            this.fName = string;
+            this.extractBytes = extractBytes;
+        }
+
+        @Override
+        protected void enqueue() throws IOException, TimeoutException, 
InterruptedException {
+            FetchEmitTuple t = new FetchEmitTuple("0",
+                    new FetchKey(TikaConfigAsyncWriter.FETCHER_NAME, fName),
+                    new EmitKey(TikaConfigAsyncWriter.EMITTER_NAME, fName)
+                    );
+            tryToAdd(t);
+        }
+    }
 }
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
index 5ff8f5d46..7452a5877 100644
--- 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaConfigAsyncWriter.java
@@ -49,8 +49,8 @@ class TikaConfigAsyncWriter {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(TikaAsyncCLI.class);
 
-    private static final String FETCHER_NAME = "fsf";
-    private static final String EMITTER_NAME = "fse";
+    static final String FETCHER_NAME = "fsf";
+    static final String EMITTER_NAME = "fse";
 
     private final SimpleAsyncConfig simpleAsyncConfig;
 
@@ -82,10 +82,21 @@ class TikaConfigAsyncWriter {
             properties = document.createElement("properties");
             document.appendChild(properties);
         }
-        writePipesIterator(document, properties);
-        writeFetchers(document, properties);
-        writeEmitters(document, properties);
-        writeAsync(document, properties);
+        Path baseInput = Paths.get(simpleAsyncConfig.getInputDir());
+        Path baseOutput = Paths.get(simpleAsyncConfig.getOutputDir());
+        if (Files.isRegularFile(baseInput)) {
+            if (baseInput.equals(baseOutput)) {
+                baseInput = baseInput.getParent();
+                baseOutput = baseInput;
+            } else {
+                baseInput = baseInput.getParent();
+            }
+        }
+
+        writePipesIterator(document, properties, baseInput);
+        writeFetchers(document, properties, baseInput);
+        writeEmitters(document, properties, baseOutput);
+        writeAsync(document, properties, output);
         Transformer transformer = TransformerFactory
                 .newInstance().newTransformer();
         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
@@ -98,39 +109,38 @@ class TikaConfigAsyncWriter {
 
     }
 
-    private void writePipesIterator(Document document, Element properties) {
+    private void writePipesIterator(Document document, Element properties, 
Path baseInput) {
         Element pipesIterator = findChild("pipesIterator", properties);
         if (pipesIterator != null) {
             LOG.info("pipesIterator already exists in tika-config. Not 
overwriting with commandline");
             return;
         }
         if (! StringUtils.isBlank(simpleAsyncConfig.getFileList())) {
-            writeFileListIterator(document, properties);
+            writeFileListIterator(document, properties, baseInput);
         } else {
-            writeFileSystemIterator(document, properties);
+            writeFileSystemIterator(document, properties, baseInput);
         }
     }
 
-    private void writeFileSystemIterator(Document document, Element 
properties) {
+    private void writeFileSystemIterator(Document document, Element 
properties, Path baseInput) {
         Element pipesIterator = createAndGetElement(document, properties, 
"pipesIterator",
                 "class", 
"org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator");
-        appendTextElement(document, pipesIterator, "basePath",
-                
Paths.get(simpleAsyncConfig.getInputDir()).toAbsolutePath().toString());
+        appendTextElement(document, pipesIterator, "basePath", 
baseInput.toAbsolutePath().toString());
         appendTextElement(document, pipesIterator, "fetcherName", 
FETCHER_NAME);
         appendTextElement(document, pipesIterator, "emitterName", 
EMITTER_NAME);
     }
 
-    private void writeFileListIterator(Document document, Element properties) {
+    private void writeFileListIterator(Document document, Element properties, 
Path baseInput) {
         Element pipesIterator = createAndGetElement(document, properties, 
"pipesIterator",
                 "class", 
"org.apache.tika.pipes.pipesiterator.filelist.FileListPipesIterator");
         appendTextElement(document, pipesIterator, "fetcherName", 
FETCHER_NAME);
         appendTextElement(document, pipesIterator, "emitterName", 
EMITTER_NAME);
         appendTextElement(document, pipesIterator, "fileList",
-                
Paths.get(simpleAsyncConfig.getFileList()).toAbsolutePath().toString());
+                baseInput.toAbsolutePath().toString());
         appendTextElement(document, pipesIterator, "hasHeader", "false");
     }
 
-    private void writeEmitters(Document document, Element properties) {
+    private void writeEmitters(Document document, Element properties, Path 
baseOutput) {
         Element emitters = findChild("emitters", properties);
         if (emitters != null) {
             LOG.info("emitters already exist in tika-config. Not overwriting 
with commandline");
@@ -141,11 +151,10 @@ class TikaConfigAsyncWriter {
         Element emitter = createAndGetElement( document, emitters, "emitter",
                 "class", "org.apache.tika.pipes.emitter.fs.FileSystemEmitter");
         appendTextElement(document, emitter, "name", EMITTER_NAME);
-        appendTextElement(document, emitter, "basePath",
-                
Paths.get(simpleAsyncConfig.getOutputDir()).toAbsolutePath().toString());
+        appendTextElement(document, emitter, "basePath", 
baseOutput.toAbsolutePath().toString());
     }
 
-    private void writeFetchers(Document document, Element properties) {
+    private void writeFetchers(Document document, Element properties, Path 
baseInput) {
         Element fetchers = findChild("fetchers", properties);
         if (fetchers != null) {
             LOG.info("fetchers already exist in tika-config. Not overwriting 
with commandline");
@@ -157,16 +166,13 @@ class TikaConfigAsyncWriter {
                 "class", "org.apache.tika.pipes.fetcher.fs.FileSystemFetcher");
         appendTextElement(document, fetcher, "name", FETCHER_NAME);
         if (!StringUtils.isBlank(simpleAsyncConfig.getInputDir())) {
-            appendTextElement(document, fetcher, "basePath", Paths
-                    .get(simpleAsyncConfig.getInputDir())
-                    .toAbsolutePath()
-                    .toString());
+            appendTextElement(document, fetcher, "basePath", 
baseInput.toAbsolutePath().toString());
         } else {
             appendTextElement(document, fetcher, "basePath", "");
         }
     }
 
-    private void writeAsync(Document document, Element properties) {
+    private void writeAsync(Document document, Element properties, Path 
thisTikaConfig) {
         Element async = findChild("async", properties);
         if (async != null) {
             LOG.info("async already exists in tika-config. Not overwriting 
with commandline");
@@ -190,10 +196,9 @@ class TikaConfigAsyncWriter {
         if (simpleAsyncConfig.getTimeoutMs() != null) {
             appendTextElement(document, async, "timeoutMillis", 
Long.toString(simpleAsyncConfig.getTimeoutMs()));
         }
-        if (simpleAsyncConfig.getTikaConfig() != null) {
-            Path p = Paths.get(simpleAsyncConfig.getTikaConfig());
-            appendTextElement(document, async, "tikaConfig", 
p.toAbsolutePath().toString());
-        }
+        appendTextElement(document, async, "tikaConfig", 
thisTikaConfig.toAbsolutePath().toString());
+
+        appendTextElement(document, async, "maxForEmitBatchBytes", "0");
     }
 
     private static  void appendTextElement(Document document, Element parent, 
String itemName, String text, String... attrs) {
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
index 482e20ea2..b59790d3a 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java
@@ -123,10 +123,10 @@ public class AsyncProcessorTest extends TikaTest {
         }
         processor.close();
 
-        String container = 
Files.readString(bytesDir.resolve("emit-1/emit-1-0"));
+        String container = 
Files.readString(bytesDir.resolve("emit-1-embed/emit-1-0"));
         assertContains("\"dc:creator\">Nikolai Lobachevsky", container);
 
-        String xmlEmbedded = 
Files.readString(bytesDir.resolve("emit-1/emit-1-1"));
+        String xmlEmbedded = 
Files.readString(bytesDir.resolve("emit-1-embed/emit-1-1"));
         assertContains("name=\"dc:creator\"", xmlEmbedded);
         assertContains(">embeddedAuthor</metadata>", xmlEmbedded);
 
diff --git 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
index bd5457ee4..adafdafd6 100644
--- 
a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
+++ 
b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaConfigAsyncWriterTest.java
@@ -42,7 +42,7 @@ public class TikaConfigAsyncWriterTest {
     public void testBasic(@TempDir Path dir) throws Exception {
         Path p = 
Paths.get(TikaConfigAsyncWriter.class.getResource("/configs/TIKA-4508-parsers.xml").toURI());
         SimpleAsyncConfig simpleAsyncConfig = new SimpleAsyncConfig("input", 
"output", 4,
-                10000L, "-Xmx1g", null, p.toAbsolutePath().toString());
+                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(), false);
         Path target = dir.resolve("combined.xml");
         TikaConfigAsyncWriter writer = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
         writer.write(target);
@@ -56,7 +56,7 @@ public class TikaConfigAsyncWriterTest {
     public void testDontOverwriteEmitters(@TempDir Path dir) throws Exception {
         Path p = 
Paths.get(TikaConfigAsyncWriter.class.getResource("/configs/TIKA-4508-emitters.xml").toURI());
         SimpleAsyncConfig simpleAsyncConfig = new SimpleAsyncConfig("input", 
"output", 4,
-                10000L, "-Xmx1g", null, p.toAbsolutePath().toString());
+                10000L, "-Xmx1g", null, p.toAbsolutePath().toString(), false);
         Path target = dir.resolve("combined.xml");
         TikaConfigAsyncWriter writer = new 
TikaConfigAsyncWriter(simpleAsyncConfig);
         writer.write(target);
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
index 3348eb720..80ff66984 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/AbstractEmbeddedDocumentBytesHandler.java
@@ -18,19 +18,26 @@ package org.apache.tika.pipes.core.extractor;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.extractor.EmbeddedDocumentBytesHandler;
 import org.apache.tika.io.FilenameUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.pipes.core.extractor.EmbeddedDocumentBytesConfig;
 import org.apache.tika.utils.StringUtils;
 
 public abstract class AbstractEmbeddedDocumentBytesHandler implements 
EmbeddedDocumentBytesHandler {
 
+    private static final MimeTypes MIME_TYPES = 
TikaConfig.getDefaultConfig().getMimeRepository();
+
     List<Integer> ids = new ArrayList<>();
 
     public String getEmitKey(String containerEmitKey, int embeddedId,
@@ -43,8 +50,24 @@ public abstract class AbstractEmbeddedDocumentBytesHandler 
implements EmbeddedDo
 
 
         StringBuilder emitKey = new StringBuilder();
-        if (StringUtils.isBlank(embeddedDocumentBytesConfig.getEmitKeyBase())) 
{
+        if (embeddedDocumentBytesConfig.getKeyBaseStrategy() ==
+                
EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_AS_IS) {
+            emitKey.append(containerEmitKey);
+            emitKey.append("-embed");
+            emitKey.append("/");
+            
emitKey.append(embeddedIdString).append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix());
+            Path p = 
Paths.get(metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));
+            String fName = p.getFileName().toString();
+            emitKey.append(fName);
+            if (! fName.contains(".")) {
+                appendSuffix(emitKey, metadata, embeddedDocumentBytesConfig);
+            }
+
+            return emitKey.toString();
+        } else if (embeddedDocumentBytesConfig.getKeyBaseStrategy() ==
+                
EmbeddedDocumentBytesConfig.KEY_BASE_STRATEGY.CONTAINER_NAME_NUMBERED) {
             emitKey.append(containerEmitKey);
+            emitKey.append("-embed");
             emitKey.append("/")
                     .append(FilenameUtils.getName(containerEmitKey));
         } else {
@@ -55,14 +78,7 @@ public abstract class AbstractEmbeddedDocumentBytesHandler 
implements EmbeddedDo
         //the file extension
         emitKey.append(embeddedDocumentBytesConfig.getEmbeddedIdPrefix())
                     .append(embeddedIdString);
-
-        if (embeddedDocumentBytesConfig.getSuffixStrategy().equals(
-                EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING)) {
-            String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
-            String suffix = FilenameUtils.getSuffixFromPath(fName);
-            suffix = suffix.toLowerCase(Locale.US);
-            emitKey.append(suffix);
-        }
+        appendSuffix(emitKey, metadata, embeddedDocumentBytesConfig);
         return emitKey.toString();
     }
 
@@ -75,4 +91,35 @@ public abstract class AbstractEmbeddedDocumentBytesHandler 
implements EmbeddedDo
     public List<Integer> getIds() {
         return ids;
     }
+
+    private void appendSuffix(StringBuilder emitKey, Metadata metadata, 
EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig) {
+        if (embeddedDocumentBytesConfig.getSuffixStrategy().equals(
+                EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING)) {
+            String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+            String suffix = FilenameUtils.getSuffixFromPath(fName);
+            suffix = suffix.toLowerCase(Locale.US);
+            emitKey.append(suffix);
+        } else if (embeddedDocumentBytesConfig.getSuffixStrategy()
+                                              
.equals(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.DETECTED)) {
+            emitKey.append(getExtension(metadata));
+        }
+    }
+
+    private String getExtension(Metadata metadata) {
+        String mime = metadata.get(Metadata.CONTENT_TYPE);
+        try {
+            String ext = MIME_TYPES
+                    .forName(mime)
+                    .getExtension();
+            if (ext == null) {
+                return ".bin";
+            } else {
+                return ext;
+            }
+        } catch (MimeTypeException e) {
+            //swallow
+        }
+        return ".bin";
+
+    }
 }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
index dca605da7..6a449b5bf 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
@@ -43,6 +43,23 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
             throw new IllegalArgumentException("can't parse " + s);
         }
     }
+
+    public enum KEY_BASE_STRATEGY {
+        CONTAINER_NAME_NUMBERED,
+        CONTAINER_NAME_AS_IS,
+        CUSTOM_BASE;
+
+        public static KEY_BASE_STRATEGY parse(String s) {
+            if (s.equalsIgnoreCase(CONTAINER_NAME_NUMBERED.name())) {
+                return CONTAINER_NAME_NUMBERED;
+            } else if (s.equalsIgnoreCase(CONTAINER_NAME_AS_IS.name())) {
+                return CONTAINER_NAME_AS_IS;
+            } else if (s.equalsIgnoreCase(CUSTOM_BASE.name())) {
+                return CUSTOM_BASE;
+            }
+            throw new IllegalArgumentException("can't parse " + s);
+        }
+    }
     //for our current custom serialization, this can't be final. :(
     private boolean extractEmbeddedDocumentBytes;
 
@@ -56,9 +73,10 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
 
     private boolean includeOriginal = false;
 
+    private KEY_BASE_STRATEGY keyBaseStrategy = 
KEY_BASE_STRATEGY.CONTAINER_NAME_NUMBERED;
     //This should be set per file. This allows a custom
     //emit key base that bypasses the algorithmic generation of the emitKey
-    //from the primary json emitKey
+    //from the primary json emitKey when keyBase Strategy is CUSTOM_BASE
     private String emitKeyBase = "";
 
     /**
@@ -94,6 +112,10 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
         return suffixStrategy;
     }
 
+    public KEY_BASE_STRATEGY getKeyBaseStrategy() {
+        return keyBaseStrategy;
+    }
+
     public String getEmbeddedIdPrefix() {
         return embeddedIdPrefix;
     }
@@ -118,6 +140,14 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
         setSuffixStrategy(SUFFIX_STRATEGY.valueOf(suffixStrategy));
     }
 
+    public void setKeyBaseStrategy(KEY_BASE_STRATEGY keyBaseStrategy) {
+        this.keyBaseStrategy = keyBaseStrategy;
+    }
+
+    public void setKeyBaseStrategy(String keyBaseStrategy) {
+        setKeyBaseStrategy(KEY_BASE_STRATEGY.valueOf(keyBaseStrategy));
+    }
+
     public void setEmbeddedIdPrefix(String embeddedIdPrefix) {
         this.embeddedIdPrefix = embeddedIdPrefix;
     }
@@ -140,28 +170,20 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
 
     @Override
     public String toString() {
-        return "EmbeddedDocumentBytesConfig{" + 
"extractEmbeddedDocumentBytes=" + extractEmbeddedDocumentBytes + ", 
zeroPadName=" +
-                zeroPadName + ", suffixStrategy=" +
-                suffixStrategy + ", embeddedIdPrefix='" + embeddedIdPrefix + 
'\'' + ", emitter='" + emitter + '\'' +
-                ", includeOriginal=" + includeOriginal + ", emitKeyBase='" +
-                emitKeyBase + '\'' + '}';
+        return "EmbeddedDocumentBytesConfig{" + 
"extractEmbeddedDocumentBytes=" + extractEmbeddedDocumentBytes + ", 
zeroPadName=" + zeroPadName + ", suffixStrategy=" +
+                suffixStrategy + ", embeddedIdPrefix='" + embeddedIdPrefix + 
'\'' + ", emitter='" + emitter + '\'' + ", includeOriginal=" + includeOriginal 
+ ", keyBaseStrategy=" +
+                keyBaseStrategy + ", emitKeyBase='" + emitKeyBase + '\'' + '}';
     }
 
     @Override
-    public boolean equals(Object o) {
-        if (this == o) {
-            return true;
-        }
-        if (o == null || getClass() != o.getClass()) {
+    public final boolean equals(Object o) {
+        if (!(o instanceof EmbeddedDocumentBytesConfig config)) {
             return false;
         }
 
-        EmbeddedDocumentBytesConfig that = (EmbeddedDocumentBytesConfig) o;
-        return extractEmbeddedDocumentBytes == 
that.extractEmbeddedDocumentBytes && zeroPadName == that.zeroPadName
-                && includeOriginal == that.includeOriginal &&
-                suffixStrategy == that.suffixStrategy && 
Objects.equals(embeddedIdPrefix, that.embeddedIdPrefix)
-                && Objects.equals(emitter, that.emitter) &&
-                Objects.equals(emitKeyBase, that.emitKeyBase);
+        return extractEmbeddedDocumentBytes == 
config.extractEmbeddedDocumentBytes && zeroPadName == config.zeroPadName && 
includeOriginal == config.includeOriginal &&
+                suffixStrategy == config.suffixStrategy && 
Objects.equals(embeddedIdPrefix, config.embeddedIdPrefix) && 
Objects.equals(emitter, config.emitter) &&
+                keyBaseStrategy == config.keyBaseStrategy && 
Objects.equals(emitKeyBase, config.emitKeyBase);
     }
 
     @Override
@@ -172,6 +194,7 @@ public class EmbeddedDocumentBytesConfig implements 
Serializable {
         result = 31 * result + Objects.hashCode(embeddedIdPrefix);
         result = 31 * result + Objects.hashCode(emitter);
         result = 31 * result + Boolean.hashCode(includeOriginal);
+        result = 31 * result + Objects.hashCode(keyBaseStrategy);
         result = 31 * result + Objects.hashCode(emitKeyBase);
         return result;
     }

Reply via email to