This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4626
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 006efccb319edc6aeb8893ad9d9b8057e9b0edb4
Author: tallison <[email protected]>
AuthorDate: Wed Jan 21 06:37:49 2026 -0500

    TIKA-4626 - swap in tika-pipes for /tika and /rmeta endpoints
---
 .../org/apache/tika/pipes/core/PipesClient.java    |   3 +
 .../tika/pipes/core/server/ParseHandler.java       |   8 +
 .../apache/tika/pipes/core/server/PipesServer.java |   2 +
 .../apache/tika/pipes/core/server/PipesWorker.java |  12 +-
 .../apache/tika/server/core/TikaServerProcess.java |  12 +-
 .../server/core/resource/PipesParsingHelper.java   | 117 +++-----
 .../core/resource/RecursiveMetadataResource.java   |  38 +--
 .../org/apache/tika/server/core/CXFTestBase.java   |  59 +++-
 .../server/core/benchmark/TikaServerBenchmark.java | 312 ++++++++++-----------
 tika-server/tika-server-standard/pom.xml           |   2 +-
 10 files changed, 269 insertions(+), 296 deletions(-)

diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java
index e79c35ccfe..053856bbeb 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PipesClient.java
@@ -423,7 +423,9 @@ public class PipesClient implements Closeable {
         int len = serverTuple.input.readInt();
         byte[] bytes = new byte[len];
         serverTuple.input.readFully(bytes);
+
         writeAck();
+
         try (ObjectInputStream objectInputStream = new 
ObjectInputStream(UnsynchronizedByteArrayInputStream
                 .builder()
                 .setByteArray(bytes)
@@ -492,6 +494,7 @@ public class PipesClient implements Closeable {
             }
         }
         socket.setSoTimeout((int) pipesConfig.getSocketTimeoutMs());
+        socket.setTcpNoDelay(true); // Disable Nagle's algorithm to avoid 
~40ms delays on small writes
         serverTuple = new ServerTuple(process, serverSocket, socket, new 
DataInputStream(socket.getInputStream()),
                 new DataOutputStream(socket.getOutputStream()), tmpDir);
         waitForStartup();
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
index af3e75f50a..a28b2c15dc 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ParseHandler.java
@@ -34,6 +34,7 @@ import org.apache.tika.digest.Digester;
 import org.apache.tika.digest.SkipContainerDocumentDigest;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.WriteLimitReachedException;
 import org.apache.tika.extractor.DocumentSelector;
 import org.apache.tika.extractor.EmbeddedDocumentBytesHandler;
 import org.apache.tika.io.TikaInputStream;
@@ -221,11 +222,15 @@ class ParseHandler {
         //queue better be empty. we deserve an exception if not
         intermediateResult.add(metadata);
         countDownLatch.await();
+        boolean writeLimitReached = false;
         try {
             autoDetectParser.parse(stream, handler, metadata, parseContext);
         } catch (SAXException e) {
             containerException = ExceptionUtils.getStackTrace(e);
             LOG.warn("sax problem:" + fetchEmitTuple.getId(), e);
+            if (WriteLimitReachedException.isWriteLimitReached(e)) {
+                writeLimitReached = true;
+            }
         } catch (EncryptedDocumentException e) {
             containerException = ExceptionUtils.getStackTrace(e);
             LOG.warn("encrypted document:" + fetchEmitTuple.getId(), e);
@@ -240,6 +245,9 @@ class ParseHandler {
             if (containerException != null) {
                 metadata.add(TikaCoreProperties.CONTAINER_EXCEPTION, 
containerException);
             }
+            if (writeLimitReached) {
+                metadata.set(TikaCoreProperties.WRITE_LIMIT_REACHED, true);
+            }
             if (LOG.isTraceEnabled()) {
                 LOG.trace("timer -- parse only time: {} ms", 
System.currentTimeMillis() - start);
             }
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index 66d5d9ae50..a97e8557a9 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@ -165,6 +165,7 @@ public class PipesServer implements AutoCloseable {
             LOG.debug("pipesClientId={}: connecting to client on port={}", 
pipesClientId, port);
             Socket socket = new Socket();
             socket.connect(new 
InetSocketAddress(InetAddress.getLoopbackAddress(), port), 
PipesClient.SOCKET_CONNECT_TIMEOUT_MS);
+            socket.setTcpNoDelay(true); // Disable Nagle's algorithm to avoid 
~40ms delays on small writes
 
             DataInputStream dis = new DataInputStream(socket.getInputStream());
             DataOutputStream dos = new 
DataOutputStream(socket.getOutputStream());
@@ -443,6 +444,7 @@ public class PipesServer implements AutoCloseable {
             int length = input.readInt();
             byte[] bytes = new byte[length];
             input.readFully(bytes);
+
             try (ObjectInputStream objectInputStream = new ObjectInputStream(
                     
UnsynchronizedByteArrayInputStream.builder().setByteArray(bytes).get())) {
                 return (FetchEmitTuple) objectInputStream.readObject();
diff --git 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
index b779388127..d8315cfd9f 100644
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
@@ -18,8 +18,6 @@ package org.apache.tika.pipes.core.server;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.time.Duration;
-import java.time.Instant;
 import java.util.List;
 import java.util.concurrent.Callable;
 
@@ -71,23 +69,15 @@ class PipesWorker implements Callable<PipesResult> {
 
     @Override
     public PipesResult call() throws Exception {
-        Instant start = Instant.now();
-
-        if (LOG.isTraceEnabled()) {
-            LOG.trace("timer -- got fetcher: {}ms", Duration.between(start, 
Instant.now()).toMillis());
-        }
-        start = Instant.now();
         MetadataListAndEmbeddedBytes parseData = null;
         try {
             //this can be null if there is a fetch exception
             ParseDataOrPipesResult parseDataResult = parseFromTuple();
+
             if (parseDataResult.pipesResult != null) {
                 return parseDataResult.pipesResult;
             }
 
-            if (LOG.isTraceEnabled()) {
-                LOG.trace("timer -- to parse: {} ms", Duration.between(start, 
Instant.now()).toMillis());
-            }
             parseData = parseDataResult.parseDataResult;
 
             if (parseData == null || 
metadataIsEmpty(parseData.getMetadataList())) {
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
index 0b7cd9cc5f..4edf08bc9a 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerProcess.java
@@ -420,7 +420,7 @@ public class TikaServerProcess {
      * Initializes the PipesParsingHelper for pipes-based parsing with process 
isolation.
      * <p>
      * The PipesParser will be configured with PASSBACK_ALL emit strategy so 
that
-     * parsed content is returned directly instead of being emitted to an 
external emitter.
+     * parsed results are returned through the socket connection.
      * <p>
      * If no config file is provided, a minimal default configuration will be 
created.
      * The plugin-roots will default to a "plugins" directory at the same 
level as the server jar.
@@ -430,25 +430,23 @@ public class TikaServerProcess {
      * @throws Exception if pipes initialization fails
      */
     private static PipesParsingHelper initPipesParsingHelper(TikaServerConfig 
tikaServerConfig) throws Exception {
-        TikaJsonConfig tikaJsonConfig;
+        // Load or create config
         Path configPath;
-
         if (tikaServerConfig.hasConfigFile()) {
             configPath = tikaServerConfig.getConfigPath();
-            tikaJsonConfig = TikaJsonConfig.load(configPath);
         } else {
-            // Create minimal config - will use defaults
             configPath = createDefaultConfig();
-            tikaJsonConfig = TikaJsonConfig.load(configPath);
         }
 
+        TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(configPath);
+
         // Load or create PipesConfig with defaults
         PipesConfig pipesConfig = tikaJsonConfig.deserialize("pipes", 
PipesConfig.class);
         if (pipesConfig == null) {
             pipesConfig = new PipesConfig();
         }
 
-        // Force PASSBACK_ALL strategy so results are returned to us (not 
emitted)
+        // Use PASSBACK_ALL strategy: results are returned through the socket
         pipesConfig.setEmitStrategy(new 
EmitStrategyConfig(EmitStrategy.PASSBACK_ALL));
 
         // Create PipesParser
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
index 290a4fc0b1..51f85d72ec 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
@@ -40,8 +40,6 @@ import org.apache.tika.pipes.api.PipesResult;
 import org.apache.tika.pipes.api.emitter.EmitData;
 import org.apache.tika.pipes.api.emitter.EmitKey;
 import org.apache.tika.pipes.api.fetcher.FetchKey;
-import org.apache.tika.pipes.core.EmitStrategy;
-import org.apache.tika.pipes.core.EmitStrategyConfig;
 import org.apache.tika.pipes.core.PipesConfig;
 import org.apache.tika.pipes.core.PipesException;
 import org.apache.tika.pipes.core.PipesParser;
@@ -76,22 +74,22 @@ public class PipesParsingHelper {
 
     private final PipesParser pipesParser;
     private final PipesConfig pipesConfig;
-    private final Path tempDirectory;
+    private final Path inputTempDirectory;
 
     public PipesParsingHelper(PipesParser pipesParser, PipesConfig 
pipesConfig) {
         this.pipesParser = pipesParser;
         this.pipesConfig = pipesConfig;
 
-        // Determine temp directory
+        // Determine input temp directory
         String configTempDir = pipesConfig.getTempDirectory();
         if (configTempDir != null && !configTempDir.isBlank()) {
-            this.tempDirectory = Paths.get(configTempDir);
-            if (!Files.isDirectory(this.tempDirectory)) {
+            this.inputTempDirectory = Paths.get(configTempDir);
+            if (!Files.isDirectory(this.inputTempDirectory)) {
                 throw new IllegalArgumentException(
                         "Configured tempDirectory does not exist or is not a 
directory: " + configTempDir);
             }
         } else {
-            this.tempDirectory = null; // Use system default
+            this.inputTempDirectory = null; // Use system default
         }
     }
 
@@ -108,29 +106,29 @@ public class PipesParsingHelper {
      */
     public List<Metadata> parse(InputStream inputStream, Metadata metadata,
                                  ParseContext parseContext, ParseMode 
parseMode) throws IOException {
-        Path tempFile = null;
+        Path inputTempFile = null;
+        String requestId = UUID.randomUUID().toString();
+
         try {
             // Write input stream to temp file
-            tempFile = createTempFile();
-            Files.copy(inputStream, tempFile, 
StandardCopyOption.REPLACE_EXISTING);
+            inputTempFile = createInputTempFile();
+            Files.copy(inputStream, inputTempFile, 
StandardCopyOption.REPLACE_EXISTING);
 
             // Set parse mode in context
             parseContext.set(ParseMode.class, parseMode);
 
-            // Set emit strategy override to PASSBACK_ALL - we want results 
returned, not emitted
-            parseContext.set(EmitStrategyConfig.class, new 
EmitStrategyConfig(EmitStrategy.PASSBACK_ALL));
+            // Create FetchEmitTuple - use NO_EMIT since we're using 
PASSBACK_ALL
+            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
inputTempFile.toAbsolutePath().toString());
 
-            // Create FetchEmitTuple
-            FetchKey fetchKey = new FetchKey(DEFAULT_FETCHER_ID, 
tempFile.toAbsolutePath().toString());
             FetchEmitTuple tuple = new FetchEmitTuple(
-                    UUID.randomUUID().toString(),
+                    requestId,
                     fetchKey,
                     EmitKey.NO_EMIT,
                     metadata,
                     parseContext
             );
 
-            // Execute parse via pipes
+            // Execute parse via pipes - results will be passed back through 
socket
             PipesResult result = pipesParser.parse(tuple);
 
             // Process result
@@ -142,20 +140,19 @@ public class PipesParsingHelper {
         } catch (PipesException e) {
             throw new TikaServerParseException(e);
         } finally {
-            // Clean up temp file
-            if (tempFile != null) {
+            // Clean up input temp file
+            if (inputTempFile != null) {
                 try {
-                    Files.deleteIfExists(tempFile);
+                    Files.deleteIfExists(inputTempFile);
                 } catch (IOException e) {
-                    LOG.warn("Failed to delete temp file: {}", tempFile, e);
+                    LOG.warn("Failed to delete input temp file: {}", 
inputTempFile, e);
                 }
             }
         }
     }
 
     /**
-     * Processes the PipesResult and extracts metadata list.
-     * Throws appropriate exceptions for error states.
+     * Processes the PipesResult and returns the metadata list.
      */
     private List<Metadata> processResult(PipesResult result) {
         if (result.isProcessCrash()) {
@@ -183,66 +180,22 @@ public class PipesParsingHelper {
                     Response.Status.INTERNAL_SERVER_ERROR);
         }
 
-        // Success cases
+        // Get metadata from result
         EmitData emitData = result.emitData();
-        if (emitData == null) {
-            LOG.debug("Parse returned null emitData, status: {}", 
result.status());
-            // Check if there's an exception message in the result
-            String message = result.message();
-            if (message != null && !message.isEmpty()) {
-                // Create metadata with exception info
-                Metadata metadata = new Metadata();
-                metadata.add(TikaCoreProperties.CONTAINER_EXCEPTION, message);
-                return Collections.singletonList(metadata);
-            }
-            return Collections.emptyList();
+        if (emitData != null && emitData.getMetadataList() != null) {
+            return emitData.getMetadataList();
         }
 
-        List<Metadata> metadataList = emitData.getMetadataList();
-        if (metadataList == null) {
-            return Collections.emptyList();
+        // Empty result
+        LOG.debug("Parse returned empty result, status: {}", result.status());
+        String message = result.message();
+        if (message != null && !message.isEmpty()) {
+            Metadata errorMetadata = new Metadata();
+            errorMetadata.add(TikaCoreProperties.CONTAINER_EXCEPTION, message);
+            return Collections.singletonList(errorMetadata);
         }
 
-        // Handle parse success with exception - always add exception info to 
metadata
-        // This includes PARSE_SUCCESS_WITH_EXCEPTION, 
EMIT_SUCCESS_PARSE_EXCEPTION, EMIT_SUCCESS_PASSBACK
-        String stackTrace = emitData.getContainerStackTrace();
-        boolean hasException = stackTrace != null && !stackTrace.isEmpty();
-
-        if (hasException && !metadataList.isEmpty()) {
-            // Check if this was a WriteLimitReached exception and set the flag
-            checkWriteLimitReached(metadataList, stackTrace);
-            // Add the stack trace to the metadata if not already set by pipes
-            Metadata firstMetadata = metadataList.get(0);
-            if (firstMetadata.get(TikaCoreProperties.CONTAINER_EXCEPTION) == 
null) {
-                firstMetadata.set(TikaCoreProperties.CONTAINER_EXCEPTION, 
stackTrace);
-            }
-        }
-
-        return metadataList;
-    }
-
-    /**
-     * Checks if the parse result was due to write limit being reached.
-     * This is a "soft" exception that should still return HTTP 200.
-     * If detected from stack trace but not in metadata, sets the metadata 
flag.
-     */
-    private boolean checkWriteLimitReached(List<Metadata> metadataList, String 
stackTrace) {
-        if (metadataList.isEmpty()) {
-            return false;
-        }
-        Metadata metadata = metadataList.get(0);
-        // Check metadata flag (set by RecursiveParserWrapper or 
CompositeParser)
-        String flagValue = 
metadata.get(TikaCoreProperties.WRITE_LIMIT_REACHED);
-        if ("true".equals(flagValue)) {
-            return true;
-        }
-        // Also check stack trace for WriteLimitReachedException
-        if (stackTrace != null && 
stackTrace.contains("WriteLimitReachedException")) {
-            // Set the metadata flag if not already set (for consistency)
-            metadata.set(TikaCoreProperties.WRITE_LIMIT_REACHED, "true");
-            return true;
-        }
-        return false;
+        return Collections.emptyList();
     }
 
     /**
@@ -265,13 +218,13 @@ public class PipesParsingHelper {
     }
 
     /**
-     * Creates a temp file in the configured temp directory.
+     * Creates a temp file for input in the configured temp directory.
      */
-    private Path createTempFile() throws IOException {
-        if (tempDirectory != null) {
-            return Files.createTempFile(tempDirectory, "tika-server-", ".tmp");
+    private Path createInputTempFile() throws IOException {
+        if (inputTempDirectory != null) {
+            return Files.createTempFile(inputTempDirectory, 
"tika-server-input-", ".tmp");
         } else {
-            return Files.createTempFile("tika-server-", ".tmp");
+            return Files.createTempFile("tika-server-input-", ".tmp");
         }
     }
 
diff --git 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 2ea92c72cb..eda085354b 100644
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -17,7 +17,6 @@
 package org.apache.tika.server.core.resource;
 
 import static org.apache.tika.server.core.resource.TikaResource.fillMetadata;
-import static org.apache.tika.server.core.resource.TikaResource.getTikaLoader;
 import static org.apache.tika.server.core.resource.TikaResource.getWriteLimit;
 import static 
org.apache.tika.server.core.resource.TikaResource.setupContentHandlerFactory;
 import static 
org.apache.tika.server.core.resource.TikaResource.setupContentHandlerFactoryIfNeeded;
@@ -42,7 +41,6 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.filter.MetadataFilter;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.pipes.api.ParseMode;
 import org.apache.tika.sax.BasicContentHandlerFactory;
@@ -55,7 +53,12 @@ public class RecursiveMetadataResource {
     protected static final BasicContentHandlerFactory.HANDLER_TYPE 
DEFAULT_HANDLER_TYPE = BasicContentHandlerFactory.HANDLER_TYPE.XML;
     private static final Logger LOG = 
LoggerFactory.getLogger(RecursiveMetadataResource.class);
 
-    public static List<Metadata> parseMetadata(TikaInputStream tis, Metadata 
metadata, MultivaluedMap<String, String> httpHeaders,
+    /**
+     * Parses content and returns metadata list.
+     * Metadata filtering is done in the child process, so no filtering needed 
here.
+     */
+    public static List<Metadata> parseMetadata(TikaInputStream tis, Metadata 
metadata,
+                                               MultivaluedMap<String, String> 
httpHeaders,
                                                ServerHandlerConfig 
handlerConfig)
             throws Exception {
 
@@ -68,10 +71,8 @@ public class RecursiveMetadataResource {
         setupContentHandlerFactory(context, handlerConfig.type().toString(), 
handlerConfig.writeLimit(),
                 handlerConfig.throwOnWriteLimitReached());
 
-        List<Metadata> metadataList = TikaResource.parseWithPipes(tis, 
metadata, context, ParseMode.RMETA);
-        MetadataFilter metadataFilter = context.get(MetadataFilter.class, 
getTikaLoader().loadMetadataFilters());
-        metadataFilter.filter(metadataList);
-        return metadataList;
+        // Filtering is done in child process, no need to filter again
+        return TikaResource.parseWithPipes(tis, metadata, context, 
ParseMode.RMETA);
     }
 
     static ServerHandlerConfig buildHandlerConfig(MultivaluedMap<String, 
String> httpHeaders, String handlerTypeName, ParseMode parseMode) {
@@ -113,10 +114,9 @@ public class RecursiveMetadataResource {
     @Path("form{" + HANDLER_TYPE_PARAM + " : (\\w+)?}")
     public Response getMetadataFromMultipart(Attachment att, 
@PathParam(HANDLER_TYPE_PARAM) String handlerTypeName) throws Exception {
         try (TikaInputStream tis = 
TikaInputStream.get(att.getObject(InputStream.class))) {
-            return Response
-                    .ok(parseMetadataToMetadataList(tis, new Metadata(), 
att.getHeaders(),
-                            buildHandlerConfig(att.getHeaders(), 
handlerTypeName, ParseMode.RMETA)))
-                    .build();
+            List<Metadata> metadataList = parseMetadata(tis, new Metadata(), 
att.getHeaders(),
+                    buildHandlerConfig(att.getHeaders(), handlerTypeName, 
ParseMode.RMETA));
+            return Response.ok(new MetadataList(metadataList)).build();
         }
     }
 
@@ -153,9 +153,8 @@ public class RecursiveMetadataResource {
         setupContentHandlerFactoryIfNeeded(context, 
handlerConfig.type().toString(),
                 handlerConfig.writeLimit(), 
handlerConfig.throwOnWriteLimitReached());
 
+        // Filtering is done in child process, no need to filter again
         List<Metadata> metadataList = TikaResource.parseWithPipes(tis, 
metadata, context, ParseMode.RMETA);
-        MetadataFilter metadataFilter = context.get(MetadataFilter.class, 
getTikaLoader().loadMetadataFilters());
-        metadataFilter.filter(metadataList);
         return new MetadataList(metadataList);
     }
 
@@ -188,16 +187,9 @@ public class RecursiveMetadataResource {
     public Response getMetadata(InputStream is, @Context HttpHeaders 
httpHeaders, @PathParam(HANDLER_TYPE_PARAM) String handlerTypeName) throws 
Exception {
         Metadata metadata = new Metadata();
         try (TikaInputStream tis = TikaInputStream.get(is)) {
-            return Response
-                    .ok(parseMetadataToMetadataList(tis, metadata, 
httpHeaders.getRequestHeaders(),
-                            
buildHandlerConfig(httpHeaders.getRequestHeaders(), handlerTypeName, 
ParseMode.RMETA)))
-                    .build();
+            List<Metadata> metadataList = parseMetadata(tis, metadata, 
httpHeaders.getRequestHeaders(),
+                    buildHandlerConfig(httpHeaders.getRequestHeaders(), 
handlerTypeName, ParseMode.RMETA));
+            return Response.ok(new MetadataList(metadataList)).build();
         }
     }
-
-    private MetadataList parseMetadataToMetadataList(TikaInputStream tis, 
Metadata metadata,
-                                                     MultivaluedMap<String, 
String> httpHeaders, ServerHandlerConfig handlerConfig)
-            throws Exception {
-        return new MetadataList(parseMetadata(tis, metadata, httpHeaders, 
handlerConfig));
-    }
 }
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
index 1a1b9cacfe..d73c002546 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/CXFTestBase.java
@@ -181,16 +181,18 @@ public abstract class CXFTestBase {
     public void setUp() throws Exception {
         Path tmp = Files.createTempFile("tika-server-test-", ".json");
         try {
+            // Copy tika config to temp file first
+            Files.copy(getTikaConfigInputStream(), tmp, 
StandardCopyOption.REPLACE_EXISTING);
+
             InputStream pipesConfigInputStream = getPipesConfigInputStream();
             if (pipesConfigInputStream != null) {
-                this.pipesConfigPath = 
Files.createTempFile("tika-server-pipes-", ".json");
-                Files.copy(pipesConfigInputStream, this.pipesConfigPath, 
StandardCopyOption.REPLACE_EXISTING);
+                // Test provided its own pipes config - merge in PASSBACK_ALL 
emit strategy
+                this.pipesConfigPath = 
mergePassbackAllStrategy(pipesConfigInputStream);
             } else {
-                // Create a default pipes config for tests
-                this.pipesConfigPath = createDefaultTestConfig();
+                // Create a default pipes config, merging metadata-filters 
from tika config
+                this.pipesConfigPath = createDefaultTestConfig(tmp);
             }
 
-            Files.copy(getTikaConfigInputStream(), tmp, 
StandardCopyOption.REPLACE_EXISTING);
             this.tika = TikaLoader.load(tmp);
 
             // Initialize PipesParsingHelper for pipes-based parsing
@@ -231,12 +233,53 @@ public abstract class CXFTestBase {
         server = sf.create();
     }
 
+    /**
+     * Merges PASSBACK_ALL emit strategy into a pipes config.
+     * This ensures the child process uses PASSBACK_ALL regardless of what's 
in the config file.
+     */
+    private Path mergePassbackAllStrategy(InputStream pipesConfigInputStream) 
throws IOException {
+        ObjectMapper mapper = new ObjectMapper();
+        com.fasterxml.jackson.databind.node.ObjectNode root = 
(com.fasterxml.jackson.databind.node.ObjectNode) 
mapper.readTree(pipesConfigInputStream);
+
+        // Get or create pipes section
+        com.fasterxml.jackson.databind.node.ObjectNode pipes = 
(com.fasterxml.jackson.databind.node.ObjectNode) root.get("pipes");
+        if (pipes == null) {
+            pipes = mapper.createObjectNode();
+            root.set("pipes", pipes);
+        }
+
+        // Set emit strategy to PASSBACK_ALL
+        com.fasterxml.jackson.databind.node.ObjectNode emitStrategy = 
mapper.createObjectNode();
+        emitStrategy.put("type", "PASSBACK_ALL");
+        pipes.set("emitStrategy", emitStrategy);
+
+        Path tempConfig = Files.createTempFile("tika-server-pipes-", ".json");
+        
mapper.writerWithDefaultPrettyPrinter().writeValue(tempConfig.toFile(), root);
+        return tempConfig;
+    }
+
     /**
      * Creates a default test config with pipes configuration.
+     * If the tika config contains metadata-filters, they are merged into the 
pipes config.
+     *
+     * @param tikaConfigPath path to the tika config (may contain 
metadata-filters)
      */
-    private Path createDefaultTestConfig() throws IOException {
+    private Path createDefaultTestConfig(Path tikaConfigPath) throws 
IOException {
         Path pluginsDir = Paths.get("target/plugins").toAbsolutePath();
 
+        // Read tika config to check for metadata-filters
+        String metadataFiltersJson = "";
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            JsonNode tikaConfig = mapper.readTree(tikaConfigPath.toFile());
+            JsonNode metadataFilters = tikaConfig.get("metadata-filters");
+            if (metadataFilters != null && !metadataFilters.isEmpty()) {
+                metadataFiltersJson = ",\n              \"metadata-filters\": 
" + mapper.writeValueAsString(metadataFilters);
+            }
+        } catch (Exception e) {
+            LOG.debug("Could not read metadata-filters from tika config: {}", 
e.getMessage());
+        }
+
         String configJson = String.format(Locale.ROOT, """
             {
               "fetchers": {
@@ -250,9 +293,9 @@ public abstract class CXFTestBase {
                 "numClients": 2,
                 "timeoutMillis": 60000
               },
-              "plugin-roots": "%s"
+              "plugin-roots": "%s"%s
             }
-            """, pluginsDir.toString().replace("\\", "/"));
+            """, pluginsDir.toString().replace("\\", "/"), 
metadataFiltersJson);
 
         Path tempConfig = Files.createTempFile("tika-test-default-config-", 
".json");
         Files.writeString(tempConfig, configJson);
diff --git 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
index bbaed1080b..795a22d09f 100644
--- 
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
+++ 
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
@@ -60,8 +60,8 @@ import java.util.concurrent.atomic.AtomicInteger;
  *   --async              Async mode: all requests sent immediately (stress 
test)
  *
  *   Size mode options:
- *   --small-kb=N         Size of small files in KB (default: 1)
- *   --large-kb=N         Size of large files in KB (default: 100)
+ *   --small-times=N      Number of paragraph repetitions for small output 
(default: 10)
+ *   --large-times=N      Number of paragraph repetitions for large output 
(default: 1000)
  *
  *   Sleep mode options:
  *   --short-ms=N         Short sleep duration in ms (default: 10)
@@ -70,24 +70,33 @@ import java.util.concurrent.atomic.AtomicInteger;
  */
 public class TikaServerBenchmark {
 
-    private static final String MOCK_XML_SIZE_TEMPLATE = """
-            <?xml version="1.0" encoding="UTF-8" ?>
-            <mock>
-                <metadata action="add" name="author">Benchmark Test</metadata>
-                <metadata action="add" name="title">Performance Test 
Document</metadata>
-                <write element="p">%s</write>
-            </mock>
-            """;
+    // Template with both sleep (parse time) and output size (times)
+    // Format args: sleepMs, times
+    // Padding added to avoid zip bomb detection (need >10KB input for 1MB 
output at 100:1 ratio)
+    private static final String MOCK_XML_TEMPLATE;
 
-    private static final String MOCK_XML_SLEEP_TEMPLATE = """
+    static {
+        StringBuilder sb = new StringBuilder();
+        sb.append("""
             <?xml version="1.0" encoding="UTF-8" ?>
             <mock>
                 <metadata action="add" name="author">Benchmark Test</metadata>
-                <metadata action="add" name="title">Sleep Test 
Document</metadata>
+                <metadata action="add" name="title">Performance Test 
Document</metadata>
                 <hang millis="%d" heavy="false" interruptible="false" />
-                <write element="p">Test content after sleep</write>
+                <write element="p" times="%d">Lorem ipsum dolor sit amet, 
consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore.</write>
+                <!-- Padding to increase input size and avoid zip bomb 
detection:
+            """);
+        // Add ~12KB of padding (120 lines of 100 chars each)
+        String paddingLine = "PADDING: Lorem ipsum dolor sit amet, consectetur 
adipiscing elit, sed do eiusmod tempor incididunt.\n";
+        for (int i = 0; i < 120; i++) {
+            sb.append(paddingLine);
+        }
+        sb.append("""
+                -->
             </mock>
-            """;
+            """);
+        MOCK_XML_TEMPLATE = sb.toString();
+    }
 
     private final String baseUrl;
     private final String endpoint;
@@ -95,14 +104,13 @@ public class TikaServerBenchmark {
     private final int count;
     private final int warmupCount;
     private final int repeats;
-    private final String mode;
     private final boolean syncMode;
 
-    // Size mode params
-    private final int smallSizeKb;
-    private final int largeSizeKb;
+    // Output size params (times = number of paragraph repetitions)
+    private final int smallTimes;
+    private final int largeTimes;
 
-    // Sleep mode params
+    // Parse time params
     private final int shortSleepMs;
     private final int longSleepMs;
 
@@ -110,22 +118,24 @@ public class TikaServerBenchmark {
     private final ExecutorService httpExecutor;
     private final ExecutorService taskExecutor;
 
-    private byte[] smallContent;
-    private byte[] largeContent;
+    // 2x2 matrix: [short/long sleep] x [small/large output]
+    private byte[] shortSmallContent;  // short parse, small output
+    private byte[] shortLargeContent;  // short parse, large output
+    private byte[] longSmallContent;   // long parse, small output
+    private byte[] longLargeContent;   // long parse, large output
 
     public TikaServerBenchmark(String baseUrl, String endpoint, int threads, 
int count,
-                               int warmupCount, int repeats, String mode, 
boolean syncMode,
-                               int smallSizeKb, int largeSizeKb, int 
shortSleepMs, int longSleepMs) {
+                               int warmupCount, int repeats, boolean syncMode,
+                               int smallTimes, int largeTimes, int 
shortSleepMs, int longSleepMs) {
         this.baseUrl = baseUrl;
         this.endpoint = endpoint;
         this.threads = threads;
         this.count = count;
         this.warmupCount = warmupCount;
         this.repeats = repeats;
-        this.mode = mode;
         this.syncMode = syncMode;
-        this.smallSizeKb = smallSizeKb;
-        this.largeSizeKb = largeSizeKb;
+        this.smallTimes = smallTimes;
+        this.largeTimes = largeTimes;
         this.shortSleepMs = shortSleepMs;
         this.longSleepMs = longSleepMs;
 
@@ -143,54 +153,33 @@ public class TikaServerBenchmark {
     }
 
     private void generateTestContent() {
-        if ("sleep".equals(mode)) {
-            smallContent = generateSleepMockXml(shortSleepMs);
-            largeContent = generateSleepMockXml(longSleepMs);
-        } else {
-            smallContent = generateSizeMockXml(smallSizeKb * 1024);
-            largeContent = generateSizeMockXml(largeSizeKb * 1024);
-        }
-    }
-
-    private byte[] generateSizeMockXml(int targetSizeBytes) {
-        StringBuilder content = new StringBuilder();
-        String baseText = "Lorem ipsum dolor sit amet, consectetur adipiscing 
elit. " +
-                "Sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. " +
-                "Ut enim ad minim veniam, quis nostrud exercitation ullamco 
laboris. ";
-
-        while (content.length() < targetSizeBytes) {
-            content.append(baseText);
-        }
-
-        String xml = String.format(Locale.ROOT, MOCK_XML_SIZE_TEMPLATE,
-                content.substring(0, Math.min(content.length(), 
targetSizeBytes)));
-        return xml.getBytes(StandardCharsets.UTF_8);
+        // 2x2 matrix of test content
+        shortSmallContent = generateMockXml(shortSleepMs, smallTimes);
+        shortLargeContent = generateMockXml(shortSleepMs, largeTimes);
+        longSmallContent = generateMockXml(longSleepMs, smallTimes);
+        longLargeContent = generateMockXml(longSleepMs, largeTimes);
     }
 
-    private byte[] generateSleepMockXml(int sleepMs) {
-        String xml = String.format(Locale.ROOT, MOCK_XML_SLEEP_TEMPLATE, 
sleepMs);
+    private byte[] generateMockXml(int sleepMs, int times) {
+        String xml = String.format(Locale.ROOT, MOCK_XML_TEMPLATE, sleepMs, 
times);
         return xml.getBytes(StandardCharsets.UTF_8);
     }
 
     public void run() throws Exception {
         System.out.println("=".repeat(70));
-        System.out.println("Tika Server Performance Benchmark");
+        System.out.println("Tika Server Performance Benchmark (2x2 Matrix)");
         System.out.println("=".repeat(70));
         System.out.println();
         System.out.printf(Locale.ROOT, "Target URL:    %s%s%n", baseUrl, 
endpoint);
         System.out.printf(Locale.ROOT, "Threads:       %d%n", threads);
         System.out.printf(Locale.ROOT, "Requests/test: %d%n", count);
         System.out.printf(Locale.ROOT, "Repeats:       %d%n", repeats);
-        System.out.printf(Locale.ROOT, "Mode:          %s%n", mode);
         System.out.printf(Locale.ROOT, "Request mode:  %s%n", syncMode ? "sync 
(realistic)" : "async (stress test)");
-
-        if ("sleep".equals(mode)) {
-            System.out.printf(Locale.ROOT, "Short sleep:   %d ms%n", 
shortSleepMs);
-            System.out.printf(Locale.ROOT, "Long sleep:    %d ms%n", 
longSleepMs);
-        } else {
-            System.out.printf(Locale.ROOT, "Small size:    %d KB%n", 
smallSizeKb);
-            System.out.printf(Locale.ROOT, "Large size:    %d KB%n", 
largeSizeKb);
-        }
+        System.out.println();
+        System.out.println("Test Matrix:");
+        System.out.printf(Locale.ROOT, "  Parse time:  short=%dms, 
long=%dms%n", shortSleepMs, longSleepMs);
+        System.out.printf(Locale.ROOT, "  Output size: small=%d times (~%dKB), 
large=%d times (~%dKB)%n",
+                smallTimes, smallTimes * 100 / 1024, largeTimes, largeTimes * 
100 / 1024);
         System.out.println();
 
         // Check server is reachable
@@ -201,31 +190,34 @@ public class TikaServerBenchmark {
         }
         System.out.println("Server is reachable.");
 
-        // Verify MockParser is being used (only for sleep mode)
-        if ("sleep".equals(mode)) {
-            if (!verifyMockParserInUse()) {
-                System.err.println("ERROR: MockParser is NOT being used by the 
server!");
-                System.err.println("The tika-core test jar must be on the 
server's classpath.");
-                System.err.println("If using java -jar, the test jar must be 
in the manifest Class-Path.");
-                System.err.println("Try running with: java -cp 
'tika-server.jar:lib/*' org.apache.tika.server.core.TikaServerCli");
-                System.exit(1);
-            }
-            System.out.println("MockParser verified - sleep mode will work 
correctly.");
+        // Verify MockParser is being used
+        if (!verifyMockParserInUse()) {
+            System.err.println("ERROR: MockParser is NOT being used by the 
server!");
+            System.err.println("The tika-core test jar must be on the server's 
classpath.");
+            System.err.println("If using java -jar, the test jar must be in 
the manifest Class-Path.");
+            System.err.println("Try running with: java -cp 
'tika-server.jar:lib/*' org.apache.tika.server.core.TikaServerCli");
+            System.exit(1);
         }
+        System.out.println("MockParser verified.");
         System.out.println();
 
         // Warmup
         System.out.printf(Locale.ROOT, "Warming up with %d requests...%n", 
warmupCount);
-        runBenchmark(smallContent, warmupCount, "warmup", getSmallLabel());
+        runBenchmark(shortSmallContent, warmupCount, "warmup", "warmup");
         System.out.println("Warmup complete.");
         System.out.println();
 
-        String firstLabel = getSmallLabel();
-        String secondLabel = getLargeLabel();
+        // Labels for the 2x2 matrix
+        String shortSmallLabel = String.format(Locale.ROOT, 
"short-%dms/small-%d", shortSleepMs, smallTimes);
+        String shortLargeLabel = String.format(Locale.ROOT, 
"short-%dms/large-%d", shortSleepMs, largeTimes);
+        String longSmallLabel = String.format(Locale.ROOT, 
"long-%dms/small-%d", longSleepMs, smallTimes);
+        String longLargeLabel = String.format(Locale.ROOT, 
"long-%dms/large-%d", longSleepMs, largeTimes);
 
         // Collect results across all repeats
-        List<BenchmarkResult> firstResults = new ArrayList<>();
-        List<BenchmarkResult> secondResults = new ArrayList<>();
+        List<BenchmarkResult> shortSmallResults = new ArrayList<>();
+        List<BenchmarkResult> shortLargeResults = new ArrayList<>();
+        List<BenchmarkResult> longSmallResults = new ArrayList<>();
+        List<BenchmarkResult> longLargeResults = new ArrayList<>();
 
         // Per-benchmark warmup count (10 requests per thread)
         int perBenchmarkWarmup = threads * 10;
@@ -238,67 +230,74 @@ public class TikaServerBenchmark {
                 System.out.println("*".repeat(70));
             }
 
-            // First test (small/short)
-            System.out.println("-".repeat(70));
-            System.out.printf(Locale.ROOT, "Running %s benchmark (%d 
requests)%n", firstLabel.toUpperCase(Locale.ROOT), count);
-            System.out.println("-".repeat(70));
-            // Warmup for this benchmark (10 requests per thread, not counted)
-            System.out.printf(Locale.ROOT, "  Per-benchmark warmup (%d 
requests)...%n", perBenchmarkWarmup);
-            runBenchmark(smallContent, perBenchmarkWarmup, "warmup", 
firstLabel);
-            BenchmarkResult firstResult = runBenchmark(smallContent, count, 
"first", firstLabel);
-            firstResults.add(firstResult);
-            printResults(firstResult, firstLabel);
-            System.out.println();
-
-            // Second test (large/long)
-            System.out.println("-".repeat(70));
-            System.out.printf(Locale.ROOT, "Running %s benchmark (%d 
requests)%n", secondLabel.toUpperCase(Locale.ROOT), count);
-            System.out.println("-".repeat(70));
-            // Warmup for this benchmark (10 requests per thread, not counted)
-            System.out.printf(Locale.ROOT, "  Per-benchmark warmup (%d 
requests)...%n", perBenchmarkWarmup);
-            runBenchmark(largeContent, perBenchmarkWarmup, "warmup", 
secondLabel);
-            BenchmarkResult secondResult = runBenchmark(largeContent, count, 
"second", secondLabel);
-            secondResults.add(secondResult);
-            printResults(secondResult, secondLabel);
+            // Test 1: short parse, small output
+            shortSmallResults.add(runSingleBenchmark(shortSmallContent, 
perBenchmarkWarmup, shortSmallLabel));
+
+            // Test 2: short parse, large output
+            shortLargeResults.add(runSingleBenchmark(shortLargeContent, 
perBenchmarkWarmup, shortLargeLabel));
+
+            // Test 3: long parse, small output
+            longSmallResults.add(runSingleBenchmark(longSmallContent, 
perBenchmarkWarmup, longSmallLabel));
+
+            // Test 4: long parse, large output
+            longLargeResults.add(runSingleBenchmark(longLargeContent, 
perBenchmarkWarmup, longLargeLabel));
         }
 
         // Calculate aggregated results
-        BenchmarkResult firstAgg = aggregateResults(firstResults);
-        BenchmarkResult secondAgg = aggregateResults(secondResults);
+        BenchmarkResult shortSmallAgg = aggregateResults(shortSmallResults);
+        BenchmarkResult shortLargeAgg = aggregateResults(shortLargeResults);
+        BenchmarkResult longSmallAgg = aggregateResults(longSmallResults);
+        BenchmarkResult longLargeAgg = aggregateResults(longLargeResults);
 
-        // Summary
+        // Summary - 2x2 Matrix format
         System.out.println();
-        System.out.println("=".repeat(70));
+        System.out.println("=".repeat(90));
         if (repeats > 1) {
             System.out.printf(Locale.ROOT, "SUMMARY (averaged over %d 
repeats)%n", repeats);
         } else {
             System.out.println("SUMMARY");
         }
-        System.out.println("=".repeat(70));
-        System.out.printf(Locale.ROOT, "%-20s %18s %18s%n", "Metric", 
firstLabel, secondLabel);
-        System.out.println("-".repeat(70));
-        System.out.printf(Locale.ROOT, "%-20s %18.2f %18.2f%n", "Throughput 
(req/s)", firstAgg.throughput, secondAgg.throughput);
-        System.out.printf(Locale.ROOT, "%-20s %18.2f %18.2f%n", "Avg Latency 
(ms)", firstAgg.avgLatencyMs, secondAgg.avgLatencyMs);
-        System.out.printf(Locale.ROOT, "%-20s %18.2f %18.2f%n", "P50 Latency 
(ms)", firstAgg.p50LatencyMs, secondAgg.p50LatencyMs);
-        System.out.printf(Locale.ROOT, "%-20s %18.2f %18.2f%n", "P95 Latency 
(ms)", firstAgg.p95LatencyMs, secondAgg.p95LatencyMs);
-        System.out.printf(Locale.ROOT, "%-20s %18.2f %18.2f%n", "P99 Latency 
(ms)", firstAgg.p99LatencyMs, secondAgg.p99LatencyMs);
-        System.out.printf(Locale.ROOT, "%-20s %18d %18d%n", "Success Count", 
firstAgg.successCount, secondAgg.successCount);
-        System.out.printf(Locale.ROOT, "%-20s %18d %18d%n", "Error Count", 
firstAgg.errorCount, secondAgg.errorCount);
-        System.out.println("=".repeat(70));
+        System.out.println("=".repeat(90));
+
+        // Throughput matrix
+        System.out.println();
+        System.out.println("THROUGHPUT (req/s):");
+        System.out.printf(Locale.ROOT, "%-20s %20s %20s%n", "", "small-" + 
smallTimes, "large-" + largeTimes);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "short-" + 
shortSleepMs + "ms", shortSmallAgg.throughput, shortLargeAgg.throughput);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "long-" + 
longSleepMs + "ms", longSmallAgg.throughput, longLargeAgg.throughput);
+
+        // Latency matrix
+        System.out.println();
+        System.out.println("AVG LATENCY (ms):");
+        System.out.printf(Locale.ROOT, "%-20s %20s %20s%n", "", "small-" + 
smallTimes, "large-" + largeTimes);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "short-" + 
shortSleepMs + "ms", shortSmallAgg.avgLatencyMs, shortLargeAgg.avgLatencyMs);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "long-" + 
longSleepMs + "ms", longSmallAgg.avgLatencyMs, longLargeAgg.avgLatencyMs);
 
-        // Output CSV-friendly line for easy comparison
+        // P95 Latency matrix
         System.out.println();
-        System.out.println("CSV format (for comparison):");
-        System.out.printf(Locale.ROOT, 
"mode,threads,repeats,%s_throughput,%s_p50,%s_p95,%s_throughput,%s_p50,%s_p95%n",
-                firstLabel, firstLabel, firstLabel, secondLabel, secondLabel, 
secondLabel);
-        System.out.printf(Locale.ROOT, 
"%s,%d,%d,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f%n",
-                mode, threads, repeats,
-                firstAgg.throughput, firstAgg.p50LatencyMs, 
firstAgg.p95LatencyMs,
-                secondAgg.throughput, secondAgg.p50LatencyMs, 
secondAgg.p95LatencyMs);
+        System.out.println("P95 LATENCY (ms):");
+        System.out.printf(Locale.ROOT, "%-20s %20s %20s%n", "", "small-" + 
smallTimes, "large-" + largeTimes);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "short-" + 
shortSleepMs + "ms", shortSmallAgg.p95LatencyMs, shortLargeAgg.p95LatencyMs);
+        System.out.printf(Locale.ROOT, "%-20s %20.2f %20.2f%n", "long-" + 
longSleepMs + "ms", longSmallAgg.p95LatencyMs, longLargeAgg.p95LatencyMs);
+
+        System.out.println();
+        System.out.println("=".repeat(90));
 
         shutdown();
     }
 
+    private BenchmarkResult runSingleBenchmark(byte[] content, int 
perBenchmarkWarmup, String label) throws Exception {
+        System.out.println("-".repeat(70));
+        System.out.printf(Locale.ROOT, "Running %s benchmark (%d requests)%n", 
label.toUpperCase(Locale.ROOT), count);
+        System.out.println("-".repeat(70));
+        System.out.printf(Locale.ROOT, "  Per-benchmark warmup (%d 
requests)...%n", perBenchmarkWarmup);
+        runBenchmark(content, perBenchmarkWarmup, "warmup", label);
+        BenchmarkResult result = runBenchmark(content, count, "test", label);
+        printResults(result, label);
+        System.out.println();
+        return result;
+    }
+
     private BenchmarkResult aggregateResults(List<BenchmarkResult> results) {
         if (results.size() == 1) {
             return results.get(0);
@@ -314,14 +313,6 @@ public class TikaServerBenchmark {
         return new BenchmarkResult(avgThroughput, avgLatency, avgP50, avgP95, 
avgP99, avgMax, totalSuccess, totalErrors);
     }
 
-    private String getSmallLabel() {
-        return "sleep".equals(mode) ? "short-sleep" : "small-files";
-    }
-
-    private String getLargeLabel() {
-        return "sleep".equals(mode) ? "long-sleep" : "large-files";
-    }
-
     private boolean checkServerHealth() {
         try {
             HttpRequest request = HttpRequest.newBuilder()
@@ -348,6 +339,7 @@ public class TikaServerBenchmark {
                     .uri(URI.create(baseUrl + "/rmeta"))
                     .header("Content-Type", "application/mock+xml")
                     .header("Accept", "application/json")
+                    .header("writeLimit", "-1")
                     .PUT(HttpRequest.BodyPublishers.ofString(testXml))
                     .timeout(Duration.ofSeconds(10))
                     .build();
@@ -391,8 +383,8 @@ public class TikaServerBenchmark {
         AtomicInteger errorCount = new AtomicInteger(0);
         AtomicInteger completedCount = new AtomicInteger(0);
 
-        // Calculate appropriate timeout based on content
-        int timeoutSeconds = "sleep".equals(mode) ? Math.max(60, longSleepMs / 
1000 + 30) : 60;
+        // Calculate appropriate timeout based on longest possible sleep time
+        int timeoutSeconds = Math.max(60, longSleepMs / 1000 + 30);
 
         // Divide requests among threads
         int requestsPerThread = requestCount / threads;
@@ -451,8 +443,8 @@ public class TikaServerBenchmark {
         AtomicInteger errorCount = new AtomicInteger(0);
         AtomicInteger completedCount = new AtomicInteger(0);
 
-        // Calculate appropriate timeout based on content
-        int timeoutSeconds = "sleep".equals(mode) ? Math.max(60, longSleepMs / 
1000 + 30) : 60;
+        // Calculate appropriate timeout based on longest possible sleep time
+        int timeoutSeconds = Math.max(60, longSleepMs / 1000 + 30);
 
         long startTime = System.nanoTime();
 
@@ -608,15 +600,14 @@ public class TikaServerBenchmark {
         String url = "http://localhost:9998";;
         String endpoint = "/tika";
         int threads = 4;
-        int count = 1000;
+        int count = 100;
         int warmup = 100;
         int repeats = 1;
-        String mode = "size";
         boolean syncMode = true; // default to sync (realistic)
-        int smallKb = 1;
-        int largeKb = 100;
+        int smallTimes = 10;
+        int largeTimes = 10000;
         int shortMs = 10;
-        int longMs = 5000;
+        int longMs = 500;
 
         for (String arg : args) {
             if (arg.startsWith("--url=")) {
@@ -631,16 +622,14 @@ public class TikaServerBenchmark {
                 warmup = Integer.parseInt(arg.substring(9));
             } else if (arg.startsWith("--repeats=")) {
                 repeats = Integer.parseInt(arg.substring(10));
-            } else if (arg.startsWith("--mode=")) {
-                mode = arg.substring(7);
             } else if (arg.equals("--sync")) {
                 syncMode = true;
             } else if (arg.equals("--async")) {
                 syncMode = false;
-            } else if (arg.startsWith("--small-kb=")) {
-                smallKb = Integer.parseInt(arg.substring(11));
-            } else if (arg.startsWith("--large-kb=")) {
-                largeKb = Integer.parseInt(arg.substring(11));
+            } else if (arg.startsWith("--small-times=")) {
+                smallTimes = Integer.parseInt(arg.substring(14));
+            } else if (arg.startsWith("--large-times=")) {
+                largeTimes = Integer.parseInt(arg.substring(14));
             } else if (arg.startsWith("--short-ms=")) {
                 shortMs = Integer.parseInt(arg.substring(11));
             } else if (arg.startsWith("--long-ms=")) {
@@ -651,13 +640,8 @@ public class TikaServerBenchmark {
             }
         }
 
-        if (!mode.equals("size") && !mode.equals("sleep")) {
-            System.err.println("Invalid mode: " + mode + ". Must be 'size' or 
'sleep'.");
-            System.exit(1);
-        }
-
         TikaServerBenchmark benchmark = new TikaServerBenchmark(
-                url, endpoint, threads, count, warmup, repeats, mode, 
syncMode, smallKb, largeKb, shortMs, longMs);
+                url, endpoint, threads, count, warmup, repeats, syncMode, 
smallTimes, largeTimes, shortMs, longMs);
 
         try {
             benchmark.run();
@@ -669,7 +653,9 @@ public class TikaServerBenchmark {
     }
 
     private static void printHelp() {
-        System.out.println("Tika Server Performance Benchmark");
+        System.out.println("Tika Server Performance Benchmark (2x2 Matrix)");
+        System.out.println();
+        System.out.println("Runs a 2x2 matrix of tests: [short/long parse 
time] x [small/large output]");
         System.out.println();
         System.out.println("Usage: java TikaServerBenchmark [options]");
         System.out.println();
@@ -677,30 +663,28 @@ public class TikaServerBenchmark {
         System.out.println("  --url=URL          Base URL of tika-server 
(default: http://localhost:9998)");
         System.out.println("  --endpoint=PATH    Endpoint to test: /tika or 
/rmeta (default: /tika)");
         System.out.println("  --threads=N        Number of client threads 
(default: 4)");
-        System.out.println("  --count=N          Number of requests per test 
(default: 1000)");
+        System.out.println("  --count=N          Number of requests per test 
(default: 100)");
         System.out.println("  --warmup=N         Number of initial warmup 
requests (default: 100)");
         System.out.println("  --repeats=N        Number of times to repeat the 
benchmark (default: 1)");
-        System.out.println("  --mode=MODE        Test mode: 'size' or 'sleep' 
(default: size)");
         System.out.println("  --sync             Synchronous: each thread 
waits for response before next request (default)");
         System.out.println("  --async            Asynchronous: all requests 
sent immediately (stress test)");
         System.out.println();
-        System.out.println("Size mode options (tests I/O throughput):");
-        System.out.println("  --small-kb=N       Size of small files in KB 
(default: 1)");
-        System.out.println("  --large-kb=N       Size of large files in KB 
(default: 100)");
+        System.out.println("Parse time options:");
+        System.out.println("  --short-ms=N       Short parse/sleep duration in 
ms (default: 10)");
+        System.out.println("  --long-ms=N        Long parse/sleep duration in 
ms (default: 500)");
         System.out.println();
-        System.out.println("Sleep mode options (tests process forking 
overhead):");
-        System.out.println("  --short-ms=N       Short sleep duration in ms 
(default: 10)");
-        System.out.println("  --long-ms=N        Long sleep duration in ms 
(default: 5000)");
+        System.out.println("Output size options:");
+        System.out.println("  --small-times=N    Paragraph repetitions for 
small output (default: 10, ~1KB)");
+        System.out.println("  --large-times=N    Paragraph repetitions for 
large output (default: 10000, ~1MB)");
         System.out.println();
-        System.out.println("Note: Each benchmark also runs a per-benchmark 
warmup of 10*threads requests");
-        System.out.println("      that is not counted towards the 
statistics.");
+        System.out.println("Note: Each of the 4 benchmarks runs a 
per-benchmark warmup of 10*threads requests.");
         System.out.println();
         System.out.println("Examples:");
-        System.out.println("  # Realistic test with 4 threads (default sync 
mode)");
-        System.out.println("  java TikaServerBenchmark --mode=sleep 
--threads=4 --short-ms=100 --long-ms=1000");
+        System.out.println("  # Default 2x2 matrix test");
+        System.out.println("  java TikaServerBenchmark --threads=4");
         System.out.println();
-        System.out.println("  # Stress test with async mode");
-        System.out.println("  java TikaServerBenchmark --mode=sleep 
--threads=4 --async --count=500");
+        System.out.println("  # Custom parse times and output sizes");
+        System.out.println("  java TikaServerBenchmark --short-ms=10 
--long-ms=1000 --small-times=10 --large-times=5000");
         System.out.println();
         System.out.println("  # Test /rmeta endpoint with 3 repeats for more 
stable results");
         System.out.println("  java TikaServerBenchmark --endpoint=/rmeta 
--mode=size --repeats=3");
diff --git a/tika-server/tika-server-standard/pom.xml 
b/tika-server/tika-server-standard/pom.xml
index 2cfb43d7ec..82db4d689a 100644
--- a/tika-server/tika-server-standard/pom.xml
+++ b/tika-server/tika-server-standard/pom.xml
@@ -125,7 +125,7 @@
         <executions>
           <execution>
             <id>unpack-plugins</id>
-            <phase>prepare-package</phase>
+            <phase>process-test-resources</phase>
             <goals>
               <goal>unpack</goal>
             </goals>

Reply via email to