This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4334
in repository https://gitbox.apache.org/repos/asf/tika.git

commit e46345a43000a399ecf22588c7cccc3fa32c01a3
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Tue Sep 30 12:31:09 2025 -0500

    TIKA-4334: replace pipes and server with newer apis - add eval and detector 
controllers
---
 tika-server/tika-server-spring/pom.xml             |  13 +-
 .../tika/server/config/TikaConfigLoader.java       |  53 +++
 .../tika/server/controller/DetectorController.java |  42 ++-
 .../tika/server/controller/EvalController.java     | 215 ++++++++++-
 .../org/apache/tika/server/util/TikaResource.java  | 119 ------
 .../main/resources/api/tika-server-openapi.yaml    |  43 ++-
 .../apache/tika/server/IntegrationTestBase.java    |   4 +-
 .../DetectorControllerIntegrationTest.java         |  42 ++-
 .../controller/EvalControllerIntegrationTest.java  | 403 +++++++++++++++++++++
 .../src/test/resources/test-tika-config.xml        |  38 ++
 10 files changed, 801 insertions(+), 171 deletions(-)

diff --git a/tika-server/tika-server-spring/pom.xml 
b/tika-server/tika-server-spring/pom.xml
index 9db1c1884..5fbd2c67b 100644
--- a/tika-server/tika-server-spring/pom.xml
+++ b/tika-server/tika-server-spring/pom.xml
@@ -29,7 +29,7 @@
   </parent>
   
   <artifactId>tika-server-spring</artifactId>
-  <name>Apache Tika Spring Boot Server</name>
+  <name>Apache Tika Spring Web Service</name>
   <description>Apache Tika Server implemented with Spring Boot and OpenAPI 
3.0</description>
 
   <dependencyManagement>
@@ -94,6 +94,17 @@
       <version>${project.parent.version}</version>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-eval-core</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-langdetect-opennlp</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.tika</groupId>
diff --git 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/config/TikaConfigLoader.java
 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/config/TikaConfigLoader.java
new file mode 100644
index 000000000..d388855ff
--- /dev/null
+++ 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/config/TikaConfigLoader.java
@@ -0,0 +1,53 @@
+/*
+ *
+ *  * Licensed to the Apache Software Foundation (ASF) under one or more
+ *  * contributor license agreements.  See the NOTICE file distributed with
+ *  * this work for additional information regarding copyright ownership.
+ *  * The ASF licenses this file to You under the Apache License, Version 2.0
+ *  * (the "License"); you may not use this file except in compliance with
+ *  * the License.  You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ *
+ */
+
+package org.apache.tika.server.config;
+
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.core.env.Environment;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+
+@Configuration
+public class TikaConfigLoader {
+    private final Environment environment;
+
+    @Autowired
+    public TikaConfigLoader(Environment environment) {
+        this.environment = environment;
+    }
+
+    @Bean
+    public TikaConfig tikaConfig() throws TikaException {
+        String tikaConfig = environment.getProperty("tika.config");
+        if (StringUtils.isNotBlank(tikaConfig)) {
+            try {
+                return new 
TikaConfig(getClass().getClassLoader().getResourceAsStream(tikaConfig));
+            } catch (Exception e) {
+                throw new TikaException("Could not load tika.config profile", 
e);
+            }
+        }
+        return TikaConfig.getDefaultConfig();
+    }
+}
diff --git 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/DetectorController.java
 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/DetectorController.java
index dd1422de6..b4f62d083 100644
--- 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/DetectorController.java
+++ 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/DetectorController.java
@@ -18,23 +18,25 @@ package org.apache.tika.server.controller;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Optional;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.core.io.Resource;
 import org.springframework.http.HttpStatus;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.context.request.NativeWebRequest;
 
+import org.apache.tika.config.TikaConfig;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
 import org.apache.tika.server.api.DetectorResourceApi;
 import org.apache.tika.server.component.ServerStatus;
-import org.apache.tika.server.util.TikaResource;
 
 /**
  * Controller for MIME/media type detection using the default detector.
@@ -42,45 +44,51 @@ import org.apache.tika.server.util.TikaResource;
  */
 @RestController
 public class DetectorController implements DetectorResourceApi {
-    
     private static final Logger LOG = 
LoggerFactory.getLogger(DetectorController.class);
     private final ServerStatus serverStatus;
+    private final TikaConfig tikaConfig;
+
+    @Value("${tika.detector.taskTimeoutMillis:30000}")
+    private long timeoutMillis;
+
 
     @Autowired
-    public DetectorController(ServerStatus serverStatus) {
+    public DetectorController(ServerStatus serverStatus, TikaConfig 
tikaConfig) {
         this.serverStatus = serverStatus;
+        this.tikaConfig = tikaConfig;
+    }
+
+    @Override
+    public Optional<NativeWebRequest> getRequest() {
+        return DetectorResourceApi.super.getRequest();
     }
 
     @Override
-    public ResponseEntity<String> putStream(Resource body) {
+    public ResponseEntity<String> putStream(Resource body, String 
contentDisposition) {
         if (body == null) {
             return ResponseEntity.badRequest().body("No document provided");
         }
-        
+
         Metadata metadata = new Metadata();
         String filename = body.getFilename();
         LOG.info("Detecting media type for Filename: {}", filename);
-        
+
         if (filename != null) {
             metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
         }
-        
-        ParseContext parseContext = new ParseContext();
-        long timeoutMillis = TikaResource.getTaskTimeout(parseContext);
+
         long taskId = serverStatus.start(ServerStatus.TASK.DETECT, filename, 
timeoutMillis);
 
         try (InputStream is = body.getInputStream();
              TikaInputStream tis = TikaInputStream.get(is)) {
-            
-            String mediaType = TikaResource
-                    .getConfig()
+
+            String mediaType = tikaConfig
                     .getDetector()
                     .detect(tis, metadata)
                     .toString();
-            
+
             LOG.info("Detected media type: {} for file: {}", mediaType, 
filename);
             return ResponseEntity.ok(mediaType);
-            
         } catch (IOException e) {
             LOG.warn("Unable to detect MIME type for file. Reason: {} ({})", 
e.getMessage(), filename, e);
             return ResponseEntity.ok(MediaType.OCTET_STREAM.toString());
@@ -88,11 +96,11 @@ public class DetectorController implements 
DetectorResourceApi {
             LOG.error("OOM while detecting: ({})", filename, e);
             serverStatus.setStatus(ServerStatus.STATUS.ERROR);
             return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
-                    .body("Out of memory error during detection");
+                                 .body("Out of memory error during detection");
         } catch (Throwable e) {
             LOG.error("Exception while detecting: ({})", filename, e);
             return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
-                    .body("Error during MIME type detection: " + 
e.getMessage());
+                                 .body("Error during MIME type detection: " + 
e.getMessage());
         } finally {
             serverStatus.complete(taskId);
         }
diff --git 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/EvalController.java
 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/EvalController.java
index 0e39c28a0..cc0ed5238 100644
--- 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/EvalController.java
+++ 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/controller/EvalController.java
@@ -20,30 +20,231 @@
 
 package org.apache.tika.server.controller;
 
-import java.util.Optional;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.http.HttpStatus;
 import org.springframework.http.ResponseEntity;
-import org.springframework.web.context.request.NativeWebRequest;
+import org.springframework.stereotype.Controller;
 
+import org.apache.tika.eval.core.langid.LanguageIDWrapper;
+import org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter;
+import org.apache.tika.eval.core.textstats.BasicTokenCountStatsCalculator;
+import org.apache.tika.eval.core.textstats.CommonTokens;
+import org.apache.tika.eval.core.textstats.CompositeTextStatsCalculator;
+import org.apache.tika.eval.core.textstats.TextStatsCalculator;
+import org.apache.tika.eval.core.tokens.CommonTokenResult;
+import org.apache.tika.eval.core.tokens.ContrastStatistics;
+import org.apache.tika.eval.core.tokens.TokenContraster;
+import org.apache.tika.eval.core.tokens.TokenCounts;
+import org.apache.tika.language.detect.LanguageResult;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.server.api.EvalResourceApi;
+import org.apache.tika.server.component.ServerStatus;
 import org.apache.tika.server.model.PutEvalCompare200Response;
 import org.apache.tika.server.model.PutEvalCompareRequest;
 import org.apache.tika.server.model.PutEvalProfile200Response;
 import org.apache.tika.server.model.PutEvalProfileRequest;
+import org.apache.tika.utils.StringUtils;
 
+@Controller
 public class EvalController implements EvalResourceApi {
-    @Override
-    public Optional<NativeWebRequest> getRequest() {
-        return EvalResourceApi.super.getRequest();
+
+    public static final long DEFAULT_TIMEOUT_MILLIS = 60000;
+
+    public static final Property DICE = Property.externalReal(
+            TikaEvalMetadataFilter.TIKA_EVAL_NS + "dice");
+
+    public static final Property OVERLAP = Property.externalReal(
+            TikaEvalMetadataFilter.TIKA_EVAL_NS + "overlap");
+
+    static CompositeTextStatsCalculator TEXT_STATS_CALCULATOR;
+
+    static {
+        List<TextStatsCalculator> calcs = new ArrayList<>();
+        calcs.add(new BasicTokenCountStatsCalculator());
+        calcs.add(new CommonTokens());
+        TEXT_STATS_CALCULATOR = new CompositeTextStatsCalculator(calcs);
     }
 
+    @Autowired
+    private ServerStatus serverStatus;
+
     @Override
     public ResponseEntity<PutEvalCompare200Response> 
putEvalCompare(PutEvalCompareRequest putEvalCompareRequest) {
-        return EvalResourceApi.super.putEvalCompare(putEvalCompareRequest);
+        try {
+            String id = putEvalCompareRequest.getId();
+            String textA = putEvalCompareRequest.getTextA();
+            String textB = putEvalCompareRequest.getTextB();
+            long timeoutMillis = putEvalCompareRequest.getTimeoutMillis() != 
null ?
+                putEvalCompareRequest.getTimeoutMillis() : 
DEFAULT_TIMEOUT_MILLIS;
+
+            Map<String, Object> result = compareText(id, textA, textB, 
timeoutMillis);
+
+            PutEvalCompare200Response response = new 
PutEvalCompare200Response();
+            mapResultToCompareResponse(result, response);
+
+            return new ResponseEntity<>(response, HttpStatus.OK);
+        } catch (Exception e) {
+            return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
+        }
     }
 
     @Override
     public ResponseEntity<PutEvalProfile200Response> 
putEvalProfile(PutEvalProfileRequest putEvalProfileRequest) {
-        return EvalResourceApi.super.putEvalProfile(putEvalProfileRequest);
+        try {
+            String id = putEvalProfileRequest.getId();
+            String text = putEvalProfileRequest.getText();
+            long timeoutMillis = putEvalProfileRequest.getTimeoutMillis() != 
null ?
+                putEvalProfileRequest.getTimeoutMillis() : 
DEFAULT_TIMEOUT_MILLIS;
+
+            Map<String, Object> result = profile(id, text, timeoutMillis);
+
+            PutEvalProfile200Response response = new 
PutEvalProfile200Response();
+            mapResultToProfileResponse(result, response);
+
+            return new ResponseEntity<>(response, HttpStatus.OK);
+        } catch (Exception e) {
+            return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
+        }
+    }
+
+    private Map<String, Object> compareText(String id, String textA, String 
textB, long timeoutMillis) {
+        Map<String, Object> stats = new HashMap<>();
+        long taskId = serverStatus.start(ServerStatus.TASK.EVAL, id, 
timeoutMillis);
+        try {
+            TokenCounts tokensA = profile("A", textA, stats);
+            TokenCounts tokensB = profile("B", textB, stats);
+            TokenContraster tokenContraster = new TokenContraster();
+            ContrastStatistics contrastStatistics =
+                    tokenContraster.calculateContrastStatistics(tokensA, 
tokensB);
+            reportContrastStats(contrastStatistics, stats);
+        } finally {
+            serverStatus.complete(taskId);
+        }
+        return stats;
+    }
+
+    private Map<String, Object> profile(String id, String text, long 
timeoutMillis) {
+        Map<String, Object> stats = new HashMap<>();
+        long taskId = serverStatus.start(ServerStatus.TASK.EVAL, id, 
timeoutMillis);
+        try {
+            profile(StringUtils.EMPTY, text, stats);
+        } finally {
+            serverStatus.complete(taskId);
+        }
+        return stats;
+    }
+
+    private TokenCounts profile(String suffix, String content, Map<String, 
Object> stats) {
+        Map<Class, Object> results = TEXT_STATS_CALCULATOR.calculate(content);
+
+        TokenCounts tokenCounts = (TokenCounts) 
results.get(BasicTokenCountStatsCalculator.class);
+        stats.put("tika-eval:numTokens" + suffix, 
tokenCounts.getTotalTokens());
+        stats.put("tika-eval:numUniqueTokens" + suffix, 
tokenCounts.getTotalUniqueTokens());
+
+        //common token results
+        CommonTokenResult commonTokenResult = (CommonTokenResult) 
results.get(CommonTokens.class);
+        stats.put("tika-eval:numAlphaTokens" + suffix, 
commonTokenResult.getAlphabeticTokens());
+        stats.put("tika-eval:numUniqueAlphaTokens" + suffix, 
commonTokenResult.getUniqueAlphabeticTokens());
+        if (commonTokenResult.getAlphabeticTokens() > 0) {
+            stats.put("tika-eval:oov" + suffix, commonTokenResult.getOOV());
+        } else {
+            stats.put("tika-eval:oov" + suffix, -1.0f);
+        }
+
+        //languages
+        List<LanguageResult> probabilities =
+                (List<LanguageResult>) results.get(LanguageIDWrapper.class);
+        if (probabilities.size() > 0) {
+            stats.put("tika-eval:lang" + suffix, 
probabilities.get(0).getLanguage());
+            stats.put("tika-eval:langConfidence" + suffix, 
probabilities.get(0).getRawScore());
+        }
+        return tokenCounts;
+    }
+
+    private void reportContrastStats(ContrastStatistics contrastStatistics,
+                                     Map<String, Object> stats) {
+        stats.put("tika-eval:dice", contrastStatistics.getDiceCoefficient());
+        stats.put("tika-eval:overlap", contrastStatistics.getOverlap());
+        //TODO, add topNMore, topNUnique
+    }
+
+    private void mapResultToCompareResponse(Map<String, Object> result, 
PutEvalCompare200Response response) {
+        if (result.get("tika-eval:dice") != null) {
+            response.setTikaEvalColonDice(((Number) 
result.get("tika-eval:dice")).floatValue());
+        }
+        if (result.get("tika-eval:overlap") != null) {
+            response.setTikaEvalColonOverlap(((Number) 
result.get("tika-eval:overlap")).floatValue());
+        }
+        if (result.get("tika-eval:numTokensA") != null) {
+            response.setTikaEvalColonNumTokensA(((Number) 
result.get("tika-eval:numTokensA")).intValue());
+        }
+        if (result.get("tika-eval:numTokensB") != null) {
+            response.setTikaEvalColonNumTokensB(((Number) 
result.get("tika-eval:numTokensB")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueTokensA") != null) {
+            response.setTikaEvalColonNumUniqueTokensA(((Number) 
result.get("tika-eval:numUniqueTokensA")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueTokensB") != null) {
+            response.setTikaEvalColonNumUniqueTokensB(((Number) 
result.get("tika-eval:numUniqueTokensB")).intValue());
+        }
+        if (result.get("tika-eval:numAlphaTokensA") != null) {
+            response.setTikaEvalColonNumAlphaTokensA(((Number) 
result.get("tika-eval:numAlphaTokensA")).intValue());
+        }
+        if (result.get("tika-eval:numAlphaTokensB") != null) {
+            response.setTikaEvalColonNumAlphaTokensB(((Number) 
result.get("tika-eval:numAlphaTokensB")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueAlphaTokensA") != null) {
+            response.setTikaEvalColonNumUniqueAlphaTokensA(((Number) 
result.get("tika-eval:numUniqueAlphaTokensA")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueAlphaTokensB") != null) {
+            response.setTikaEvalColonNumUniqueAlphaTokensB(((Number) 
result.get("tika-eval:numUniqueAlphaTokensB")).intValue());
+        }
+        if (result.get("tika-eval:oovA") != null) {
+            response.setTikaEvalColonOovA(((Number) 
result.get("tika-eval:oovA")).floatValue());
+        }
+        if (result.get("tika-eval:oovB") != null) {
+            response.setTikaEvalColonOovB(((Number) 
result.get("tika-eval:oovB")).floatValue());
+        }
+        if (result.get("tika-eval:langA") != null) {
+            response.setTikaEvalColonLangA((String) 
result.get("tika-eval:langA"));
+        }
+        if (result.get("tika-eval:langB") != null) {
+            response.setTikaEvalColonLangB((String) 
result.get("tika-eval:langB"));
+        }
+        if (result.get("tika-eval:langConfidenceA") != null) {
+            response.setTikaEvalColonLangConfidenceA(((Number) 
result.get("tika-eval:langConfidenceA")).floatValue());
+        }
+        if (result.get("tika-eval:langConfidenceB") != null) {
+            response.setTikaEvalColonLangConfidenceB(((Number) 
result.get("tika-eval:langConfidenceB")).floatValue());
+        }
+    }
+
+    private void mapResultToProfileResponse(Map<String, Object> result, 
PutEvalProfile200Response response) {
+        if (result.get("tika-eval:numTokens") != null) {
+            response.setTikaEvalColonNumTokens(((Number) 
result.get("tika-eval:numTokens")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueTokens") != null) {
+            response.setTikaEvalColonNumUniqueTokens(((Number) 
result.get("tika-eval:numUniqueTokens")).intValue());
+        }
+        if (result.get("tika-eval:numAlphaTokens") != null) {
+            response.setTikaEvalColonNumAlphaTokens(((Number) 
result.get("tika-eval:numAlphaTokens")).intValue());
+        }
+        if (result.get("tika-eval:numUniqueAlphaTokens") != null) {
+            response.setTikaEvalColonNumUniqueAlphaTokens(((Number) 
result.get("tika-eval:numUniqueAlphaTokens")).intValue());
+        }
+        if (result.get("tika-eval:oov") != null) {
+            response.setTikaEvalColonOov(((Number) 
result.get("tika-eval:oov")).floatValue());
+        }
+        if (result.get("tika-eval:lang") != null) {
+            response.setTikaEvalColonLang((String) 
result.get("tika-eval:lang"));
+        }
+        if (result.get("tika-eval:langConfidence") != null) {
+            response.setTikaEvalColonLangConfidence(((Number) 
result.get("tika-eval:langConfidence")).floatValue());
+        }
     }
 }
diff --git 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/util/TikaResource.java
 
b/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/util/TikaResource.java
deleted file mode 100644
index d810b14bb..000000000
--- 
a/tika-server/tika-server-spring/src/main/java/org/apache/tika/server/util/TikaResource.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- *
- *  * Licensed to the Apache Software Foundation (ASF) under one or more
- *  * contributor license agreements.  See the NOTICE file distributed with
- *  * this work for additional information regarding copyright ownership.
- *  * The ASF licenses this file to You under the Apache License, Version 2.0
- *  * (the "License"); you may not use this file except in compliance with
- *  * the License.  You may obtain a copy of the License at
- *  *
- *  *     http://www.apache.org/licenses/LICENSE-2.0
- *  *
- *  * Unless required by applicable law or agreed to in writing, software
- *  * distributed under the License is distributed on an "AS IS" BASIS,
- *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  * See the License for the specific language governing permissions and
- *  * limitations under the License.
- *
- *
- */
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.server.util;
-
-import org.springframework.stereotype.Component;
-
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.parser.ParseContext;
-
-/**
- * Utility class providing access to Tika configuration and common operations.
- * This class serves as a bridge between the Spring controllers and Tika core 
functionality.
- */
-@Component
-public class TikaResource {
-    
-    private static final long DEFAULT_TASK_TIMEOUT_MILLIS = 300000; // 5 
minutes
-    private static TikaConfig tikaConfig;
-    
-    static {
-        try {
-            // Initialize with default Tika configuration
-            tikaConfig = TikaConfig.getDefaultConfig();
-        } catch (Exception e) {
-            throw new RuntimeException("Failed to initialize TikaConfig", e);
-        }
-    }
-    
-    /**
-     * Get the task timeout from ParseContext, or return default if not 
configured.
-     * 
-     * @param parseContext The parse context which may contain timeout 
configuration
-     * @return Timeout in milliseconds
-     */
-    public static long getTaskTimeout(ParseContext parseContext) {
-        // Check if timeout is configured in parse context
-        if (parseContext != null) {
-            // Look for timeout configuration - this could be expanded based 
on actual Tika implementation
-            Object timeout = parseContext.get(Object.class); // Placeholder - 
actual implementation would vary
-            if (timeout instanceof Long) {
-                return (Long) timeout;
-            }
-        }
-        return DEFAULT_TASK_TIMEOUT_MILLIS;
-    }
-    
-    /**
-     * Get the Tika configuration instance.
-     * 
-     * @return TikaConfig instance
-     */
-    public static TikaConfig getConfig() {
-        return tikaConfig;
-    }
-    
-    /**
-     * Get the detector from the Tika configuration.
-     * 
-     * @return Detector instance
-     */
-    public static Detector getDetector() {
-        return tikaConfig.getDetector();
-    }
-    
-    /**
-     * Set a custom TikaConfig (useful for testing or custom configurations).
-     * 
-     * @param config The TikaConfig to use
-     */
-    public static void setConfig(TikaConfig config) {
-        tikaConfig = config;
-    }
-    
-    /**
-     * Reset to default configuration.
-     */
-    public static void resetToDefault() {
-        try {
-            tikaConfig = TikaConfig.getDefaultConfig();
-        } catch (Exception e) {
-            throw new RuntimeException("Failed to reset to default 
TikaConfig", e);
-        }
-    }
-}
diff --git 
a/tika-server/tika-server-spring/src/main/resources/api/tika-server-openapi.yaml
 
b/tika-server/tika-server-spring/src/main/resources/api/tika-server-openapi.yaml
index 6a2d738b4..eee596a91 100644
--- 
a/tika-server/tika-server-spring/src/main/resources/api/tika-server-openapi.yaml
+++ 
b/tika-server/tika-server-spring/src/main/resources/api/tika-server-openapi.yaml
@@ -177,11 +177,20 @@ paths:
       operationId: put_stream
       requestBody:
         content:
-          '*/*':
+          application/octet-stream:
             schema:
               type: string
               format: binary
         required: true
+      parameters:
+        - in: header
+          name: Content-Disposition
+          description: |
+            Optional header to provide a filename hint to Tika for more 
accurate detection.
+            Example: `attachment; filename="document.pdf"`
+          schema:
+            type: string
+          required: false
       responses:
         '200':
           content:
@@ -1284,12 +1293,34 @@ paths:
                   "tika-eval:numAlphaTokensB":
                     type: integer
                     description: Number of alphabetic tokens in text B
-                  "tika-eval:languageA":
+                  "tika-eval:numUniqueAlphaTokensA":
+                    type: integer
+                    description: Number of unique alphabetic tokens in text A
+                  "tika-eval:numUniqueAlphaTokensB":
+                    type: integer
+                    description: Number of unique alphabetic tokens in text B
+                  "tika-eval:oovA":
+                    type: number
+                    format: float
+                    description: Out-of-vocabulary ratio for text A
+                  "tika-eval:oovB":
+                    type: number
+                    format: float
+                    description: Out-of-vocabulary ratio for text B
+                  "tika-eval:langA":
                     type: string
                     description: Detected language for text A
-                  "tika-eval:languageB":
+                  "tika-eval:langB":
                     type: string
                     description: Detected language for text B
+                  "tika-eval:langConfidenceA":
+                    type: number
+                    format: float
+                    description: Language detection confidence for text A
+                  "tika-eval:langConfidenceB":
+                    type: number
+                    format: float
+                    description: Language detection confidence for text B
           description: If successful, this operation returns HTTP status code 
200 with comparison statistics and similarity metrics.
         '500':
           description: An error occurred processing the call.
@@ -1342,15 +1373,15 @@ paths:
                   "tika-eval:numUniqueAlphaTokens":
                     type: integer
                     description: Number of unique alphabetic tokens
-                  "tika-eval:outOfVocabulary":
+                  "tika-eval:oov":
                     type: number
                     format: float
                     description: Out-of-vocabulary ratio (-1.0 if no 
alphabetic tokens)
-                  "tika-eval:language":
+                  "tika-eval:lang":
                     type: string
                     description: Detected language code
                     example: "en"
-                  "tika-eval:languageConfidence":
+                  "tika-eval:langConfidence":
                     type: number
                     format: float
                     description: Language detection confidence score
diff --git 
a/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/IntegrationTestBase.java
 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/IntegrationTestBase.java
index e8ebfc607..c030e97eb 100644
--- 
a/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/IntegrationTestBase.java
+++ 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/IntegrationTestBase.java
@@ -29,7 +29,7 @@ import org.springframework.web.context.WebApplicationContext;
  * and setup for Spring Boot tests with a random port.
  */
 @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
-@TestPropertySource(properties = 
{"tika.config.path=classpath:test-tika-config.xml"})
+@TestPropertySource(properties = {"tika.config=test-tika-config.xml"})
 public abstract class IntegrationTestBase {
 
     @LocalServerPort
@@ -45,4 +45,4 @@ public abstract class IntegrationTestBase {
     protected String getBaseUrl() {
         return "http://localhost:"; + port;
     }
-}
\ No newline at end of file
+}
diff --git 
a/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/DetectorControllerIntegrationTest.java
 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/DetectorControllerIntegrationTest.java
index 3a2cca8e9..015c732e9 100644
--- 
a/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/DetectorControllerIntegrationTest.java
+++ 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/DetectorControllerIntegrationTest.java
@@ -27,8 +27,6 @@ import java.nio.file.Paths;
 
 import org.junit.jupiter.api.Test;
 import org.springframework.http.MediaType;
-import org.springframework.mock.web.MockMultipartFile;
-import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
 
 import org.apache.tika.server.IntegrationTestBase;
 
@@ -55,7 +53,8 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(jsonContent))
+                .content(jsonContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.json\""))
                 .andExpect(status().isOk())
                 .andExpect(content().string("application/json"));
     }
@@ -101,11 +100,12 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
     public void testDetectEmptyContent() throws Exception {
         byte[] emptyContent = new byte[0];
 
+        // Empty content is treated as missing request body by Spring, so 
expect 400
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(emptyContent))
-                .andExpect(status().isOk())
-                .andExpect(content().string("application/octet-stream"));
+                .content(emptyContent)
+                .header("Content-Disposition", "attachment; 
filename=\"empty.txt\""))
+                .andExpect(status().isBadRequest());
     }
 
     @Test
@@ -126,9 +126,10 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
         byte[] textContent = "This is a test 
file.".getBytes(StandardCharsets.UTF_8);
 
         // Test that filename hints help with detection
-        mockMvc.perform(MockMvcRequestBuilders.multipart("/detect/stream")
-                .file(new MockMultipartFile("file", "test.txt",
-                      MediaType.TEXT_PLAIN_VALUE, textContent)))
+        mockMvc.perform(put("/detect/stream")
+                .contentType(MediaType.APPLICATION_OCTET_STREAM)
+                .content(textContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.txt\""))
                 .andExpect(status().isOk())
                 .andExpect(content().string("text/plain"));
     }
@@ -139,9 +140,10 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(jsContent))
+                .content(jsContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.js\""))
                 .andExpect(status().isOk())
-                .andExpect(content().string("application/javascript"));
+                .andExpect(content().string("text/javascript"));
     }
 
     @Test
@@ -150,7 +152,8 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(cssContent))
+                .content(cssContent)
+                .header("Content-Disposition", "attachment; 
filename=\"style.css\""))
                 .andExpect(status().isOk())
                 .andExpect(content().string("text/css"));
     }
@@ -175,8 +178,7 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
     public void testDetectNullContent() throws Exception {
         // Test with no content - should return bad request
         mockMvc.perform(put("/detect/stream"))
-                .andExpect(status().isBadRequest())
-                .andExpect(content().string("No document provided"));
+                .andExpect(status().isBadRequest());
     }
 
     @Test
@@ -238,10 +240,10 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(markdownContent))
+                .content(markdownContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.md\""))
                 .andExpect(status().isOk())
-                // Markdown might be detected as text/plain since it's 
text-based
-                .andExpect(content().string("text/plain"));
+                .andExpect(content().string("text/x-web-markdown"));
     }
 
     @Test
@@ -251,7 +253,8 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(csvContent))
+                .content(csvContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.csv\""))
                 .andExpect(status().isOk())
                 .andExpect(content().string("text/csv"));
     }
@@ -311,7 +314,8 @@ public class DetectorControllerIntegrationTest extends 
IntegrationTestBase {
 
         mockMvc.perform(put("/detect/stream")
                 .contentType(MediaType.APPLICATION_OCTET_STREAM)
-                .content(jsonContent))
+                .content(jsonContent)
+                .header("Content-Disposition", "attachment; 
filename=\"test.json\""))
                 .andExpect(status().isOk())
                 .andExpect(content().string("application/json"));
     }
diff --git 
a/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/EvalControllerIntegrationTest.java
 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/EvalControllerIntegrationTest.java
new file mode 100644
index 000000000..c0720a6f0
--- /dev/null
+++ 
b/tika-server/tika-server-spring/src/test/java/org/apache/tika/server/controller/EvalControllerIntegrationTest.java
@@ -0,0 +1,403 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.controller;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static 
org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put;
+import static 
org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
+
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Test;
+import org.springframework.http.MediaType;
+import org.springframework.test.web.servlet.MvcResult;
+
+import org.apache.tika.server.IntegrationTestBase;
+
+/**
+ * Integration tests for EvalController.
+ * Tests text profiling and comparison endpoints using TikaEval framework.
+ */
+public class EvalControllerIntegrationTest extends IntegrationTestBase {
+
+    private final ObjectMapper objectMapper = new ObjectMapper();
+
+    @Test
+    public void testBasicProfile() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "1");
+        request.put("text", "the quick brown fox jumped qwertyuiop");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Verify token count - based on original test expectations
+        Integer numTokens = (Integer) results.get("tika-eval:numTokens");
+        assertEquals(6, numTokens.intValue());
+
+        Object oovObj = results.get("tika-eval:oov");
+        Double oov = ((Number) oovObj).doubleValue();
+        assertEquals(0.166, oov, 0.01);
+
+        // Verify language detection
+        String language = (String) results.get("tika-eval:lang");
+        assertNotNull(language);
+    }
+
+    @Test
+    public void testBasicCompare() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "1");
+        request.put("textA", "the quick brown fox jumped qwertyuiop");
+        request.put("textB", "the the the fast brown dog jumped qwertyuiop");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/compare")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Verify text A statistics - based on original test expectations
+        Integer numTokensA = (Integer) results.get("tika-eval:numTokensA");
+        assertEquals(6, numTokensA.intValue());
+
+        Object oovAObj = results.get("tika-eval:oovA");
+        Double oovA = ((Number) oovAObj).doubleValue();
+        assertEquals(0.166, oovA, 0.01);
+
+        String languageA = (String) results.get("tika-eval:langA");
+        assertNotNull(languageA);
+
+        // Verify similarity metrics - based on original test expectations
+        Object diceObj = results.get("tika-eval:dice");
+        Double dice = ((Number) diceObj).doubleValue();
+        assertEquals(0.666, dice, 0.01);
+
+        Object overlapObj = results.get("tika-eval:overlap");
+        Double overlap = ((Number) overlapObj).doubleValue();
+        assertEquals(0.571, overlap, 0.01);
+    }
+
+    @Test
+    public void testProfileWithTimeout() throws Exception {
+        Map<String, Object> request = new HashMap<>();
+        request.put("id", "timeout-test");
+        request.put("text", "short text for testing timeout functionality");
+        request.put("timeoutMillis", 30000);
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Verify basic statistics for the text
+        assertNotNull(results.get("tika-eval:numTokens"));
+        assertNotNull(results.get("tika-eval:numUniqueTokens"));
+    }
+
+    @Test
+    public void testCompareWithTimeout() throws Exception {
+        Map<String, Object> request = new HashMap<>();
+        request.put("id", "compare-timeout-test");
+        request.put("textA", "hello world");
+        request.put("textB", "hello universe");
+        request.put("timeoutMillis", 30000);
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/compare")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Verify both texts have been processed
+        assertNotNull(results.get("tika-eval:numTokensA"));
+        assertNotNull(results.get("tika-eval:numTokensB"));
+
+        // Verify similarity metrics are present
+        assertNotNull(results.get("tika-eval:dice"));
+        assertNotNull(results.get("tika-eval:overlap"));
+    }
+
+    @Test
+    public void testProfileEmptyText() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "empty-test");
+        request.put("text", "");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Empty text should have zero tokens
+        assertEquals(0, (Integer) results.get("tika-eval:numTokens"));
+        assertEquals(0, (Integer) results.get("tika-eval:numUniqueTokens"));
+        assertEquals(0, (Integer) results.get("tika-eval:numAlphaTokens"));
+    }
+
+    @Test
+    public void testProfileLongText() throws Exception {
+        // Create a longer text sample for testing
+        StringBuilder longText = new StringBuilder();
+        for (int i = 0; i < 100; i++) {
+            longText.append("This is sentence number ").append(i).append(". ");
+        }
+
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "long-text-test");
+        request.put("text", longText.toString());
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Verify we get reasonable statistics for long text
+        Integer numTokens = (Integer) results.get("tika-eval:numTokens");
+        assertNotNull(numTokens);
+        // Should have many tokens for this long text
+        assert(numTokens > 100);
+
+        assertNotNull(results.get("tika-eval:numUniqueTokens"));
+    }
+
+    @Test
+    public void testCompareIdenticalTexts() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "identical-test");
+        request.put("textA", "the quick brown fox");
+        request.put("textB", "the quick brown fox");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/compare")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Identical texts should have high similarity scores
+        Double dice = (Double) results.get("tika-eval:dice");
+        assertNotNull(dice);
+        // Dice coefficient should be close to 1.0 for identical texts
+        assert(dice > 0.9);
+
+        // Token counts should be identical
+        assertEquals(results.get("tika-eval:numTokensA"), 
results.get("tika-eval:numTokensB"));
+    }
+
+    @Test
+    public void testCompareCompletelyDifferentTexts() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "different-test");
+        request.put("textA", "apple banana cherry");
+        request.put("textB", "dog elephant frog");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/compare")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Completely different texts should have low similarity scores
+        Double dice = (Double) results.get("tika-eval:dice");
+        assertNotNull(dice);
+        // Dice coefficient should be close to 0.0 for completely different 
texts
+        assert(dice < 0.1);
+
+        Double overlap = (Double) results.get("tika-eval:overlap");
+        assertNotNull(overlap);
+        assert(overlap < 0.1);
+    }
+
+    @Test
+    public void testInvalidJsonRequest() throws Exception {
+        String invalidJson = "{invalid json}";
+
+        mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(invalidJson.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isBadRequest());
+    }
+
+    @Test
+    public void testMissingRequiredFields() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "missing-text");
+        // Missing "text" field
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isBadRequest());
+    }
+
+    @Test
+    public void testMissingRequiredFieldsCompare() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "missing-text");
+        request.put("textA", "some text");
+        // Missing "textB" field
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        mockMvc.perform(put("/eval/compare")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isBadRequest());
+    }
+
+    @Test
+    public void testMultipleProfileRequests() throws Exception {
+        // Test that the server can handle multiple concurrent profile requests
+        Map<String, String> request1 = new HashMap<>();
+        request1.put("id", "multi-test-1");
+        request1.put("text", "First test text for profiling.");
+
+        Map<String, String> request2 = new HashMap<>();
+        request2.put("id", "multi-test-2");
+        request2.put("text", "Second test text for profiling analysis.");
+
+        Map<String, String> request3 = new HashMap<>();
+        request3.put("id", "multi-test-3");
+        request3.put("text", "Third test text with different content for 
evaluation.");
+
+        String jsonRequest1 = objectMapper.writeValueAsString(request1);
+        String jsonRequest2 = objectMapper.writeValueAsString(request2);
+        String jsonRequest3 = objectMapper.writeValueAsString(request3);
+
+        // Execute multiple requests to test server status tracking
+        mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest1.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk());
+
+        mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest2.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk());
+
+        mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest3.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk());
+    }
+
+    @Test
+    public void testSpecialCharactersInText() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "special-chars-test");
+        request.put("text", "Hello, world! This text contains special 
characters: @#$%^&*()_+{}|:<>?");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Should handle special characters gracefully
+        assertNotNull(results.get("tika-eval:numTokens"));
+        assertNotNull(results.get("tika-eval:numAlphaTokens"));
+    }
+
+    @Test
+    public void testUnicodeText() throws Exception {
+        Map<String, String> request = new HashMap<>();
+        request.put("id", "unicode-test");
+        request.put("text", "Héllo wørld! This is tëst tëxt with ūnïcōdē 
characters: 你好世界 🌍");
+
+        String jsonRequest = objectMapper.writeValueAsString(request);
+
+        MvcResult result = mockMvc.perform(put("/eval/profile")
+                .contentType(MediaType.APPLICATION_JSON)
+                .content(jsonRequest.getBytes(StandardCharsets.UTF_8)))
+                .andExpect(status().isOk())
+                .andReturn();
+
+        String responseBody = result.getResponse().getContentAsString();
+        Map<String, Object> results = objectMapper.readValue(responseBody,
+            new TypeReference<Map<String, Object>>() {});
+
+        // Should handle Unicode characters gracefully
+        assertNotNull(results.get("tika-eval:numTokens"));
+    }
+}
diff --git 
a/tika-server/tika-server-spring/src/test/resources/test-tika-config.xml 
b/tika-server/tika-server-spring/src/test/resources/test-tika-config.xml
new file mode 100644
index 000000000..2e2597ad4
--- /dev/null
+++ b/tika-server/tika-server-spring/src/test/resources/test-tika-config.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <!-- Basic configuration for integration tests -->
+  <parsers>
+    <parser class="org.apache.tika.parser.DefaultParser"/>
+  </parsers>
+  
+  <detectors>
+    <detector class="org.apache.tika.detect.DefaultDetector"/>
+    <detector class="org.apache.tika.mime.MimeTypes"/>
+  </detectors>
+  
+  <!-- Server-specific configuration -->
+  <service-loader>
+    <dynamic>true</dynamic>
+  </service-loader>
+  
+  <!-- Metadata configuration -->
+  <metadataFilter class="org.apache.tika.metadata.filter.MetadataFilter">
+    <!-- Allow all metadata for tests -->
+  </metadataFilter>
+</properties>

Reply via email to