This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new b385819af TIKA-4510: Fix nondeterministic failures in
LanguageResourceTest (#2360)
b385819af is described below
commit b385819af53e5bc6c580b56876d9d62a098bdb4f
Author: Luca Dai <[email protected]>
AuthorDate: Thu Oct 9 02:47:53 2025 -0500
TIKA-4510: Fix nondeterministic failures in LanguageResourceTest (#2360)
Co-authored-by: LucaD <[email protected]>
---
.../server/core/resource/LanguageResource.java | 37 ++++++++++++++++------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
index 8e2a2b1df..495b91335 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/LanguageResource.java
@@ -38,27 +38,45 @@ import org.apache.tika.language.detect.LanguageResult;
public class LanguageResource {
private static final Logger LOG =
LoggerFactory.getLogger(LanguageResource.class);
+ // TIKA-4510: handle @PUT and @POST separately to avoid nondeterministic
failures
@PUT
+ @Path("/stream")
+ @Consumes("*/*")
+ @Produces("text/plain")
+ public String detectPutStream(final InputStream is) throws IOException {
+ return detectStream(is);
+ }
+
@POST
@Path("/stream")
@Consumes("*/*")
@Produces("text/plain")
- public String detect(final InputStream is) throws IOException {
- String fileTxt = IOUtils.toString(is, UTF_8);
- LanguageResult language = new OptimaizeLangDetector()
- .loadModels()
- .detect(fileTxt);
- String detectedLang = language.getLanguage();
- LOG.info("Detecting language for incoming resource: [{}]",
detectedLang);
- return detectedLang;
+ public String detectPostStream(final InputStream is) throws IOException {
+ return detectStream(is);
}
@PUT
+ @Path("/string")
+ @Consumes("*/*")
+ @Produces("text/plain")
+ public String detectPutString(final String string) throws IOException {
+ return detectString(string);
+ }
+
@POST
@Path("/string")
@Consumes("*/*")
@Produces("text/plain")
- public String detect(final String string) throws IOException {
+ public String detectPostString(final String string) throws IOException {
+ return detectString(string);
+ }
+
+ private String detectStream(InputStream is) throws IOException {
+ String fileTxt = IOUtils.toString(is, UTF_8);
+ return detectString(fileTxt);
+ }
+
+ private String detectString(String string) throws IOException {
LanguageResult language = new OptimaizeLangDetector()
.loadModels()
.detect(string);
@@ -66,5 +84,4 @@ public class LanguageResource {
LOG.info("Detecting language for incoming resource: [{}]",
detectedLang);
return detectedLang;
}
-
}