This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new 4181de38b2 JAMES-2456 Add a reactor timeout for Tika requests (#2437)
4181de38b2 is described below

commit 4181de38b23cad9a12c64498ea5db49427453e5e
Author: Trần Hồng Quân <55171818+quantranhong1...@users.noreply.github.com>
AuthorDate: Mon Oct 7 19:13:02 2024 +0700

    JAMES-2456 Add a reactor timeout for Tika requests (#2437)
---
 docs/modules/servers/partials/configure/tika.adoc  |  2 +-
 .../json/MessageToOpenSearchJsonTest.java          | 27 +++++++
 .../nonTextualWithoutAttachmentTextContent.json    | 83 ++++++++++++++++++++++
 .../james/mailbox/tika/TikaHttpClientImpl.java     |  3 +-
 .../apache/james/mailbox/tika/TikaContainer.java   |  6 +-
 .../james/mailbox/tika/TikaTextExtractorTest.java  | 17 +++++
 6 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/docs/modules/servers/partials/configure/tika.adoc 
b/docs/modules/servers/partials/configure/tika.adoc
index 4e2ae16662..c12b693c34 100644
--- a/docs/modules/servers/partials/configure/tika.adoc
+++ b/docs/modules/servers/partials/configure/tika.adoc
@@ -26,7 +26,7 @@ Defaults to false.
 | Port of your tika server. The default value is 9998
 
 | tika.timeoutInMillis
-| Timeout when issuing request to the tika server. The default value is 3 
seconds.
+| Timeout when issuing request to the tika server. The default value is 30 
seconds.
 
 | tika.cache.eviction.period
 | A cache is used to avoid, when possible, query Tika multiple time for the 
same attachments.
diff --git 
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
index 3ef2e058d1..0908728920 100644
--- 
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
+++ 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
@@ -484,6 +484,33 @@ class MessageToOpenSearchJsonTest {
                 
ClassLoaderUtils.getSystemResourceAsString("eml/nonTextual.json", 
StandardCharsets.UTF_8));
     }
 
+    @Test
+    void 
convertMessageShouldSucceedWithAttachmentTextContentBeNullWhenTikaReadTimeout() 
throws Exception {
+        textExtractor = new TikaTextExtractor(new RecordingMetricFactory(), 
new TikaHttpClientImpl(TikaConfiguration.builder()
+            .host(tika.getIp())
+            .port(tika.getPort())
+            .timeoutInMillis(1)
+            .build()));
+
+        MessageToOpenSearchJson messageToOpenSearchJson = new 
MessageToOpenSearchJson(
+            textExtractor,
+            ZoneId.of("Europe/Paris"),
+            IndexAttachments.YES, IndexHeaders.YES);
+        MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID, 
THREAD_ID, date,
+            SIZE,
+            BODY_START_OCTET,
+            new 
ByteContent(IOUtils.toByteArray(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/nonTextual.eml"))),
+            new Flags(),
+            propertyBuilder.build(),
+            MAILBOX_ID);
+        spamMail.setUid(UID);
+        spamMail.setModSeq(MOD_SEQ);
+
+        assertThatJson(messageToOpenSearchJson.convertToJson(spamMail).block())
+            .when(IGNORING_ARRAY_ORDER)
+            
.isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/nonTextualWithoutAttachmentTextContent.json",
 StandardCharsets.UTF_8));
+    }
+
     @Test
     void convertToJsonWithoutAttachmentShouldConvertEmailBoby() throws 
IOException {
         // Given
diff --git 
a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
 
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
new file mode 100644
index 0000000000..cf5be90ccb
--- /dev/null
+++ 
b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json
@@ -0,0 +1,83 @@
+{
+  "attachments": [
+    {
+      "mediaType":"application",
+      "subtype":"vnd.oasis.opendocument.text",
+      "fileName":"toto.odt","fileExtension":"odt",
+      "contentDisposition":"attachment",
+      "textContent": null
+    }
+  ],
+  "bcc":[],
+  "htmlBody":null,
+  "textBody":"This mail have a non textual attachment !\r\n",
+  "cc":[],
+  "date":"2015-06-07T00:00:00+0200",
+  "saveDate": null,
+  "from":[{
+    "name":"Benoit Tellier",
+    "address":"btell...@linagora.com",
+    "domain":"linagora"
+  }],
+  "hasAttachment":false,
+  "headers": [{
+    "name": "return-path",
+    "value": "<btell...@linagora.com>"
+  }, {
+    "name": "received",
+    "value": "from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t 
by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 
2015 12:43:28 +0200"
+  }, {
+    "name": "x-sieve",
+    "value": "CMU Sieve 2.2"
+  }, {
+    "name": "received",
+    "value": "from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with 
cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate 
requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor 
<btell...@linagora.com>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)"
+  }, {
+    "name": "to",
+    "value": "btell...@linagora.com"
+  }, {
+    "name": "from",
+    "value": "Benoit Tellier <btell...@linagora.com>"
+  }, {
+    "name": "subject",
+    "value": "Test message"
+  }, {
+    "name": "message-id",
+    "value": "<5582a0ce.4020...@linagora.com>"
+  }, {
+    "name": "date",
+    "value": "Thu, 18 Jun 2015 12:43:26 +0200"
+  }, {
+    "name": "user-agent",
+    "value": "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 
Thunderbird/38.0.1"
+  }, {
+    "name": "mime-version",
+    "value": "1.0"
+  }, {
+    "name": "content-type",
+    "value": "multipart/mixed; 
boundary=\"------------030000010109090603040500\""
+  }],
+  "mailboxId":"18",
+  "mediaType":"plain",
+  "messageId":"184",
+  "threadId": "184",
+  "modSeq":42,
+  "sentDate":"2015-06-18T12:43:26+0200",
+  "size":25,
+  "subject":["Test message"],
+  "subtype":"text",
+  "to":[{
+    "name":null,
+    "address":"btell...@linagora.com",
+    "domain":"linagora"
+  }],
+  "uid":25,
+  "userFlags":[],
+  "mimeMessageID":"<5582a0ce.4020...@linagora.com>",
+  "isAnswered":false,
+  "isDeleted":false,
+  "isDraft":false,
+  "isFlagged":false,
+  "isRecent":false,
+  "isUnread":true
+}
diff --git 
a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
 
b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
index a3227cbf6a..ca7f1a4ce9 100644
--- 
a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
+++ 
b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaHttpClientImpl.java
@@ -83,8 +83,9 @@ public class TikaHttpClientImpl implements TikaHttpClient {
                     return Mono.empty();
                 }
             })
+            .timeout(Duration.ofMillis(tikaConfiguration.getTimeoutInMillis()))
             .onErrorResume(e -> {
-                LOGGER.warn("Failing to call Tika for content type {}", 
contentType, e);
+                LOGGER.warn("Failing to call Tika for content type '{}'", 
contentType.asString(), e);
                 return Mono.empty();
             });
     }
diff --git 
a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java 
b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
index b0e6a42346..fe9a66a739 100644
--- 
a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
+++ 
b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaContainer.java
@@ -18,9 +18,10 @@
  ****************************************************************/
 package org.apache.james.mailbox.tika;
 
+import static 
org.apache.james.mailbox.tika.TikaConfiguration.DEFAULT_TIMEOUT_IN_MS;
+
 import java.time.Duration;
 import java.util.UUID;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.james.util.docker.DockerContainer;
 import org.apache.james.util.docker.Images;
@@ -28,12 +29,9 @@ import org.apache.james.util.docker.RateLimiters;
 import org.junit.rules.ExternalResource;
 import org.testcontainers.containers.wait.strategy.Wait;
 
-import com.google.common.primitives.Ints;
-
 public class TikaContainer extends ExternalResource {
     
     private static final int DEFAULT_TIKA_PORT = 9998;
-    private static final int DEFAULT_TIMEOUT_IN_MS = 
Ints.checkedCast(TimeUnit.MINUTES.toMillis(3));
 
     private final DockerContainer tika;
 
diff --git 
a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
 
b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
index 6c0f76ac0f..150859fd13 100644
--- 
a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
+++ 
b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java
@@ -26,6 +26,7 @@ import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.james.mailbox.extractor.ParsedContent;
@@ -76,6 +77,22 @@ class TikaTextExtractorTest {
             .contains("This is some awesome text text.");
     }
 
+    @Test
+    void shouldReturnEmptyParsedContentWhenTimeoutTriggered() throws Exception 
{
+        textExtractor = new TikaTextExtractor(new RecordingMetricFactory(), 
new TikaHttpClientImpl(TikaConfiguration.builder()
+            .host(tika.getIp())
+            .port(tika.getPort())
+            .timeoutInMillis(1)
+            .build()));
+
+        InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/writter.docx");
+
+        assertThat(textExtractor.extractContentReactive(inputStream,
+                
ContentType.of("application/vnd.openxmlformats-officedocument.wordprocessingml.document"))
+            .block())
+            .isEqualTo(ParsedContent.empty());
+    }
+
     @Test
     void textMicrosoftWorldTest() throws Exception {
         InputStream inputStream = 
ClassLoader.getSystemResourceAsStream("documents/writter.docx");


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org

Reply via email to