This is an automated email from the ASF dual-hosted git repository.

rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new ab68ea48fd JAMES-4077 [SearchSnippet] Highlight OpenSearch 
implementation (#2447)
ab68ea48fd is described below

commit ab68ea48fd460fdc4491b8a3372ed00ba7e97c2f
Author: vttran <vtt...@linagora.com>
AuthorDate: Mon Oct 14 11:35:07 2024 +0700

    JAMES-4077 [SearchSnippet] Highlight OpenSearch implementation (#2447)
---
 .../backends/opensearch/search/ScrolledSearch.java |   2 +-
 .../searchhighligt/SearchHighLighterContract.java  |  64 ++++--
 .../lucene/search/LuceneSearchHighlighter.java     |   3 +
 .../OpenSearchListeningMessageSearchIndex.java     |   5 +-
 .../opensearch/json/JsonMessageConstants.java      |   1 -
 .../search/OpenSearchSearchHighlighter.java        |  91 ++++++++
 .../opensearch/search/OpenSearchSearcher.java      |  42 +++-
 .../search/OpenSearchSearchHighlighterTest.java    | 244 +++++++++++++++++++++
 8 files changed, 423 insertions(+), 29 deletions(-)

diff --git 
a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
 
b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
index 3a5d22d1ce..8f9ccf209d 100644
--- 
a/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
+++ 
b/backends-common/opensearch/src/main/java/org/apache/james/backends/opensearch/search/ScrolledSearch.java
@@ -72,7 +72,7 @@ public class ScrolledSearch {
         }
 
         Consumer<ScrollResponse<ObjectNode>> onResponse = searchResponse -> {
-            scrollId.set(Optional.of(searchResponse.scrollId()));
+            scrollId.set(Optional.ofNullable(searchResponse.scrollId()));
             sink.next(searchResponse);
 
             if (searchResponse.hits().hits().isEmpty()) {
diff --git 
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
 
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
index e95f5f8d06..e273e95e9c 100644
--- 
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
+++ 
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
@@ -33,6 +33,7 @@ import org.apache.james.mailbox.MessageManager;
 import org.apache.james.mailbox.exception.MailboxException;
 import org.apache.james.mailbox.model.ComposedMessageId;
 import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MessageId;
 import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mime4j.dom.Message;
@@ -64,14 +65,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Matthieu for your help")
+                    .setSubject("Hallo, Thx Matthieu for your help")
                     .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -90,7 +91,7 @@ public interface SearchHighLighterContract {
         assertThat(searchSnippets).hasSize(1);
         assertSoftly(softly -> {
             
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
-            
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo!
 Thx <mark>Matthieu</mark> for your help");
+            
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo,
 Thx <mark>Matthieu</mark> for your help");
         });
     }
 
@@ -102,14 +103,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Matthieu for your help")
+                    .setSubject("Hallo, Thx Matthieu for your help")
                     .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -139,14 +140,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Matthieu for your help")
+                    .setSubject("Hallo, Thx Matthieu for your help")
                     .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -172,14 +173,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Naruto for your help")
+                    .setSubject("Hallo, Thx Naruto for your help")
                     .setBody("append Naruto to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -200,7 +201,7 @@ public interface SearchHighLighterContract {
         assertSoftly(softly -> {
             
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
             
softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append 
<mark>Naruto</mark> to inbox");
-            
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo!
 Thx <mark>Naruto</mark> for your help");
+            
softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo,
 Thx <mark>Naruto</mark> for your help");
         });
     }
 
@@ -245,7 +246,7 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Naruto Itachi for your help")
+                    .setSubject("Hallo, Thx Naruto Itachi for your help")
                     .setBody("append Naruto Itachi to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -319,7 +320,7 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Matthieu for your help")
+                    .setSubject("Hallo, Thx Matthieu for your help")
                     .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -344,7 +345,7 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Matthieu for your help")
+                    .setSubject("Hallo, Thx Matthieu for your help")
                     .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -364,7 +365,7 @@ public interface SearchHighLighterContract {
 
         // Then highlightSearch should return username1 entry
         assertThat(searchSnippets).hasSize(1);
-        
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx 
<mark>Matthieu</mark> for your help");
+        
assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo, Thx 
<mark>Matthieu</mark> for your help");
     }
 
     @Test
@@ -374,14 +375,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Naruto for your help")
+                    .setSubject("Hallo, Thx Naruto for your help")
                     .setBody("append Naruto to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -402,8 +403,8 @@ public interface SearchHighLighterContract {
         assertThat(searchSnippets.stream()
             .map(SearchSnippet::highlightedSubject)
             .toList())
-            .containsExactlyInAnyOrder(Optional.of("Hallo! Thx 
<mark>Naruto</mark> for your help"),
-                Optional.of("Hallo! Thx <mark>Alex</mark> for your help"));
+            .containsExactlyInAnyOrder(Optional.of("Hallo, Thx 
<mark>Naruto</mark> for your help"),
+                Optional.of("Hallo, Thx <mark>Alex</mark> for your help"));
     }
 
     @Test
@@ -413,14 +414,14 @@ public interface SearchHighLighterContract {
         ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Naruto for your help - Sasuke for 
your help")
+                    .setSubject("Hallo, Thx Naruto for your help - Sasuke for 
your help")
                     .setBody("append Naruto to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
         ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
                 Message.Builder.of()
                     .setTo("to@james.local")
-                    .setSubject("Hallo! Thx Alex for your help")
+                    .setSubject("Hallo, Thx Alex for your help")
                     .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
             session).getId();
 
@@ -441,6 +442,27 @@ public interface SearchHighLighterContract {
         assertThat(searchSnippets.stream()
             .map(SearchSnippet::highlightedSubject)
             .toList())
-            .containsExactlyInAnyOrder(Optional.of("Hallo! Thx 
<mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
+            .containsExactlyInAnyOrder(Optional.of("Hallo, Thx 
<mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help"));
+    }
+
+    @Test
+    default void highLightSearchShouldReturnEmptyWhenMessageIdsIsEmpty() 
throws Exception {
+        MailboxSession session = session(USERNAME1);
+        ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo("to@james.local")
+                    .setSubject("Hallo, Thx Naruto Itachi for your help")
+                    .setBody("append Naruto Itachi to inbox", 
StandardCharsets.UTF_8)),
+            session).getId();
+
+        verifyMessageWasIndexed(1);
+
+        List<MessageId> messageIdsSearch = List.of();
+
+        assertThat(Flux.from(testee().highlightSearch(messageIdsSearch, 
MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.bodyContains("Naruto 
Itachi")))
+                .inMailboxes(List.of(m1.getMailboxId()))
+                .build(), session))
+            .collectList()
+            .block()).hasSize(0);
     }
 }
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
index 6a4facb1de..ee1fdf6edc 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
@@ -98,6 +98,9 @@ public class LuceneSearchHighlighter implements 
SearchHighlighter {
 
     @Override
     public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds, 
MultimailboxesSearchQuery expression, MailboxSession session) {
+        if (messageIds.isEmpty()) {
+            return Flux.empty();
+        }
         return storeMailboxManager.getInMailboxIds(expression, session)
             .collectList()
             .flatMapMany(inMailboxIdsAccessible -> 
highlightSearch(inMailboxIdsAccessible, expression.getSearchQuery(), 
messageIds));
diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
index 547245de05..9de1fedf31 100644
--- 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/events/OpenSearchListeningMessageSearchIndex.java
@@ -28,6 +28,7 @@ import static 
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.IS_U
 import static 
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MAILBOX_ID;
 import static 
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.MESSAGE_ID;
 import static 
org.apache.james.mailbox.opensearch.json.JsonMessageConstants.UID;
+import static 
org.apache.james.mailbox.opensearch.search.OpenSearchSearcher.SEARCH_HIGHLIGHT;
 
 import java.util.Collection;
 import java.util.EnumSet;
@@ -324,7 +325,7 @@ public class OpenSearchListeningMessageSearchIndex extends 
ListeningMessageSearc
         Preconditions.checkArgument(session != null, "'session' is mandatory");
         Optional<Integer> noLimit = Optional.empty();
 
-        return searcher.search(ImmutableList.of(mailbox.getMailboxId()), 
searchQuery, noLimit, UID_FIELD)
+        return searcher.search(ImmutableList.of(mailbox.getMailboxId()), 
searchQuery, noLimit, UID_FIELD, !SEARCH_HIGHLIGHT)
             .handle(this::extractUidFromHit);
     }
     
@@ -336,7 +337,7 @@ public class OpenSearchListeningMessageSearchIndex extends 
ListeningMessageSearc
             return Flux.empty();
         }
 
-        return searcher.search(mailboxIds, searchQuery, Optional.empty(), 
MESSAGE_ID_FIELD)
+        return searcher.search(mailboxIds, searchQuery, Optional.empty(), 
MESSAGE_ID_FIELD, !SEARCH_HIGHLIGHT)
             .handle(this::extractMessageIdFromHit)
             .distinct()
             .take(limit);
diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
index 65cef55261..bb99f9ff85 100644
--- 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/JsonMessageConstants.java
@@ -45,7 +45,6 @@ public interface JsonMessageConstants {
     String SENT_DATE = "sentDate";
     String SAVE_DATE = "saveDate";
     String ATTACHMENTS = "attachments";
-    String TEXT = "text";
     String MIME_MESSAGE_ID = "mimeMessageID";
     String USER = "user";
 
diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
new file mode 100644
index 0000000000..9f676b3a7e
--- /dev/null
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighter.java
@@ -0,0 +1,91 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.opensearch.search;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.james.mailbox.MailboxSession;
+import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MessageId;
+import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
+import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.json.JsonMessageConstants;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
+import org.apache.james.mailbox.searchhighligt.SearchSnippet;
+import org.apache.james.mailbox.store.StoreMailboxManager;
+import org.opensearch.client.opensearch.core.search.Hit;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import reactor.core.publisher.Flux;
+
+public class OpenSearchSearchHighlighter implements SearchHighlighter {
+    public static final String ATTACHMENT_TEXT_CONTENT_FIELD = 
JsonMessageConstants.ATTACHMENTS + "." + 
JsonMessageConstants.Attachment.TEXT_CONTENT;
+    public static final List<String> SNIPPET_FIELDS = List.of(
+        JsonMessageConstants.MESSAGE_ID,
+        JsonMessageConstants.SUBJECT,
+        JsonMessageConstants.TEXT_BODY,
+        ATTACHMENT_TEXT_CONTENT_FIELD);
+
+    private final OpenSearchSearcher openSearchSearcher;
+    private final StoreMailboxManager storeMailboxManager;
+    private final MessageId.Factory messageIdFactory;
+
+    public OpenSearchSearchHighlighter(OpenSearchSearcher openSearchSearcher, 
StoreMailboxManager storeMailboxManager, MessageId.Factory messageIdFactory) {
+        this.openSearchSearcher = openSearchSearcher;
+        this.storeMailboxManager = storeMailboxManager;
+        this.messageIdFactory = messageIdFactory;
+    }
+
+    @Override
+    public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds, 
MultimailboxesSearchQuery expression, MailboxSession session) {
+        if (messageIds.isEmpty()) {
+            return Flux.empty();
+        }
+
+        return storeMailboxManager.getInMailboxIds(expression, session)
+            .collectList()
+            .flatMapMany(mailboxIds -> highlightSearch(mailboxIds, 
expression.getSearchQuery(), messageIds.size()));
+    }
+
+    private Flux<SearchSnippet> highlightSearch(List<MailboxId> mailboxIds, 
SearchQuery query, int limit) {
+        return openSearchSearcher.search(mailboxIds, query, 
Optional.of(limit), SNIPPET_FIELDS, OpenSearchSearcher.SEARCH_HIGHLIGHT)
+            .map(this::buildSearchSnippet);
+    }
+
+    private SearchSnippet buildSearchSnippet(Hit<ObjectNode> searchResult) {
+        MessageId messageId  = 
Optional.ofNullable(searchResult.fields().get(JsonMessageConstants.MESSAGE_ID))
+            .map(jsonData -> jsonData.toJson().asJsonArray().getString(0))
+            .map(messageIdFactory::fromString)
+            .orElseThrow(() -> new IllegalStateException("Can not extract 
MessageID for search result: " + searchResult.id()));
+
+        Map<String, List<String>> highlightHit = searchResult.highlight();
+
+        Optional<String> highlightedSubject =  
Optional.ofNullable(highlightHit.get(JsonMessageConstants.SUBJECT))
+            .map(List::getFirst);
+        Optional<String> highlightedTextBody = 
Optional.ofNullable(highlightHit.get(JsonMessageConstants.TEXT_BODY))
+            .or(() -> 
Optional.ofNullable(highlightHit.get(ATTACHMENT_TEXT_CONTENT_FIELD)))
+            .map(List::getFirst);
+
+        return new SearchSnippet(messageId, highlightedSubject, 
highlightedTextBody);
+    }
+}
diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
index 81b16e3ab3..09f1524de4 100644
--- 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearcher.java
@@ -19,6 +19,8 @@
 
 package org.apache.james.mailbox.opensearch.search;
 
+import static 
org.apache.james.mailbox.opensearch.search.OpenSearchSearchHighlighter.ATTACHMENT_TEXT_CONTENT_FIELD;
+
 import java.util.Collection;
 import java.util.List;
 import java.util.Optional;
@@ -31,11 +33,15 @@ import org.apache.james.backends.opensearch.RoutingKey;
 import org.apache.james.backends.opensearch.search.ScrolledSearch;
 import org.apache.james.mailbox.model.MailboxId;
 import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.json.JsonMessageConstants;
 import org.apache.james.mailbox.opensearch.query.QueryConverter;
 import org.apache.james.mailbox.opensearch.query.SortConverter;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration;
 import org.opensearch.client.opensearch._types.SortOptions;
 import org.opensearch.client.opensearch._types.Time;
 import org.opensearch.client.opensearch.core.SearchRequest;
+import org.opensearch.client.opensearch.core.search.Highlight;
+import org.opensearch.client.opensearch.core.search.HighlightField;
 import org.opensearch.client.opensearch.core.search.Hit;
 
 import com.fasterxml.jackson.databind.node.ObjectNode;
@@ -44,6 +50,7 @@ import reactor.core.publisher.Flux;
 
 public class OpenSearchSearcher {
     public static final int DEFAULT_SEARCH_SIZE = 100;
+    public static final boolean SEARCH_HIGHLIGHT = true;
     private static final Time TIMEOUT = new Time.Builder().time("1m").build();
     private static final int MAX_ROUTING_KEY = 5;
 
@@ -52,24 +59,47 @@ public class OpenSearchSearcher {
     private final int size;
     private final AliasName aliasName;
     private final RoutingKey.Factory<MailboxId> routingKeyFactory;
+    private final Highlight highlightQuery;
 
     public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter 
queryConverter, int size,
                               ReadAliasName aliasName, 
RoutingKey.Factory<MailboxId> routingKeyFactory) {
+        this(client, queryConverter, size, aliasName, routingKeyFactory, 
SearchHighlighterConfiguration.DEFAULT);
+    }
+
+    public OpenSearchSearcher(ReactorOpenSearchClient client, QueryConverter 
queryConverter, int size,
+                              ReadAliasName aliasName, 
RoutingKey.Factory<MailboxId> routingKeyFactory,
+                              SearchHighlighterConfiguration 
searchHighlighterConfiguration) {
         this.client = client;
         this.queryConverter = queryConverter;
         this.size = size;
         this.aliasName = aliasName;
         this.routingKeyFactory = routingKeyFactory;
+
+        HighlightField highlightField = new HighlightField.Builder()
+            .forceSource(true)
+            .preTags(searchHighlighterConfiguration.preTagFormatter())
+            .postTags(searchHighlighterConfiguration.postTagFormatter())
+            .fragmentSize(searchHighlighterConfiguration.fragmentSize())
+            .numberOfFragments(1)
+            .build();
+
+        this.highlightQuery = new Highlight.Builder()
+            .fields(JsonMessageConstants.SUBJECT, highlightField)
+            .fields(JsonMessageConstants.TEXT_BODY, highlightField)
+            .fields(ATTACHMENT_TEXT_CONTENT_FIELD, highlightField)
+            .build();
     }
 
     public Flux<Hit<ObjectNode>> search(Collection<MailboxId> mailboxIds, 
SearchQuery query,
-                                        Optional<Integer> limit, List<String> 
fields) {
-        SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit, 
fields);
+                                        Optional<Integer> limit, List<String> 
fields,
+                                        boolean searchHighlight) {
+        SearchRequest searchRequest = prepareSearch(mailboxIds, query, limit, 
fields, searchHighlight);
         return new ScrolledSearch(client, searchRequest)
             .searchHits();
     }
 
-    private SearchRequest prepareSearch(Collection<MailboxId> mailboxIds, 
SearchQuery query, Optional<Integer> limit, List<String> fields) {
+    private SearchRequest prepareSearch(Collection<MailboxId> mailboxIds, 
SearchQuery query,
+                                        Optional<Integer> limit, List<String> 
fields, boolean highlight) {
         List<SortOptions> sorts = query.getSorts()
             .stream()
             .flatMap(SortConverter::convertSort)
@@ -84,6 +114,10 @@ public class OpenSearchSearcher {
             .storedFields(fields)
             .sort(sorts);
 
+        if (highlight) {
+            request.highlight(highlightQuery);
+        }
+
         return toRoutingKey(mailboxIds)
             .map(request::routing)
             .orElse(request)
@@ -101,7 +135,7 @@ public class OpenSearchSearcher {
     }
 
     private int computeRequiredSize(Optional<Integer> limit) {
-        return limit.map(value -> Math.min(value.intValue(), size))
+        return limit.map(value -> Math.min(value, size))
             .orElse(size);
     }
 
diff --git 
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
new file mode 100644
index 0000000000..a0d73e19ba
--- /dev/null
+++ 
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/search/OpenSearchSearchHighlighterTest.java
@@ -0,0 +1,244 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+package org.apache.james.mailbox.opensearch.search;
+
+import static 
org.apache.james.mailbox.opensearch.search.OpenSearchSearcherTest.SEARCH_SIZE;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.awaitility.Durations.ONE_HUNDRED_MILLISECONDS;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.ZoneId;
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.james.backends.opensearch.DockerOpenSearchExtension;
+import org.apache.james.backends.opensearch.IndexName;
+import org.apache.james.backends.opensearch.OpenSearchIndexer;
+import org.apache.james.backends.opensearch.ReactorOpenSearchClient;
+import org.apache.james.backends.opensearch.ReadAliasName;
+import org.apache.james.backends.opensearch.WriteAliasName;
+import org.apache.james.core.Username;
+import org.apache.james.mailbox.MailboxManager;
+import org.apache.james.mailbox.MailboxSession;
+import org.apache.james.mailbox.MessageManager;
+import org.apache.james.mailbox.inmemory.InMemoryMessageId;
+import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources;
+import org.apache.james.mailbox.model.ComposedMessageId;
+import org.apache.james.mailbox.model.Mailbox;
+import org.apache.james.mailbox.model.MailboxACL;
+import org.apache.james.mailbox.model.MailboxId;
+import org.apache.james.mailbox.model.MailboxPath;
+import org.apache.james.mailbox.model.MessageId;
+import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
+import org.apache.james.mailbox.model.SearchQuery;
+import org.apache.james.mailbox.opensearch.IndexAttachments;
+import org.apache.james.mailbox.opensearch.IndexHeaders;
+import org.apache.james.mailbox.opensearch.MailboxIdRoutingKeyFactory;
+import org.apache.james.mailbox.opensearch.MailboxIndexCreationUtil;
+import org.apache.james.mailbox.opensearch.OpenSearchMailboxConfiguration;
+import 
org.apache.james.mailbox.opensearch.events.OpenSearchListeningMessageSearchIndex;
+import org.apache.james.mailbox.opensearch.json.MessageToOpenSearchJson;
+import org.apache.james.mailbox.opensearch.query.CriterionConverter;
+import org.apache.james.mailbox.opensearch.query.QueryConverter;
+import org.apache.james.mailbox.searchhighligt.SearchHighLighterContract;
+import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
+import org.apache.james.mailbox.searchhighligt.SearchSnippet;
+import org.apache.james.mailbox.store.StoreMailboxManager;
+import org.apache.james.mailbox.store.StoreMessageManager;
+import org.apache.james.mailbox.store.search.MessageSearchIndex;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaExtension;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
+import org.apache.james.metrics.tests.RecordingMetricFactory;
+import org.apache.james.mime4j.dom.Message;
+import org.apache.james.util.ClassLoaderUtils;
+import org.awaitility.Awaitility;
+import org.awaitility.Durations;
+import org.awaitility.core.ConditionFactory;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.RegisterExtension;
+
+import com.github.fge.lambdas.Throwing;
+import com.google.common.collect.ImmutableSet;
+
+import reactor.core.publisher.Flux;
+
+public class OpenSearchSearchHighlighterTest implements 
SearchHighLighterContract {
+    private MessageSearchIndex messageSearchIndex;
+    private StoreMailboxManager storeMailboxManager;
+    private StoreMessageManager inboxMessageManager;
+    private OpenSearchSearchHighlighter testee;
+
+    private static final ConditionFactory CALMLY_AWAIT = Awaitility
+        .with().pollInterval(ONE_HUNDRED_MILLISECONDS)
+        .and().pollDelay(ONE_HUNDRED_MILLISECONDS)
+        .await();
+
+    @RegisterExtension
+    static TikaExtension tika = new TikaExtension();
+
+    @RegisterExtension
+    static DockerOpenSearchExtension openSearch = new 
DockerOpenSearchExtension(DockerOpenSearchExtension.CleanupStrategy.NONE);
+    static ReactorOpenSearchClient client;
+    static TikaTextExtractor textExtractor;
+
+    @BeforeAll
+    static void setUpAll() throws Exception {
+        client = openSearch.getDockerOpenSearch().clientProvider().get();
+        textExtractor = new TikaTextExtractor(new RecordingMetricFactory(),
+            new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
+    }
+
+    @AfterAll
+    static void tearDown() throws IOException {
+        client.close();
+    }
+
+    @BeforeEach
+    public void setUp() throws Exception {
+        WriteAliasName writeAliasName = new 
WriteAliasName(UUID.randomUUID().toString());
+        ReadAliasName readAliasName = new 
ReadAliasName(UUID.randomUUID().toString());
+        IndexName indexName = new IndexName(UUID.randomUUID().toString());
+        MailboxIndexCreationUtil.prepareClient(
+            client, readAliasName, writeAliasName, indexName,
+            openSearch.getDockerOpenSearch().configuration());
+
+        MailboxIdRoutingKeyFactory routingKeyFactory = new 
MailboxIdRoutingKeyFactory();
+        OpenSearchMailboxConfiguration openSearchMailboxConfiguration = 
OpenSearchMailboxConfiguration.builder()
+            .optimiseMoves(false)
+            .textFuzzinessSearch(false)
+            .build();
+        final MessageId.Factory messageIdFactory = new 
InMemoryMessageId.Factory();
+
+        OpenSearchSearcher openSearchSearcher = new OpenSearchSearcher(client, 
new QueryConverter(new CriterionConverter(openSearchMailboxConfiguration)), 
SEARCH_SIZE,
+            readAliasName, routingKeyFactory);
+
+        InMemoryIntegrationResources resources = 
InMemoryIntegrationResources.builder()
+            .preProvisionnedFakeAuthenticator()
+            .fakeAuthorizator()
+            .inVmEventBus()
+            .defaultAnnotationLimits()
+            .defaultMessageParser()
+            .listeningSearchIndex(preInstanciationStage -> new 
OpenSearchListeningMessageSearchIndex(
+                preInstanciationStage.getMapperFactory(),
+                ImmutableSet.of(),
+                new OpenSearchIndexer(client,
+                    writeAliasName),
+                openSearchSearcher,
+                new MessageToOpenSearchJson(textExtractor, 
ZoneId.of("Europe/Paris"), IndexAttachments.YES, IndexHeaders.YES),
+                preInstanciationStage.getSessionProvider(), routingKeyFactory, 
messageIdFactory,
+                openSearchMailboxConfiguration, new RecordingMetricFactory()))
+            .noPreDeletionHooks()
+            .storeQuotaManager()
+            .build();
+
+        storeMailboxManager = resources.getMailboxManager();
+        messageSearchIndex = resources.getSearchIndex();
+        MailboxSession session = 
storeMailboxManager.createSystemSession(USERNAME1);
+        MailboxPath inboxPath = MailboxPath.inbox(USERNAME1);
+        storeMailboxManager.createMailbox(inboxPath, session);
+        inboxMessageManager = (StoreMessageManager) 
storeMailboxManager.getMailbox(inboxPath, session);
+
+        testee = new OpenSearchSearchHighlighter(openSearchSearcher, 
storeMailboxManager, messageIdFactory);
+    }
+
+    @Override
+    public SearchHighlighter testee() {
+        return testee;
+    }
+
+    @Override
+    public MailboxSession session(Username username) {
+        return storeMailboxManager.createSystemSession(username);
+    }
+
+    @Override
+    public MessageManager.AppendResult 
appendMessage(MessageManager.AppendCommand appendCommand, MailboxSession 
session) {
+        return Throwing.supplier(() -> 
inboxMessageManager.appendMessage(appendCommand, session)).get();
+    }
+
+    @Override
+    public MailboxId randomMailboxId(Username username) {
+        String random = new String(new byte[8]);
+        return Throwing.supplier(() -> 
storeMailboxManager.createMailbox(MailboxPath.forUser(USERNAME1, random), 
session(username)).get()).get();
+    }
+
+    @Override
+    public void applyRightsCommand(MailboxId mailboxId, Username owner, 
Username delegated) {
+        Mailbox mailbox = inboxMessageManager.getMailboxEntity();
+        Throwing.runnable(() -> 
storeMailboxManager.applyRightsCommand(mailbox.generateAssociatedPath(),
+            
MailboxACL.command().forUser(delegated).rights(MailboxACL.FULL_RIGHTS).asAddition(),
+            session(owner))).run();
+    }
+
+    @Override
+    public void verifyMessageWasIndexed(int indexedMessageCount) {
+        CALMLY_AWAIT.atMost(Durations.TEN_SECONDS)
+            .untilAsserted(() -> 
assertThat(messageSearchIndex.search(session(USERNAME1), 
inboxMessageManager.getMailboxEntity(), SearchQuery.of()).toStream().count())
+                .isEqualTo(indexedMessageCount));
+    }
+
+    @Test
+    void shouldHighlightAttachmentTextContentWhenTextBodyDoesNotMatch() throws 
Exception {
+        
assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
+        MailboxSession session = session(USERNAME1);
+
+        ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo("to@james.local")
+                    .setSubject("Hallo, Thx Matthieu for your help")
+                    .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
+            session).getId();
+
+        // m2 has an attachment with text content: "This is a beautiful banana"
+        ComposedMessageId m2 = inboxMessageManager.appendMessage(
+            MessageManager.AppendCommand.builder()
+                
.build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithTextAttachment.eml")),
+            session).getId();
+
+        verifyMessageWasIndexed(2);
+
+        String keywordSearch = "beautiful";
+        MultimailboxesSearchQuery multiMailboxSearch = 
MultimailboxesSearchQuery.from(SearchQuery.of(
+                new 
SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR,
+                    List.of(SearchQuery.bodyContains(keywordSearch),
+                        SearchQuery.attachmentContains(keywordSearch)))))
+            .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
+            .build();
+
+        List<SearchSnippet> searchSnippets = 
Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), 
m2.getMessageId()), multiMailboxSearch, session))
+            .collectList()
+            .block();
+
+        assertThat(searchSnippets).hasSize(1);
+        assertThat(searchSnippets.getFirst().highlightedBody()).contains("This 
is a <mark>beautiful</mark> banana.");
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org


Reply via email to