This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new a6f9f32d8c JAMES-4100 Improve Search Snippet display (#2583)
a6f9f32d8c is described below

commit a6f9f32d8c0a63f262b62391d3ec1d15510ad29b
Author: hungphan227 <45198168+hungphan...@users.noreply.github.com>
AuthorDate: Wed Jan 22 15:18:30 2025 +0700

    JAMES-4100 Improve Search Snippet display (#2583)
    
    Co-authored-by: hung phan <hp...@linagora.com>
---
 .../searchhighligt/SearchHighLighterContract.java  | 39 ++++++++++++++++++++++
 .../lucene/search/LuceneIndexableDocument.java     |  2 +-
 .../lucene/search/LuceneSearchHighlighter.java     |  2 ++
 .../mailbox/opensearch/json/IndexableMessage.java  |  2 +-
 .../james/mailbox/store/search/SearchUtil.java     | 17 ++++++++++
 .../contract/SearchSnippetGetMethodContract.scala  |  6 ++--
 6 files changed, 63 insertions(+), 5 deletions(-)

diff --git 
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
 
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
index 2181140be9..998b8f2ac2 100644
--- 
a/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
+++ 
b/mailbox/api/src/test/java/org/apache/james/mailbox/searchhighligt/SearchHighLighterContract.java
@@ -532,4 +532,43 @@ public interface SearchHighLighterContract {
             
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).contains("<mark>barcamp</mark>");
         });
     }
+
+    @Test
+    default void highlightSearchShouldShortenGreaterThanCharacters() throws 
Exception {
+        MailboxSession session = session(USERNAME1);
+
+        // Given m1,m2 with m1 has body containing the searched word (contentA)
+        ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo("to@james.local")
+                    .setSubject("Hallo, Thx Matthieu for your help")
+                    .setBody("Start \n>>>>>>>>>> append contentA to > inbox 
\n>>>>>> End",
+                        StandardCharsets.UTF_8)),
+            session).getId();
+
+        ComposedMessageId m2 = appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo("to@james.local")
+                    .setSubject("Hallo, Thx Alex for your help")
+                    .setBody("append contentB to inbox", 
StandardCharsets.UTF_8)),
+            session).getId();
+
+        verifyMessageWasIndexed(2);
+
+        // When searching for the word (contentA) in the body
+        MultimailboxesSearchQuery multiMailboxSearch = 
MultimailboxesSearchQuery.from(SearchQuery.of(
+                SearchQuery.bodyContains("contentA")))
+            .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
+            .build();
+
+        // Then highlightSearch should return the SearchSnippet with the 
highlightedBody containing the word (contentA)
+        List<SearchSnippet> searchSnippets = 
Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), 
m2.getMessageId()), multiMailboxSearch, session))
+            .collectList()
+            .block();
+        assertThat(searchSnippets).hasSize(1);
+        assertSoftly(softly -> {
+            
softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId());
+            
softly.assertThat(searchSnippets.getFirst().highlightedBody().get()).isEqualTo("Start
 \n append <mark>contentA</mark> to &gt; inbox \n End");
+        });
+    }
 }
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
index bee20308d2..4f14d14c3f 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
@@ -195,7 +195,7 @@ public class LuceneIndexableDocument {
         doc.add(new TextField(BCC_FIELD, 
uppercase(EMailers.from(headerCollection.getBccAddressSet()).serialize()), 
Field.Store.YES));
 
         // index body
-        Optional<String> bodyText = mimePartExtracted.locateFirstTextBody();
+        Optional<String> bodyText = 
mimePartExtracted.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
         Optional<String> bodyHtml = mimePartExtracted.locateFirstHtmlBody();
 
         bodyText.or(() -> bodyHtml)
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
index c398865df8..8a25d922c4 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
@@ -57,6 +57,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.highlight.Formatter;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
 
@@ -126,6 +127,7 @@ public class LuceneSearchHighlighter implements 
SearchHighlighter {
         Query query = buildQueryFromSearchQuery(searchQuery);
         QueryScorer scorer = new QueryScorer(query);
         Highlighter highlighter = new Highlighter(formatter, scorer);
+        highlighter.setEncoder(new SimpleHTMLEncoder());
         highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 
configuration.fragmentSize()));
         return highlighter;
     }
diff --git 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
index 722d8b8988..8bfaf646f8 100644
--- 
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
+++ 
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
@@ -135,7 +135,7 @@ public class IndexableMessage {
                 .asMimePart(textExtractor)
                 .map(parsingResult -> {
 
-                    Optional<String> bodyText = 
parsingResult.locateFirstTextBody();
+                    Optional<String> bodyText = 
parsingResult.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine);
                     Optional<String> bodyHtml = 
parsingResult.locateFirstHtmlBody();
 
                     boolean hasAttachment = 
MessageAttachmentMetadata.hasNonInlinedAttachment(message.getAttachments());
diff --git 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
index 8c7686f60c..0a90b132b6 100644
--- 
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
+++ 
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
@@ -473,5 +473,22 @@ public class SearchUtil {
         };
     }
 
+    public static String removeGreaterThanCharactersAtBeginningOfLine(String 
text) {
+        StringBuilder result = new StringBuilder();
+        boolean isNewLine = false;
 
+        for (int i = 0; i < text.length(); i++) {
+            char current = text.charAt(i);
+
+            if (current == '\n') {
+                isNewLine = true;
+                result.append(current);
+            } else if (!isNewLine || current != '>') {
+                result.append(current);
+                isNewLine = false;
+            }
+        }
+
+        return result.toString();
+    }
 }
diff --git 
a/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
 
b/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
index 7246e6a80e..17cd4d3cf9 100644
--- 
a/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
+++ 
b/server/protocols/jmap-rfc-8621-integration-tests/jmap-rfc-8621-integration-tests-common/src/main/scala/org/apache/james/jmap/rfc8621/contract/SearchSnippetGetMethodContract.scala
@@ -529,12 +529,12 @@ trait SearchSnippetGetMethodContract {
              |    "list": [
              |      {
              |        "emailId": "${messageId1.serialize}",
-             |        "subject": "Weekly report - <mark>vttran</mark> 
27/02-03/03/2023",
+             |        "subject": "Weekly report - <mark>vttran</mark> 
27&#x2F;02-03&#x2F;03&#x2F;2023",
              |        "preview": null
              |      },
              |      {
              |        "emailId": "${messageId2.serialize}",
-             |        "subject": "Weekly report - <mark>vttran</mark> 
19/08-23/08/2024",
+             |        "subject": "Weekly report - <mark>vttran</mark> 
19&#x2F;08-23&#x2F;08&#x2F;2024",
              |        "preview": null
              |      }
              |    ],
@@ -610,7 +610,7 @@ trait SearchSnippetGetMethodContract {
              |    "list": [
              |      {
              |        "emailId": "${messageId1.serialize}",
-             |        "subject": "Weekly report - <mark>vttran</mark> 
27/02-03/03/2023",
+             |        "subject": "Weekly report - <mark>vttran</mark> 
27&#x2F;02-03&#x2F;03&#x2F;2023",
              |        "preview": null
              |      },
              |      {


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org

Reply via email to