This is an automated email from the ASF dual-hosted git repository.

rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 956d5cc8b134785bb319417eb3796e5cdab8e408
Author: TungTV <vtt...@linagora.com>
AuthorDate: Mon Nov 4 08:29:24 2024 +0700

    JAMES-4082 Adapt Lucene search - support full capability
---
 mailbox/lucene/pom.xml                             | 11 ++++
 .../lucene/search/LuceneIndexableDocument.java     |  2 +-
 .../lucene/search/LuceneMessageSearchIndex.java    | 67 +++++++++++++---------
 .../lucene/search/LuceneSearchHighlighter.java     | 18 +++++-
 .../search/LuceneFSSearchHighLighterTest.java      |  3 +-
 .../search/LuceneMailboxMessageFlagSearchTest.java |  3 +-
 .../LuceneMailboxMessageSearchIndexTest.java       |  3 +-
 .../search/LuceneMemorySearchHighLighterTest.java  | 58 ++++++++++++++++++-
 .../search/LuceneMessageSearchIndexTest.java       | 25 +++++++-
 .../lucenesearch/host/LuceneSearchHostSystem.java  |  3 +-
 10 files changed, 156 insertions(+), 37 deletions(-)

diff --git a/mailbox/lucene/pom.xml b/mailbox/lucene/pom.xml
index 484e20e302..7844c9709d 100644
--- a/mailbox/lucene/pom.xml
+++ b/mailbox/lucene/pom.xml
@@ -63,6 +63,17 @@
             <type>test-jar</type>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>${james.groupId}</groupId>
+            <artifactId>apache-james-mailbox-tika</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${james.groupId}</groupId>
+            <artifactId>apache-james-mailbox-tika</artifactId>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>${james.groupId}</groupId>
             <artifactId>event-bus-api</artifactId>
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
index 1cb006783b..bee20308d2 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
@@ -234,7 +234,7 @@ public class LuceneIndexableDocument {
                     .orElse(sysFlag.toString()), Field.Store.YES)));
 
         Arrays.stream(messageFlags.getUserFlags())
-            .forEach(userFlag -> doc.add(new StringField(FLAGS_FIELD, 
userFlag, Field.Store.YES)));
+            .forEach(userFlag -> doc.add(new StringField(FLAGS_FIELD, 
lowercase(userFlag), Field.Store.YES)));
 
         // if no flags are there we just use a empty field
         if (messageFlags.getSystemFlags().length == 0 && 
messageFlags.getUserFlags().length == 0) {
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
index 12b47a3209..81afadae54 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
@@ -18,6 +18,9 @@
  ****************************************************************/
 package org.apache.james.mailbox.lucene.search;
 
+import static 
org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_FILE_NAME_FIELD;
+import static 
org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_TEXT_CONTENT_FIELD;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
@@ -47,6 +50,7 @@ import org.apache.james.mailbox.MessageUid;
 import org.apache.james.mailbox.SessionProvider;
 import org.apache.james.mailbox.exception.MailboxException;
 import org.apache.james.mailbox.exception.UnsupportedSearchException;
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.model.Mailbox;
 import org.apache.james.mailbox.model.MailboxId;
 import org.apache.james.mailbox.model.MessageAttachmentMetadata;
@@ -394,6 +398,7 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
 
     private final MailboxId.Factory mailboxIdFactory;
     private final MessageId.Factory messageIdFactory;
+    private final LuceneIndexableDocument indexableDocument;
 
     @VisibleForTesting
     final IndexWriter writer;
@@ -401,7 +406,7 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
 
     private int maxQueryResults = DEFAULT_MAX_QUERY_RESULTS;
 
-    private boolean suffixMatch = false;
+    private boolean suffixMatch = true;
 
     @Inject
     public LuceneMessageSearchIndex(
@@ -409,8 +414,9 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
         MailboxId.Factory mailboxIdFactory,
         Directory directory,
         MessageId.Factory messageIdFactory,
-        SessionProvider sessionProvider) throws IOException {
-        this(factory, mailboxIdFactory, directory, false, messageIdFactory, 
sessionProvider);
+        SessionProvider sessionProvider,
+        TextExtractor textExtractor) throws IOException {
+        this(factory, mailboxIdFactory, directory, false, messageIdFactory, 
sessionProvider, textExtractor);
     }
 
     public LuceneMessageSearchIndex(
@@ -419,10 +425,12 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
             Directory directory,
             boolean dropIndexOnStart,
             MessageId.Factory messageIdFactory,
-            SessionProvider sessionProvider) throws IOException {
+            SessionProvider sessionProvider,
+            TextExtractor textExtractor) throws IOException {
         super(factory, ImmutableSet.of(), sessionProvider);
         this.mailboxIdFactory = mailboxIdFactory;
         this.messageIdFactory = messageIdFactory;
+        this.indexableDocument = new LuceneIndexableDocument(textExtractor);
         this.directory = directory;
         try {
             this.writer = new IndexWriter(this.directory, 
createConfig(LenientImapSearchAnalyzer.INSTANCE, dropIndexOnStart));
@@ -445,8 +453,13 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
 
     @Override
     public EnumSet<SearchCapabilities> 
getSupportedCapabilities(EnumSet<MailboxManager.MessageCapabilities> 
messageCapabilities) {
-        return EnumSet.of(SearchCapabilities.MultimailboxSearch);
-
+        return EnumSet.of(SearchCapabilities.MultimailboxSearch,
+            SearchCapabilities.PartialEmailMatch,
+            SearchCapabilities.Text,
+            SearchCapabilities.FullText,
+            SearchCapabilities.AttachmentFileName,
+            SearchCapabilities.Attachment,
+            SearchCapabilities.HighlightSearch);
     }
 
     /**
@@ -1041,19 +1054,20 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
     /**
      * Return a {@link Query} which is build based on the given {@link 
SearchQuery.TextCriterion}
      */
-    private Query createTextQuery(SearchQuery.TextCriterion crit) throws 
UnsupportedSearchException {
+    private Query createTextQuery(SearchQuery.TextCriterion crit) {
         String value = crit.getOperator().getValue().toUpperCase(Locale.US);
-        switch (crit.getType()) {
-        case BODY:
-            return createTermQuery(BODY_FIELD, value);
-        case FULL:
-            BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
-            queryBuilder.add(createTermQuery(BODY_FIELD, value), 
BooleanClause.Occur.SHOULD);
-            queryBuilder.add(createTermQuery(HEADERS_FIELD,value), 
BooleanClause.Occur.SHOULD);
-            return queryBuilder.build();
-        default:
-            throw new UnsupportedSearchException();
-        }
+        return switch (crit.getType()) {
+            case BODY -> createTermQuery(BODY_FIELD, value);
+            case ATTACHMENTS -> createTermQuery(ATTACHMENT_TEXT_CONTENT_FIELD, 
value);
+            case ATTACHMENT_FILE_NAME -> 
createTermQuery(ATTACHMENT_FILE_NAME_FIELD, value);
+            case FULL -> {
+                BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+                queryBuilder.add(createTermQuery(BODY_FIELD, value), 
BooleanClause.Occur.SHOULD);
+                queryBuilder.add(createTermQuery(HEADERS_FIELD, value), 
BooleanClause.Occur.SHOULD);
+                
queryBuilder.add(createTermQuery(ATTACHMENT_TEXT_CONTENT_FIELD, value), 
BooleanClause.Occur.SHOULD);
+                yield queryBuilder.build();
+            }
+        };
     }
 
     /**
@@ -1120,7 +1134,7 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
         } else if (criterion instanceof AttachmentCriterion crit) {
             return createAttachmentQuery(crit.getOperator().isSet());
         } else if (criterion instanceof CustomFlagCriterion crit) {
-            return createFlagQuery(crit.getFlag(), crit.getOperator().isSet(), 
inMailboxes, recentUids);
+            return createFlagQuery(crit.getFlag().toLowerCase(Locale.US), 
crit.getOperator().isSet(), inMailboxes, recentUids);
         } else if (criterion instanceof SearchQuery.TextCriterion crit) {
             return createTextQuery(crit);
         } else if (criterion instanceof SearchQuery.AllCriterion) {
@@ -1141,15 +1155,12 @@ public class LuceneMessageSearchIndex extends 
ListeningMessageSearchIndex {
 
     @Override
     public Mono<Void> add(MailboxSession session, Mailbox mailbox, 
MailboxMessage membership) {
-        return Mono.fromRunnable(Throwing.runnable(() -> {
-            Document doc = createMessageDocument(session, membership);
-            Document flagsDoc = createFlagsDocument(membership);
-
-            log.trace("Adding document: uid:'{}' with flags: {}", 
doc.get("uid"), flagsDoc);
-
-            writer.addDocument(doc);
-            writer.addDocument(flagsDoc);
-        }));
+        return indexableDocument.createMessageDocument(membership, session)
+            .flatMap(document -> Mono.fromRunnable(Throwing.runnable(() -> {
+                writer.addDocument(document);
+                
writer.addDocument(indexableDocument.createFlagsDocument(membership));
+            })))
+            .then();
     }
 
     @Override
diff --git 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
index ee1fdf6edc..7a6804988d 100644
--- 
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
+++ 
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java
@@ -19,15 +19,18 @@
 
 package org.apache.james.mailbox.lucene.search;
 
+import static 
org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_TEXT_CONTENT_FIELD;
 import static 
org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.BODY_FIELD;
 import static 
org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.MESSAGE_ID_FIELD;
 import static 
org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.SUBJECT_FIELD;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.Locale;
 import java.util.Optional;
+import java.util.stream.Stream;
 
 import org.apache.james.mailbox.MailboxSession;
 import org.apache.james.mailbox.model.MailboxId;
@@ -44,6 +47,7 @@ import org.apache.james.mailbox.store.StoreMailboxManager;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -62,6 +66,7 @@ import reactor.core.publisher.Mono;
 import reactor.core.scheduler.Schedulers;
 
 public class LuceneSearchHighlighter implements SearchHighlighter {
+
     private static Analyzer defaultAnalyzer() {
         return new StandardAnalyzer();
     }
@@ -156,8 +161,9 @@ public class LuceneSearchHighlighter implements 
SearchHighlighter {
 
     private SearchSnippet buildSearchSnippet(Document doc, SearchQuery 
searchQuery) throws IOException, InvalidTokenOffsetsException {
         MessageId messageId = 
messageIdFactory.fromString(doc.get(MESSAGE_ID_FIELD));
-        String highlightedSubject = getHighlightedSubject(doc, searchQuery);
-        String highlightedBody = getHighlightedBody(doc, searchQuery);
+        Optional<String> highlightedSubject = 
Optional.ofNullable(getHighlightedSubject(doc, searchQuery));
+        Optional<String> highlightedBody = 
Optional.ofNullable(getHighlightedBody(doc, searchQuery))
+            .or(() -> getHighlightAttachmentTextBody(doc, searchQuery));
 
         return new SearchSnippet(messageId, highlightedSubject, 
highlightedBody);
     }
@@ -169,4 +175,12 @@ public class LuceneSearchHighlighter implements 
SearchHighlighter {
     private String getHighlightedBody(Document doc, SearchQuery searchQuery) 
throws IOException, InvalidTokenOffsetsException {
         return highlighter(searchQuery).getBestFragment(analyzer, BODY_FIELD, 
doc.get(BODY_FIELD));
     }
+
+    private Optional<String> getHighlightAttachmentTextBody(Document doc, 
SearchQuery searchQuery) {
+        Highlighter highlighter = highlighter(searchQuery);
+        return 
Stream.ofNullable(doc.getFields(ATTACHMENT_TEXT_CONTENT_FIELD)).flatMap(Arrays::stream)
+            .map(IndexableField::stringValue)
+            .map(Throwing.function(contentType -> 
highlighter.getBestFragment(analyzer, ATTACHMENT_TEXT_CONTENT_FIELD, 
contentType)))
+            .findFirst();
+    }
 }
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java
index ea43b4b87a..850efeeb95 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java
@@ -40,6 +40,7 @@ import 
org.apache.james.mailbox.searchhighligt.SearchHighlighter;
 import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration;
 import org.apache.james.mailbox.store.StoreMailboxManager;
 import org.apache.james.mailbox.store.StoreMessageManager;
+import org.apache.james.mailbox.store.extractor.JsoupTextExtractor;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
 import org.apache.lucene.store.FSDirectory;
 import org.junit.jupiter.api.BeforeEach;
@@ -65,7 +66,7 @@ class LuceneFSSearchHighLighterTest implements 
SearchHighLighterContract {
             .listeningSearchIndex(Throwing.function(preInstanciationStage -> 
new LuceneMessageSearchIndex(
                 preInstanciationStage.getMapperFactory(), new 
InMemoryId.Factory(), FSDirectory.open(tempPath),
                 messageIdFactory,
-                preInstanciationStage.getSessionProvider())))
+                preInstanciationStage.getSessionProvider(), new 
JsoupTextExtractor())))
             .noPreDeletionHooks()
             .storeQuotaManager()
             .build();
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java
index 76669104c2..eca27dae24 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java
@@ -47,6 +47,7 @@ import org.apache.james.mailbox.model.MessageRange;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.store.StoreMailboxManager;
 import org.apache.james.mailbox.store.StoreMessageManager;
+import org.apache.james.mailbox.store.extractor.JsoupTextExtractor;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
 import org.apache.james.utils.UpdatableTickingClock;
 import org.apache.lucene.document.Document;
@@ -156,7 +157,7 @@ class LuceneMailboxMessageFlagSearchTest {
                 .listeningSearchIndex(Throwing.function(preInstanciationStage 
-> new LuceneMessageSearchIndex(
                         preInstanciationStage.getMapperFactory(), new 
InMemoryId.Factory(), new ByteBuffersDirectory(),
                         new InMemoryMessageId.Factory(),
-                        preInstanciationStage.getSessionProvider())))
+                        preInstanciationStage.getSessionProvider(), new 
JsoupTextExtractor())))
                 .noPreDeletionHooks()
                 .storeQuotaManager()
                 .build();
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java
index 58d82d02f7..fb2ff37c3c 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java
@@ -52,6 +52,7 @@ import org.apache.james.mailbox.model.TestMessageId;
 import org.apache.james.mailbox.model.UidValidity;
 import org.apache.james.mailbox.model.UpdatedFlags;
 import org.apache.james.mailbox.store.MessageBuilder;
+import org.apache.james.mailbox.store.extractor.JsoupTextExtractor;
 import org.apache.james.mailbox.store.search.ListeningMessageSearchIndex;
 import 
org.apache.james.mailbox.store.search.ListeningMessageSearchIndexContract;
 import org.apache.lucene.store.ByteBuffersDirectory;
@@ -112,7 +113,7 @@ class LuceneMailboxMessageSearchIndexTest {
         id3 = factory.generate();
         id4 = factory.generate();
         id5 = factory.generate();
-        index = new LuceneMessageSearchIndex(null, new TestId.Factory(), new 
ByteBuffersDirectory(), true, factory, null);
+        index = new LuceneMessageSearchIndex(null, new TestId.Factory(), new 
ByteBuffersDirectory(), true, factory, null, new JsoupTextExtractor());
         index.setEnableSuffixMatch(true);
         Map<String, String> headersSubject = new HashMap<>();
         headersSubject.put("Subject", "test (fwd)");
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java
index ca97c12441..23af88f0b8 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java
@@ -19,31 +19,45 @@
 package org.apache.james.mailbox.lucene.search;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
+
+import java.nio.charset.StandardCharsets;
+import java.util.List;
 
 import org.apache.james.core.Username;
+import org.apache.james.mailbox.MailboxManager;
 import org.apache.james.mailbox.MailboxSession;
 import org.apache.james.mailbox.MessageManager;
 import org.apache.james.mailbox.exception.MailboxException;
 import org.apache.james.mailbox.inmemory.InMemoryId;
 import org.apache.james.mailbox.inmemory.InMemoryMessageId;
 import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources;
+import org.apache.james.mailbox.model.ComposedMessageId;
 import org.apache.james.mailbox.model.Mailbox;
 import org.apache.james.mailbox.model.MailboxACL;
 import org.apache.james.mailbox.model.MailboxId;
 import org.apache.james.mailbox.model.MailboxPath;
 import org.apache.james.mailbox.model.MessageId;
+import org.apache.james.mailbox.model.MultimailboxesSearchQuery;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.searchhighligt.SearchHighLighterContract;
 import org.apache.james.mailbox.searchhighligt.SearchHighlighter;
 import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration;
+import org.apache.james.mailbox.searchhighligt.SearchSnippet;
 import org.apache.james.mailbox.store.StoreMailboxManager;
 import org.apache.james.mailbox.store.StoreMessageManager;
+import org.apache.james.mailbox.store.extractor.JsoupTextExtractor;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
+import org.apache.james.mime4j.dom.Message;
+import org.apache.james.util.ClassLoaderUtils;
 import org.apache.lucene.store.ByteBuffersDirectory;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import com.github.fge.lambdas.Throwing;
 
+import reactor.core.publisher.Flux;
+
 class LuceneMemorySearchHighLighterTest implements SearchHighLighterContract {
     private MessageSearchIndex messageSearchIndex;
     private StoreMailboxManager storeMailboxManager;
@@ -62,7 +76,7 @@ class LuceneMemorySearchHighLighterTest implements 
SearchHighLighterContract {
             .listeningSearchIndex(Throwing.function(preInstanciationStage -> 
new LuceneMessageSearchIndex(
                 preInstanciationStage.getMapperFactory(), new 
InMemoryId.Factory(), new ByteBuffersDirectory(),
                 messageIdFactory,
-                preInstanciationStage.getSessionProvider())))
+                preInstanciationStage.getSessionProvider(), new 
JsoupTextExtractor())))
             .noPreDeletionHooks()
             .storeQuotaManager()
             .build();
@@ -112,4 +126,46 @@ class LuceneMemorySearchHighLighterTest implements 
SearchHighLighterContract {
             
MailboxACL.command().forUser(delegated).rights(MailboxACL.FULL_RIGHTS).asAddition(),
             session(owner))).run();
     }
+
+
+    @Test
+    void shouldHighlightAttachmentTextContentWhenTextBodyDoesNotMatch() throws 
Exception {
+        
assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
+        MailboxSession session = session(USERNAME1);
+
+        ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from(
+                Message.Builder.of()
+                    .setTo("to@james.local")
+                    .setSubject("Hallo, Thx Matthieu for your help")
+                    .setBody("append contentA to inbox", 
StandardCharsets.UTF_8)),
+            session).getId();
+
+        // m2 has an attachment with text content: "This is a beautiful banana"
+        ComposedMessageId m2 = inboxMessageManager.appendMessage(
+            MessageManager.AppendCommand.builder()
+                
.build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithTextAttachment.eml")),
+            session).getId();
+
+        verifyMessageWasIndexed(2);
+
+        String keywordSearch = "beautiful";
+        MultimailboxesSearchQuery multiMailboxSearch = 
MultimailboxesSearchQuery.from(SearchQuery.of(
+                new 
SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR,
+                    List.of(SearchQuery.bodyContains(keywordSearch),
+                        SearchQuery.attachmentContains(keywordSearch)))))
+            .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId()))
+            .build();
+
+        List<SearchSnippet> searchSnippets = 
Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), 
m2.getMessageId()), multiMailboxSearch, session))
+            .collectList()
+            .block();
+
+        assertThat(searchSnippets).hasSize(1);
+
+        assertThat(searchSnippets.getFirst().highlightedBody())
+            .isPresent()
+            .satisfies(highlightedBody -> {
+                assertThat(highlightedBody.get()).contains("This is a 
<mark>beautiful</mark> banana");
+            });
+    }
 }
diff --git 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
index 8046ba8ca3..fcb8b2d347 100644
--- 
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
+++ 
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
@@ -19,8 +19,10 @@
 
 package org.apache.james.mailbox.lucene.search;
 
+import java.net.URISyntaxException;
 import java.util.List;
 
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.inmemory.InMemoryId;
 import org.apache.james.mailbox.inmemory.InMemoryMessageId;
 import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources;
@@ -28,13 +30,34 @@ import org.apache.james.mailbox.model.MailboxId;
 import org.apache.james.mailbox.model.MessageId;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.store.search.AbstractMessageSearchIndexTest;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaExtension;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
+import org.apache.james.metrics.tests.RecordingMetricFactory;
 import org.apache.lucene.store.ByteBuffersDirectory;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.RegisterExtension;
 
 import com.github.fge.lambdas.Throwing;
 
 class LuceneMessageSearchIndexTest extends AbstractMessageSearchIndexTest {
+    @RegisterExtension
+    static TikaExtension tika = new TikaExtension();
+
+    static TextExtractor textExtractor;
+
+    @BeforeAll
+    static void setUpClass() throws URISyntaxException {
+        textExtractor = new TikaTextExtractor(new RecordingMetricFactory(),
+            new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
+    }
 
     @Override
     protected void awaitMessageCount(List<MailboxId> mailboxIds, SearchQuery 
query, long messageCount) {
@@ -51,7 +74,7 @@ class LuceneMessageSearchIndexTest extends 
AbstractMessageSearchIndexTest {
             .listeningSearchIndex(Throwing.function(preInstanciationStage -> 
new LuceneMessageSearchIndex(
                 preInstanciationStage.getMapperFactory(), new 
InMemoryId.Factory(), new ByteBuffersDirectory(),
                 new InMemoryMessageId.Factory(),
-                preInstanciationStage.getSessionProvider())))
+                preInstanciationStage.getSessionProvider(), textExtractor)))
             .noPreDeletionHooks()
             .storeQuotaManager()
             .build();
diff --git 
a/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java
 
b/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java
index ee82414000..cc37e7db84 100644
--- 
a/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java
+++ 
b/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java
@@ -34,6 +34,7 @@ import org.apache.james.mailbox.inmemory.InMemoryMessageId;
 import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources;
 import org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex;
 import org.apache.james.mailbox.store.StoreSubscriptionManager;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.quota.NoQuotaManager;
 import org.apache.james.metrics.logger.DefaultMetricFactory;
 import org.apache.james.mpt.api.ImapFeatures;
@@ -66,7 +67,7 @@ public class LuceneSearchHostSystem extends 
JamesImapHostSystem {
             .listeningSearchIndex(Throwing.function(preInstanciationStage -> 
new LuceneMessageSearchIndex(
                 preInstanciationStage.getMapperFactory(), new 
InMemoryId.Factory(), new ByteBuffersDirectory(),
                 new InMemoryMessageId.Factory(),
-                preInstanciationStage.getSessionProvider())))
+                preInstanciationStage.getSessionProvider(), new 
DefaultTextExtractor())))
             .noPreDeletionHooks()
             .storeQuotaManager()
             .build();


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org
For additional commands, e-mail: notifications-h...@james.apache.org

Reply via email to