This is an automated email from the ASF dual-hosted git repository. rcordier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 956d5cc8b134785bb319417eb3796e5cdab8e408 Author: TungTV <vtt...@linagora.com> AuthorDate: Mon Nov 4 08:29:24 2024 +0700 JAMES-4082 Adapt Lucene search - support full capability --- mailbox/lucene/pom.xml | 11 ++++ .../lucene/search/LuceneIndexableDocument.java | 2 +- .../lucene/search/LuceneMessageSearchIndex.java | 67 +++++++++++++--------- .../lucene/search/LuceneSearchHighlighter.java | 18 +++++- .../search/LuceneFSSearchHighLighterTest.java | 3 +- .../search/LuceneMailboxMessageFlagSearchTest.java | 3 +- .../LuceneMailboxMessageSearchIndexTest.java | 3 +- .../search/LuceneMemorySearchHighLighterTest.java | 58 ++++++++++++++++++- .../search/LuceneMessageSearchIndexTest.java | 25 +++++++- .../lucenesearch/host/LuceneSearchHostSystem.java | 3 +- 10 files changed, 156 insertions(+), 37 deletions(-) diff --git a/mailbox/lucene/pom.xml b/mailbox/lucene/pom.xml index 484e20e302..7844c9709d 100644 --- a/mailbox/lucene/pom.xml +++ b/mailbox/lucene/pom.xml @@ -63,6 +63,17 @@ <type>test-jar</type> <scope>test</scope> </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>apache-james-mailbox-tika</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>apache-james-mailbox-tika</artifactId> + <type>test-jar</type> + <scope>test</scope> + </dependency> <dependency> <groupId>${james.groupId}</groupId> <artifactId>event-bus-api</artifactId> diff --git a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java index 1cb006783b..bee20308d2 100644 --- a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java +++ b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java @@ -234,7 +234,7 @@ public class LuceneIndexableDocument { .orElse(sysFlag.toString()), Field.Store.YES))); Arrays.stream(messageFlags.getUserFlags()) - .forEach(userFlag -> doc.add(new StringField(FLAGS_FIELD, userFlag, Field.Store.YES))); + .forEach(userFlag -> doc.add(new StringField(FLAGS_FIELD, lowercase(userFlag), Field.Store.YES))); // if no flags are there we just use a empty field if (messageFlags.getSystemFlags().length == 0 && messageFlags.getUserFlags().length == 0) { diff --git a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java index 12b47a3209..81afadae54 100644 --- a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java +++ b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java @@ -18,6 +18,9 @@ ****************************************************************/ package org.apache.james.mailbox.lucene.search; +import static org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_FILE_NAME_FIELD; +import static org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_TEXT_CONTENT_FIELD; + import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; @@ -47,6 +50,7 @@ import org.apache.james.mailbox.MessageUid; import org.apache.james.mailbox.SessionProvider; import org.apache.james.mailbox.exception.MailboxException; import org.apache.james.mailbox.exception.UnsupportedSearchException; +import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.model.Mailbox; import org.apache.james.mailbox.model.MailboxId; import org.apache.james.mailbox.model.MessageAttachmentMetadata; @@ -394,6 +398,7 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { private final MailboxId.Factory mailboxIdFactory; private final MessageId.Factory messageIdFactory; + private final LuceneIndexableDocument indexableDocument; @VisibleForTesting final IndexWriter writer; @@ -401,7 +406,7 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { private int maxQueryResults = DEFAULT_MAX_QUERY_RESULTS; - private boolean suffixMatch = false; + private boolean suffixMatch = true; @Inject public LuceneMessageSearchIndex( @@ -409,8 +414,9 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { MailboxId.Factory mailboxIdFactory, Directory directory, MessageId.Factory messageIdFactory, - SessionProvider sessionProvider) throws IOException { - this(factory, mailboxIdFactory, directory, false, messageIdFactory, sessionProvider); + SessionProvider sessionProvider, + TextExtractor textExtractor) throws IOException { + this(factory, mailboxIdFactory, directory, false, messageIdFactory, sessionProvider, textExtractor); } public LuceneMessageSearchIndex( @@ -419,10 +425,12 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { Directory directory, boolean dropIndexOnStart, MessageId.Factory messageIdFactory, - SessionProvider sessionProvider) throws IOException { + SessionProvider sessionProvider, + TextExtractor textExtractor) throws IOException { super(factory, ImmutableSet.of(), sessionProvider); this.mailboxIdFactory = mailboxIdFactory; this.messageIdFactory = messageIdFactory; + this.indexableDocument = new LuceneIndexableDocument(textExtractor); this.directory = directory; try { this.writer = new IndexWriter(this.directory, createConfig(LenientImapSearchAnalyzer.INSTANCE, dropIndexOnStart)); @@ -445,8 +453,13 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { @Override public EnumSet<SearchCapabilities> getSupportedCapabilities(EnumSet<MailboxManager.MessageCapabilities> messageCapabilities) { - return EnumSet.of(SearchCapabilities.MultimailboxSearch); - + return EnumSet.of(SearchCapabilities.MultimailboxSearch, + SearchCapabilities.PartialEmailMatch, + SearchCapabilities.Text, + SearchCapabilities.FullText, + SearchCapabilities.AttachmentFileName, + SearchCapabilities.Attachment, + SearchCapabilities.HighlightSearch); } /** @@ -1041,19 +1054,20 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { /** * Return a {@link Query} which is build based on the given {@link SearchQuery.TextCriterion} */ - private Query createTextQuery(SearchQuery.TextCriterion crit) throws UnsupportedSearchException { + private Query createTextQuery(SearchQuery.TextCriterion crit) { String value = crit.getOperator().getValue().toUpperCase(Locale.US); - switch (crit.getType()) { - case BODY: - return createTermQuery(BODY_FIELD, value); - case FULL: - BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); - queryBuilder.add(createTermQuery(BODY_FIELD, value), BooleanClause.Occur.SHOULD); - queryBuilder.add(createTermQuery(HEADERS_FIELD,value), BooleanClause.Occur.SHOULD); - return queryBuilder.build(); - default: - throw new UnsupportedSearchException(); - } + return switch (crit.getType()) { + case BODY -> createTermQuery(BODY_FIELD, value); + case ATTACHMENTS -> createTermQuery(ATTACHMENT_TEXT_CONTENT_FIELD, value); + case ATTACHMENT_FILE_NAME -> createTermQuery(ATTACHMENT_FILE_NAME_FIELD, value); + case FULL -> { + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + queryBuilder.add(createTermQuery(BODY_FIELD, value), BooleanClause.Occur.SHOULD); + queryBuilder.add(createTermQuery(HEADERS_FIELD, value), BooleanClause.Occur.SHOULD); + queryBuilder.add(createTermQuery(ATTACHMENT_TEXT_CONTENT_FIELD, value), BooleanClause.Occur.SHOULD); + yield queryBuilder.build(); + } + }; } /** @@ -1120,7 +1134,7 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { } else if (criterion instanceof AttachmentCriterion crit) { return createAttachmentQuery(crit.getOperator().isSet()); } else if (criterion instanceof CustomFlagCriterion crit) { - return createFlagQuery(crit.getFlag(), crit.getOperator().isSet(), inMailboxes, recentUids); + return createFlagQuery(crit.getFlag().toLowerCase(Locale.US), crit.getOperator().isSet(), inMailboxes, recentUids); } else if (criterion instanceof SearchQuery.TextCriterion crit) { return createTextQuery(crit); } else if (criterion instanceof SearchQuery.AllCriterion) { @@ -1141,15 +1155,12 @@ public class LuceneMessageSearchIndex extends ListeningMessageSearchIndex { @Override public Mono<Void> add(MailboxSession session, Mailbox mailbox, MailboxMessage membership) { - return Mono.fromRunnable(Throwing.runnable(() -> { - Document doc = createMessageDocument(session, membership); - Document flagsDoc = createFlagsDocument(membership); - - log.trace("Adding document: uid:'{}' with flags: {}", doc.get("uid"), flagsDoc); - - writer.addDocument(doc); - writer.addDocument(flagsDoc); - })); + return indexableDocument.createMessageDocument(membership, session) + .flatMap(document -> Mono.fromRunnable(Throwing.runnable(() -> { + writer.addDocument(document); + writer.addDocument(indexableDocument.createFlagsDocument(membership)); + }))) + .then(); } @Override diff --git a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java index ee1fdf6edc..7a6804988d 100644 --- a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java +++ b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneSearchHighlighter.java @@ -19,15 +19,18 @@ package org.apache.james.mailbox.lucene.search; +import static org.apache.james.mailbox.lucene.search.DocumentFieldConstants.ATTACHMENT_TEXT_CONTENT_FIELD; import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.BODY_FIELD; import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.MESSAGE_ID_FIELD; import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.SUBJECT_FIELD; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Locale; import java.util.Optional; +import java.util.stream.Stream; import org.apache.james.mailbox.MailboxSession; import org.apache.james.mailbox.model.MailboxId; @@ -44,6 +47,7 @@ import org.apache.james.mailbox.store.StoreMailboxManager; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -62,6 +66,7 @@ import reactor.core.publisher.Mono; import reactor.core.scheduler.Schedulers; public class LuceneSearchHighlighter implements SearchHighlighter { + private static Analyzer defaultAnalyzer() { return new StandardAnalyzer(); } @@ -156,8 +161,9 @@ public class LuceneSearchHighlighter implements SearchHighlighter { private SearchSnippet buildSearchSnippet(Document doc, SearchQuery searchQuery) throws IOException, InvalidTokenOffsetsException { MessageId messageId = messageIdFactory.fromString(doc.get(MESSAGE_ID_FIELD)); - String highlightedSubject = getHighlightedSubject(doc, searchQuery); - String highlightedBody = getHighlightedBody(doc, searchQuery); + Optional<String> highlightedSubject = Optional.ofNullable(getHighlightedSubject(doc, searchQuery)); + Optional<String> highlightedBody = Optional.ofNullable(getHighlightedBody(doc, searchQuery)) + .or(() -> getHighlightAttachmentTextBody(doc, searchQuery)); return new SearchSnippet(messageId, highlightedSubject, highlightedBody); } @@ -169,4 +175,12 @@ public class LuceneSearchHighlighter implements SearchHighlighter { private String getHighlightedBody(Document doc, SearchQuery searchQuery) throws IOException, InvalidTokenOffsetsException { return highlighter(searchQuery).getBestFragment(analyzer, BODY_FIELD, doc.get(BODY_FIELD)); } + + private Optional<String> getHighlightAttachmentTextBody(Document doc, SearchQuery searchQuery) { + Highlighter highlighter = highlighter(searchQuery); + return Stream.ofNullable(doc.getFields(ATTACHMENT_TEXT_CONTENT_FIELD)).flatMap(Arrays::stream) + .map(IndexableField::stringValue) + .map(Throwing.function(contentType -> highlighter.getBestFragment(analyzer, ATTACHMENT_TEXT_CONTENT_FIELD, contentType))) + .findFirst(); + } } diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java index ea43b4b87a..850efeeb95 100644 --- a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneFSSearchHighLighterTest.java @@ -40,6 +40,7 @@ import org.apache.james.mailbox.searchhighligt.SearchHighlighter; import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration; import org.apache.james.mailbox.store.StoreMailboxManager; import org.apache.james.mailbox.store.StoreMessageManager; +import org.apache.james.mailbox.store.extractor.JsoupTextExtractor; import org.apache.james.mailbox.store.search.MessageSearchIndex; import org.apache.lucene.store.FSDirectory; import org.junit.jupiter.api.BeforeEach; @@ -65,7 +66,7 @@ class LuceneFSSearchHighLighterTest implements SearchHighLighterContract { .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), FSDirectory.open(tempPath), messageIdFactory, - preInstanciationStage.getSessionProvider()))) + preInstanciationStage.getSessionProvider(), new JsoupTextExtractor()))) .noPreDeletionHooks() .storeQuotaManager() .build(); diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java index 76669104c2..eca27dae24 100644 --- a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageFlagSearchTest.java @@ -47,6 +47,7 @@ import org.apache.james.mailbox.model.MessageRange; import org.apache.james.mailbox.model.SearchQuery; import org.apache.james.mailbox.store.StoreMailboxManager; import org.apache.james.mailbox.store.StoreMessageManager; +import org.apache.james.mailbox.store.extractor.JsoupTextExtractor; import org.apache.james.mailbox.store.search.MessageSearchIndex; import org.apache.james.utils.UpdatableTickingClock; import org.apache.lucene.document.Document; @@ -156,7 +157,7 @@ class LuceneMailboxMessageFlagSearchTest { .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), new ByteBuffersDirectory(), new InMemoryMessageId.Factory(), - preInstanciationStage.getSessionProvider()))) + preInstanciationStage.getSessionProvider(), new JsoupTextExtractor()))) .noPreDeletionHooks() .storeQuotaManager() .build(); diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java index 58d82d02f7..fb2ff37c3c 100644 --- a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMailboxMessageSearchIndexTest.java @@ -52,6 +52,7 @@ import org.apache.james.mailbox.model.TestMessageId; import org.apache.james.mailbox.model.UidValidity; import org.apache.james.mailbox.model.UpdatedFlags; import org.apache.james.mailbox.store.MessageBuilder; +import org.apache.james.mailbox.store.extractor.JsoupTextExtractor; import org.apache.james.mailbox.store.search.ListeningMessageSearchIndex; import org.apache.james.mailbox.store.search.ListeningMessageSearchIndexContract; import org.apache.lucene.store.ByteBuffersDirectory; @@ -112,7 +113,7 @@ class LuceneMailboxMessageSearchIndexTest { id3 = factory.generate(); id4 = factory.generate(); id5 = factory.generate(); - index = new LuceneMessageSearchIndex(null, new TestId.Factory(), new ByteBuffersDirectory(), true, factory, null); + index = new LuceneMessageSearchIndex(null, new TestId.Factory(), new ByteBuffersDirectory(), true, factory, null, new JsoupTextExtractor()); index.setEnableSuffixMatch(true); Map<String, String> headersSubject = new HashMap<>(); headersSubject.put("Subject", "test (fwd)"); diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java index ca97c12441..23af88f0b8 100644 --- a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLighterTest.java @@ -19,31 +19,45 @@ package org.apache.james.mailbox.lucene.search; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +import java.nio.charset.StandardCharsets; +import java.util.List; import org.apache.james.core.Username; +import org.apache.james.mailbox.MailboxManager; import org.apache.james.mailbox.MailboxSession; import org.apache.james.mailbox.MessageManager; import org.apache.james.mailbox.exception.MailboxException; import org.apache.james.mailbox.inmemory.InMemoryId; import org.apache.james.mailbox.inmemory.InMemoryMessageId; import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources; +import org.apache.james.mailbox.model.ComposedMessageId; import org.apache.james.mailbox.model.Mailbox; import org.apache.james.mailbox.model.MailboxACL; import org.apache.james.mailbox.model.MailboxId; import org.apache.james.mailbox.model.MailboxPath; import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; import org.apache.james.mailbox.model.SearchQuery; import org.apache.james.mailbox.searchhighligt.SearchHighLighterContract; import org.apache.james.mailbox.searchhighligt.SearchHighlighter; import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration; +import org.apache.james.mailbox.searchhighligt.SearchSnippet; import org.apache.james.mailbox.store.StoreMailboxManager; import org.apache.james.mailbox.store.StoreMessageManager; +import org.apache.james.mailbox.store.extractor.JsoupTextExtractor; import org.apache.james.mailbox.store.search.MessageSearchIndex; +import org.apache.james.mime4j.dom.Message; +import org.apache.james.util.ClassLoaderUtils; import org.apache.lucene.store.ByteBuffersDirectory; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import com.github.fge.lambdas.Throwing; +import reactor.core.publisher.Flux; + class LuceneMemorySearchHighLighterTest implements SearchHighLighterContract { private MessageSearchIndex messageSearchIndex; private StoreMailboxManager storeMailboxManager; @@ -62,7 +76,7 @@ class LuceneMemorySearchHighLighterTest implements SearchHighLighterContract { .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), new ByteBuffersDirectory(), messageIdFactory, - preInstanciationStage.getSessionProvider()))) + preInstanciationStage.getSessionProvider(), new JsoupTextExtractor()))) .noPreDeletionHooks() .storeQuotaManager() .build(); @@ -112,4 +126,46 @@ class LuceneMemorySearchHighLighterTest implements SearchHighLighterContract { MailboxACL.command().forUser(delegated).rights(MailboxACL.FULL_RIGHTS).asAddition(), session(owner))).run(); } + + + @Test + void shouldHighlightAttachmentTextContentWhenTextBodyDoesNotMatch() throws Exception { + assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment)); + MailboxSession session = session(USERNAME1); + + ComposedMessageId m1 = appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo, Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // m2 has an attachment with text content: "This is a beautiful banana" + ComposedMessageId m2 = inboxMessageManager.appendMessage( + MessageManager.AppendCommand.builder() + .build(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithTextAttachment.eml")), + session).getId(); + + verifyMessageWasIndexed(2); + + String keywordSearch = "beautiful"; + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + new SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR, + List.of(SearchQuery.bodyContains(keywordSearch), + SearchQuery.attachmentContains(keywordSearch))))) + .inMailboxes(List.of(m1.getMailboxId(), m2.getMailboxId())) + .build(); + + List<SearchSnippet> searchSnippets = Flux.from(testee().highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session)) + .collectList() + .block(); + + assertThat(searchSnippets).hasSize(1); + + assertThat(searchSnippets.getFirst().highlightedBody()) + .isPresent() + .satisfies(highlightedBody -> { + assertThat(highlightedBody.get()).contains("This is a <mark>beautiful</mark> banana"); + }); + } } diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java index 8046ba8ca3..fcb8b2d347 100644 --- a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java @@ -19,8 +19,10 @@ package org.apache.james.mailbox.lucene.search; +import java.net.URISyntaxException; import java.util.List; +import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.inmemory.InMemoryId; import org.apache.james.mailbox.inmemory.InMemoryMessageId; import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources; @@ -28,13 +30,34 @@ import org.apache.james.mailbox.model.MailboxId; import org.apache.james.mailbox.model.MessageId; import org.apache.james.mailbox.model.SearchQuery; import org.apache.james.mailbox.store.search.AbstractMessageSearchIndexTest; +import org.apache.james.mailbox.tika.TikaConfiguration; +import org.apache.james.mailbox.tika.TikaExtension; +import org.apache.james.mailbox.tika.TikaHttpClientImpl; +import org.apache.james.mailbox.tika.TikaTextExtractor; +import org.apache.james.metrics.tests.RecordingMetricFactory; import org.apache.lucene.store.ByteBuffersDirectory; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; import com.github.fge.lambdas.Throwing; class LuceneMessageSearchIndexTest extends AbstractMessageSearchIndexTest { + @RegisterExtension + static TikaExtension tika = new TikaExtension(); + + static TextExtractor textExtractor; + + @BeforeAll + static void setUpClass() throws URISyntaxException { + textExtractor = new TikaTextExtractor(new RecordingMetricFactory(), + new TikaHttpClientImpl(TikaConfiguration.builder() + .host(tika.getIp()) + .port(tika.getPort()) + .timeoutInMillis(tika.getTimeoutInMillis()) + .build())); + } @Override protected void awaitMessageCount(List<MailboxId> mailboxIds, SearchQuery query, long messageCount) { @@ -51,7 +74,7 @@ class LuceneMessageSearchIndexTest extends AbstractMessageSearchIndexTest { .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), new ByteBuffersDirectory(), new InMemoryMessageId.Factory(), - preInstanciationStage.getSessionProvider()))) + preInstanciationStage.getSessionProvider(), textExtractor))) .noPreDeletionHooks() .storeQuotaManager() .build(); diff --git a/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java b/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java index ee82414000..cc37e7db84 100644 --- a/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java +++ b/mpt/impl/imap-mailbox/lucenesearch/src/test/java/org/apache/james/mpt/imapmailbox/lucenesearch/host/LuceneSearchHostSystem.java @@ -34,6 +34,7 @@ import org.apache.james.mailbox.inmemory.InMemoryMessageId; import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources; import org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex; import org.apache.james.mailbox.store.StoreSubscriptionManager; +import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; import org.apache.james.mailbox.store.quota.NoQuotaManager; import org.apache.james.metrics.logger.DefaultMetricFactory; import org.apache.james.mpt.api.ImapFeatures; @@ -66,7 +67,7 @@ public class LuceneSearchHostSystem extends JamesImapHostSystem { .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), new ByteBuffersDirectory(), new InMemoryMessageId.Factory(), - preInstanciationStage.getSessionProvider()))) + preInstanciationStage.getSessionProvider(), new DefaultTextExtractor()))) .noPreDeletionHooks() .storeQuotaManager() .build(); --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org