This is an automated email from the ASF dual-hosted git repository. rcordier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 81d9d6c37bf1ee4a2d426c5bf2217639148ec6eb Author: Tung Tran <vtt...@linagora.com> AuthorDate: Fri Oct 4 12:37:19 2024 +0700 JAMES-4077 [SearchSnippet] Search Highlight - Lucene memory implementation --- .../mailbox/searchhighligt/SearchHighlighter.java | 32 ++ .../SearchHighlighterConfiguration.java | 41 ++ .../mailbox/searchhighligt/SearchSnippet.java | 33 ++ mailbox/lucene/pom.xml | 11 +- .../search/LuceneMemorySearchHighlighter.java | 169 +++++++ .../search/LuceneMemorySearchHighLightTest.java | 491 +++++++++++++++++++++ .../james/mailbox/store/StoreMailboxManager.java | 2 +- 7 files changed, 777 insertions(+), 2 deletions(-) diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighter.java b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighter.java new file mode 100644 index 0000000000..803942feaf --- /dev/null +++ b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighter.java @@ -0,0 +1,32 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.searchhighligt; + +import java.util.List; + +import org.apache.james.mailbox.MailboxSession; +import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; +import org.reactivestreams.Publisher; + +public interface SearchHighlighter { + + Publisher<SearchSnippet> highlightSearch(List<MessageId> messageIds, MultimailboxesSearchQuery expression, MailboxSession session); +} diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighterConfiguration.java b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighterConfiguration.java new file mode 100644 index 0000000000..139fd4f46f --- /dev/null +++ b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchHighlighterConfiguration.java @@ -0,0 +1,41 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.searchhighligt; + +import com.google.common.base.Preconditions; + +public record SearchHighlighterConfiguration(String preTagFormatter, + String postTagFormatter, + int fragmentSize) { + + public static final String PRE_TAG_FORMATTER_DEFAULT = "<mark>"; + public static final String POST_TAG_FORMATTER_DEFAULT = "</mark>"; + public static final int FRAGMENT_SIZE_DEFAULT = 100; + + public static final SearchHighlighterConfiguration DEFAULT = new SearchHighlighterConfiguration(PRE_TAG_FORMATTER_DEFAULT, POST_TAG_FORMATTER_DEFAULT, FRAGMENT_SIZE_DEFAULT); + + public SearchHighlighterConfiguration(String preTagFormatter, String postTagFormatter, int fragmentSize) { + Preconditions.checkArgument(fragmentSize > 0, "fragmentSize should be positive"); + Preconditions.checkArgument(fragmentSize <= 255, "fragmentSize should be less than 256 (rfc8621#section-5)"); + this.preTagFormatter = preTagFormatter; + this.postTagFormatter = postTagFormatter; + this.fragmentSize = fragmentSize; + } +} diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchSnippet.java b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchSnippet.java new file mode 100644 index 0000000000..f0d7e55890 --- /dev/null +++ b/mailbox/api/src/main/java/org/apache/james/mailbox/searchhighligt/SearchSnippet.java @@ -0,0 +1,33 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.searchhighligt; + +import java.util.Optional; + +import org.apache.james.mailbox.model.MessageId; + +public record SearchSnippet(MessageId messageId, + Optional<String> highlightedSubject, + Optional<String> highlightedBody) { + + public SearchSnippet(MessageId messageId, String highlightedSubject, String highlightedBody) { + this(messageId, Optional.ofNullable(highlightedSubject), Optional.ofNullable(highlightedBody)); + } +} \ No newline at end of file diff --git a/mailbox/lucene/pom.xml b/mailbox/lucene/pom.xml index b765477e1b..484e20e302 100644 --- a/mailbox/lucene/pom.xml +++ b/mailbox/lucene/pom.xml @@ -111,12 +111,21 @@ <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-highlighter</artifactId> + <version>${lucene.version}</version> + </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-misc</artifactId> <version>${lucene.version}</version> </dependency> - + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-queryparser</artifactId> + <version>${lucene.version}</version> + </dependency> <dependency> <groupId>org.awaitility</groupId> <artifactId>awaitility</artifactId> diff --git a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighlighter.java b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighlighter.java new file mode 100644 index 0000000000..82997cfa2a --- /dev/null +++ b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighlighter.java @@ -0,0 +1,169 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.lucene.search; + +import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.BODY_FIELD; +import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.MESSAGE_ID_FIELD; +import static org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.SUBJECT_FIELD; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Optional; + +import org.apache.james.mailbox.MailboxSession; +import org.apache.james.mailbox.model.MailboxId; +import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; +import org.apache.james.mailbox.model.SearchQuery; +import org.apache.james.mailbox.model.SearchQuery.ConjunctionCriterion; +import org.apache.james.mailbox.model.SearchQuery.SubjectCriterion; +import org.apache.james.mailbox.model.SearchQuery.TextCriterion; +import org.apache.james.mailbox.searchhighligt.SearchHighlighter; +import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration; +import org.apache.james.mailbox.searchhighligt.SearchSnippet; +import org.apache.james.mailbox.store.StoreMailboxManager; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; + +import com.github.fge.lambdas.Throwing; + +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; + +public class LuceneMemorySearchHighlighter implements SearchHighlighter { + private static Analyzer defaultAnalyzer() { + return new StandardAnalyzer(); + } + + private final LuceneMessageSearchIndex luceneMessageSearchIndex; + private final Analyzer analyzer; + private final Formatter formatter; + private final MessageId.Factory messageIdFactory; + private final SearchHighlighterConfiguration configuration; + private final StoreMailboxManager storeMailboxManager; + + public LuceneMemorySearchHighlighter(LuceneMessageSearchIndex luceneMessageSearchIndex, + SearchHighlighterConfiguration searchHighlighterConfiguration, + MessageId.Factory messageIdFactory, StoreMailboxManager storeMailboxManager, + Analyzer analyzer) { + this.luceneMessageSearchIndex = luceneMessageSearchIndex; + this.messageIdFactory = messageIdFactory; + this.analyzer = analyzer; + this.configuration = searchHighlighterConfiguration; + this.storeMailboxManager = storeMailboxManager; + this.formatter = new SimpleHTMLFormatter(searchHighlighterConfiguration.preTagFormatter(), searchHighlighterConfiguration.postTagFormatter()); + } + + public LuceneMemorySearchHighlighter(LuceneMessageSearchIndex luceneMessageSearchIndex, + SearchHighlighterConfiguration searchHighlighterConfiguration, + MessageId.Factory messageIdFactory, + StoreMailboxManager storeMailboxManager) { + this(luceneMessageSearchIndex, + searchHighlighterConfiguration, + messageIdFactory, + storeMailboxManager, + defaultAnalyzer()); + } + + @Override + public Flux<SearchSnippet> highlightSearch(List<MessageId> messageIds, MultimailboxesSearchQuery expression, MailboxSession session) { + return storeMailboxManager.getInMailboxIds(expression, session) + .collectList() + .flatMapMany(inMailboxIdsAccessible -> highlightSearch(inMailboxIdsAccessible, expression.getSearchQuery(), messageIds)); + } + + private Flux<SearchSnippet> highlightSearch(Collection<MailboxId> mailboxIds, SearchQuery searchQuery, List<MessageId> messageIds) { + int limit = messageIds.size(); + return Mono.fromCallable(() -> luceneMessageSearchIndex.searchDocument(mailboxIds, searchQuery, limit)).flatMapMany(Flux::fromIterable) + .map(document -> Throwing.supplier(() -> buildSearchSnippet(document, searchQuery)).get()) + .filter(searchSnippet -> messageIds.contains(searchSnippet.messageId())) + .subscribeOn(Schedulers.boundedElastic()); + } + + private Highlighter highlighter(SearchQuery searchQuery) { + Query query = buildQueryFromSearchQuery(searchQuery); + QueryScorer scorer = new QueryScorer(query); + Highlighter highlighter = new Highlighter(formatter, scorer); + highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, configuration.fragmentSize())); + return highlighter; + } + + private Query buildQueryFromSearchQuery(SearchQuery searchQuery) { + BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + + searchQuery.getCriteria().stream() + .map(this::buildQueryFromCriterion) + .flatMap(Optional::stream) + .forEach(query -> queryBuilder.add(query, BooleanClause.Occur.SHOULD)); + + return queryBuilder.build(); + } + + private Optional<Query> buildQueryFromCriterion(SearchQuery.Criterion criterion) { + if (criterion instanceof TextCriterion textCriterion) { + return Optional.of(buildQuery(BODY_FIELD, textCriterion.getOperator().getValue().toLowerCase(Locale.US))); + } else if (criterion instanceof SubjectCriterion subjectCriterion) { + return Optional.of(buildQuery(SUBJECT_FIELD, subjectCriterion.getSubject().toLowerCase(Locale.US))); + } else if (criterion instanceof ConjunctionCriterion conjunctionCriterion && !conjunctionCriterion.getType().equals(SearchQuery.Conjunction.NOR)) { + BooleanQuery.Builder conQuery = new BooleanQuery.Builder(); + conjunctionCriterion.getCriteria().stream() + .map(this::buildQueryFromCriterion) + .flatMap(Optional::stream) + .forEach(query -> conQuery.add(query, BooleanClause.Occur.SHOULD)); + return Optional.of(conQuery.build()); + } + return Optional.empty(); + } + + private Query buildQuery(String field, String queryValue) { + QueryParser parser = new QueryParser(field, analyzer); + return Throwing.supplier(() -> parser.parse(queryValue)).get(); + } + + private SearchSnippet buildSearchSnippet(Document doc, SearchQuery searchQuery) throws IOException, InvalidTokenOffsetsException { + MessageId messageId = messageIdFactory.fromString(doc.get(MESSAGE_ID_FIELD)); + String highlightedSubject = getHighlightedSubject(doc, searchQuery); + String highlightedBody = getHighlightedBody(doc, searchQuery); + + return new SearchSnippet(messageId, highlightedSubject, highlightedBody); + } + + private String getHighlightedSubject(Document doc, SearchQuery searchQuery) throws InvalidTokenOffsetsException, IOException { + return highlighter(searchQuery).getBestFragment(analyzer, SUBJECT_FIELD, doc.get(SUBJECT_FIELD)); + } + + private String getHighlightedBody(Document doc, SearchQuery searchQuery) throws IOException, InvalidTokenOffsetsException { + return highlighter(searchQuery).getBestFragment(analyzer, BODY_FIELD, doc.get(BODY_FIELD)); + } +} diff --git a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLightTest.java b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLightTest.java new file mode 100644 index 0000000000..f4e0232bf8 --- /dev/null +++ b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMemorySearchHighLightTest.java @@ -0,0 +1,491 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ +package org.apache.james.mailbox.lucene.search; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.SoftAssertions.assertSoftly; + +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.james.core.Username; +import org.apache.james.events.EventBus; +import org.apache.james.mailbox.MailboxSession; +import org.apache.james.mailbox.MessageIdManager; +import org.apache.james.mailbox.MessageManager; +import org.apache.james.mailbox.inmemory.InMemoryId; +import org.apache.james.mailbox.inmemory.InMemoryMessageId; +import org.apache.james.mailbox.inmemory.manager.InMemoryIntegrationResources; +import org.apache.james.mailbox.model.ComposedMessageId; +import org.apache.james.mailbox.model.Mailbox; +import org.apache.james.mailbox.model.MailboxACL; +import org.apache.james.mailbox.model.MailboxId; +import org.apache.james.mailbox.model.MailboxPath; +import org.apache.james.mailbox.model.MessageId; +import org.apache.james.mailbox.model.MultimailboxesSearchQuery; +import org.apache.james.mailbox.model.SearchQuery; +import org.apache.james.mailbox.searchhighligt.SearchHighlighterConfiguration; +import org.apache.james.mailbox.searchhighligt.SearchSnippet; +import org.apache.james.mailbox.store.StoreMailboxManager; +import org.apache.james.mailbox.store.StoreMessageManager; +import org.apache.james.mailbox.store.search.MessageSearchIndex; +import org.apache.james.mime4j.dom.Message; +import org.apache.james.utils.UpdatableTickingClock; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.github.fge.lambdas.Throwing; + +class LuceneMemorySearchHighLightTest { + private static final Username USERNAME1 = Username.of("username1"); + protected MessageSearchIndex messageSearchIndex; + protected StoreMailboxManager storeMailboxManager; + protected MessageIdManager messageIdManager; + protected EventBus eventBus; + protected MessageId.Factory messageIdFactory; + protected UpdatableTickingClock clock; + private StoreMessageManager inboxMessageManager; + private MailboxSession session; + private LuceneMemorySearchHighlighter testee; + + private Mailbox mailbox; + + @BeforeEach + public void setUp() throws Exception { + messageIdFactory = new InMemoryMessageId.Factory(); + InMemoryIntegrationResources resources = InMemoryIntegrationResources.builder() + .preProvisionnedFakeAuthenticator() + .fakeAuthorizator() + .inVmEventBus() + .defaultAnnotationLimits() + .defaultMessageParser() + .listeningSearchIndex(Throwing.function(preInstanciationStage -> new LuceneMessageSearchIndex( + preInstanciationStage.getMapperFactory(), new InMemoryId.Factory(), new ByteBuffersDirectory(), + messageIdFactory, + preInstanciationStage.getSessionProvider()))) + .noPreDeletionHooks() + .storeQuotaManager() + .build(); + storeMailboxManager = resources.getMailboxManager(); + messageIdManager = resources.getMessageIdManager(); + messageSearchIndex = resources.getSearchIndex(); + eventBus = resources.getEventBus(); + messageIdFactory = new InMemoryMessageId.Factory(); + + clock = (UpdatableTickingClock) storeMailboxManager.getClock(); + testee = new LuceneMemorySearchHighlighter(((LuceneMessageSearchIndex) messageSearchIndex), + SearchHighlighterConfiguration.DEFAULT, + messageIdFactory, storeMailboxManager); + + session = storeMailboxManager.createSystemSession(USERNAME1); + MailboxPath inboxPath = MailboxPath.inbox(USERNAME1); + storeMailboxManager.createMailbox(inboxPath, session); + inboxMessageManager = (StoreMessageManager) storeMailboxManager.getMailbox(inboxPath, session); + mailbox = inboxMessageManager.getMailboxEntity(); + } + + @Test + void highlightSearchShouldReturnHighLightedSubjectWhenMatched() throws Exception { + // Given m1,m2 with m1 has subject containing the searched word (Matthieu) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (Matthieu) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("Matthieu"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session) + .collectList() + .block(); + + // Then highlightSearch should return the SearchSnippet with the highlightedSubject containing the word (Matthieu) + assertThat(searchSnippets).hasSize(1); + assertSoftly(softly -> { + softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId()); + softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Matthieu</mark> for your help"); + }); + } + + @Test + void highlightSearchShouldReturnHighlightedBodyWhenMatched() throws Exception { + // Given m1,m2 with m1 has body containing the searched word (contentA) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (contentA) in the body + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + SearchQuery.bodyContains("contentA"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedBody containing the word (contentA) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(1); + assertSoftly(softly -> { + softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId()); + softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append <mark>contentA</mark> to inbox"); + }); + } + + @Test + void searchBothSubjectAndBodyHighLightShouldReturnEmptyWhenNotMatched() throws Exception { + // Given m1,m2 with m1,m2 has both body+subject not containing the searched word (contentC) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (contentC) in both subject and body + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + SearchQuery.bodyContains("contentC"), + SearchQuery.subject("contentC"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return an empty + assertThat(testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session) + .collectList() + .block()).isEmpty(); + } + + @Test + void searchBothSubjectAndBodyHighLightShouldReturnEntryWhenMatched() throws Exception { + // Given m1,m2 with m1 has body + subject containing the searched word (Naruto) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Naruto for your help") + .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (Naruto) in both subject and body + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + SearchQuery.bodyContains("Naruto"), + SearchQuery.subject("Naruto"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedBody/highlightedSubject containing the word (Naruto) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()),multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(1); + assertSoftly(softly -> { + softly.assertThat(searchSnippets.getFirst().messageId()).isEqualTo(m1.getMessageId()); + softly.assertThat(searchSnippets.getFirst().highlightedBody()).contains("append <mark>Naruto</mark> to inbox"); + softly.assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Naruto</mark> for your help"); + }); + } + + @Test + void highlightSearchShouldReturnMultipleResultsWhenMultipleMatches() throws Exception { + // Given m1,m2 with m1,m2 has subject containing the searched word (WeeklyReport) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Tran Van Tung WeeklyReport 04/10/2024") + .setBody("The weekly report has been in attachment1", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Tran Van Tung WeeklyReport 11/10/2024") + .setBody("The weekly report has been in attachment2", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (WeeklyReport) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("WeeklyReport"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedSubject containing the word (WeeklyReport) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()),multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(2); + assertThat(searchSnippets.stream().map(SearchSnippet::highlightedSubject)) + .allSatisfy(highlightedSubject -> assertThat(highlightedSubject.get()).contains("Tran Van Tung <mark>WeeklyReport</mark>")); + } + + @Test + void highlightSearchShouldReturnCorrectFormatWhenSearchTwoWords() throws Exception { + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Naruto Itachi for your help") + .setBody("append Naruto Itachi to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(1); + + // When searching for the word (Naruto and Itachi) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.bodyContains("Naruto Itachi"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedSubject containing the word (Naruto) (and) (Itachi) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId()),multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(1); + assertThat(searchSnippets.getFirst().highlightedBody()).contains("append <mark>Naruto</mark> <mark>Itachi</mark> to inbox"); + } + + @Test + void highlightSearchShouldReturnEmptyResultsWhenKeywordNoMatch() throws Exception { + // Given m1 that has both subject + body not containing the searched word (Vietnam) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Tran Van Tung WeeklyReport 04/10/2024") + .setBody("The weekly report has been in attachment1", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(1); + + // When searching for the word (Vietnam) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("Vietnam"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return an empty list + assertThat(testee.highlightSearch(List.of(m1.getMessageId()),multiMailboxSearch, session) + .collectList() + .block()).isEmpty(); + } + + @Test + void highlightSearchShouldReturnEmptyResultsWhenMailboxIdNoMatch() throws Exception { + // Given message m1 of mailbox Mailbox.inbox(username1) + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Tran Van Tung WeeklyReport 04/10/2024") + .setBody("The weekly report has been in attachment1", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(1); + + // When searching for the word (WeeklyReport) in the subject but in another mailbox + MailboxId randomMailboxId = storeMailboxManager.createMailbox(MailboxPath.forUser(USERNAME1, "random1"), session).get(); + + // Then highlightSearch should return an empty list + assertThat(testee.highlightSearch(List.of(m1.getMessageId()), + MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("WeeklyReport"))) + .inMailboxes(List.of(randomMailboxId)).build(), session) + .collectList() + .block()).isEmpty(); + } + + @Test + void highlightSearchShouldNotReturnEntryWhenDoesNotAccessible() throws Exception { + // Given messages of username1 + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(1); + + // When searching for the word (Matthieu) in the subject, but the mailbox is not accessible + MailboxSession notAccessible = storeMailboxManager.createSystemSession(Username.of("notAccessible")); + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId()),MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("Matthieu"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(), notAccessible) + .collectList() + .block(); + + // Then highlightSearch should not return username1 entry + assertThat(searchSnippets).hasSize(0); + } + + @Test + void highlightSearchShouldReturnEntryWhenHasAccessible() throws Exception { + // Given messages of username1 + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Matthieu for your help") + .setBody("append contentA to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(1); + + // Set right for delegated1 to access username1 mailbox + Username delegated1 = Username.of("delegated"); + MailboxSession delegated1Session = storeMailboxManager.createSystemSession(delegated1); + storeMailboxManager.applyRightsCommand(mailbox.generateAssociatedPath(), + MailboxACL.command().forUser(delegated1).rights(MailboxACL.FULL_RIGHTS).asAddition(), + session); + + // When searching for the word (Matthieu) in the subject + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId()), MultimailboxesSearchQuery.from(SearchQuery.of(SearchQuery.subject("Matthieu"))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(), delegated1Session) + .collectList() + .block(); + + // Then highlightSearch should return username1 entry + assertThat(searchSnippets).hasSize(1); + assertThat(searchSnippets.getFirst().highlightedSubject()).contains("Hallo! Thx <mark>Matthieu</mark> for your help"); + } + + @Test + void highLightSearchShouldSupportConjunctionCriterionInMultiMessage() throws Exception { + // Given m1,m2 + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Naruto for your help") + .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (Naruto) or (Alex) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + new SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR, + List.of(SearchQuery.subject("Naruto"), SearchQuery.subject("Alex"))))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedSubject containing the word (Naruto) or (Alex) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(2); + assertThat(searchSnippets.stream() + .map(SearchSnippet::highlightedSubject) + .toList()) + .containsExactlyInAnyOrder(Optional.of("Hallo! Thx <mark>Naruto</mark> for your help"), + Optional.of("Hallo! Thx <mark>Alex</mark> for your help")); + } + + @Test + void highLightSearchShouldSupportConjunctionCriterionInSingleMessage() throws Exception { + // Given m1,m2 + ComposedMessageId m1 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Naruto for your help - Sasuke for your help") + .setBody("append Naruto to inbox", StandardCharsets.UTF_8)), + session).getId(); + + ComposedMessageId m2 = inboxMessageManager.appendMessage(MessageManager.AppendCommand.from( + Message.Builder.of() + .setTo("to@james.local") + .setSubject("Hallo! Thx Alex for your help") + .setBody("append contentB to inbox", StandardCharsets.UTF_8)), + session).getId(); + + // Verify that the messages are indexed + assertThat(messageSearchIndex.search(session, mailbox, SearchQuery.of()).toStream().count()).isEqualTo(2); + + // When searching for the word (Naruto) or (Sasuke) in the subject + MultimailboxesSearchQuery multiMailboxSearch = MultimailboxesSearchQuery.from(SearchQuery.of( + new SearchQuery.ConjunctionCriterion(SearchQuery.Conjunction.OR, + List.of(SearchQuery.subject("Naruto"), SearchQuery.subject("Sasuke"))))) + .inMailboxes(List.of(mailbox.getMailboxId())) + .build(); + + // Then highlightSearch should return the SearchSnippet with the highlightedSubject containing the word (Naruto) or (Sasuke) + List<SearchSnippet> searchSnippets = testee.highlightSearch(List.of(m1.getMessageId(), m2.getMessageId()), multiMailboxSearch, session) + .collectList() + .block(); + assertThat(searchSnippets).hasSize(1); + assertThat(searchSnippets.stream() + .map(SearchSnippet::highlightedSubject) + .toList()) + .containsExactlyInAnyOrder(Optional.of("Hallo! Thx <mark>Naruto</mark> for your help - <mark>Sasuke</mark> for your help")); + } +} diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java index e922c0b83e..c3415c0491 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java @@ -883,7 +883,7 @@ public class StoreMailboxManager implements MailboxManager { return threadIdGuessingAlgorithm.getMessageIdsInThread(threadId, session); } - private Flux<MailboxId> getInMailboxIds(MultimailboxesSearchQuery expression, MailboxSession session) { + public Flux<MailboxId> getInMailboxIds(MultimailboxesSearchQuery expression, MailboxSession session) { if (expression.getInMailboxes().isEmpty()) { return accessibleMailboxIds(expression.getNamespace(), Right.Read, session); } else { --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org