This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new 955c610e88 JAMES-4166 Implement collapseThreads support for Lucene
(#2935)
955c610e88 is described below
commit 955c610e88cdb7e7d91e0d64e69167c78bf1420d
Author: Trần Hồng Quân <[email protected]>
AuthorDate: Fri Feb 6 20:12:25 2026 +0700
JAMES-4166 Implement collapseThreads support for Lucene (#2935)
---
mailbox/lucene/pom.xml | 5 ++
.../lucene/search/LuceneIndexableDocument.java | 6 +-
.../lucene/search/LuceneMessageSearchIndex.java | 70 ++++++++++++++++++----
.../search/LuceneMessageSearchIndexTest.java | 5 ++
.../memory/MemoryEmailQueryMethodNoViewTest.java | 12 ----
.../rfc8621/memory/MemoryEmailQueryMethodTest.java | 12 ----
upgrade-instructions.md | 13 ++++
7 files changed, 86 insertions(+), 37 deletions(-)
diff --git a/mailbox/lucene/pom.xml b/mailbox/lucene/pom.xml
index 95df49120f..9a1784159a 100644
--- a/mailbox/lucene/pom.xml
+++ b/mailbox/lucene/pom.xml
@@ -122,6 +122,11 @@
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-grouping</artifactId>
+ <version>${lucene.version}</version>
+ </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
diff --git
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
index 4f14d14c3f..1f45d594a1 100644
---
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
+++
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneIndexableDocument.java
@@ -88,6 +88,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
@@ -143,7 +144,10 @@ public class LuceneIndexableDocument {
Optional.ofNullable(SearchUtil.getSerializedMessageIdIfSupportedByUnderlyingStorageOrNull(message))
.ifPresent(serializedMessageId -> doc.add(new
StringField(MESSAGE_ID_FIELD, serializedMessageId, Field.Store.YES)));
Optional.ofNullable(SearchUtil.getSerializedThreadIdIfSupportedByUnderlyingStorageOrNull(message))
- .ifPresent(serializedThreadId -> doc.add(new
StringField(THREAD_ID_FIELD, serializedThreadId, Field.Store.YES)));
+ .ifPresent(serializedThreadId -> {
+ doc.add(new StringField(THREAD_ID_FIELD, serializedThreadId,
Field.Store.YES));
+ doc.add(new SortedDocValuesField(THREAD_ID_FIELD, new
BytesRef(serializedThreadId)));
+ });
HeaderCollection headerCollection =
mimePartExtracted.getHeaderCollection();
diff --git
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
index 60fc932240..b9298d7013 100644
---
a/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
+++
b/mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java
@@ -131,7 +131,12 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.grouping.GroupDocs;
+import org.apache.lucene.search.grouping.GroupingSearch;
+import org.apache.lucene.search.grouping.TermGroupSelector;
+import org.apache.lucene.search.grouping.TopGroups;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -308,6 +313,13 @@ public class LuceneMessageSearchIndex extends
ListeningMessageSearchIndex {
return Flux.empty();
}
+ if (searchQuery.shouldCollapseThreads()) {
+ return searchCollapseThreads(mailboxIds, searchQuery,
searchOptions);
+ }
+ return searchWithoutCollapseThreads(mailboxIds, searchQuery,
searchOptions);
+ }
+
+ private Flux<MessageId> searchWithoutCollapseThreads(Collection<MailboxId>
mailboxIds, SearchQuery searchQuery, SearchOptions searchOptions) throws
MailboxException {
long requestedLimit =
Math.addExact(searchOptions.offset().getOffset(),
searchOptions.limit().getLimit().orElseThrow());
return Flux.fromIterable(searchMultimap(mailboxIds, searchQuery)
@@ -320,6 +332,36 @@ public class LuceneMessageSearchIndex extends
ListeningMessageSearchIndex {
.collect(ImmutableList.toImmutableList()));
}
+ private Flux<MessageId> searchCollapseThreads(Collection<MailboxId>
mailboxIds, SearchQuery searchQuery, SearchOptions searchOptions) throws
MailboxException {
+ Query query = buildQuery(mailboxIds, searchQuery);
+
+ try (IndexReader reader = DirectoryReader.open(writer)) {
+ IndexSearcher searcher = new IndexSearcher(reader);
+
+ GroupingSearch groupingSearch = new GroupingSearch(new
TermGroupSelector(THREAD_ID_FIELD));
+ Sort sort = createSort(searchQuery.getSorts());
+ groupingSearch.setGroupSort(sort);
+ groupingSearch.setSortWithinGroup(sort);
+ // get the first message of each thread group
+ groupingSearch.setGroupDocsOffset(0);
+ groupingSearch.setGroupDocsLimit(1);
+
+ int groupOffset =
Math.toIntExact(searchOptions.offset().getOffset());
+ int topNGroups =
Math.toIntExact(searchOptions.limit().getLimit().orElseThrow());
+
+ TopGroups<BytesRef> topGroups = groupingSearch.search(searcher,
query, groupOffset, topNGroups);
+ List<MessageId> result = new ArrayList<>(topGroups.groups.length);
+ for (GroupDocs<BytesRef> group : topGroups.groups) {
+ ScoreDoc[] scoreDocs = group.scoreDocs();
+ Document document =
searcher.storedFields().document(scoreDocs[0].doc);
+
documentToSearchResult(document).getMessageId().ifPresent(result::add);
+ }
+ return Flux.fromIterable(result);
+ } catch (IOException e) {
+ throw new MailboxException("Unable to search the mailbox", e);
+ }
+ }
+
private List<SearchResult> searchMultimap(Collection<MailboxId>
mailboxIds, SearchQuery searchQuery) throws MailboxException {
return searchDocument(mailboxIds, searchQuery, maxQueryResults)
.stream()
@@ -336,22 +378,12 @@ public class LuceneMessageSearchIndex extends
ListeningMessageSearchIndex {
}
public List<Document> searchDocument(Collection<MailboxId> mailboxIds,
SearchQuery searchQuery, int maxQueryResults) throws MailboxException {
- Query inMailboxes = buildQueryFromMailboxes(mailboxIds);
-
try (IndexReader reader = DirectoryReader.open(writer)) {
IndexSearcher searcher = new IndexSearcher(reader);
- BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
- queryBuilder.add(inMailboxes, BooleanClause.Occur.MUST);
- // Not return flags documents
- queryBuilder.add(new PrefixQuery(new Term(FLAGS_FIELD, "")),
BooleanClause.Occur.MUST_NOT);
-
- List<Criterion> crits = searchQuery.getCriteria();
- for (Criterion crit : crits) {
- queryBuilder.add(createQuery(crit, inMailboxes,
searchQuery.getRecentMessageUids()), BooleanClause.Occur.MUST);
- }
+ Query query = buildQuery(mailboxIds, searchQuery);
// query for all the documents sorted as specified in the
SearchQuery
- TopDocs docs = searcher.search(queryBuilder.build(),
maxQueryResults, createSort(searchQuery.getSorts()));
+ TopDocs docs = searcher.search(query, maxQueryResults,
createSort(searchQuery.getSorts()));
return Stream.of(docs.scoreDocs)
.map(Throwing.function(sDoc ->
searcher.storedFields().document(sDoc.doc)))
@@ -370,6 +402,20 @@ public class LuceneMessageSearchIndex extends
ListeningMessageSearchIndex {
return queryBuilder.build();
}
+ private Query buildQuery(Collection<MailboxId> mailboxIds, SearchQuery
searchQuery) throws MailboxException {
+ Query inMailboxes = buildQueryFromMailboxes(mailboxIds);
+ BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+ queryBuilder.add(inMailboxes, BooleanClause.Occur.MUST);
+ // Not return flags documents
+ queryBuilder.add(new PrefixQuery(new Term(FLAGS_FIELD, "")),
BooleanClause.Occur.MUST_NOT);
+
+ List<Criterion> crits = searchQuery.getCriteria();
+ for (Criterion crit : crits) {
+ queryBuilder.add(createQuery(crit, inMailboxes,
searchQuery.getRecentMessageUids()), BooleanClause.Occur.MUST);
+ }
+ return queryBuilder.build();
+ }
+
private String toSentDateField(DateResolution res) {
return switch (res) {
case Year -> SENT_DATE_FIELD_YEAR_RESOLUTION;
diff --git
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
index fcb8b2d347..1c59cb4d93 100644
---
a/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
+++
b/mailbox/lucene/src/test/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndexTest.java
@@ -96,6 +96,11 @@ class LuceneMessageSearchIndexTest extends
AbstractMessageSearchIndexTest {
return InMemoryMessageId.of(1000);
}
+ @Override
+ protected boolean supportsCollapseThreads() {
+ return true;
+ }
+
@Disabled("JAMES-1799: ignoring failing test after generalizing OpenSearch
test suite to other mailbox search backends")
@Override
public void uidShouldreturnEveryThing() {
diff --git
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
index 2587a843ef..49141a8c7b 100644
---
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
+++
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodNoViewTest.java
@@ -102,16 +102,4 @@ public class MemoryEmailQueryMethodNoViewTest implements
EmailQueryMethodContrac
public void
inMailboxBeforeSortedByReceivedAtShouldCollapseThreads(GuiceJamesServer server)
{
}
- @Test
- @Override
- @Disabled("JAMES-4166 collapseThreads does not support Lucene
implementation yet")
- public void collapseThreadsShouldApplyOnSearchIndexPath(GuiceJamesServer
server) {
- }
-
- @Test
- @Override
- @Disabled("JAMES-4166 collapseThreads does not support Lucene
implementation yet")
- public void
collapseThreadsShouldApplyPaginationOnCollapsedResults(GuiceJamesServer server)
{
- }
-
}
diff --git
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
index 970107e19c..f5152dd222 100644
---
a/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
+++
b/server/protocols/jmap-rfc-8621-integration-tests/memory-jmap-rfc-8621-integration-tests/src/test/java/org/apache/james/jmap/rfc8621/memory/MemoryEmailQueryMethodTest.java
@@ -52,16 +52,4 @@ public class MemoryEmailQueryMethodTest extends MemoryBase
implements EmailQuery
EmailQueryMethodContract.super.shouldListMailsReceivedAfterADate(server);
}
- @Test
- @Override
- @Disabled("JAMES-4166 collapseThreads does not support Lucene
implementation yet")
- public void collapseThreadsShouldApplyOnSearchIndexPath(GuiceJamesServer
server) {
- }
-
- @Test
- @Override
- @Disabled("JAMES-4166 collapseThreads does not support Lucene
implementation yet")
- public void
collapseThreadsShouldApplyPaginationOnCollapsedResults(GuiceJamesServer server)
{
- }
-
}
diff --git a/upgrade-instructions.md b/upgrade-instructions.md
index 145b04d820..cc79b2954e 100644
--- a/upgrade-instructions.md
+++ b/upgrade-instructions.md
@@ -17,6 +17,19 @@ Changes to apply between 3.9.x and 3.10.0 will be reported
here.
Change list:
- [Adding thread_id column to Cassandra email_query_view_sent_at and
email_query_view_received_at
tables](#adding-thread_id-column-to-cassandra-email_query_view_sent_at-and-email_query_view_received_at-tables)
- [Adding thread_id column to Postgresql email_query_view
table](#adding-thread_id-column-to-postgresql-email_query_view-table)
+ - [Lucene mailbox index schema update for collapseThreads
support](#lucene-mailbox-index-schema-update-for-collapsethreads-support)
+
+### Lucene mailbox index schema update for collapseThreads support
+
+Date: 06/02/2026
+
+Concerned products: James apps relying on Lucene as the search index
+
+JIRA: https://issues.apache.org/jira/browse/JAMES-4166
+
+James now requires the `threadId` field to be indexed as a
SortedDocValuesField to support `collapseThreads` on mailbox search.
+
+After upgrading, you need to rebuild the Lucene mailbox index by [reindexing
all
mails](https://james.apache.org/server/manage-webadmin.html#ReIndexing_all_mails).
### Adding thread_id column to Cassandra email_query_view_sent_at and
email_query_view_received_at tables
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]