This is an automated email from the ASF dual-hosted git repository.
rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new 35a3bc4463 Limit the subject length for indexation to not go over
Lucene raw indexation max size limit
35a3bc4463 is described below
commit 35a3bc44632d09d6cb6cf6dc68f65d0e04101ba3
Author: Rene Cordier <[email protected]>
AuthorDate: Mon Nov 3 15:30:35 2025 +0700
Limit the subject length for indexation to not go over Lucene raw
indexation max size limit
---
.../mailbox/opensearch/json/IndexableMessage.java | 9 +++++++-
.../opensearch/OpenSearchIntegrationTest.java | 27 ++++++++++++++++++++++
.../james/mailbox/store/search/SearchUtil.java | 16 +++++++++++++
3 files changed, 51 insertions(+), 1 deletion(-)
diff --git
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
index cb2bd7cae7..fdd78aa3b5 100644
---
a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
+++
b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java
@@ -27,6 +27,7 @@ import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
import org.apache.james.mailbox.ModSeq;
@@ -145,7 +146,7 @@ public class IndexableMessage {
ZonedDateTime internalDate =
getSanitizedInternalDate(message, zoneId);
List<HeaderCollection.Header> headers =
headerCollection.getHeaders();
- Subjects subjects =
Subjects.from(headerCollection.getSubjectSet());
+ Subjects subjects =
Subjects.from(limitSubjectsLength(headerCollection.getSubjectSet()));
EMailers from =
EMailers.from(headerCollection.getFromAddressSet());
EMailers to =
EMailers.from(headerCollection.getToAddressSet());
EMailers cc =
EMailers.from(headerCollection.getCcAddressSet());
@@ -226,6 +227,12 @@ public class IndexableMessage {
return ImmutableList.of();
}
}
+
+ private Set<String> limitSubjectsLength(Set<String> subjects) {
+ return subjects.stream()
+ .map(SearchUtil::truncateSubjectField)
+ .collect(Collectors.toSet());
+ }
}
public static Builder builder() {
diff --git
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
index d44a2b8ba6..4a3182dafd 100644
---
a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
+++
b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/OpenSearchIntegrationTest.java
@@ -306,6 +306,33 @@ class OpenSearchIntegrationTest extends
AbstractMessageSearchIndexTest {
.containsExactly(composedMessageId.getUid());
}
+ @Test
+ void tooLongSubjectShouldNotMakeIndexingFail() throws Exception {
+ MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX);
+ MailboxSession session = MailboxSessionUtil.create(USERNAME);
+ MessageManager messageManager =
storeMailboxManager.getMailbox(mailboxPath, session);
+
+ String recipient = "[email protected]";
+ ComposedMessageId composedMessageId =
messageManager.appendMessage(MessageManager.AppendCommand.from(
+ Message.Builder.of()
+ .setTo(recipient)
+ .setSubject(Strings.repeat("0123456789", 5000))
+ .setBody("0123456789", StandardCharsets.UTF_8)),
+ session).getId();
+
+ CALMLY_AWAIT.atMost(Durations.TEN_SECONDS)
+ .untilAsserted(() -> assertThat(client.search(
+ new SearchRequest.Builder()
+ .index(indexName.getValue())
+ .query(QueryBuilders.matchAll().build().toQuery())
+ .build())
+ .block()
+ .hits().total().value()).isEqualTo(14));
+
+
assertThat(Flux.from(messageManager.search(SearchQuery.of(SearchQuery.address(SearchQuery.AddressType.To,
recipient)), session)).toStream())
+ .containsExactly(composedMessageId.getUid());
+ }
+
@Test
void fieldsExceedingLuceneLimitShouldNotBeIgnored() throws Exception {
MailboxPath mailboxPath = MailboxPath.forUser(USERNAME, INBOX);
diff --git
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
index 0a90b132b6..b45f2bf903 100644
---
a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
+++
b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
@@ -18,6 +18,8 @@
****************************************************************/
package org.apache.james.mailbox.store.search;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
@@ -52,6 +54,7 @@ public class SearchUtil {
private static final char OPEN_SQUARE_BRACKED = '[';
private static final char CLOSE_SQUARE_BRACKED = ']';
private static final char COLON = ':';
+ private static final int MAX_RAW_BYTES = 32766;
/**
* Return the DISPLAY ADDRESS for the given {@link Mailbox}.
@@ -491,4 +494,17 @@ public class SearchUtil {
return result.toString();
}
+
+ public static String truncateSubjectField(String subject) {
+ if (subject == null) {
+ return null;
+ }
+
+ byte[] subjectAsBytes = subject.getBytes(StandardCharsets.UTF_8);
+ if (subjectAsBytes.length < MAX_RAW_BYTES) {
+ return subject;
+ }
+
+ return new String(Arrays.copyOf(subjectAsBytes, MAX_RAW_BYTES),
StandardCharsets.UTF_8);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]