This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push: new e857226d8a [ENHANCEMENT] Indexing: infer hasAttachment without calling MailboxMe… (#2777) e857226d8a is described below commit e857226d8a72da6d1e2090eecc6190b671e32f09 Author: Benoit TELLIER <btell...@linagora.com> AuthorDate: Fri Jul 25 21:53:35 2025 +0200 [ENHANCEMENT] Indexing: infer hasAttachment without calling MailboxMe… (#2777) --- .../mailbox/opensearch/json/IndexableMessage.java | 4 ++-- .../opensearch/json/IndexableMessageTest.java | 15 +----------- .../src/test/resources/eml/alternative.json | 2 +- .../src/test/resources/eml/inlined-mixed.json | 2 +- .../james/mailbox/store/search/mime/MimePart.java | 27 ++++++++++++++++++---- .../search/mime/MimePartContainerBuilder.java | 3 +++ .../mailbox/store/search/mime/MimePartParser.java | 2 ++ .../search/mime/RootMimePartContainerBuilder.java | 7 ++++++ .../store/src/test/resources/eml/james-3901.json | 2 +- .../store/src/test/resources/eml/nonTextual.json | 2 +- .../nonTextualWithoutAttachmentTextContent.json | 2 +- mailbox/store/src/test/resources/eml/spamMail.json | 2 +- 12 files changed, 43 insertions(+), 27 deletions(-) diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java index 8bfaf646f8..cb2bd7cae7 100644 --- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java +++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/IndexableMessage.java @@ -31,7 +31,6 @@ import java.util.stream.Collectors; import org.apache.james.mailbox.ModSeq; import org.apache.james.mailbox.extractor.TextExtractor; -import org.apache.james.mailbox.model.MessageAttachmentMetadata; import org.apache.james.mailbox.opensearch.IndexAttachments; import org.apache.james.mailbox.opensearch.IndexBody; import org.apache.james.mailbox.opensearch.IndexHeaders; @@ -138,8 +137,9 @@ public class IndexableMessage { Optional<String> bodyText = parsingResult.locateFirstTextBody().map(SearchUtil::removeGreaterThanCharactersAtBeginningOfLine); Optional<String> bodyHtml = parsingResult.locateFirstHtmlBody(); - boolean hasAttachment = MessageAttachmentMetadata.hasNonInlinedAttachment(message.getAttachments()); List<MimePart> attachments = setFlattenedAttachments(parsingResult, indexAttachments); + boolean hasAttachment = attachments.stream() + .anyMatch(mimePart -> !mimePart.isInlinedWithCid() && mimePart.getContentDisposition().isPresent()); HeaderCollection headerCollection = parsingResult.getHeaderCollection(); ZonedDateTime internalDate = getSanitizedInternalDate(message, zoneId); diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java index 3836ae127f..726f9585b4 100644 --- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java +++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/IndexableMessageTest.java @@ -37,10 +37,7 @@ import org.apache.james.mailbox.ModSeq; import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.inmemory.InMemoryMessageId; -import org.apache.james.mailbox.model.AttachmentMetadata; -import org.apache.james.mailbox.model.MessageAttachmentMetadata; import org.apache.james.mailbox.model.MessageId; -import org.apache.james.mailbox.model.StringBackedAttachmentId; import org.apache.james.mailbox.model.TestId; import org.apache.james.mailbox.model.ThreadId; import org.apache.james.mailbox.opensearch.IndexAttachments; @@ -90,21 +87,11 @@ class IndexableMessageTest { when(mailboxMessage.getMessageId()) .thenReturn(messageId); when(mailboxMessage.getFullContent()) - .thenReturn(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")); + .thenReturn(ClassLoader.getSystemResourceAsStream("eml/emailWithTextAttachment.eml")); when(mailboxMessage.createFlags()) .thenReturn(new Flags()); when(mailboxMessage.getUid()) .thenReturn(MESSAGE_UID); - when(mailboxMessage.getAttachments()) - .thenReturn(ImmutableList.of(MessageAttachmentMetadata.builder() - .attachment(AttachmentMetadata.builder() - .messageId(messageId) - .attachmentId(StringBackedAttachmentId.from("1")) - .type("text/plain") - .size(36) - .build()) - .isInline(false) - .build())); // When IndexableMessage indexableMessage = IndexableMessage.builder() diff --git a/mailbox/opensearch/src/test/resources/eml/alternative.json b/mailbox/opensearch/src/test/resources/eml/alternative.json index 86a63fc739..5cc8f5c0c3 100644 --- a/mailbox/opensearch/src/test/resources/eml/alternative.json +++ b/mailbox/opensearch/src/test/resources/eml/alternative.json @@ -15,7 +15,7 @@ "cc":[], "date":"2015-06-07T00:00:00+0200", "from":[{"name":"Benoit TELLIER","address":"btell...@linagora.com","domain":"linagora"}], - "hasAttachment":false, + "hasAttachment":true, "headers":[ {"name":"mime-version","value":"1.0"}, {"name":"subject","value":"Test"}, diff --git a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json index b7408ee3db..04055bf8e4 100644 --- a/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json +++ b/mailbox/opensearch/src/test/resources/eml/inlined-mixed.json @@ -25,7 +25,7 @@ "address": "b...@domain.tld", "domain":"domain" }], - "hasAttachment": false, + "hasAttachment": true, "headers": [{ "name": "date", "value": "Wed, 26 Jan 2022 12:21:37 +0100" diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java index 6e4732cc6e..3f1bb33c15 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePart.java @@ -32,6 +32,7 @@ import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; +import org.apache.james.mailbox.model.Cid; import org.apache.james.mailbox.model.ContentType; import org.apache.james.mailbox.model.ContentType.MediaType; import org.apache.james.mailbox.model.ContentType.SubType; @@ -58,6 +59,7 @@ public class MimePart { private Optional<String> fileExtension; private Optional<String> contentDisposition; private Optional<Charset> charset; + private Optional<Cid> cid; private Predicate<ContentType> shouldCaryOverContent; private Builder(Predicate<ContentType> shouldCaryOverContent) { @@ -71,6 +73,7 @@ public class MimePart { this.fileExtension = Optional.empty(); this.contentDisposition = Optional.empty(); this.charset = Optional.empty(); + this.cid = Optional.empty(); } @Override @@ -132,6 +135,12 @@ public class MimePart { } } + @Override + public MimePartContainerBuilder addCid(Cid cid) { + this.cid = Optional.ofNullable(cid); + return this; + } + @Override public ParsedMimePart build() { final Optional<ContentType> contentType = computeContentType(); @@ -145,7 +154,7 @@ public class MimePart { fileName, fileExtension, contentDisposition, - children); + cid, children); } } @@ -159,12 +168,13 @@ public class MimePart { private final Optional<String> fileName; private final Optional<String> fileExtension; private final Optional<String> contentDisposition; + private final Optional<Cid> cid; private final List<ParsedMimePart> attachments; public ParsedMimePart(HeaderCollection headerCollection, Optional<InputStream> bodyContent, Optional<Charset> charset, Optional<MediaType> mediaType, Optional<SubType> subType, Optional<ContentType> contentType, Optional<String> fileName, Optional<String> fileExtension, - Optional<String> contentDisposition, List<ParsedMimePart> attachments) { + Optional<String> contentDisposition, Optional<Cid> cid, List<ParsedMimePart> attachments) { this.headerCollection = headerCollection; this.mediaType = mediaType; this.subType = subType; @@ -172,6 +182,7 @@ public class MimePart { this.fileName = fileName; this.fileExtension = fileExtension; this.contentDisposition = contentDisposition; + this.cid = cid; this.attachments = attachments; this.charset = charset; @@ -190,7 +201,7 @@ public class MimePart { return Mono.just(Optional.empty()); }) .map(text -> new MimePart(headerCollection, text.flatMap(ParsedContent::getTextualContent), - mediaType, subType, fileName, fileExtension, contentDisposition, attachments))); + mediaType, subType, fileName, fileExtension, contentDisposition, cid, attachments))); } private Mono<ParsedContent> extractText(TextExtractor textExtractor) { @@ -232,21 +243,27 @@ public class MimePart { private final Optional<String> fileName; private final Optional<String> fileExtension; private final Optional<String> contentDisposition; + private final Optional<Cid> cid; private final List<MimePart> attachments; private MimePart(HeaderCollection headerCollection, Optional<String> bodyTextContent, Optional<MediaType> mediaType, - Optional<SubType> subType, Optional<String> fileName, Optional<String> fileExtension, - Optional<String> contentDisposition, List<MimePart> attachments) { + Optional<SubType> subType, Optional<String> fileName, Optional<String> fileExtension, + Optional<String> contentDisposition, Optional<Cid> cid, List<MimePart> attachments) { this.headerCollection = headerCollection; this.mediaType = mediaType; this.subType = subType; this.fileName = fileName; this.fileExtension = fileExtension; this.contentDisposition = contentDisposition; + this.cid = cid; this.attachments = attachments; this.bodyTextContent = bodyTextContent; } + public boolean isInlinedWithCid() { + return contentDisposition.map("inline"::equalsIgnoreCase).orElse(false) && cid.isPresent(); + } + public List<MimePart> getAttachments() { return attachments; } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java index f3dc788f24..415a058910 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartContainerBuilder.java @@ -22,6 +22,7 @@ package org.apache.james.mailbox.store.search.mime; import java.io.InputStream; import java.nio.charset.Charset; +import org.apache.james.mailbox.model.Cid; import org.apache.james.mailbox.model.ContentType.MediaType; import org.apache.james.mailbox.model.ContentType.SubType; import org.apache.james.mime4j.stream.Field; @@ -46,4 +47,6 @@ public interface MimePartContainerBuilder { MimePartContainerBuilder addContentDisposition(String contentDisposition); + MimePartContainerBuilder addCid(Cid cid); + } diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java index 3dbd62c917..c8f8148032 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/MimePartParser.java @@ -27,6 +27,7 @@ import java.util.LinkedList; import java.util.Optional; import org.apache.james.mailbox.extractor.TextExtractor; +import org.apache.james.mailbox.model.Cid; import org.apache.james.mailbox.model.ContentType.MediaType; import org.apache.james.mailbox.model.ContentType.SubType; import org.apache.james.mime4j.MimeException; @@ -130,6 +131,7 @@ public class MimePartParser { .map(SubType::of) .ifPresent(currentlyBuildMimePart::addSubType); currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType()); + Optional.ofNullable(descriptor.getContentId()).flatMap(v -> Cid.parser().relaxed().unwrap().parse(v)).ifPresent(currentlyBuildMimePart::addCid); Optional.ofNullable(descriptor.getContentDispositionFilename()) .or(() -> Optional.ofNullable(descriptor.getContentTypeParameters().get("name"))) diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java index e087c59fbb..54c5203ac2 100644 --- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java +++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/mime/RootMimePartContainerBuilder.java @@ -22,6 +22,7 @@ package org.apache.james.mailbox.store.search.mime; import java.io.InputStream; import java.nio.charset.Charset; +import org.apache.james.mailbox.model.Cid; import org.apache.james.mailbox.model.ContentType.MediaType; import org.apache.james.mailbox.model.ContentType.SubType; import org.apache.james.mime4j.stream.Field; @@ -85,6 +86,12 @@ public class RootMimePartContainerBuilder implements MimePartContainerBuilder { return this; } + @Override + public MimePartContainerBuilder addCid(Cid cid) { + LOGGER.warn("Trying to add content disposition to the Root MimePart container"); + return this; + } + @Override public MimePartContainerBuilder charset(Charset charset) { LOGGER.warn("Trying to add content charset to the Root MimePart container"); diff --git a/mailbox/store/src/test/resources/eml/james-3901.json b/mailbox/store/src/test/resources/eml/james-3901.json index 421b11279a..c54055fa81 100644 --- a/mailbox/store/src/test/resources/eml/james-3901.json +++ b/mailbox/store/src/test/resources/eml/james-3901.json @@ -12,7 +12,7 @@ "cc":[], "date":"2015-06-07T00:00:00+0200", "from":[{"name":null,"address":"Drew Fossum","domain":null}], - "hasAttachment":false, + "hasAttachment":true, "headers":[ {"name":"return-path","value":"<firstname1.surna...@upn.integration-open-paas.org>"}, {"name":"received","value":"from 10.2.0.0 (EHLO 617) ([10.2.0.0]) by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID -489272706 for <firstname1.surna...@upn.integration-open-paas.org>; Sat, 28 Nov 2020 10:49:24 +0000 (GMT)"}, diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json index b57359eff6..85ef80c4e6 100644 --- a/mailbox/store/src/test/resources/eml/nonTextual.json +++ b/mailbox/store/src/test/resources/eml/nonTextual.json @@ -19,7 +19,7 @@ "address":"btell...@linagora.com", "domain":"linagora" }], - "hasAttachment":false, + "hasAttachment":true, "headers": [{ "name": "return-path", "value": "<btell...@linagora.com>" diff --git a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json index cf5be90ccb..255bdb2063 100644 --- a/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json +++ b/mailbox/store/src/test/resources/eml/nonTextualWithoutAttachmentTextContent.json @@ -19,7 +19,7 @@ "address":"btell...@linagora.com", "domain":"linagora" }], - "hasAttachment":false, + "hasAttachment":true, "headers": [{ "name": "return-path", "value": "<btell...@linagora.com>" diff --git a/mailbox/store/src/test/resources/eml/spamMail.json b/mailbox/store/src/test/resources/eml/spamMail.json index 468665807e..d1e212770d 100644 --- a/mailbox/store/src/test/resources/eml/spamMail.json +++ b/mailbox/store/src/test/resources/eml/spamMail.json @@ -152,6 +152,6 @@ "isDraft": false, "isFlagged": false, "isRecent": false, - "hasAttachment": false, + "hasAttachment": true, "isUnread": true } --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@james.apache.org For additional commands, e-mail: notifications-h...@james.apache.org