Repository: james-project Updated Branches: refs/heads/master 96aee2c6b -> d44eeb13e
JAMES-1833 MessageContentExtractor should have improved fallback In case a mail was received with inlined body and text part without Content-ID alone, they were returned as attachment and empty body. James should have a best effort strategy to find bodies. Now : - James checks for bodies without Content-disposition (text/plain and text/html) - In case it can not locate one, it checks for inlined text without CID (error behaviour) In last case CID express intent for the text to be displayed in an other part. Thus, it can not be displayed as a body. Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/21b69533 Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/21b69533 Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/21b69533 Branch: refs/heads/master Commit: 21b69533e7fb768f8a5444caf7e912aed5c8908d Parents: 96aee2c Author: Benoit Tellier <[email protected]> Authored: Wed Oct 12 17:18:19 2016 +0200 Committer: Benoit Tellier <[email protected]> Committed: Fri Oct 14 08:59:00 2016 +0200 ---------------------------------------------------------------------- .../jmap/model/MessageContentExtractor.java | 36 ++++-- .../jmap/model/MessageContentExtractorTest.java | 122 +++++++++++++++++++ 2 files changed, 149 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/21b69533/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java ---------------------------------------------------------------------- diff --git a/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java b/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java index 384b8a5..626e5fc 100644 --- a/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java +++ b/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java @@ -22,6 +22,9 @@ package org.apache.james.jmap.model; import java.io.IOException; import java.util.Objects; import java.util.Optional; +import java.util.function.Predicate; + +import javax.mail.internet.MimeMessage; import org.apache.commons.io.IOUtils; import org.apache.james.mime4j.dom.Body; @@ -33,7 +36,9 @@ import com.github.fge.lambdas.Throwing; import com.github.fge.lambdas.functions.ThrowingFunction; public class MessageContentExtractor { - + + public static final String CONTENT_ID = "Content-ID"; + public MessageContent extract(org.apache.james.mime4j.dom.Message message) throws IOException { Body body = message.getBody(); if (body instanceof TextBody) { @@ -100,21 +105,34 @@ public class MessageContentExtractor { } private Optional<String> getFirstMatchingTextBody(Multipart multipart, String mimeType) throws IOException { + Optional<String> firstMatchingTextBody = getFirstMatchingTextBody(multipart, mimeType, this::isNotAttachment); + if (firstMatchingTextBody.isPresent()) { + return firstMatchingTextBody; + } + Optional<String> fallBackInlinedBodyWithoutCid = getFirstMatchingTextBody(multipart, mimeType, this::isInlinedWithoutCid); + return fallBackInlinedBodyWithoutCid; + } + + private Optional<String> getFirstMatchingTextBody(Multipart multipart, String mimeType, Predicate<Entity> condition) { return multipart.getBodyParts() - .stream() - .filter(part -> mimeType.equals(part.getMimeType())) - .filter(this::isNotAttachment) - .map(Entity::getBody) - .filter(TextBody.class::isInstance) - .map(TextBody.class::cast) - .findFirst() - .map(Throwing.function(this::asString).sneakyThrow()); + .stream() + .filter(part -> mimeType.equals(part.getMimeType())) + .filter(condition) + .map(Entity::getBody) + .filter(TextBody.class::isInstance) + .map(TextBody.class::cast) + .findFirst() + .map(Throwing.function(this::asString).sneakyThrow()); } private boolean isNotAttachment(Entity part) { return part.getDispositionType() == null; } + private boolean isInlinedWithoutCid(Entity part) { + return part.getDispositionType().equals(MimeMessage.INLINE) && part.getHeader().getField(CONTENT_ID) == null; + } + public static class MessageContent { private final Optional<String> textBody; private final Optional<String> htmlBody; http://git-wip-us.apache.org/repos/asf/james-project/blob/21b69533/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java ---------------------------------------------------------------------- diff --git a/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java b/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java index d6eb03c..c015a3b 100644 --- a/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java +++ b/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java @@ -22,14 +22,20 @@ import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; +import javax.mail.internet.MimeMessage; + import org.apache.james.jmap.model.MessageContentExtractor.MessageContent; import org.apache.james.mime4j.dom.Message; import org.apache.james.mime4j.dom.Multipart; +import org.apache.james.mime4j.field.Fields; import org.apache.james.mime4j.message.BasicBodyFactory; import org.apache.james.mime4j.message.BodyPart; import org.apache.james.mime4j.message.BodyPartBuilder; +import org.apache.james.mime4j.message.HeaderImpl; import org.apache.james.mime4j.message.MessageBuilder; import org.apache.james.mime4j.message.MultipartBuilder; +import org.apache.james.mime4j.stream.Field; +import org.apache.james.mime4j.util.ByteSequence; import org.junit.Before; import org.junit.Test; @@ -40,6 +46,23 @@ public class MessageContentExtractorTest { private static final String TEXT_CONTENT = "text content"; private static final String HTML_CONTENT = "<b>html</b> content"; private static final String ATTACHMENT_CONTENT = "attachment content"; + private static final String ANY_VALUE = "anyValue"; + private static final Field CONTENT_ID_FIELD = new Field() { + @Override + public String getName() { + return MessageContentExtractor.CONTENT_ID; + } + + @Override + public String getBody() { + return ANY_VALUE; + } + + @Override + public ByteSequence getRaw() { + return ByteSequence.EMPTY; + } + }; private MessageContentExtractor testee; @@ -218,4 +241,103 @@ public class MessageContentExtractorTest { MessageContent actual = testee.extract(message); assertThat(actual.getHtmlBody()).contains(HTML_CONTENT); } + + @Test + public void extractShouldRetrieveHtmlBodyWithOneInlinedHTMLAttachmentWithoutCid() throws IOException { + //Given + BodyPart inlinedHTMLPart = BodyPartBuilder.create() + .setBody(HTML_CONTENT, "html", Charsets.UTF_8) + .build(); + HeaderImpl inlinedHeader = new HeaderImpl(); + inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE)); + inlinedHeader.addField(Fields.contentType("text/html; charset=utf-8")); + inlinedHTMLPart.setHeader(inlinedHeader); + Multipart multipartAlternative = MultipartBuilder.create("alternative") + .addBodyPart(inlinedHTMLPart) + .build(); + Message message = MessageBuilder.create() + .setBody(multipartAlternative) + .build(); + + //When + MessageContent actual = testee.extract(message); + + //Then + assertThat(actual.getHtmlBody()).contains(HTML_CONTENT); + } + + @Test + public void extractShouldNotRetrieveHtmlBodyWithOneInlinedHTMLAttachmentWithCid() throws IOException { + //Given + BodyPart inlinedHTMLPart = BodyPartBuilder.create() + .setBody(HTML_CONTENT, "html", Charsets.UTF_8) + .build(); + HeaderImpl inlinedHeader = new HeaderImpl(); + inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE)); + inlinedHeader.addField(Fields.contentType("text/html; charset=utf-8")); + inlinedHeader.addField(CONTENT_ID_FIELD); + inlinedHTMLPart.setHeader(inlinedHeader); + Multipart multipartAlternative = MultipartBuilder.create("alternative") + .addBodyPart(inlinedHTMLPart) + .build(); + Message message = MessageBuilder.create() + .setBody(multipartAlternative) + .build(); + + //When + MessageContent actual = testee.extract(message); + + //Then + assertThat(actual.getHtmlBody()).isEmpty(); + } + + + @Test + public void extractShouldRetrieveTextBodyWithOneInlinedTextAttachmentWithoutCid() throws IOException { + //Given + BodyPart inlinedTextPart = BodyPartBuilder.create() + .setBody(TEXT_CONTENT, "text", Charsets.UTF_8) + .build(); + HeaderImpl inlinedHeader = new HeaderImpl(); + inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE)); + inlinedHeader.addField(Fields.contentType("text/plain; charset=utf-8")); + inlinedTextPart.setHeader(inlinedHeader); + Multipart multipartAlternative = MultipartBuilder.create("alternative") + .addBodyPart(inlinedTextPart) + .build(); + Message message = MessageBuilder.create() + .setBody(multipartAlternative) + .build(); + + //When + MessageContent actual = testee.extract(message); + + //Then + assertThat(actual.getTextBody()).contains(TEXT_CONTENT); + } + + @Test + public void extractShouldNotRetrieveTextBodyWithOneInlinedTextAttachmentWithCid() throws IOException { + //Given + BodyPart inlinedTextPart = BodyPartBuilder.create() + .setBody(TEXT_CONTENT, "text", Charsets.UTF_8) + .build(); + HeaderImpl inlinedHeader = new HeaderImpl(); + inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE)); + inlinedHeader.addField(Fields.contentType("text/plain; charset=utf-8")); + inlinedHeader.addField(CONTENT_ID_FIELD); + inlinedTextPart.setHeader(inlinedHeader); + Multipart multipartAlternative = MultipartBuilder.create("alternative") + .addBodyPart(inlinedTextPart) + .build(); + Message message = MessageBuilder.create() + .setBody(multipartAlternative) + .build(); + + //When + MessageContent actual = testee.extract(message); + + //Then + assertThat(actual.getTextBody()).isEmpty(); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
