Repository: james-project
Updated Branches:
  refs/heads/master 96aee2c6b -> d44eeb13e


JAMES-1833 MessageContentExtractor should have improved fallback

In case a mail was received with inlined body and text part without Content-ID 
alone, they were returned as attachment and empty body.

James should have a best effort strategy to find bodies.

Now :
 - James checks for bodies without Content-disposition (text/plain and 
text/html)
 - In case it can not locate one, it checks for inlined text without CID (error 
behaviour)

 In last case CID express intent for the text to be displayed in an other part. 
Thus, it can not be displayed as a body.


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/21b69533
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/21b69533
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/21b69533

Branch: refs/heads/master
Commit: 21b69533e7fb768f8a5444caf7e912aed5c8908d
Parents: 96aee2c
Author: Benoit Tellier <btell...@linagora.com>
Authored: Wed Oct 12 17:18:19 2016 +0200
Committer: Benoit Tellier <btell...@linagora.com>
Committed: Fri Oct 14 08:59:00 2016 +0200

----------------------------------------------------------------------
 .../jmap/model/MessageContentExtractor.java     |  36 ++++--
 .../jmap/model/MessageContentExtractorTest.java | 122 +++++++++++++++++++
 2 files changed, 149 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/21b69533/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java
----------------------------------------------------------------------
diff --git 
a/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java
 
b/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java
index 384b8a5..626e5fc 100644
--- 
a/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java
+++ 
b/server/protocols/jmap/src/main/java/org/apache/james/jmap/model/MessageContentExtractor.java
@@ -22,6 +22,9 @@ package org.apache.james.jmap.model;
 import java.io.IOException;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.function.Predicate;
+
+import javax.mail.internet.MimeMessage;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.james.mime4j.dom.Body;
@@ -33,7 +36,9 @@ import com.github.fge.lambdas.Throwing;
 import com.github.fge.lambdas.functions.ThrowingFunction;
 
 public class MessageContentExtractor {
-    
+
+    public static final String CONTENT_ID = "Content-ID";
+
     public MessageContent extract(org.apache.james.mime4j.dom.Message message) 
throws IOException {
         Body body = message.getBody();
         if (body instanceof TextBody) {
@@ -100,21 +105,34 @@ public class MessageContentExtractor {
     }
 
     private Optional<String> getFirstMatchingTextBody(Multipart multipart, 
String mimeType) throws IOException {
+        Optional<String> firstMatchingTextBody = 
getFirstMatchingTextBody(multipart, mimeType, this::isNotAttachment);
+        if (firstMatchingTextBody.isPresent()) {
+            return firstMatchingTextBody;
+        }
+        Optional<String> fallBackInlinedBodyWithoutCid = 
getFirstMatchingTextBody(multipart, mimeType, this::isInlinedWithoutCid);
+        return fallBackInlinedBodyWithoutCid;
+    }
+
+    private Optional<String> getFirstMatchingTextBody(Multipart multipart, 
String mimeType, Predicate<Entity> condition) {
         return multipart.getBodyParts()
-                .stream()
-                .filter(part -> mimeType.equals(part.getMimeType()))
-                .filter(this::isNotAttachment)
-                .map(Entity::getBody)
-                .filter(TextBody.class::isInstance)
-                .map(TextBody.class::cast)
-                .findFirst()
-                .map(Throwing.function(this::asString).sneakyThrow());
+            .stream()
+            .filter(part -> mimeType.equals(part.getMimeType()))
+            .filter(condition)
+            .map(Entity::getBody)
+            .filter(TextBody.class::isInstance)
+            .map(TextBody.class::cast)
+            .findFirst()
+            .map(Throwing.function(this::asString).sneakyThrow());
     }
 
     private boolean isNotAttachment(Entity part) {
         return part.getDispositionType() == null;
     }
 
+    private boolean isInlinedWithoutCid(Entity part) {
+        return part.getDispositionType().equals(MimeMessage.INLINE) && 
part.getHeader().getField(CONTENT_ID) == null;
+    }
+
     public static class MessageContent {
         private final Optional<String> textBody;
         private final Optional<String> htmlBody;

http://git-wip-us.apache.org/repos/asf/james-project/blob/21b69533/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java
----------------------------------------------------------------------
diff --git 
a/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java
 
b/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java
index d6eb03c..c015a3b 100644
--- 
a/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java
+++ 
b/server/protocols/jmap/src/test/java/org/apache/james/jmap/model/MessageContentExtractorTest.java
@@ -22,14 +22,20 @@ import static org.assertj.core.api.Assertions.assertThat;
 
 import java.io.IOException;
 
+import javax.mail.internet.MimeMessage;
+
 import org.apache.james.jmap.model.MessageContentExtractor.MessageContent;
 import org.apache.james.mime4j.dom.Message;
 import org.apache.james.mime4j.dom.Multipart;
+import org.apache.james.mime4j.field.Fields;
 import org.apache.james.mime4j.message.BasicBodyFactory;
 import org.apache.james.mime4j.message.BodyPart;
 import org.apache.james.mime4j.message.BodyPartBuilder;
+import org.apache.james.mime4j.message.HeaderImpl;
 import org.apache.james.mime4j.message.MessageBuilder;
 import org.apache.james.mime4j.message.MultipartBuilder;
+import org.apache.james.mime4j.stream.Field;
+import org.apache.james.mime4j.util.ByteSequence;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -40,6 +46,23 @@ public class MessageContentExtractorTest {
     private static final String TEXT_CONTENT = "text content";
     private static final String HTML_CONTENT = "<b>html</b> content";
     private static final String ATTACHMENT_CONTENT = "attachment content";
+    private static final String ANY_VALUE = "anyValue";
+    private static final Field CONTENT_ID_FIELD = new Field() {
+        @Override
+        public String getName() {
+            return MessageContentExtractor.CONTENT_ID;
+        }
+
+        @Override
+        public String getBody() {
+            return ANY_VALUE;
+        }
+
+        @Override
+        public ByteSequence getRaw() {
+            return ByteSequence.EMPTY;
+        }
+    };
 
     private MessageContentExtractor testee;
 
@@ -218,4 +241,103 @@ public class MessageContentExtractorTest {
         MessageContent actual = testee.extract(message);
         assertThat(actual.getHtmlBody()).contains(HTML_CONTENT);
     }
+
+    @Test
+    public void 
extractShouldRetrieveHtmlBodyWithOneInlinedHTMLAttachmentWithoutCid() throws 
IOException {
+        //Given
+        BodyPart inlinedHTMLPart = BodyPartBuilder.create()
+            .setBody(HTML_CONTENT, "html", Charsets.UTF_8)
+            .build();
+        HeaderImpl inlinedHeader = new HeaderImpl();
+        inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE));
+        inlinedHeader.addField(Fields.contentType("text/html; charset=utf-8"));
+        inlinedHTMLPart.setHeader(inlinedHeader);
+        Multipart multipartAlternative = MultipartBuilder.create("alternative")
+            .addBodyPart(inlinedHTMLPart)
+            .build();
+        Message message = MessageBuilder.create()
+            .setBody(multipartAlternative)
+            .build();
+
+        //When
+        MessageContent actual = testee.extract(message);
+
+        //Then
+        assertThat(actual.getHtmlBody()).contains(HTML_CONTENT);
+    }
+
+    @Test
+    public void 
extractShouldNotRetrieveHtmlBodyWithOneInlinedHTMLAttachmentWithCid() throws 
IOException {
+        //Given
+        BodyPart inlinedHTMLPart = BodyPartBuilder.create()
+            .setBody(HTML_CONTENT, "html", Charsets.UTF_8)
+            .build();
+        HeaderImpl inlinedHeader = new HeaderImpl();
+        inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE));
+        inlinedHeader.addField(Fields.contentType("text/html; charset=utf-8"));
+        inlinedHeader.addField(CONTENT_ID_FIELD);
+        inlinedHTMLPart.setHeader(inlinedHeader);
+        Multipart multipartAlternative = MultipartBuilder.create("alternative")
+            .addBodyPart(inlinedHTMLPart)
+            .build();
+        Message message = MessageBuilder.create()
+            .setBody(multipartAlternative)
+            .build();
+
+        //When
+        MessageContent actual = testee.extract(message);
+
+        //Then
+        assertThat(actual.getHtmlBody()).isEmpty();
+    }
+
+
+    @Test
+    public void 
extractShouldRetrieveTextBodyWithOneInlinedTextAttachmentWithoutCid() throws 
IOException {
+        //Given
+        BodyPart inlinedTextPart = BodyPartBuilder.create()
+            .setBody(TEXT_CONTENT, "text", Charsets.UTF_8)
+            .build();
+        HeaderImpl inlinedHeader = new HeaderImpl();
+        inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE));
+        inlinedHeader.addField(Fields.contentType("text/plain; 
charset=utf-8"));
+        inlinedTextPart.setHeader(inlinedHeader);
+        Multipart multipartAlternative = MultipartBuilder.create("alternative")
+            .addBodyPart(inlinedTextPart)
+            .build();
+        Message message = MessageBuilder.create()
+            .setBody(multipartAlternative)
+            .build();
+
+        //When
+        MessageContent actual = testee.extract(message);
+
+        //Then
+        assertThat(actual.getTextBody()).contains(TEXT_CONTENT);
+    }
+
+    @Test
+    public void 
extractShouldNotRetrieveTextBodyWithOneInlinedTextAttachmentWithCid() throws 
IOException {
+        //Given
+        BodyPart inlinedTextPart = BodyPartBuilder.create()
+            .setBody(TEXT_CONTENT, "text", Charsets.UTF_8)
+            .build();
+        HeaderImpl inlinedHeader = new HeaderImpl();
+        inlinedHeader.addField(Fields.contentDisposition(MimeMessage.INLINE));
+        inlinedHeader.addField(Fields.contentType("text/plain; 
charset=utf-8"));
+        inlinedHeader.addField(CONTENT_ID_FIELD);
+        inlinedTextPart.setHeader(inlinedHeader);
+        Multipart multipartAlternative = MultipartBuilder.create("alternative")
+            .addBodyPart(inlinedTextPart)
+            .build();
+        Message message = MessageBuilder.create()
+            .setBody(multipartAlternative)
+            .build();
+
+        //When
+        MessageContent actual = testee.extract(message);
+
+        //Then
+        assertThat(actual.getTextBody()).isEmpty();
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org
For additional commands, e-mail: server-dev-h...@james.apache.org

Reply via email to