Repository: james-project Updated Branches: refs/heads/master bf13f1e6d -> b25ee7ffe
MAILBOX-336 Only using tika for non textual attachments Note that the content of nonTextual.json needed to be updated but the structure of the JSON did not change. I encountered some really stage bugs, spend my morning to try to understand why two similar JSON strings were reported as different while printed the same way by the JSON assertion library. Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/b25ee7ff Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/b25ee7ff Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/b25ee7ff Branch: refs/heads/master Commit: b25ee7ffeb2ca57e9806a67378a3b71883833e3e Parents: bf13f1e Author: benwa <[email protected]> Authored: Wed May 23 12:16:35 2018 +0700 Committer: benwa <[email protected]> Committed: Wed May 23 14:18:00 2018 +0700 ---------------------------------------------------------------------- .../mailbox/elasticsearch/json/MimePart.java | 31 ++++- .../json/MimePartContainerBuilder.java | 3 + .../elasticsearch/json/MimePartParser.java | 13 +- .../json/RootMimePartContainerBuilder.java | 7 + .../json/IndexableMessageTest.java | 9 +- .../json/MessageToElasticSearchJsonTest.java | 70 +++++----- .../resources/eml/emailWith3Attachments.eml | 4 +- .../src/test/resources/eml/nonTextual.json | 135 ++++++------------- .../org/apache/james/util/ClassLoaderUtils.java | 15 ++- 9 files changed, 148 insertions(+), 139 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java index 11f246c..bb5c98f 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java @@ -20,11 +20,14 @@ package org.apache.james.mailbox.elasticsearch.json; import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Optional; import java.util.stream.Stream; import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; @@ -35,6 +38,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; @@ -50,6 +54,7 @@ public class MimePart { private Optional<String> fileName; private Optional<String> fileExtension; private Optional<String> contentDisposition; + private Optional<Charset> charset; private TextExtractor textExtractor; private Builder() { @@ -61,6 +66,7 @@ public class MimePart { this.fileName = Optional.empty(); this.fileExtension = Optional.empty(); this.contentDisposition = Optional.empty(); + this.charset = Optional.empty(); this.textExtractor = new DefaultTextExtractor(); } @@ -115,6 +121,12 @@ public class MimePart { } @Override + public MimePartContainerBuilder charset(Charset charset) { + this.charset = Optional.of(charset); + return this; + } + + @Override public MimePart build() { Optional<ParsedContent> parsedContent = parseContent(textExtractor); return new MimePart( @@ -131,9 +143,7 @@ public class MimePart { private Optional<ParsedContent> parseContent(TextExtractor textExtractor) { if (bodyContent.isPresent()) { try { - return Optional.of(textExtractor.extractContent( - bodyContent.get(), - computeContentType().orElse(null))); + return Optional.of(extractText(textExtractor, bodyContent.get())); } catch (Throwable e) { LOGGER.warn("Failed parsing attachment", e); } @@ -141,6 +151,21 @@ public class MimePart { return Optional.empty(); } + private ParsedContent extractText(TextExtractor textExtractor, InputStream bodyContent) throws Exception { + if (isTextBody()) { + return new ParsedContent( + IOUtils.toString(bodyContent, charset.orElse(StandardCharsets.UTF_8)), + ImmutableMap.of()); + } + return textExtractor.extractContent( + bodyContent, + computeContentType().orElse(null)); + } + + private Boolean isTextBody() { + return mediaType.map("text"::equals).orElse(false); + } + private Optional<String> computeContentType() { if (mediaType.isPresent() && subType.isPresent()) { return Optional.of(mediaType.get() + "/" + subType.get()); http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java index b56d303..4a71d21 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java @@ -20,6 +20,7 @@ package org.apache.james.mailbox.elasticsearch.json; import java.io.InputStream; +import java.nio.charset.Charset; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mime4j.stream.Field; @@ -38,6 +39,8 @@ public interface MimePartContainerBuilder { MimePartContainerBuilder addFileName(String fileName); + MimePartContainerBuilder charset(Charset charset); + MimePartContainerBuilder addMediaType(String mediaType); MimePartContainerBuilder addSubType(String subType); http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java index 9582661..7cd6e3a 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java @@ -20,8 +20,10 @@ package org.apache.james.mailbox.elasticsearch.json; import java.io.IOException; +import java.nio.charset.Charset; import java.util.Deque; import java.util.LinkedList; +import java.util.Optional; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.Message; @@ -61,7 +63,7 @@ public class MimePartParser { return result; } - private void processMimePart(MimeTokenStream stream, EntityState state) throws IOException { + private void processMimePart(MimeTokenStream stream, EntityState state) { switch (state) { case T_START_MULTIPART: case T_START_MESSAGE: @@ -106,17 +108,22 @@ public class MimePartParser { } } - private void manageBodyExtraction(MimeTokenStream stream) throws IOException { + private void manageBodyExtraction(MimeTokenStream stream) { extractMimePartBodyDescription(stream); currentlyBuildMimePart.addBodyContent(stream.getDecodedInputStream()); } private void extractMimePartBodyDescription(MimeTokenStream stream) { - final MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor(); + MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor(); + currentlyBuildMimePart.addMediaType(descriptor.getMediaType()) .addSubType(descriptor.getSubType()) .addContentDisposition(descriptor.getContentDispositionType()) .addFileName(descriptor.getContentDispositionFilename()); + + Optional.ofNullable(descriptor.getCharset()) + .map(Charset::forName) + .ifPresent(currentlyBuildMimePart::charset); } } http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java index b0718c2..81e58be 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java @@ -20,6 +20,7 @@ package org.apache.james.mailbox.elasticsearch.json; import java.io.InputStream; +import java.nio.charset.Charset; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mime4j.stream.Field; @@ -86,4 +87,10 @@ public class RootMimePartContainerBuilder implements MimePartContainerBuilder { LOGGER.warn("Trying to add content disposition to the Root MimePart container"); return this; } + + @Override + public MimePartContainerBuilder charset(Charset charset) { + LOGGER.warn("Trying to add content charset to the Root MimePart container"); + return this; + } } http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java index fe1eff8..557b620 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java @@ -27,6 +27,7 @@ import static org.mockito.Mockito.when; import java.io.ByteArrayInputStream; import java.io.IOException; import java.time.ZoneId; +import java.util.Optional; import javax.mail.Flags; @@ -470,8 +471,12 @@ public class IndexableMessageTest { .build(); // Then - assertThat(indexableMessage.getText()).contains("first attachment content"); - assertThat(indexableMessage.getText()).contains("third attachment content"); + assertThat(indexableMessage.getAttachments()) + .extracting(MimePart::getTextualBody) + .contains(Optional.of("first attachment content")); + assertThat(indexableMessage.getAttachments()) + .extracting(MimePart::getTextualBody) + .contains(Optional.of("third attachment content")); } @Test http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java index f4a4642..3aa2de4 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java @@ -25,7 +25,6 @@ import static net.javacrumbs.jsonunit.fluent.JsonFluentAssert.assertThatJson; import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.time.ZoneId; import java.util.Date; @@ -33,7 +32,6 @@ import java.util.Date; import javax.mail.Flags; import javax.mail.util.SharedByteArrayInputStream; -import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.FlagsBuilder; import org.apache.james.mailbox.MailboxSession.User; import org.apache.james.mailbox.MessageUid; @@ -68,7 +66,7 @@ public class MessageToElasticSearchJsonTest { public static final MessageId MESSAGE_ID = TestMessageId.of(184L); public static final long MOD_SEQ = 42L; public static final MessageUid UID = MessageUid.of(25); - public static final Charset CHARSET = StandardCharsets.UTF_8; + public static final MockMailboxSession MAILBOX_SESSION = new MockMailboxSession("username"); private TextExtractor textExtractor; @@ -95,7 +93,7 @@ public class MessageToElasticSearchJsonTest { } @Test - public void convertToJsonShouldThrowWhenNoUser() throws Exception { + public void convertToJsonShouldThrowWhenNoUser() { MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson( new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); @@ -122,15 +120,15 @@ public class MessageToElasticSearchJsonTest { date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/spamMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/spamMail.eml"), new Flags(), propertyBuilder, MAILBOX_ID); spamMail.setUid(UID); spamMail.setModSeq(MOD_SEQ); - assertThatJson(messageToElasticSearchJson.convertToJson(spamMail, ImmutableList.of(new MockMailboxSession("username").getUser()))) + assertThatJson(messageToElasticSearchJson.convertToJson(spamMail, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/spamMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/spamMail.json")); } @Test @@ -142,15 +140,15 @@ public class MessageToElasticSearchJsonTest { date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/htmlMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/htmlMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("social", "pocket-money").build(), propertyBuilder, MAILBOX_ID); htmlMail.setModSeq(MOD_SEQ); htmlMail.setUid(UID); - assertThatJson(messageToElasticSearchJson.convertToJson(htmlMail, ImmutableList.of(new MockMailboxSession("username").getUser()))) + assertThatJson(messageToElasticSearchJson.convertToJson(htmlMail, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/htmlMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/htmlMail.json")); } @Test @@ -162,15 +160,15 @@ public class MessageToElasticSearchJsonTest { date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/pgpSignedMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/pgpSignedMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); pgpSignedMail.setModSeq(MOD_SEQ); pgpSignedMail.setUid(UID); - assertThatJson(messageToElasticSearchJson.convertToJson(pgpSignedMail, ImmutableList.of(new MockMailboxSession("username").getUser()))) + assertThatJson(messageToElasticSearchJson.convertToJson(pgpSignedMail, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/pgpSignedMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/pgpSignedMail.json")); } @Test @@ -182,7 +180,7 @@ public class MessageToElasticSearchJsonTest { date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/mail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/mail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); @@ -191,7 +189,7 @@ public class MessageToElasticSearchJsonTest { assertThatJson(messageToElasticSearchJson.convertToJson(mail, ImmutableList.of(new MockMailboxSession("user1").getUser(), new MockMailboxSession("user2").getUser()))) .when(IGNORING_ARRAY_ORDER).when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/mail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/mail.json")); } @Test @@ -203,15 +201,15 @@ public class MessageToElasticSearchJsonTest { date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/recursiveMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/recursiveMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); recursiveMail.setModSeq(MOD_SEQ); recursiveMail.setUid(UID); - assertThatJson(messageToElasticSearchJson.convertToJson(recursiveMail, ImmutableList.of(new MockMailboxSession("username").getUser()))) + assertThatJson(messageToElasticSearchJson.convertToJson(recursiveMail, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER).when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/recursiveMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/recursiveMail.json")); } @Test @@ -223,16 +221,16 @@ public class MessageToElasticSearchJsonTest { null, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/recursiveMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/recursiveMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); mailWithNoInternalDate.setModSeq(MOD_SEQ); mailWithNoInternalDate.setUid(UID); - assertThatJson(messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(new MockMailboxSession("username").getUser()))) + assertThatJson(messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER) .when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/recursiveMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/recursiveMail.json")); } @Test @@ -242,7 +240,7 @@ public class MessageToElasticSearchJsonTest { null, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/recursiveMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/recursiveMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); @@ -254,13 +252,13 @@ public class MessageToElasticSearchJsonTest { new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); - String convertToJson = messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(new MockMailboxSession("username").getUser())); + String convertToJson = messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(MAILBOX_SESSION.getUser())); // Then assertThatJson(convertToJson) .when(IGNORING_ARRAY_ORDER) .when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/recursiveMail.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/recursiveMail.json")); } @Test @@ -270,7 +268,7 @@ public class MessageToElasticSearchJsonTest { null, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/recursiveMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/recursiveMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); @@ -282,13 +280,13 @@ public class MessageToElasticSearchJsonTest { new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); - String convertToJson = messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(new MockMailboxSession("username").getUser())); + String convertToJson = messageToElasticSearchJson.convertToJson(mailWithNoInternalDate, ImmutableList.of(MAILBOX_SESSION.getUser())); // Then assertThatJson(convertToJson) .when(IGNORING_ARRAY_ORDER) .when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/recursiveMailWithoutAttachments.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/recursiveMailWithoutAttachments.json")); } @Test(expected = NullPointerException.class) @@ -301,7 +299,7 @@ public class MessageToElasticSearchJsonTest { mailWithNoMailboxId = new SimpleMailboxMessage(MESSAGE_ID, date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/recursiveMail.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/recursiveMail.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, null); @@ -310,7 +308,7 @@ public class MessageToElasticSearchJsonTest { } catch (Exception exception) { throw Throwables.propagate(exception); } - messageToElasticSearchJson.convertToJson(mailWithNoMailboxId, ImmutableList.of(new MockMailboxSession("username").getUser())); + messageToElasticSearchJson.convertToJson(mailWithNoMailboxId, ImmutableList.of(MAILBOX_SESSION.getUser())); } @Test @@ -351,15 +349,17 @@ public class MessageToElasticSearchJsonTest { MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID, date, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/nonTextual.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/nonTextual.eml"), new Flags(), propertyBuilder, MAILBOX_ID); spamMail.setUid(UID); spamMail.setModSeq(MOD_SEQ); - assertThatJson(messageToElasticSearchJson.convertToJson(spamMail, ImmutableList.of(new MockMailboxSession("username").getUser()))) + + assertThatJson(messageToElasticSearchJson.convertToJson(spamMail, ImmutableList.of(MAILBOX_SESSION.getUser()))) .when(IGNORING_ARRAY_ORDER) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/nonTextual.json"), CHARSET)); + .isEqualTo( + ClassLoaderUtils.getSystemResourceAsString("eml/nonTextual.json", StandardCharsets.UTF_8)); } @Test @@ -369,7 +369,7 @@ public class MessageToElasticSearchJsonTest { null, SIZE, BODY_START_OCTET, - new SharedByteArrayInputStream(ClassLoaderUtils.getSystemResourceAsByteArray("eml/emailWithNonIndexableAttachment.eml")), + ClassLoaderUtils.getSystemResourceAsSharedStream("eml/emailWithNonIndexableAttachment.eml"), new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.SEEN).add("debian", "security").build(), propertyBuilder, MAILBOX_ID); @@ -381,12 +381,12 @@ public class MessageToElasticSearchJsonTest { new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.NO); - String convertToJsonWithoutAttachment = messageToElasticSearchJson.convertToJsonWithoutAttachment(message, ImmutableList.of(new MockMailboxSession("username").getUser())); + String convertToJsonWithoutAttachment = messageToElasticSearchJson.convertToJsonWithoutAttachment(message, ImmutableList.of(MAILBOX_SESSION.getUser())); // Then assertThatJson(convertToJsonWithoutAttachment) .when(IGNORING_ARRAY_ORDER) .when(IGNORING_VALUES) - .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("eml/emailWithNonIndexableAttachmentWithoutAttachment.json"), CHARSET)); + .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/emailWithNonIndexableAttachmentWithoutAttachment.json")); } } http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/elasticsearch/src/test/resources/eml/emailWith3Attachments.eml ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/resources/eml/emailWith3Attachments.eml b/mailbox/elasticsearch/src/test/resources/eml/emailWith3Attachments.eml index e4b7185..c7eedfb 100644 --- a/mailbox/elasticsearch/src/test/resources/eml/emailWith3Attachments.eml +++ b/mailbox/elasticsearch/src/test/resources/eml/emailWith3Attachments.eml @@ -30,11 +30,11 @@ Content-Disposition: attachment; UEsDBBQAAAgAAGJVK0pexjIMJwAAACcAAAAIAAAAbWltZXR5cGVhcHBsaWNhdGlvbi92bmQu dC54bWxQSwUGAAAAABEAEQBwBAAAjyUAAAAA --------------36566F1E9D791340FFB75FF8 -Content-Type: text/html; charset=UTF-8; +Content-Type: application/vnd.oasis.opendocument.text; name="attachment2-nonIndexableAttachment.html" Content-Transfer-Encoding: base64 Content-Disposition: attachment; - filename="attachment2-nonIndexableAttachment.html" + filename="attachment2-nonIndexableAttachment.odt" PCFET0NUWVBFIGh0bWw+CjxodG1sIGNsYXNzPSJtb3ppbGxhIiBsYW5nPSJlbiI+PGhlYWQ+ CI+PC9kaXY+PC9kaXY+PC9ib2R5PjwvaHRtbD4= http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/mailbox/store/src/test/resources/eml/nonTextual.json ---------------------------------------------------------------------- diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json index 1170d5a..ccf1a4c 100644 --- a/mailbox/store/src/test/resources/eml/nonTextual.json +++ b/mailbox/store/src/test/resources/eml/nonTextual.json @@ -1,84 +1,5 @@ { - "messageId":"184", - "uid":25, - "mailboxId":"18", - "modSeq":42, - "size":25, - "date":"2015-06-07T00:00:00+0200", - "mediaType":"plain", - "subtype":"text", - "userFlags":[], - "mimeMessageID": "<[email protected]>", - "headers":{ - "date":[ - "Thu, 18 Jun 2015 12:43:26 +0200" - ], - "mime-version":[ - "1.0" - ], - "x-sieve":[ - "CMU Sieve 2.2" - ], - "return-path":[ - "<[email protected]>" - ], - "subject":[ - "Test message" - ], - "message-id":[ - "<[email protected]>" - ], - "received":[ - "from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 2015 12:43:28 +0200","from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor <[email protected]>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)" - ], - "from":[ - "Benoit Tellier <[email protected]>" - ], - "content-type":[ - "multipart/mixed; boundary=\"------------030000010109090603040500\"" - ], - "to":[ - "[email protected]" - ], - "user-agent":[ - "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.0.1" - ] - }, - "from":[ - { - "name":"Benoit Tellier", - "address":"[email protected]" - } - ], - "to":[ - { - "name":"[email protected]", - "address":"[email protected]" - } - ], - "cc":[], - "bcc":[], - "replyTo":[], - "subject":["Test message"], - "sentDate":"2015-06-18T12:43:26+0200", - "properties":[ - { - "namespace":"http://james.apache.org/rfc2045/Content-Type", - "localName":"type", - "value":"plain" - }, - { - "namespace":"http://james.apache.org/rfc2045/Content-Type", - "localName":"subtype", - "value":"text" - }, - { - "namespace":"http://james.apache.org/rfc2045", - "localName":"Content-Description", - "value":"An e-mail" - } - ], - "attachments":[ + "attachments": [ { "mediaType":"text", "subtype":"plain", @@ -89,32 +10,62 @@ "content-transfer-encoding":["7bit"], "content-type":["text/plain; charset=utf-8"] }, - "textContent":"This mail have a non textual attachment !\r\n\n" + "textContent":"This mail have a non textual attachment !\r\n" }, { "mediaType":"application", "subtype":"vnd.oasis.opendocument.text", - "fileName":"toto.odt", - "fileExtension":"odt", + "fileName":"toto.odt","fileExtension":"odt", "contentDisposition":"attachment", "headers":{ "content-transfer-encoding":["base64"], "content-disposition":["attachment; filename=\"toto.odt\""], "content-type":["application/vnd.oasis.opendocument.text; name=\"toto.odt\""] }, - "textContent":"Awesome document provided for text extraction !\n"} + "textContent":"Awesome document provided for text extraction !\n" + } ], - "textBody":"This mail have a non textual attachment !\r\n\n", - "htmlBody": null, + "bcc":[], + "htmlBody":null, + "textBody":"This mail have a non textual attachment !\r\n", + "cc":[], + "date":"2015-06-07T00:00:00+0200", + "from":[{"name":"Benoit Tellier","address":"[email protected]"}], + "hasAttachment":false, + "headers":{ + "date":["Thu, 18 Jun 2015 12:43:26 +0200"], + "mime-version":["1.0"], + "x-sieve":["CMU Sieve 2.2"], + "return-path":["<[email protected]>"], + "subject":["Test message"], + "message-id":["<[email protected]>"], + "received":["from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 2015 12:43:28 +0200","from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor <[email protected]>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)"], + "from":["Benoit Tellier <[email protected]>"], + "content-type":["multipart/mixed; boundary=\"------------030000010109090603040500\""], + "to":["[email protected]"], + "user-agent":["Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.0.1"]}, + "mailboxId":"18", + "mediaType":"plain", + "messageId":"184", + "modSeq":42, + "properties":[ + {"namespace":"http://james.apache.org/rfc2045/Content-Type","localName":"type","value":"plain"}, + {"namespace":"http://james.apache.org/rfc2045/Content-Type","localName":"subtype","value":"text"}, + {"namespace":"http://james.apache.org/rfc2045","localName":"Content-Description","value":"An e-mail"}], + "replyTo":[], + "sentDate":"2015-06-18T12:43:26+0200", + "size":25,"subject":["Test message"], + "subtype":"text", + "text":"Benoit Tellier [email protected] [email protected] [email protected] Test message This mail have a non textual attachment !\r\n", + "to":[{"name":"[email protected]","address":"[email protected]"}], + "uid":25, + "userFlags":[], + "users":["username"], + "mimeMessageID":"<[email protected]>", "isAnswered":false, "isDeleted":false, "isDraft":false, "isFlagged":false, "isRecent":false, - "hasAttachment":false, - "isUnread":true, - "users": [ - "username" - ], - "text": "Benoit Tellier [email protected] [email protected] [email protected] Test message This mail have a non textual attachment !\r\n\n" + "isUnread":true } http://git-wip-us.apache.org/repos/asf/james-project/blob/b25ee7ff/server/container/util-java8/src/main/java/org/apache/james/util/ClassLoaderUtils.java ---------------------------------------------------------------------- diff --git a/server/container/util-java8/src/main/java/org/apache/james/util/ClassLoaderUtils.java b/server/container/util-java8/src/main/java/org/apache/james/util/ClassLoaderUtils.java index 89574b6..68d9b08 100644 --- a/server/container/util-java8/src/main/java/org/apache/james/util/ClassLoaderUtils.java +++ b/server/container/util-java8/src/main/java/org/apache/james/util/ClassLoaderUtils.java @@ -20,19 +20,26 @@ package org.apache.james.util; import java.io.IOException; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import javax.mail.util.SharedByteArrayInputStream; + import org.apache.commons.io.IOUtils; public class ClassLoaderUtils { - public static String getSystemResourceAsString(String filename) { + public static String getSystemResourceAsString(String filename, Charset charset) { try { - return IOUtils.toString(ClassLoader.getSystemResourceAsStream(filename), StandardCharsets.US_ASCII); + return IOUtils.toString(ClassLoader.getSystemResourceAsStream(filename), charset); } catch (IOException e) { throw new RuntimeException(e); } } + public static String getSystemResourceAsString(String filename) { + return getSystemResourceAsString(filename, StandardCharsets.US_ASCII); + } + public static byte[] getSystemResourceAsByteArray(String filename) { try { return IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream(filename)); @@ -40,4 +47,8 @@ public class ClassLoaderUtils { throw new RuntimeException(e); } } + + public static SharedByteArrayInputStream getSystemResourceAsSharedStream(String filename) { + return new SharedByteArrayInputStream(getSystemResourceAsByteArray(filename)); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
