Author: btellier Date: Mon Jun 29 08:45:43 2015 New Revision: 1688146 URL: http://svn.apache.org/r1688146 Log: MAILBOX-245 Use text extractor on JSON generation
Added: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java Mon Jun 29 08:45:43 2015 @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation. import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.Multimap; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater; import org.apache.james.mailbox.store.mail.model.MailboxId; import org.apache.james.mailbox.store.mail.model.Message; @@ -41,11 +42,11 @@ import java.util.stream.Collectors; public class IndexableMessage { - public static IndexableMessage from(Message<? extends MailboxId> message) { + public static IndexableMessage from(Message<? extends MailboxId> message, TextExtractor textExtractor) { Preconditions.checkNotNull(message.getMailboxId()); IndexableMessage indexableMessage = new IndexableMessage(); try { - MimePart parsingResult = new MimePartParser(message).parse(); + MimePart parsingResult = new MimePartParser(message, textExtractor).parse(); indexableMessage.bodyText = parsingResult.locateFirstTextualBody(); indexableMessage.setFlattenedAttachments(parsingResult); indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(), getSanitizedInternalDate(message)); Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/JsonMessageConstants.java Mon Jun 29 08:45:43 2015 @@ -66,6 +66,7 @@ public interface JsonMessageConstants { String CONTENT_DISPOSITION = "contentDisposition"; String FILENAME = "fileName"; String FILE_EXTENSION = "fileExtension"; + String FILE_METADATA = "fileMetadata"; } interface Property { Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJson.java Mon Jun 29 08:45:43 2015 @@ -26,21 +26,24 @@ import com.fasterxml.jackson.databind.Ob import com.fasterxml.jackson.datatype.guava.GuavaModule; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import com.google.common.base.Preconditions; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.Message; public class MessageToElasticSearchJson { - private ObjectMapper mapper; + private final ObjectMapper mapper; + private final TextExtractor textExtractor; - public MessageToElasticSearchJson() { - mapper = new ObjectMapper(); - mapper.registerModule(new GuavaModule()); - mapper.registerModule(new Jdk8Module()); + public MessageToElasticSearchJson(TextExtractor textExtractor) { + this.textExtractor = textExtractor; + this.mapper = new ObjectMapper(); + this.mapper.registerModule(new GuavaModule()); + this.mapper.registerModule(new Jdk8Module()); } public String convertToJson(Message<?> message) throws JsonProcessingException { Preconditions.checkNotNull(message); - return mapper.writeValueAsString(IndexableMessage.from(message)); + return mapper.writeValueAsString(IndexableMessage.from(message, textExtractor)); } public String getUpdatedJsonMessagePart(Flags flags, long modSeq) throws JsonProcessingException { Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java Mon Jun 29 08:45:43 2015 @@ -21,15 +21,18 @@ package org.apache.james.mailbox.elastic import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import org.apache.commons.io.FilenameUtils; -import org.apache.commons.io.IOUtils; +import org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor; +import org.apache.james.mailbox.elasticsearch.json.extractor.ParsedContent; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mime4j.stream.Field; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.Optional; @@ -47,7 +50,7 @@ public class MimePart { private Optional<String> fileName; private Optional<String> fileExtension; private Optional<String> contentDisposition; - + private TextExtractor textExtractor; private Builder() { children = Lists.newArrayList(); @@ -58,6 +61,7 @@ public class MimePart { this.fileName = Optional.empty(); this.fileExtension = Optional.empty(); this.contentDisposition = Optional.empty(); + this.textExtractor = new DefaultTextExtractor(); } @Override @@ -104,35 +108,50 @@ public class MimePart { } @Override + public MimePartContainerBuilder using(TextExtractor textExtractor) { + Preconditions.checkArgument(textExtractor != null, "Provided text extractor should not be null"); + this.textExtractor = textExtractor; + return this; + } + + @Override public MimePart build() { + Optional<ParsedContent> parsedContent = parseContent(textExtractor); return new MimePart( - headerCollectionBuilder.build(), - decodeContent(), - mediaType, - subType, - fileName, - fileExtension, - contentDisposition, - children + headerCollectionBuilder.build(), + parsedContent.map(ParsedContent::getTextualContent) + .orElse(Optional.empty()) + , + mediaType, + subType, + fileName, + fileExtension, + contentDisposition, + children, + parsedContent.map(ParsedContent::getMetadata) + .orElse(ImmutableMultimap.<String, String>builder().build()) ); } - private boolean isTextualMimePart() { - return mediaType.isPresent() - && mediaType.get().equalsIgnoreCase("text"); - } - - private Optional<String> decodeContent() { - if (bodyContent.isPresent() && isTextualMimePart()) { + private Optional<ParsedContent> parseContent(TextExtractor textExtractor) { + if (bodyContent.isPresent()) { try { - return Optional.of(IOUtils.toString(bodyContent.get())); - } catch (IOException e) { - LOGGER.warn("Can not decode body content", e); + return Optional.of(textExtractor.extractContent(bodyContent.get(), computeContentType(), fileName)); + } catch (Exception e) { + LOGGER.warn("Failed parsing attachment", e); } } return Optional.empty(); } + private Optional<String> computeContentType() { + if (mediaType.isPresent() && subType.isPresent()) { + return Optional.of(mediaType.get() + "/" + subType.get()); + } else { + return Optional.empty(); + } + } + } public static Builder builder() { @@ -149,10 +168,11 @@ public class MimePart { private final Optional<String> fileExtension; private final Optional<String> contentDisposition; private final List<MimePart> attachments; + private final ImmutableMultimap<String, String> metadata; private MimePart(HeaderCollection headerCollection, Optional<String> bodyTextContent, Optional<String> mediaType, Optional<String> subType, Optional<String> fileName, Optional<String> fileExtension, - Optional<String> contentDisposition, List<MimePart> attachments) { + Optional<String> contentDisposition, List<MimePart> attachments, Multimap<String, String> metadata) { this.headerCollection = headerCollection; this.mediaType = mediaType; this.subType = subType; @@ -161,6 +181,7 @@ public class MimePart { this.contentDisposition = contentDisposition; this.attachments = attachments; this.bodyTextContent = bodyTextContent; + this.metadata = ImmutableMultimap.copyOf(metadata); } @JsonIgnore @@ -208,6 +229,11 @@ public class MimePart { return bodyTextContent; } + @JsonProperty(JsonMessageConstants.Attachment.FILE_METADATA) + public ImmutableMultimap<String, String> getMetadata() { + return metadata; + } + @JsonIgnore public Optional<String> locateFirstTextualBody() { return Stream.concat( Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartContainerBuilder.java Mon Jun 29 08:45:43 2015 @@ -19,6 +19,7 @@ package org.apache.james.mailbox.elasticsearch.json; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mime4j.stream.Field; import java.io.InputStream; @@ -27,6 +28,8 @@ public interface MimePartContainerBuilde MimePart build(); + MimePartContainerBuilder using(TextExtractor textExtractor); + MimePartContainerBuilder addToHeaders(Field field); MimePartContainerBuilder addBodyContent(InputStream bodyContent); Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java Mon Jun 29 08:45:43 2015 @@ -20,6 +20,7 @@ package org.apache.james.mailbox.elasticsearch.json; import com.google.common.base.Preconditions; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxId; import org.apache.james.mailbox.store.mail.model.Message; import org.apache.james.mime4j.MimeException; @@ -36,13 +37,15 @@ import java.util.LinkedList; public class MimePartParser { private final Message<? extends MailboxId> message; + private final TextExtractor textExtractor; private final MimeTokenStream stream; private final Deque<MimePartContainerBuilder> builderStack; private MimePart result; private MimePartContainerBuilder currentlyBuildMimePart; - public MimePartParser(Message<? extends MailboxId> message) { + public MimePartParser(Message<? extends MailboxId> message, TextExtractor textExtractor) { this.message = message; + this.textExtractor = textExtractor; this.builderStack = new LinkedList<>(); this.currentlyBuildMimePart = new RootMimePartContainerBuilder(); this.stream = new MimeTokenStream( @@ -94,7 +97,7 @@ public class MimePartParser { } private void closeMimePart() { - MimePart bodyMimePart = currentlyBuildMimePart.build(); + MimePart bodyMimePart = currentlyBuildMimePart.using(textExtractor).build(); if (!builderStack.isEmpty()) { builderStack.peek().addChild(bodyMimePart); } else { Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java (original) +++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/RootMimePartContainerBuilder.java Mon Jun 29 08:45:43 2015 @@ -19,6 +19,7 @@ package org.apache.james.mailbox.elasticsearch.json; +import org.apache.james.mailbox.elasticsearch.json.extractor.TextExtractor; import org.apache.james.mime4j.stream.Field; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +37,10 @@ public class RootMimePartContainerBuilde return rootMimePart; } + @Override public MimePartContainerBuilder using(TextExtractor textExtractor) { + return this; + } + @Override public MimePartContainerBuilder addToHeaders(Field field) { LOGGER.warn("Trying to add headers to the Root MimePart container"); Modified: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java (original) +++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java Mon Jun 29 08:45:43 2015 @@ -30,6 +30,7 @@ import org.apache.james.mailbox.acl.Simp import org.apache.james.mailbox.acl.UnionMailboxACLResolver; import org.apache.james.mailbox.elasticsearch.events.ElasticSearchListeningMessageSearchIndex; import org.apache.james.mailbox.elasticsearch.json.MessageToElasticSearchJson; +import org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor; import org.apache.james.mailbox.elasticsearch.query.CriterionConverter; import org.apache.james.mailbox.elasticsearch.query.QueryConverter; import org.apache.james.mailbox.elasticsearch.search.ElasticSearchSearcher; @@ -158,10 +159,10 @@ public class ElasticSearchIntegrationTes IndexCreationFactory.createIndex(new TestingClientProvider(embeddedElasticSearch.getNode())) ); MailboxSessionMapperFactory<InMemoryId> mapperFactory = new InMemoryMailboxSessionMapperFactory(); - elasticSearchListeningMessageSearchIndex = new ElasticSearchListeningMessageSearchIndex<InMemoryId>(mapperFactory, + elasticSearchListeningMessageSearchIndex = new ElasticSearchListeningMessageSearchIndex<>(mapperFactory, new ElasticSearchIndexer(clientProvider), - new ElasticSearchSearcher<InMemoryId>(clientProvider, new QueryConverter(new CriterionConverter())), - new MessageToElasticSearchJson()); + new ElasticSearchSearcher<>(clientProvider, new QueryConverter(new CriterionConverter())), + new MessageToElasticSearchJson(new DefaultTextExtractor())); storeMailboxManager = new StoreMailboxManager<>( mapperFactory, new MockAuthenticator(), Modified: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java (original) +++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java Mon Jun 29 08:45:43 2015 @@ -22,6 +22,8 @@ package org.apache.james.mailbox.elastic import com.google.common.base.Throwables; import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.FlagsBuilder; +import org.apache.james.mailbox.elasticsearch.json.extractor.DefaultTextExtractor; +import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor; import org.apache.james.mailbox.store.TestId; import org.apache.james.mailbox.store.mail.model.Message; import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder; @@ -53,11 +55,9 @@ public class MessageToElasticSearchJsonT private Date date; private PropertyBuilder propertyBuilder; - private MessageToElasticSearchJson messageToElasticSearchJson; @Before public void setUp() throws Exception { - messageToElasticSearchJson = new MessageToElasticSearchJson(); date = formatter.parse("07-06-2015"); propertyBuilder = new PropertyBuilder(); propertyBuilder.setMediaType("plain"); @@ -68,6 +68,7 @@ public class MessageToElasticSearchJsonT @Test public void spamEmailShouldBeWellConvertedToJson() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> spamMail = new SimpleMessage<>(date, SIZE, BODY_START_OCTET, @@ -83,6 +84,7 @@ public class MessageToElasticSearchJsonT @Test public void htmlEmailShouldBeWellConvertedToJson() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> htmlMail = new SimpleMessage<>(date, SIZE, BODY_START_OCTET, @@ -99,6 +101,7 @@ public class MessageToElasticSearchJsonT @Test public void pgpSignedEmailShouldBeWellConvertedToJson() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> pgpSignedMail = new SimpleMessage<>(date, SIZE, BODY_START_OCTET, @@ -115,6 +118,7 @@ public class MessageToElasticSearchJsonT @Test public void simpleEmailShouldBeWellConvertedToJson() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> mail = new SimpleMessage<>(date, SIZE, BODY_START_OCTET, @@ -131,6 +135,7 @@ public class MessageToElasticSearchJsonT @Test public void recursiveEmailShouldBeWellConvertedToJson() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> recursiveMail = new SimpleMessage<>(date, SIZE, BODY_START_OCTET, @@ -147,6 +152,7 @@ public class MessageToElasticSearchJsonT @Test public void emailWithNoInternalDateShouldUseNowDate() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> mailWithNoInternalDate = new SimpleMessage<>(null, SIZE, BODY_START_OCTET, @@ -164,6 +170,7 @@ public class MessageToElasticSearchJsonT @Test(expected = NullPointerException.class) public void emailWithNoMailboxIdShouldThrow() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); Message<TestId> mailWithNoMailboxId; try { mailWithNoMailboxId = new SimpleMessage<>(date, @@ -183,19 +190,38 @@ public class MessageToElasticSearchJsonT @Test public void getUpdatedJsonMessagePartShouldBehaveWellOnEmptyFlags() throws Exception { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new Flags(), MOD_SEQ)) .isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":false,\"isDraft\":false,\"isFlagged\":false,\"isRecent\":false,\"userFlags\":[],\"isUnread\":true}"); } @Test public void getUpdatedJsonMessagePartShouldBehaveWellOnNonEmptyFlags() throws Exception { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); assertThatJson(messageToElasticSearchJson.getUpdatedJsonMessagePart(new FlagsBuilder().add(Flags.Flag.DELETED, Flags.Flag.FLAGGED).add("user").build(), MOD_SEQ)) .isEqualTo("{\"modSeq\":42,\"isAnswered\":false,\"isDeleted\":true,\"isDraft\":false,\"isFlagged\":true,\"isRecent\":false,\"userFlags\":[\"user\"],\"isUnread\":true}"); } @Test(expected = NullPointerException.class) public void getUpdatedJsonMessagePartShouldThrowIfFlagsIsNull() throws Exception { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new DefaultTextExtractor()); messageToElasticSearchJson.getUpdatedJsonMessagePart(null, MOD_SEQ); } + @Test + public void spamEmailShouldBeWellConvertedToJsonWithApacheTika() throws IOException { + MessageToElasticSearchJson messageToElasticSearchJson = new MessageToElasticSearchJson(new TikaTextExtractor()); + Message<TestId> spamMail = new SimpleMessage<>(date, + SIZE, + BODY_START_OCTET, + new SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))), + new Flags(), + propertyBuilder, + MAILBOX_ID); + spamMail.setModSeq(MOD_SEQ); + assertThatJson(messageToElasticSearchJson.convertToJson(spamMail)) + .when(IGNORING_ARRAY_ORDER) + .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json"))); + } + } Added: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java?rev=1688146&view=auto ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java (added) +++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonUsingTika.java Mon Jun 29 08:45:43 2015 @@ -0,0 +1,79 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mailbox.elasticsearch.json; + +import javax.mail.Flags; +import javax.mail.util.SharedByteArrayInputStream; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; + +import org.apache.commons.io.IOUtils; +import org.apache.james.mailbox.elasticsearch.json.extractor.TikaTextExtractor; +import org.apache.james.mailbox.store.TestId; +import org.apache.james.mailbox.store.mail.model.Message; +import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder; +import org.apache.james.mailbox.store.mail.model.impl.SimpleMessage; +import org.junit.Before; +import org.junit.Test; +import static net.javacrumbs.jsonunit.core.Option.IGNORING_ARRAY_ORDER; +import static net.javacrumbs.jsonunit.fluent.JsonFluentAssert.assertThatJson; + +public class MessageToElasticSearchJsonUsingTika { + + public static final int SIZE = 25; + public static final int BODY_START_OCTET = 100; + public static final TestId MAILBOX_ID = TestId.of(18L); + public static final long MOD_SEQ = 42L; + public static final long UID = 25L; + + private SimpleDateFormat formatter = new SimpleDateFormat("dd-MM-yyyy"); + + private Date date; + private PropertyBuilder propertyBuilder; + private MessageToElasticSearchJson messageToElasticSearchJson; + + @Before + public void setUp() throws Exception { + messageToElasticSearchJson = new MessageToElasticSearchJson(new TikaTextExtractor()); + date = formatter.parse("07-06-2015"); + propertyBuilder = new PropertyBuilder(); + propertyBuilder.setMediaType("plain"); + propertyBuilder.setSubType("text"); + propertyBuilder.setTextualLineCount(18L); + propertyBuilder.setContentDescription("An e-mail"); + } + + @Test + public void spamEmailShouldBeWellConvertedToJson() throws IOException { + Message<TestId> spamMail = new SimpleMessage<>(date, + SIZE, + BODY_START_OCTET, + new SharedByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("documents/nonTextual.eml"))), + new Flags(), + propertyBuilder, + MAILBOX_ID); + spamMail.setModSeq(MOD_SEQ); + assertThatJson(messageToElasticSearchJson.convertToJson(spamMail)) + .when(IGNORING_ARRAY_ORDER) + .isEqualTo(IOUtils.toString(ClassLoader.getSystemResource("documents/nonTextual.json"))); + } + +} Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json (original) +++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/htmlMail.json Mon Jun 29 08:45:43 2015 @@ -121,6 +121,7 @@ "subtype":"plain", "fileName":null, "fileExtension":null, + "fileMetadata":{}, "contentDisposition":null, "headers":{ "content-transfer-encoding":[ @@ -136,6 +137,7 @@ "mediaType":"text", "subtype":"html", "fileName":null, + "fileMetadata":{}, "fileExtension":null, "contentDisposition":null, "headers":{ Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml?rev=1688146&view=auto ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml (added) +++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.eml Mon Jun 29 08:45:43 2015 @@ -0,0 +1,196 @@ +Return-Path: <btell...@linagora.com> +Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53]) + by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; + Thu, 18 Jun 2015 12:43:28 +0200 +X-Sieve: CMU Sieve 2.2 +Received: from [10.75.9.154] (unknown [92.103.166.6]) + (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits)) + (No client certificate requested) + by alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A + for <btell...@linagora.com>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST) +To: btell...@linagora.com +From: Benoit Tellier <btell...@linagora.com> +Subject: Test message +Message-ID: <5582a0ce.4020...@linagora.com> +Date: Thu, 18 Jun 2015 12:43:26 +0200 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 + Thunderbird/38.0.1 +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="------------030000010109090603040500" + +This is a multi-part message in MIME format. +--------------030000010109090603040500 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 7bit + +This mail have a non textual attachment ! + +--------------030000010109090603040500 +Content-Type: application/vnd.oasis.opendocument.text; + name="toto.odt" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; + filename="toto.odt" + +UEsDBBQAAAgAAGNV0kZexjIMJwAAACcAAAAIAAAAbWltZXR5cGVhcHBsaWNhdGlvbi92bmQu +b2FzaXMub3BlbmRvY3VtZW50LnRleHRQSwMEFAAACAAAY1XSRnPWCvM4AwAAOAMAABgAAABU +aHVtYm5haWxzL3RodW1ibmFpbC5wbmeJUE5HDQoaCgAAAA1JSERSAAAAxgAAAQAIAwAAAN+D ++XIAAAEyUExURXJoXXN0dXZ4e3h3eHl7fnx8gXqCh32IgXuJnoB7fI+Fa4+IbIODe4iEe5+Q +eISDhICFiISJhIaMiY2Gg4uJgomMjIGJkIuOkY6Uj4SRm4+WkomZnJGKgJGUjJuTh5uWj5GV +kZGVnJaYkpWbmpmVlZmblJ2en4OYqZSYoJSfqpyeoI6gs5SipJygopKhsZapvKeajKGemKqd +k6KjnaqilKmrn7SllL2tnaampaarraquo6msrqmusKq2urSwqLu5tLy5uaO2yKu5w77EtrXI +1sC5p8+8rcHGt83BtdfGttnKus3NysLO3svP0tbRzdrQy9rYztrb2tvg5Nru9+bf3Ozg0PLj +yfLl0PXo0/vt0uTn7ujn5+jp7Ovw6OL2/en4/fLr5/336vP7/v7+/gAAAP///4uCFGkAAAHB +SURBVHja7dPbThMBEAbgYgsaQmkEVhfbxR5YDKUIslBUWuTogZI0HGxq0gBt3/8dIE00ek+C +gW8uZv6Zu+9iUsNHUSkMDAwMDAwMDAwMDAyMJ8o4Of137+z/vfV+h7Nh+a6X+8t/rqN2MxjF +y9H4fv1wjN1c5eOXdOpdshTNFQuvTrbGk2pzIq6/DJbGnk+udOY/ZYNqsBcnM4WpSqVbquxk +e7nCRPZ4pZ8Oxhe+vZh+3RzbqC1uzlxvXz0Y4+fpbC7qlVqZ8moUhfH5j8/PkupBMahHYb4V +Ti7/mm+EH1a/HkXFIG7n33RrpUY4DOOzjeR9v9SefXtYy2YOovX84mZ00R78t7/RWfPiGBgY +GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY +GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY +GBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgY +GBgYGBgYGBgYGBgYGBj3VrfW1XHYvlYYzQAAAABJRU5ErkJgglBLAwQUAAgICABjVdJGAAAA +AAAAAAAAAAAACwAAAGNvbnRlbnQueG1spVfNjts2EL73KVQFyE1La50AWWXtIEAQoMAuUHS3 +Ra5cirLZSqRKUpb9Nn2WPlmGpEVTtuVl4oNliPPNzMfh/FD3n7ZNnWyoVEzwRZrfzNKEciJK +xleL9M/nr9mH9NPyl3tRVYzQohSkayjXGRFcw38C2lwVTrpIO8kLgRVTBccNVYUmhWgpH7SK +EF1YX25F6V0drW7BobamWx2rbLAjXfwS79mCQ+1S4j5W2WAhqKF6JWKVt6rOKgFRb1qs2RGL +bc34P4t0rXVbINT3/U0/vxFyhfK7uztkpZ4w8bi2k7VFlQTRmhpnCuU3ORqwDdU4lp/BhpR4 +17xQGR0arPHJqarNKjojNquJ0JA1ltG5YcHj452X8cc7L0PdBuv1xJl8QI8gtI/Hh0MuyCbW +l8GOQkUka6O36dChvhDCUzUKrkAt3dvZ7B1y7wG6vwjvJdNUBnByEU5wTXzERXMuaIDLESAy +ujFp6hPfBEJNKNwiJ/ZgVU6a/vb48ETWtMEHMHsdnDGuNOaHyEhzCJM7fY8kbYXUPjBVfMOE +07r13Na6qafL3UgH6EqW5Vko0JkjKH0ovGzDaP8mHXXyywlxd5QQti2+pmJBYd+8qJDPkMH4 +MoYUOTR5ufJzqBIdh03A7NoHkG5bKpkR4dqqFSMLYdbX4idM7mdXYGHU0Bmth47ht3TWjBBZ +oyCBoFBEWwTa4/kgm22cOVMcoqyOLR41CqLUXJ/Lh+c/kJFlZjzCANh7Cq4Ft+lyuAO4/qGQ +X6jgLpBVmNCspKRWy3vXy/1y4t4N70X6VVL6hLnK0wTa9gBqWL07yNJkZMEIsxXlsF/oJKpn +SqXospcHBuPHHmLyBGrVGWdvcSvUx2OgW71IQIoG8xGiZZpAs99gyWyW/wA5t9vXuQEugpqL +zRXUvtC/8V/dZVoBJobSTmnaXMMpyIqfS5goBmgqmffruNNw7pqRzNrxWW6fI76/597XnmaL +JV5J3K4HASyYS7R9yZzWE0ySEssyHQybUsxaqGUqNaMq8Z25gKKEJgOjqJqRlzwNJN5RdoQx +Gwz4TpN/PiFvePwQqfc5JacO0WQc94IXUe78i/GzvLeXdUX/7eCLxJ/H6WJil0qm2hrvMtFp +uPHSrIa7Asw+6H1W7Lb3W113SruiMhyvMvY85M51Vkx8rzXyxX1c2LBPR611KmHaQbIuP+99 +t5ifAiAhlj1VoqHJMCETSIANK2mZwHyxGgn8JCYmqv//9+tAAewNdNpDArijRaNTRxOfl8vv +UEsHCLswXn60AwAAnw4AAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAADAAAAHNldHRpbmdz +LnhtbL1aW3PaOBR+31+R4T0lIZcmTJKOIaWlpYEB0sz2TdgH0CLreCQ5wL/fIxloFnBLjbVP +DL6cTzqX71zkuw+LWJy8gtIc5X3l/N1Z5QRkiBGXk/vK87B1elP58PDXHY7HPIR6hGEagzSn +GoyhR/QJvS51Pbt9X0mVrCPTXNcli0HXTVjHBOT6tfrbp+sOLLuyEFzO7itTY5J6tTqfz9/N +L96hmlTPb29vq+7u+tEQ5ZhPDoXKnn4LhYgbIPtCthgHVjs7u6xm/ysnq0W+UU2t8rDWw3r7 +D3crgOznlBuIrW5OVpft0u4rBFl/5TDfaK2y773/vvOdng8UsCEmlfUds0zojkA5qTyc3VV3 +RRwutgNj40PuC4/MdK/gy9rFxdVxwj8Dn0z3L/v8/e3tRTHpgynO+xCRj0FzyuQE9BbCCFEA +k5UHo1IohtGWDYVzDd8wgjzpYyb0weJPY5acchnBAqJdZe13MPcOhYZaHqbydrS1VG0Ut8q2 +vlwrbspc36udvb+9Li43L1RqV2fnRaVqPhJQfrA4saWHtpPaz40RG4EFY2Qlu4HGYJwbgAV9 +4gdiPCRR2942RWWOIqQOW2JqmijSWG4HdVnSG4iz0qJ6Vy8tFhpU+9d+U3DxbT0AAaGBqKXo +QoGV77n4llzybq/4av8DlBsPz6bZhVQxQ7n5T9JqjxjMtNjiaXfjK3arHigkkBKNw99j/uO8 +y8lvIikThYdk5MT32AQaLJxNFKZym+jLAnFcZJG87UKhTsiVPUSfkz8gjxDwBUe5GzgW4WOc +mOUvVXQEwiAdGUUM0hJLHciQnBKiwP3zABYkiaCdKDZRLJl+Y2rWQhUzM8SnNB6BC67yUYds +1KUaneAmXHqQ/5GWHg2Wmm60KCJ9KM5B+BL+M8ataayqHhWbO7YsH+xZjqhMnzEqFTY294Gz +MvpY4NwHsWDSoWZgkLCQ1j+Y0i5musWVNvZy+YCDmAnRZInugQophRIZXF/74pvfWb/4NqjO +ErQJcIzWBCGsw5UP09c86iNuUz5tzTYQF5c31wW7zGe5IMIK4Qco/LgwHWCRH8pqCp4EmnpN +S82g1sT8ouhRReTsqjIfYWOBE4h6PDSp8oLwiE9oyJWtfOtp3dE/uittevMA1kEW9clKKMXS +g/j2RJJZiGoodUYNwSwJoLIU0GQiTIWr/XzpsA9U1NoACoxRFpf41KYIX3hfUm34eGl3p1+4 +ofQtUyYals19MRGpNJz5SnvPGmz1AWoIC/NCBUnirfzQfbC+8Er9fJsanHwbFee8IHLZe5WR +hji0GdZSrAcad7L7OP8KsD2eKMkwXRE5B6CqDajnCr3a/00m92P+wNjxUGZ5gtwUwLotO1z7 +CNhAUN3jNEiNSZPJEET5bpDZyWflTk5NaRaUZOI3ubb4LloCmTGuHsXYtQh+2izlKuw117iC +zkdG0h02AvG4OuLwRcx9sIcekDsUODqnbmrpLGra0qebbZiAahEITQ81t4nbD5jTn2NQXxMP +UFZp9qCLNJeNIHOm9FM+mZ5SmYcitRsu6NrsFb5nR2Bd2RSofRRyTYof2s0jo0hlGpoYx2xn +JnX4eG5LnP0dYKrC4gO/bmrs8VCHAkP8zUFE2qfHNlFSj/OzdOlKcl3y2/9N83uG8a69KjjO +XB2uUV9tKADJk77CTr1O4NeXDS6ZWh5iENsE7m8Ar2pXNzclTBT7w44vfgA1AJPuFFd/qIJA +8IkkqhkYTNak5mUYYo1mGdRHOpCUaUwDFXm7vyJtVT07Wl6hBGZgmPJQqW+6egqlhMjXEmfp +Z1tu0kp5Tdm49dZtvNWXt0OIBFRLYbw3IMrgNzKHCVKDz0nEjIfZXctmA58ATk+feUTp3uYD +L1XlVyrCA82Z7KUyNKmvwUZWHNEmetT5wxQFRb0vmE+2BftFa3nEtNYsqbp7wkcYs1R46e+i +7GC2Ox5r8DMdcc5qA28IcSK8OO43ZqYNymk29btc9auW5eii+KjzXGpFZplK8o9yC35B4pZn +BwSeTkFtff5J4IhtukK7mSI+k3smX9355q2a9zXgw79QSwcIGlhvBQQGAABPKAAAUEsDBBQA +CAgIAGNV0kYAAAAAAAAAAAAAAAAIAAAAbWV0YS54bWyNk82OmzAUhfd9CsTMFoyBScACRuqi +q6laqanUXeTYd4inYCPbDOnb10BImSSLLjn3Oz7HPxTPp7bx3kEboWTp4zDyPZBMcSHr0v+5 ++xJk/nP1qVCvr4IB4Yr1LUgbtGCp56zSkHlU+r2WRFEjDJG0BUMsI6oDuVjImiZT0KycGiF/ +l/7R2o4gNAxDOCSh0jXCeZ6jabqgnF24rtfNRHGGoIExwSAcYrSwY8P/LTWy60pKqUvQiM+l +p7g4ilI0fy90rTlv7m3AsQlyDamlwbuA4cH3zttfHXjsV8vpjjWqYirDNFDriMCZoYoj/BRE +mwBnOxyTFJP4KcT5NsnzbLMp0B1HwRm5Z01ItA3TLM/yJMdZgRZsTgUurLv4gPd6Wqv6vsNf +0/jHOeJm/NHF/rAGTBVf0Wd5ZmuQ4MxKVy/ioOHbtHGUhmmYhPHji5D9af8r2+w3qbcC9p1W +b8AsSqM2evzci4YHS8y/FeeEyxM11pU0VjBv0i09NBAw1Utb+u6mJ1G0tL4R1WHMula7FYkv +mqa1pt3xejAozRdte9bY0dHMgl4G6TKR7taGo7BgOspcyA2IfVQV6MMrQff+yOovUEsHCM3d +MirAAQAAzwMAAFBLAwQUAAgICABjVdJGAAAAAAAAAAAAAAAACgAAAHN0eWxlcy54bWztWluv +2zYSft9fYajovsmSfDm+NCcFtrvFFki6wCbb14KWKIsNJQok5Ut+/Q5JUaJlyUfJyRqFsXk4 +gTkfh8OPM8MhqTc/nnI6OWAuCCuevWgaehNcxCwhxf7Z+8/Hn/219+Pbv7xhaUpivE1YXOW4 +kL6QZ4rFBDoXYmuEz17Fiy1DgohtgXIstjLeshIXttPWRW/1UKZFKxvbXYPd3hKf5NjOCnvR +F+3Gj6zBbu+Eo+PYzgoLnLrdUza280lQP2V+zPISSdKx4kRJ8enZy6Qst0FwPB6nx/mU8X0Q +bTabQEsbg+MGV1acalQSB5hiNZgIomkUWGyOJRprn8K6JhVVvsN8NDVIoqtVFYf9aI847Aeo +iTPER/uGBl8u7zwZv7zzxO2bI5kNrMk6eA9C/ef9u9YXeD52LIW9oCrmpBw9TYN2+zPGGlNV +BxOg2txZGC4C89tBH2/Cj5xIzB14fBMeIxo3jLO8jzTARQEgfHxQbmrRXE16UPMy4LhkXDaG +pOMTFLAza8IrkzkdDi8ltdA9T5JeKJgzDyDUwNH9A8HH77yLzHl7ATadBdBp6KUuGuTmqZsd +ojBQmCZsYEnapMr3TdpPWVXAJGCrqAnEpxJzokSI6m7bCw2ul1H2FSrrvcLRcBHeQsxlH98f +/x0oma/SPSS0Wouzy828t3ZLSxlsZymKsZ/gmIq3b0wqapon5rcy7tn7mWP8ARUi8iaQdSwo +J/TcyrzJhQYl9Pe4gElBIIgjEcILbo/yjkD21JxMPkC3tGewv6KSiR+6QNN60wDOclRcIEoi +Y8hVB8SJdpovMM7M9mXbADfCNMPNK0z7O/4D/VbdNsvBjDHpLCTOX2OT4xVf5zCjLAiGnLlu +N+WatTTBKapoXcRZzbVRe47KjMSexda//ZJD1uCSQNGnJiIkZ58wbLuUwT7/3XzxtEQLb6Ky +yDYllDaS1WyTxuDBKdseQZXPSqljsGC++l13ERlK2NEHawWW/unZC6dRtI5I0Ss/X8sl7N0+ +lDrYFyWKodDyM8bJZ6ayiEFHi1vog5pb3IOF3WG03itsn9aabgrTORKZ+aZ8TREVji+ViCPN +/AXvWqTwPqokU4OAg5EEMwNFtMyQHUDbseMYQWkIi0ViaSVqf1bG5SyB7pT7cnfhXqRIsNoU +VZnvzsYaaW2ELQY8iJVC+d+w2Q1c2X01m0pgoKFQi6sHr91G8gpfGDWUGMGttFyQzyCPZqXU +bRQV+wrtoSnluiGGbUZycJy//aNhCEuoU/xPmBd6dr1j+lAtoKKbWlqMGtdionC6LBuarQlW ++jmzktoWK/jp1+tRVTlL8WkgR+hBG4iZdGfQRpqR7rCN6JdfvXbpLnLCmETRrLV302mB++xc +ZrjQy+ZTlCRAurZFJwFKctKYP9K3y6qIZWUUqiQCs4R5wyK+7PzWaf2EQBYo1CDhdLGZLdvQ +vIyPEths4/L/TvwndGLXx7DNUl3H4zhHpPDVIc963+wKVFYi60BeESGmBnfSJcWu75jrhB3j +KiCUs8E2AZ5DUSmUK792YJ+zY2dwaOmE5ieMS1+yPZaZOq+r0HtpYHdA49AfIJASxBNvMEPY +xaNICDAPgqgNqWt9/8QocYJ5UB00NFdRfr8phYpXF/ARGn6fhb/vWHLuM+ulXJYjDokGKCv1 +nv70pBNHK9gxKdUhNpyG67mWmVNLDC4P/1eIdrfTZhl0NVDoagDRIzqLl9LOQE7RgWZTyv++ +Ju9mr0UbyKMTUD1qjfryQt32HCqWrbw1eij79Vp/O5HVpowCWXsb8KDFDWLY5jZ5KqvbsH0h +XPtCAHbDkqKzEyQTV/yaEPzq6BoMrM1qNjawdCGQYbLPpNpjwu/H0/QOCoSvmf+NDEOVyhEx +feGRs1EeGX1Dl7RXFON4+gnpE903dBUghqMvzMQ2274qE+vXAXN3rU8+4lKinclebYfD2Xmg +nDNtqgcUtnB8hEP26EU3rNXSns53copx9ZtjcSOvbR7tVb/A+fP0DX2KaH03feobrf6fPY71 +ZFglzdH+ivh/GYnXAVJ8wLSGGzpUA0yrqbSq3FcPEwgSbcOzSnh11y7RrogJostvWDkdk9je +TaAdiMFx9oW6D+5T24HUunVjCrU0O+LE351N6oUi2HMGb46CdnyVK+Z1EtGrSvQlyLPnt+11 +cqE4lRbeMtw7XZAOMTmS49mDcbwY4HjRz/HiHhzPH4zj5QDHy36Ol/fgePFgHD8NcPzUz/HT +PThePhjHqwGOV/0cr+7B8dODcbwe4Hjdz/H6HhyvHozjzQDHm36ON/fgeP1QHEe9DEd9/Eb3 +YHfzWOxOB/id9jM8vQvHUfhgJM8GSJ71kzx7JcmXIpf5gkks4CxZpGRf1VfGjcCvD9UpY1L9 +7luEqJ6reWs/IFqpp7a60XYUzuT1Q5vbx5y11Uuc0me/F1LzHW8hLpIhA0m/gVa9YqS1oG+Y +wZsC85WCfh/YPDkPi3301FpaGtTS1jJSxFx/mKnKOucjD62t/bZDPdqATjj6W4G95NjDcqMz +LO/FCf99mUdeD6hzhaUlR5Ko7xjXzblBtzYXqtHg5GrtQJ70GSfqa656mRmXHBHpXd/hrdar +zhGwvcO7ljUl35WEG+takTHm5ncPxh/9HJ2a2akL6PbbnhogcGnVGWrCaRiuHB7su7K/w0CH +7qBB8826B4RS9Xrbi2lD8tkTjJLmRgslf1RCGr8w3mLaOcR1bdRs+X37YGq+wwn1P8/9VqNv +5e2EM4zUs6T+EbgsOI3XilofvXbKWpAj0ehoRqsblaabL4uuzY4zO7HRUR/0f7f+9r9QSwcI +XUUkGToIAAD3LgAAUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAMAAAAbWFuaWZlc3QucmRm +zZPNboMwEITvPIVlzthALwUFcijKuWqfwDWGWAUv8poS3r6Ok1ZRpKrqn9TjrkYz3460m+1h +HMiLsqjBVDRjKSXKSGi16Ss6uy65pds62ti2Kx+aHfFqg6WfKrp3bio5X5aFLTcMbM+zoih4 +mvM8T7wiwdU4cUgMxrSOCAkejUJp9eR8GjnO4glmV1F066CQefcgPYvdOqmgsgphtlK9h7Yg +kYFAjQlMyoR0gxy6TkvFM5bzUTnBoe3ix2C904OiPGDwK47P2N6IDKblXuC9sO5cg998lWh6 +7mN6ddPF8d8jlGCcMu5P6rs7ef/n/i7P/xnir7R2RGxAzqNn+pDntPIfVUevUEsHCLT3aNIF +AQAAgwMAAFBLAwQUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAENvbmZpZ3VyYXRpb25zMi9w +b3B1cG1lbnUvUEsDBBQAAAgAAGNV0kYAAAAAAAAAAAAAAAAaAAAAQ29uZmlndXJhdGlvbnMy +L3N0YXR1c2Jhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u +czIvdG9vbGJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u +czIvbWVudWJhci9QSwMEFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAABDb25maWd1cmF0aW9u +czIvZmxvYXRlci9QSwMEFAAICAgAY1XSRgAAAAAAAAAAAAAAACcAAABDb25maWd1cmF0aW9u +czIvYWNjZWxlcmF0b3IvY3VycmVudC54bWwDAFBLBwgAAAAAAgAAAAAAAABQSwMEFAAACAAA +Y1XSRgAAAAAAAAAAAAAAABoAAABDb25maWd1cmF0aW9uczIvdG9vbHBhbmVsL1BLAwQUAAAI +AABjVdJGAAAAAAAAAAAAAAAAHAAAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwME +FAAACAAAY1XSRgAAAAAAAAAAAAAAAB8AAABDb25maWd1cmF0aW9uczIvaW1hZ2VzL0JpdG1h +cHMvUEsDBBQACAgIAGNV0kYAAAAAAAAAAAAAAAAVAAAATUVUQS1JTkYvbWFuaWZlc3QueG1s +tZTBbsMgDIbvfYqI6xTYeppQ0h4q7Qm6B2DESZHARGCq9u1HqrXJNGVqtO5mY/P/nzBQbU/O +FkcI0Xis2Qt/ZgWg9o3Brmbv+7fylW03q8opNC1EktegyPsw3tKapYDSq2iiROUgStLS94CN +18kBkvzeLy9Ot2wCsGabVTH6tcZCmfeH89jdJmvLXtGhZmJOZFx20BhV0rmHmqm+t0Yrym3i +iA2/APMpJyc4ERNLGPaH5D5QGRsFXUPeYzfDYJzqQAz1RS7aIw18+RxnhAdyMZQX6UYgysOO +Dxd2QOrxtHS28A+sX2s8NO0dVyd3PS322HlsTZfCRSKuhdIaLOTUB6FTCL8P929edz6HmHBA +4MlwPVUYzCvx4w/YfAJQSwcIHYDzWRwBAAA+BAAAUEsBAhQAFAAACAAAY1XSRl7GMgwnAAAA +JwAAAAgAAAAAAAAAAAAAAAAAAAAAAG1pbWV0eXBlUEsBAhQAFAAACAAAY1XSRnPWCvM4AwAA +OAMAABgAAAAAAAAAAAAAAAAATQAAAFRodW1ibmFpbHMvdGh1bWJuYWlsLnBuZ1BLAQIUABQA +CAgIAGNV0ka7MF5+tAMAAJ8OAAALAAAAAAAAAAAAAAAAALsDAABjb250ZW50LnhtbFBLAQIU +ABQACAgIAGNV0kYaWG8FBAYAAE8oAAAMAAAAAAAAAAAAAAAAAKgHAABzZXR0aW5ncy54bWxQ +SwECFAAUAAgICABjVdJGzd0yKsABAADPAwAACAAAAAAAAAAAAAAAAADmDQAAbWV0YS54bWxQ +SwECFAAUAAgICABjVdJGXUUkGToIAAD3LgAACgAAAAAAAAAAAAAAAADcDwAAc3R5bGVzLnht +bFBLAQIUABQACAgIAGNV0ka092jSBQEAAIMDAAAMAAAAAAAAAAAAAAAAAE4YAABtYW5pZmVz +dC5yZGZQSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAAAAAAAAAAAAAAAACNGQAAQ29u +ZmlndXJhdGlvbnMyL3BvcHVwbWVudS9QSwECFAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAGgAA +AAAAAAAAAAAAAADFGQAAQ29uZmlndXJhdGlvbnMyL3N0YXR1c2Jhci9QSwECFAAUAAAIAABj +VdJGAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAD9GQAAQ29uZmlndXJhdGlvbnMyL3Rvb2xi +YXIvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABgAAAAAAAAAAAAAAAAAMxoAAENvbmZp +Z3VyYXRpb25zMi9tZW51YmFyL1BLAQIUABQAAAgAAGNV0kYAAAAAAAAAAAAAAAAYAAAAAAAA +AAAAAAAAAGkaAABDb25maWd1cmF0aW9uczIvZmxvYXRlci9QSwECFAAUAAgICABjVdJGAAAA +AAIAAAAAAAAAJwAAAAAAAAAAAAAAAACfGgAAQ29uZmlndXJhdGlvbnMyL2FjY2VsZXJhdG9y +L2N1cnJlbnQueG1sUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAAAAAAABoAAAAAAAAAAAAAAAAA +9hoAAENvbmZpZ3VyYXRpb25zMi90b29scGFuZWwvUEsBAhQAFAAACAAAY1XSRgAAAAAAAAAA +AAAAABwAAAAAAAAAAAAAAAAALhsAAENvbmZpZ3VyYXRpb25zMi9wcm9ncmVzc2Jhci9QSwEC +FAAUAAAIAABjVdJGAAAAAAAAAAAAAAAAHwAAAAAAAAAAAAAAAABoGwAAQ29uZmlndXJhdGlv +bnMyL2ltYWdlcy9CaXRtYXBzL1BLAQIUABQACAgIAGNV0kYdgPNZHAEAAD4EAAAVAAAAAAAA +AAAAAAAAAKUbAABNRVRBLUlORi9tYW5pZmVzdC54bWxQSwUGAAAAABEAEQBwBAAABB0AAAAA + +--------------030000010109090603040500-- Added: james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json?rev=1688146&view=auto ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json (added) +++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/nonTextual.json Mon Jun 29 08:45:43 2015 @@ -0,0 +1,156 @@ +{ + "id":0, + "mailboxId":"18", + "modSeq":42, + "size":25, + "date":"2015-06-07T00:00:00+0200", + "mediaType":"plain", + "subtype":"text", + "userFlags":[], + "headers":{ + "date":[ + "Thu, 18 Jun 2015 12:43:26 +0200" + ], + "mime-version":[ + "1.0" + ], + "x-sieve":[ + "CMU Sieve 2.2" + ], + "return-path":[ + "<btell...@linagora.com>" + ], + "subject":[ + "Test message" + ], + "message-id":[ + "<5582a0ce.4020...@linagora.com>" + ], + "received":[ + "from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])\t by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;\t Thu, 18 Jun 2015 12:43:28 +0200","from [10.75.9.154] (unknown [92.103.166.6])\t(using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))\t(No client certificate requested)\tby alderaan.linagora.com (Postfix) with ESMTPSA id 0EB1078A\tfor <btell...@linagora.com>; Thu, 18 Jun 2015 12:43:28 +0200 (CEST)" + ], + "from":[ + "Benoit Tellier <btell...@linagora.com>" + ], + "content-type":[ + "multipart/mixed; boundary=\"------------030000010109090603040500\"" + ], + "to":[ + "btell...@linagora.com" + ], + "user-agent":[ + "Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.0.1" + ] + }, + "from":[ + { + "name":"Benoit Tellier", + "address":"btell...@linagora.com" + } + ], + "to":[ + { + "name":"btell...@linagora.com", + "address":"btell...@linagora.com" + } + ], + "cc":[], + "bcc":[], + "subject":["Test message"], + "sentDate":"2015-06-18T12:43:26+0200", + "properties":[ + { + "namespace":"http://james.apache.org/rfc2045/Content-Type", + "localName":"type", + "value":"plain" + }, + { + "namespace":"http://james.apache.org/rfc2045/Content-Type", + "localName":"subtype", + "value":"text" + }, + { + "namespace":"http://james.apache.org/rfc2045", + "localName":"Content-Description", + "value":"An e-mail" + } + ], + "attachments":[ + { + "mediaType":"text", + "subtype":"plain", + "fileName":null, + "fileExtension":null, + "contentDisposition":null, + "fileMetadata":{ + "X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.txt.TXTParser"], + "Content-Encoding":["ISO-8859-1"], + "Content-Type":["text/plain; charset=ISO-8859-1"] + }, + "headers":{ + "content-transfer-encoding":["7bit"], + "content-type":["text/plain; charset=utf-8"] + }, + "textContent":"This mail have a non textual attachment !\n\n" + }, + { + "mediaType":"application", + "subtype":"vnd.oasis.opendocument.text", + "fileName":"toto.odt", + "fileExtension":"odt", + "contentDisposition":"attachment", + "fileMetadata":{ + "date":["2015-06-18T12:43:07.489893918"], + "meta:paragraph-count":["1"], + "meta:word-count":["7"], + "Table-Count":["0"], + "generator":["LibreOffice/4.4.3.2$Linux_X86_64 LibreOffice_project/40m0$Build-2"], + "Word-Count":["7"], + "dcterms:created":["2015-06-18T12:41:25.197399866"], + "dcterms:modified":["2015-06-18T12:43:07.489893918"], + "Last-Modified":["2015-06-18T12:43:07.489893918"], + "nbPara":["1"], + "Last-Save-Date":["2015-06-18T12:43:07.489893918"], + "meta:object-count":["0"], + "meta:character-count":["47"], + "Paragraph-Count":["1"], + "nbImg":["0"], + "meta:save-date":["2015-06-18T12:43:07.489893918"], + "modified":["2015-06-18T12:43:07.489893918"], + "Edit-Time":["PT1M42S"], + "meta:image-count":["0"], + "Image-Count":["0"], + "nbCharacter":["47"], + "nbObject":["0"], + "nbPage":["1"], + "Object-Count":["0"], + "nbWord":["7"], + "Content-Type":["application/vnd.oasis.opendocument.text"], + "X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.odf.OpenDocumentParser"], + "meta:creation-date":["2015-06-18T12:41:25.197399866"], + "meta:table-count":["0"], + "Creation-Date":["2015-06-18T12:41:25.197399866"], + "xmpTPg:NPages":["1"], + "resourceName":["toto.odt"], + "Character Count":["47"], + "editing-cycles":["2"], + "Page-Count":["1"], + "nbTab":["0"], + "meta:page-count":["1"] + }, + "headers":{ + "content-transfer-encoding":["base64"], + "content-disposition":["attachment; filename=\"toto.odt\""], + "content-type":["application/vnd.oasis.opendocument.text; name=\"toto.odt\""] + }, + "textContent":"Awesome document provided for text extraction !\n"} + ], + "textBody":"This mail have a non textual attachment !\n\n", + "isAnswered":false, + "isDeleted":false, + "isDraft":false, + "isFlagged":false, + "isRecent":false, + "hasAttachment":true, + "isUnread":true +} Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json (original) +++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/recursiveMail.json Mon Jun 29 08:45:43 2015 @@ -95,6 +95,7 @@ "textContent": "Forward as attachment !\n\n\n", "mediaType": "text", "subtype": "plain", + "fileMetadata":{}, "fileName": null, "fileExtension": null, "contentDisposition": null, @@ -111,6 +112,7 @@ "textContent": null, "mediaType": null, "subtype": null, + "fileMetadata":{}, "fileName": null, "fileExtension": null, "contentDisposition": null, @@ -131,6 +133,7 @@ "mediaType": null, "subtype": null, "fileName": null, + "fileMetadata":{}, "fileExtension": null, "contentDisposition": null, "headers": { @@ -176,6 +179,7 @@ "subtype": "plain", "fileName": null, "fileExtension": null, + "fileMetadata":{}, "contentDisposition": null, "headers": { "content-transfer-encoding": [ @@ -188,6 +192,7 @@ }, { "textContent": null, + "fileMetadata":{}, "mediaType": "application", "subtype": "vnd.oasis.opendocument.spreadsheet", "fileName": "Sprint-2015-05-18.ods", Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json?rev=1688146&r1=1688145&r2=1688146&view=diff ============================================================================== --- james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json (original) +++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json Mon Jun 29 08:45:43 2015 @@ -127,6 +127,7 @@ "mediaType": "text", "subtype": "plain", "fileName": null, + "fileMetadata":{}, "fileExtension": null, "contentDisposition": "inline", "headers": { @@ -147,6 +148,7 @@ "subtype": "rfc822-headers", "fileName": "header", "fileExtension": "", + "fileMetadata":{}, "contentDisposition": "inline", "headers": { "content-disposition": [ @@ -168,6 +170,7 @@ "mediaType": "text", "subtype": "plain", "fileName": null, + "fileMetadata":{}, "fileExtension": null, "contentDisposition": "inline", "headers": { --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org