JAMES-1934 implemens hasAttachment search for ES
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/c1357bca Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/c1357bca Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/c1357bca Branch: refs/heads/master Commit: c1357bca59a99a5e422068782fef24f5195b86e8 Parents: 59cfd84 Author: Luc DUZAN <[email protected]> Authored: Thu Feb 9 12:56:53 2017 +0100 Committer: Benoit Tellier <[email protected]> Committed: Wed Feb 15 06:59:45 2017 +0700 ---------------------------------------------------------------------- .../elasticsearch/MailboxMappingFactory.java | 4 + .../elasticsearch/json/IndexableMessage.java | 24 ++++-- .../json/IndexableMessageWithMessageId.java | 6 +- .../elasticsearch/query/CriterionConverter.java | 6 ++ .../json/IndexableMessageTest.java | 80 +++++++++++++++++++- .../src/test/resources/eml/Toto.eml | 41 ---------- .../store/src/test/resources/eml/htmlMail.json | 2 +- .../src/test/resources/eml/nonTextual.json | 2 +- .../store/src/test/resources/eml/spamMail.json | 2 +- 9 files changed, 111 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java index aaa0009..7c54b40 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java @@ -212,6 +212,10 @@ public class MailboxMappingFactory { .endObject() .endObject() + .startObject(JsonMessageConstants.HAS_ATTACHMENT) + .field(NodeMappingFactory.TYPE, NodeMappingFactory.BOOLEAN) + .endObject() + .startObject(JsonMessageConstants.TEXT) .field(NodeMappingFactory.TYPE, NodeMappingFactory.STRING) .field(NodeMappingFactory.ANALYZER, NodeMappingFactory.SNOWBALL) http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java index 770f558..6150b19 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java @@ -34,6 +34,8 @@ import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxMessage; import org.apache.james.mailbox.store.mail.model.Property; +import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder; +import org.apache.james.mailbox.store.mail.model.impl.SimpleProperty; import org.apache.james.mime4j.MimeException; import com.fasterxml.jackson.annotation.JsonProperty; @@ -46,6 +48,8 @@ import com.google.common.collect.Multimap; public class IndexableMessage { + public static final SimpleProperty HAS_ATTACHMENT_PROPERTY = new SimpleProperty(PropertyBuilder.JAMES_INTERNALS, PropertyBuilder.HAS_ATTACHMENT, "true"); + public static IndexableMessage from(MailboxMessage message, List<User> users, TextExtractor textExtractor, ZoneId zoneId, IndexAttachments indexAttachments) { @@ -57,6 +61,9 @@ public class IndexableMessage { indexableMessage.users = users.stream().map(User::getUserName).collect(Guavate.toImmutableList()); indexableMessage.bodyText = parsingResult.locateFirstTextBody(); indexableMessage.bodyHtml = parsingResult.locateFirstHtmlBody(); + indexableMessage.hasAttachment = message.getProperties() + .stream() + .anyMatch(property -> property.equals(HAS_ATTACHMENT_PROPERTY)); indexableMessage.setFlattenedAttachments(parsingResult, indexAttachments); indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(), getSanitizedInternalDate(message, zoneId)); indexableMessage.generateText(); @@ -68,11 +75,13 @@ public class IndexableMessage { } private void setFlattenedAttachments(MimePart parsingResult, IndexAttachments indexAttachments) { - if (indexAttachments.equals(IndexAttachments.YES)) { - attachments = parsingResult.getAttachmentsStream() - .collect(Collectors.toList()); + List<MimePart> mimeparts = parsingResult.getAttachmentsStream() + .collect(Guavate.toImmutableList()); + + if (IndexAttachments.YES.equals(indexAttachments)) { + this.attachments = mimeparts; } else { - attachments = ImmutableList.of(); + this.attachments = ImmutableList.of(); } } @@ -134,6 +143,7 @@ public class IndexableMessage { private String date; private String mediaType; private String subType; + private boolean hasAttachment; private boolean isUnRead; private boolean isRecent; private boolean isFlagged; @@ -155,11 +165,12 @@ public class IndexableMessage { private Optional<String> bodyHtml; private String text; + public IndexableMessage(long uid, String mailboxId, List<String> users, long modSeq, long size, String date, String mediaType, String subType, boolean isUnRead, boolean isRecent, boolean isFlagged, boolean isDeleted, boolean isDraft, boolean isAnswered, String[] userFlags, Multimap<String, String> headers, EMailers from, EMailers to, EMailers cc, EMailers bcc, EMailers replyTo, Subjects subjects, String sentDate, List<Property> properties, - List<MimePart> attachments, Optional<String> bodyText, Optional<String> bodyHtml, String text) { + List<MimePart> attachments, boolean hasAttachment, Optional<String> bodyText, Optional<String> bodyHtml, String text) { this.uid = uid; this.mailboxId = mailboxId; this.users = users; @@ -185,6 +196,7 @@ public class IndexableMessage { this.sentDate = sentDate; this.properties = properties; this.attachments = attachments; + this.hasAttachment = hasAttachment; this.bodyText = bodyText; this.bodyHtml = bodyHtml; this.text = text; @@ -330,7 +342,7 @@ public class IndexableMessage { @JsonProperty(JsonMessageConstants.HAS_ATTACHMENT) public boolean getHasAttachment() { - return attachments.size() > 0; + return hasAttachment; } @JsonProperty(JsonMessageConstants.TEXT) http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java index 8f376dc..2578580 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java @@ -43,7 +43,7 @@ public class IndexableMessageWithMessageId extends IndexableMessage { indexableMessage.isDeleted(), indexableMessage.isDraft(), indexableMessage.isAnswered(), indexableMessage.getUserFlags(), indexableMessage.getHeaders(), indexableMessage.getFrom(), indexableMessage.getTo(), indexableMessage.getCc(), indexableMessage.getBcc(), indexableMessage.getReplyTo(), indexableMessage.getSubjects(), indexableMessage.getSentDate(), indexableMessage.getProperties(), - indexableMessage.getAttachments(), indexableMessage.getBodyText(), indexableMessage.getBodyHtml(), indexableMessage.getText(), + indexableMessage.getAttachments(), indexableMessage.getHasAttachment(), indexableMessage.getBodyText(), indexableMessage.getBodyHtml(), indexableMessage.getText(), message.getMessageId().serialize()); } @@ -53,10 +53,10 @@ public class IndexableMessageWithMessageId extends IndexableMessage { String mediaType, String subType, boolean isUnRead, boolean isRecent, boolean isFlagged, boolean isDeleted, boolean isDraft, boolean isAnswered, String[] userFlags, Multimap<String, String> headers, EMailers from, EMailers to, EMailers cc, EMailers bcc, EMailers replyTo, Subjects subjects, - String sentDate, List<Property> properties, List<MimePart> attachments, Optional<String> bodyText, + String sentDate, List<Property> properties, List<MimePart> attachments, boolean hasAttachments, Optional<String> bodyText, Optional<String> bodyHtml, String text, String messageId) { super(uid, mailboxId, users, modSeq, size, date, mediaType, subType, isUnRead, isRecent, isFlagged, isDeleted, - isDraft, isAnswered, userFlags, headers, from, to, cc, bcc, replyTo, subjects, sentDate, properties, attachments, + isDraft, isAnswered, userFlags, headers, from, to, cc, bcc, replyTo, subjects, sentDate, properties, attachments, hasAttachments, bodyText, bodyHtml, text); this.messageId = messageId; } http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java index 3904a92..dcc4ff6 100644 --- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java +++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java @@ -78,6 +78,8 @@ public class CriterionConverter { registerCriterionConverter(SearchQuery.InternalDateCriterion.class, criterion -> dateRangeFilter(JsonMessageConstants.DATE, criterion.getOperator())); + + registerCriterionConverter(SearchQuery.AttachmentCriterion.class, this::convertAttachmentCriterion); } @SuppressWarnings("unchecked") @@ -115,6 +117,10 @@ public class CriterionConverter { return criterionConverterMap.get(criterion.getClass()).apply(criterion); } + private QueryBuilder convertAttachmentCriterion(SearchQuery.AttachmentCriterion criterion) { + return termQuery(JsonMessageConstants.HAS_ATTACHMENT, criterion.getOperator().isSet()); + } + private QueryBuilder convertCustomFlagCriterion(SearchQuery.CustomFlagCriterion criterion) { QueryBuilder termQueryBuilder = termQuery(JsonMessageConstants.USER_FLAGS, criterion.getFlag()); if (criterion.getOperator().isSet()) { http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java index cf98e6e..a66f326 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java @@ -20,11 +20,12 @@ package org.apache.james.mailbox.elasticsearch.json; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.any; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.time.ZoneId; import javax.mail.Flags; @@ -38,6 +39,8 @@ import org.apache.james.mailbox.mock.MockMailboxSession; import org.apache.james.mailbox.model.TestId; import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; import org.apache.james.mailbox.store.mail.model.MailboxMessage; +import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder; +import org.apache.james.mailbox.store.mail.model.impl.SimpleProperty; import org.apache.james.mailbox.tika.extractor.TikaTextExtractor; import org.junit.Test; @@ -209,6 +212,77 @@ public class IndexableMessageTest { } @Test + public void hasAttachmentsShouldReturnTrueWhenPropertyIsPresentAndTrue() throws IOException { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + when(mailboxMessage.getUid()) + .thenReturn(MESSAGE_UID); + when(mailboxMessage.getProperties()).thenReturn(ImmutableList.of(IndexableMessage.HAS_ATTACHMENT_PROPERTY)); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); + + // Then + assertThat(indexableMessage.getHasAttachment()).isTrue(); + } + + @Test + public void hasAttachmentsShouldReturnFalseWhenPropertyIsPresentButFalse() throws IOException { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + when(mailboxMessage.getUid()) + .thenReturn(MESSAGE_UID); + when(mailboxMessage.getProperties()) + .thenReturn(ImmutableList.of(new SimpleProperty(PropertyBuilder.JAMES_INTERNALS, PropertyBuilder.HAS_ATTACHMENT, "false"))); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); + + // Then + assertThat(indexableMessage.getHasAttachment()).isFalse(); + } + + @Test + public void hasAttachmentsShouldReturnFalseWhenPropertyIsAbsent() throws IOException { + //Given + MailboxMessage mailboxMessage = mock(MailboxMessage.class); + TestId mailboxId = TestId.of(1); + when(mailboxMessage.getMailboxId()) + .thenReturn(mailboxId); + when(mailboxMessage.getFullContent()) + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")))); + when(mailboxMessage.createFlags()) + .thenReturn(new Flags()); + when(mailboxMessage.getUid()) + .thenReturn(MESSAGE_UID); + when(mailboxMessage.getProperties()) + .thenReturn(ImmutableList.of()); + + // When + IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()), + new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES); + + // Then + assertThat(indexableMessage.getHasAttachment()).isFalse(); + } + + @Test public void attachmentsShouldNotBeenIndexedWhenAsked() throws Exception { //Given MailboxMessage mailboxMessage = mock(MailboxMessage.class); @@ -216,7 +290,7 @@ public class IndexableMessageTest { when(mailboxMessage.getMailboxId()) .thenReturn(mailboxId); when(mailboxMessage.getFullContent()) - .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml")))); + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")))); when(mailboxMessage.createFlags()) .thenReturn(new Flags()); when(mailboxMessage.getUid()) @@ -238,7 +312,7 @@ public class IndexableMessageTest { when(mailboxMessage.getMailboxId()) .thenReturn(mailboxId); when(mailboxMessage.getFullContent()) - .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml")))); + .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/emailWith3Attachments.eml")))); when(mailboxMessage.createFlags()) .thenReturn(new Flags()); when(mailboxMessage.getUid()) http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/test/resources/eml/Toto.eml ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/resources/eml/Toto.eml b/mailbox/elasticsearch/src/test/resources/eml/Toto.eml deleted file mode 100644 index ab2de03..0000000 --- a/mailbox/elasticsearch/src/test/resources/eml/Toto.eml +++ /dev/null @@ -1,41 +0,0 @@ -Return-Path: <[email protected]> -Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53]) - by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA; - Tue, 29 Nov 2016 13:57:56 +0100 -X-Sieve: CMU Sieve 2.2 -Received: from [10.69.0.146] (mne69-10-88-173-78-196.fbx.proxad.net [88.173.78.196]) - (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits)) - (No client certificate requested) - by alderaan.linagora.com (Postfix) with ESMTPSA id CB0233783 - for <[email protected]>; Tue, 29 Nov 2016 13:57:56 +0100 (CET) -To: Laura ROYET <[email protected]> -From: Laura Royet <[email protected]> -Subject: Toto -Message-ID: <[email protected]> -Date: Tue, 29 Nov 2016 13:57:56 +0100 -User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 - Thunderbird/45.5.0 -MIME-Version: 1.0 -Content-Type: multipart/mixed; - boundary="------------3F646081DC313215FD6847F4" - -This is a multi-part message in MIME format. ---------------3F646081DC313215FD6847F4 -Content-Type: text/plain; charset=utf-8; format=flowed -Content-Transfer-Encoding: 7bit - - - --- -Laura Royet - - ---------------3F646081DC313215FD6847F4 -Content-Type: text/plain; charset=UTF-8; - name="Toto.txt" -Content-Transfer-Encoding: base64 -Content-Disposition: attachment; - filename="Toto.txt" - -VG90bwpDb3B5cmlnaHQgwqkgMjAxNiBMSU5BR09SQSAKQ0MgQlktU0EsIEdOVSBGREwK ---------------3F646081DC313215FD6847F4-- http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/htmlMail.json ---------------------------------------------------------------------- diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json b/mailbox/store/src/test/resources/eml/htmlMail.json index 1655f5e..a8998ef 100644 --- a/mailbox/store/src/test/resources/eml/htmlMail.json +++ b/mailbox/store/src/test/resources/eml/htmlMail.json @@ -159,7 +159,7 @@ "isAnswered":false, "isFlagged":false, "isRecent":false, - "hasAttachment":true, + "hasAttachment":false, "isUnread":false, "users": [ "username" http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/nonTextual.json ---------------------------------------------------------------------- diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json index 9d1aa9c..e44e2d8 100644 --- a/mailbox/store/src/test/resources/eml/nonTextual.json +++ b/mailbox/store/src/test/resources/eml/nonTextual.json @@ -154,7 +154,7 @@ "isDraft":false, "isFlagged":false, "isRecent":false, - "hasAttachment":true, + "hasAttachment":false, "isUnread":true, "users": [ "username" http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/spamMail.json ---------------------------------------------------------------------- diff --git a/mailbox/store/src/test/resources/eml/spamMail.json b/mailbox/store/src/test/resources/eml/spamMail.json index b71a5ce..388aa2d 100644 --- a/mailbox/store/src/test/resources/eml/spamMail.json +++ b/mailbox/store/src/test/resources/eml/spamMail.json @@ -198,7 +198,7 @@ "isDraft": false, "isFlagged": false, "isRecent": false, - "hasAttachment": true, + "hasAttachment": false, "isUnread": true, "users": [ "username" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
