This is an automated email from the ASF dual-hosted git repository.

rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit ef87f69290a724e9f6065173f91ad051e6268d02
Author: Benoit Tellier <[email protected]>
AuthorDate: Tue Feb 4 16:27:35 2020 +0700

    MAILBOX-395 ElasticSearch indexing should not fail upon invalid charset
---
 .../mailbox/elasticsearch/json/MimePartParser.java | 17 +++++++---
 .../json/MessageToElasticSearchJsonTest.java       | 22 ++++++++++++
 .../src/test/resources/eml/invalidCharset.eml      | 10 ++++++
 .../src/test/resources/eml/invalidCharset.json     | 39 ++++++++++++++++++++++
 4 files changed, 84 insertions(+), 4 deletions(-)

diff --git 
a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
 
b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
index 7cd6e3a..a87ae44 100644
--- 
a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
+++ 
b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePartParser.java
@@ -33,10 +33,13 @@ import 
org.apache.james.mime4j.message.MaximalBodyDescriptor;
 import org.apache.james.mime4j.stream.EntityState;
 import org.apache.james.mime4j.stream.MimeConfig;
 import org.apache.james.mime4j.stream.MimeTokenStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Preconditions;
 
 public class MimePartParser {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(MimePartParser.class);
 
     private final Message message;
     private final TextExtractor textExtractor;
@@ -120,10 +123,16 @@ public class MimePartParser {
             .addSubType(descriptor.getSubType())
             .addContentDisposition(descriptor.getContentDispositionType())
             .addFileName(descriptor.getContentDispositionFilename());
-
-        Optional.ofNullable(descriptor.getCharset())
-            .map(Charset::forName)
-            .ifPresent(currentlyBuildMimePart::charset);
+        extractCharset(descriptor);
     }
 
+    private void extractCharset(MaximalBodyDescriptor descriptor) {
+        try {
+            Optional.ofNullable(descriptor.getCharset())
+                .map(Charset::forName)
+                .ifPresent(currentlyBuildMimePart::charset);
+        } catch (Exception e) {
+            LOGGER.info("Failed parsing charset", e);
+        }
+    }
 }
diff --git 
a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
 
b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
index 8586c1a..7c7f5ba 100644
--- 
a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
+++ 
b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java
@@ -110,6 +110,28 @@ class MessageToElasticSearchJsonTest {
     }
 
     @Test
+    void invalidCharsetShouldBeWellConvertedToJson() throws IOException {
+        MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(
+            new DefaultTextExtractor(),
+            ZoneId.of("Europe/Paris"), IndexAttachments.YES);
+        MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID,
+                date,
+                SIZE,
+                BODY_START_OCTET,
+                
ClassLoaderUtils.getSystemResourceAsSharedStream("eml/invalidCharset.eml"),
+                new Flags(),
+                propertyBuilder,
+                MAILBOX_ID);
+        spamMail.setUid(UID);
+        spamMail.setModSeq(MOD_SEQ);
+
+        String actual = messageToElasticSearchJson.convertToJson(spamMail, 
ImmutableList.of(USERNAME));
+        assertThatJson(actual)
+            .when(IGNORING_ARRAY_ORDER)
+            
.isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/invalidCharset.json"));
+    }
+
+    @Test
     void htmlEmailShouldBeWellConvertedToJson() throws IOException {
         MessageToElasticSearchJson messageToElasticSearchJson = new 
MessageToElasticSearchJson(
             new DefaultTextExtractor(),
diff --git a/mailbox/store/src/test/resources/eml/invalidCharset.eml 
b/mailbox/store/src/test/resources/eml/invalidCharset.eml
new file mode 100644
index 0000000..62bc3fb
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/invalidCharset.eml
@@ -0,0 +1,10 @@
+To: Antoine DUPRAT <[email protected]>
+From: Antoine DUPRAT <[email protected]>
+Subject: Inline attachment
+Message-ID: <[email protected]>
+Date: Tue, 5 Jul 2016 11:47:46 +0200
+MIME-Version: 1.0
+Content-Type: text/plain; charset=%invalid; format=flowed
+Content-Transfer-Encoding: 7bit
+
+This is an inline attachment: Cheers!
\ No newline at end of file
diff --git a/mailbox/store/src/test/resources/eml/invalidCharset.json 
b/mailbox/store/src/test/resources/eml/invalidCharset.json
new file mode 100644
index 0000000..eed4184
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/invalidCharset.json
@@ -0,0 +1,39 @@
+{
+  "attachments":[],
+  "bcc":[],
+  "htmlBody":null,
+  "textBody":"This is an inline attachment: Cheers!",
+  "cc":[],
+  "date":"2015-06-07T00:00:00+0200",
+  "from":[{"name":"Antoine DUPRAT","address":"[email protected]"}],
+  "hasAttachment":false,
+  "headers":[
+    {"name":"to","value":"Antoine DUPRAT <[email protected]>"},
+    {"name":"from","value":"Antoine DUPRAT <[email protected]>"},
+    {"name":"subject","value":"Inline attachment"},
+    
{"name":"message-id","value":"<[email protected]>"},
+    {"name":"date","value":"Tue, 5 Jul 2016 11:47:46 +0200"},
+    {"name":"mime-version","value":"1.0"},
+    {"name":"content-type","value":"text/plain; charset=%invalid; 
format=flowed"},
+    {"name":"content-transfer-encoding","value":"7bit"}
+  ],
+  "mailboxId":"18",
+  "mediaType":"plain",
+  "messageId":"184",
+  "modSeq":42,
+  "sentDate":"2016-07-05T11:47:46+0200",
+  "size":25,
+  "subject":["Inline attachment"],
+  "subtype":"text",
+  "text":"Antoine DUPRAT [email protected] Antoine DUPRAT [email protected] 
Inline attachment This is an inline attachment: Cheers!",
+  "to":[{"name":"Antoine DUPRAT","address":"[email protected]"}],
+  "uid":25,
+  "userFlags":[],
+  "mimeMessageID":"<[email protected]>",
+  "isAnswered":false,
+  "isDeleted":false,
+  "isDraft":false,
+  "isFlagged":false,
+  "isRecent":false,
+  "isUnread":true
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to