Author: olegk
Date: Fri Oct 31 10:07:00 2014
New Revision: 1635743

URL: http://svn.apache.org/r1635743
Log:
MIME4J-218: Content-Type fallback character set
contributed by Wolfgang Fahl <wf at bitplan.com>

Added:
    
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
   (with props)
Modified:
    
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
    
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
    
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
    
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
    
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
    
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
    
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java

Modified: 
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
 (original)
+++ 
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
 Fri Oct 31 10:07:00 2014
@@ -60,7 +60,7 @@ public class DefaultFieldBuilder impleme
         }
         int len = line.length();
         if (this.maxlen > 0 && this.buf.length() + len >= this.maxlen) {
-            throw new MaxHeaderLengthLimitException("Maximum header length 
limit exceeded");
+            throw new MaxHeaderLengthLimitException("Maximum header length 
limit (" + this.maxlen + ") exceeded");
         }
         this.buf.append(line.buffer(), 0, line.length());
     }

Modified: 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
 (original)
+++ 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
 Fri Oct 31 10:07:00 2014
@@ -26,6 +26,7 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
 
 import org.apache.james.mime4j.Charsets;
@@ -42,12 +43,44 @@ public class BasicBodyFactory implements
 
     public static final BasicBodyFactory INSTANCE = new BasicBodyFactory();
 
-    private static Charset resolveCharset(final String mimeCharset) throws 
UnsupportedEncodingException {
-        try {
-            return mimeCharset != null ? Charset.forName(mimeCharset) : null;
-        } catch (UnsupportedCharsetException ex) {
-            throw new UnsupportedEncodingException(mimeCharset);
+    private final boolean lenient;
+
+    public BasicBodyFactory() {
+        this(true);
+    }
+
+    public BasicBodyFactory(final boolean lenient) {
+        this.lenient = lenient;
+    }
+
+    /**
+     * select the Charset for the given mimeCharset string
+     * 
+     *  if you need support for non standard or invalid mimeCharset 
specifications
+     *  you might want to create your own derived BodyFactory extending 
BasicBodyFactory and
+     *  overriding this method as suggested by:
+     *    https://issues.apache.org/jira/browse/MIME4J-218
+     *  
+     *  the default behavior is lenient, invalid mimeCharset specifications 
will return the defaultCharset
+     * 
+     *  @param mimeCharset - the string specification for a Charset e.g. 
"UTF-8"
+     *  @throws UnsupportedEncodingException if the mimeCharset is invalid
+     */ 
+    protected Charset resolveCharset(final String mimeCharset) throws 
UnsupportedEncodingException {
+        if (mimeCharset != null) {
+            try {
+                return Charset.forName(mimeCharset);
+            } catch (UnsupportedCharsetException ex) {
+                if (!lenient) {
+                    throw new UnsupportedEncodingException(mimeCharset);
+                }
+            } catch (IllegalCharsetNameException ex) {
+                if (!lenient) {
+                    throw new UnsupportedEncodingException(mimeCharset);
+                }
+            }
         }
+        return Charset.defaultCharset();
     }
 
     public TextBody textBody(final String text, final String mimeCharset) 
throws UnsupportedEncodingException {

Modified: 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
 (original)
+++ 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
 Fri Oct 31 10:07:00 2014
@@ -302,7 +302,7 @@ public class DefaultMessageBuilder imple
             BodyDescriptorBuilder bdb = bodyDescBuilder != null ? 
bodyDescBuilder :
                 new DefaultBodyDescriptorBuilder(null, fieldParser != null ? 
fieldParser :
                     strict ? DefaultFieldParser.getParser() : 
LenientFieldParser.getParser(), mon);
-            BodyFactory bf = bodyFactory != null ? bodyFactory : new 
BasicBodyFactory();
+            BodyFactory bf = bodyFactory != null ? bodyFactory : new 
BasicBodyFactory(!strict);
             MimeStreamParser parser = new MimeStreamParser(cfg, mon, bdb);
             parser.setContentHandler(new ParserStreamContentHandler(message, 
bf));
             parser.setContentDecoding(contentDecoding);

Modified: 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
 (original)
+++ 
james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
 Fri Oct 31 10:07:00 2014
@@ -309,7 +309,7 @@ public class MessageBuilder extends Abst
     /**
      * Sets binary content of this message with the given MIME type.
      *
-     * @param body
+     * @param bin
      *            the body.
      * @param mimeType
      *            the MIME media type of the specified body
@@ -898,7 +898,7 @@ public class MessageBuilder extends Abst
         BodyDescriptorBuilder currentBodyDescBuilder = bodyDescBuilder != null 
? bodyDescBuilder :
                 new DefaultBodyDescriptorBuilder(null, fieldParser != null ? 
fieldParser :
                         strict ? DefaultFieldParser.getParser() : 
LenientFieldParser.getParser(), currentMonitor);
-        BodyFactory currentBodyFactory = bodyFactory != null ? bodyFactory : 
new BasicBodyFactory();
+        BodyFactory currentBodyFactory = bodyFactory != null ? bodyFactory : 
new BasicBodyFactory(!strict);
         MimeStreamParser parser = new MimeStreamParser(currentConfig, 
currentMonitor, currentBodyDescBuilder);
 
         Message message = new MessageImpl();

Added: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java?rev=1635743&view=auto
==============================================================================
--- 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
 (added)
+++ 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
 Fri Oct 31 10:07:00 2014
@@ -0,0 +1,108 @@
+package org.apache.james.mime4j.dom;
+
+import java.io.ByteArrayInputStream;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.james.mime4j.message.BasicBodyFactory;
+import org.apache.james.mime4j.message.DefaultMessageBuilder;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * check that the Charset handling of BasicBodyFactory can be influenced with
+ * the boolean lenient flag
+ * 
+ * @author wf
+ *
+ */
+public class MessageCharsetLenientTest {
+
+       /**
+        * set up a message with an invalid charset
+        * 
+        * @throws Exception
+        */
+       @Test
+       public void testLenientCharsetHandling() throws Exception {
+               // this list of invalidCharsets is taken from parsing a sample 
of some 1/4 million e-mails
+               // so all of the showed up in real world e-mails
+               String invalidCharsets[] = {
+                               "%CHARSET",
+                               "'iso-8859-1'",
+                               "'utf-8'",
+                               "0",
+                               "238",
+                               "DEFAULT_CHARSET",
+                               "DIN_66003",
+                               "ISO 8859-1",
+                               "None",
+                               "Standard",
+                               "UTF-7",
+                               "X-CTEXT",
+                               "X-UNKNOWN",
+                               "\\iso-8859-1\"",
+                               "\\us-ascii\"",
+                               "ansi_x3.110-1983",
+                               "charset=us-ascii",
+                               "en",
+                               "iso-0-250-250-250-25-0-25",
+                               "iso-10646",
+                               "iso-1149-1",
+                               "iso-2191-1",
+                               "iso-3817-4",
+                               "iso-4736-8",
+                               "iso-5266-7",
+                               "iso-5666-3",
+                               "iso-5978-6",
+                               "iso-6558-5",
+                               "iso-7708-8",
+                               "iso-8085-5",
+                               "iso-8589-0",
+                               "iso-8814-4",
+                               "iso-8859-1 name=FAQ.htm",
+                               "iso-8859-16",
+                               "iso-8859-1?",
+                               "iso-8859-8-i",
+                               "iso-9284-4",
+                               "latin-iso8859-1",
+                               "unicode-1-1-utf-7",
+                               "unknown-8bit",
+                               "utf-7",
+                               "windows-1250 reply-type=original",
+                               "windows-1252 <!DOCTYPE HTML PUBLIC -//W3C//DTD 
HTML 4.01 Transitional//EN>",
+                               "x-user-defined", " {$RND_CHARSET$}" };
+               
+               // check with lenient charset handling on and off
+               boolean[] lenientstates = { true, false };
+               // create the message builder
+               DefaultMessageBuilder builder = new DefaultMessageBuilder();
+               // count how many Exception hits we got
+               int invalidCount=0;
+               // test in bosh states
+               for (boolean lenient : lenientstates) {
+                       // set how lenient we are
+            builder.setBodyFactory(new BasicBodyFactory(lenient));
+                       // check the list of invalid Charsets
+                       for (String invalidCharset : invalidCharsets) {
+                               // create a message with the charset 
+                               String charsetContent = "Subject: my 
subject\r\n"
+                                               + "Content-Type: text/plain; 
charset=" + invalidCharset + "\r\n"
+                                               + "Strange charset isn't it?\r" 
+ "\r\n";
+        // try parsing it
+                               try {
+                                       Message message = 
builder.parseMessage(new ByteArrayInputStream(
+                                                       
charsetContent.getBytes("UTF-8")));
+                                       // check some message attribute
+                                       Assert.assertEquals("text/plain", 
message.getMimeType());
+                                       // if we get here we had a lenient mode 
- in non lenient an exception would have been thrown
+                                       
Assert.assertTrue("Charset:"+invalidCharset+" should not be allowed when 
lenient is "+lenient,lenient);
+                               } catch (UnsupportedEncodingException ex) {
+                                       
Assert.assertFalse("Charset:"+invalidCharset+" should not throw an exception 
when lenient is "+lenient,lenient);
+                                       invalidCount++;
+                               }
+                       }
+               } // for
+               Assert.assertEquals(invalidCharsets.length,invalidCount);
+       }
+
+}

Propchange: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision

Propchange: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
 (original)
+++ 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
 Fri Oct 31 10:07:00 2014
@@ -20,13 +20,10 @@
 package org.apache.james.mime4j.message;
 
 import java.io.InputStream;
-import java.util.List;
 
 import org.apache.james.mime4j.Charsets;
 import org.apache.james.mime4j.dom.Body;
-import org.apache.james.mime4j.dom.TextBody;
 import org.apache.james.mime4j.dom.field.ContentTypeField;
-import org.apache.james.mime4j.stream.Field;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;

Modified: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
 (original)
+++ 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
 Fri Oct 31 10:07:00 2014
@@ -23,9 +23,6 @@ import org.apache.james.mime4j.dom.Body;
 import org.apache.james.mime4j.dom.Entity;
 import org.apache.james.mime4j.dom.Header;
 import org.apache.james.mime4j.field.DefaultFieldParser;
-import org.apache.james.mime4j.message.BasicBodyFactory;
-import org.apache.james.mime4j.message.BodyPart;
-import org.apache.james.mime4j.message.HeaderImpl;
 import org.junit.Assert;
 import org.junit.Test;
 

Modified: 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java
URL: 
http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java
 (original)
+++ 
james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java
 Fri Oct 31 10:07:00 2014
@@ -23,7 +23,6 @@ import java.io.InputStream;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
-import java.util.List;
 import java.util.TimeZone;
 
 import org.apache.james.mime4j.Charsets;
@@ -39,7 +38,6 @@ import org.apache.james.mime4j.dom.field
 import org.apache.james.mime4j.field.DefaultFieldParser;
 import org.apache.james.mime4j.field.Fields;
 import org.apache.james.mime4j.field.address.AddressBuilder;
-import org.apache.james.mime4j.stream.Field;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;


Reply via email to