This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git


The following commit(s) were added to refs/heads/master by this push:
     new 0ee7cc83 MIME4J-331 Add support for RFC 6532, and test it. (#114)
0ee7cc83 is described below

commit 0ee7cc830ed449d8c44c0e5e2398a79fa2f5ff89
Author: Arnt Gulbrandsen <a...@gulbrandsen.priv.no>
AuthorDate: Tue Mar 18 15:05:09 2025 +0100

    MIME4J-331 Add support for RFC 6532, and test it. (#114)
    
    * Add support for RFC 6532, and test it.
    * Test long headers explicitly
    * Extend the 653x test with unencoded UTF8 Subject.
---
 .../apache/james/mime4j/stream/RawFieldParser.java |  4 +-
 .../james/mime4j/parser/MimeStreamParserTest.java  | 51 ++++++++++++++++++++++
 .../apache/james/mime4j/parser/TestHandler.java    |  3 +-
 .../james/mime4j/stream/RawFieldParserTest.java    | 30 +++++++++++++
 .../apache/james/mime4j/stream/RawFieldTest.java   |  3 ++
 .../mime4j/field/address/LenientAddressParser.java |  2 +-
 .../mime4j/field/address/AddressListParser.jjt     |  2 +
 .../field/address/DefaultAddressBuilderTest.java   |  4 ++
 .../field/address/LenientAddressBuilderTest.java   |  5 +++
 9 files changed, 101 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java 
b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
index 8437927f..b39cad33 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
@@ -323,6 +323,7 @@ public class RawFieldParser {
      */
     public void copyContent(final ByteSequence buf, final ParserCursor cursor, 
final BitSet delimiters,
             final StringBuilder dst) {
+        ByteArrayBuffer dstRaw = new ByteArrayBuffer(80);
         int pos = cursor.getPos();
         int indexFrom = cursor.getPos();
         int indexTo = cursor.getUpperBound();
@@ -333,10 +334,11 @@ public class RawFieldParser {
                 break;
             } else {
                 pos++;
-                dst.append(current);
+                dstRaw.append(current);
             }
         }
         cursor.updatePos(pos);
+        dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw));
     }
 
     /**
diff --git 
a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java 
b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
index e8a3f689..74e2c36e 100644
--- 
a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
+++ 
b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
@@ -21,6 +21,7 @@ package org.apache.james.mime4j.parser;
 
 import org.apache.james.mime4j.stream.BodyDescriptor;
 import org.apache.james.mime4j.stream.Field;
+import org.apache.james.mime4j.stream.MimeConfig;
 import org.apache.james.mime4j.util.ByteSequence;
 import org.apache.james.mime4j.util.ContentUtil;
 import org.junit.Assert;
@@ -29,6 +30,7 @@ import org.junit.Test;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.Charset;
 import java.util.LinkedList;
 
 public class MimeStreamParserTest {
@@ -430,6 +432,55 @@ public class MimeStreamParserTest {
         Assert.assertEquals(expected, result);
     }
 
+    @Test
+    public void testRfc6531() throws Exception {
+        MimeStreamParser parser = new MimeStreamParser(MimeConfig.STRICT);
+        parser.setContentDecoding(true);
+        TestHandler handler = new TestHandler();
+        parser.setContentHandler(handler);
+
+        String msg = "Subject: Naïve Subject\r\n"
+                + "From: foo@ø.example\r\n"
+                + "To: ø@example.com\r\n"
+                + "Content-Type: text/plain; charset=utf-8\r\n"
+                + "\r\n"
+                + "This sentence ends with the letter x.\r\n";
+        String expected = "<message>\r\n"
+                + "<header>\r\n"
+                + "<field>\r\n"
+                + "Subject: Naïve Subject"
+                + "</field>\r\n"
+                + "<field>\r\n"
+                + "From: foo@ø.example"
+                + "</field>\r\n"
+                + "<field>\r\n"
+                + "To: ø@example.com"
+                + "</field>\r\n"
+                + "<field>\r\n"
+                + "Content-Type: text/plain; charset=utf-8"
+                + "</field>\r\n"
+                + "</header>\r\n"
+                + "<body>\r\n"
+                + "This sentence ends with the letter x.\r\n"
+                + "</body>\r\n"
+                + "</message>\r\n";
+
+        // Dot the ı's and check that the ø is present in the message
+        // as its UTF8 encoding, 0xC3 0xB8. If the test uses anything
+        // else, then passing the test doesn't imply correctness.
+        byte[] msgAsUtf8 = msg.getBytes(Charset.forName("utf8"));
+        int i = 0;
+        while(i+1 < msgAsUtf8.length &&
+              (msgAsUtf8[i] != 0xC3 || msgAsUtf8[i+1] != 0xB8))
+              i++;
+        Assert.assertTrue(i < msgAsUtf8.length);
+
+        parser.parse(new ByteArrayInputStream(msgAsUtf8));
+        String result = handler.sb.toString();
+
+        Assert.assertEquals(expected, result);
+    }
+
     protected String decode(ByteSequence byteSequence) {
         return ContentUtil.decode(byteSequence);
     }
diff --git a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java 
b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
index f0ccd4dd..449a342b 100644
--- a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
+++ b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
@@ -22,6 +22,7 @@ package org.apache.james.mime4j.parser;
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.james.mime4j.Charsets;
 import org.apache.james.mime4j.parser.ContentHandler;
 import org.apache.james.mime4j.stream.BodyDescriptor;
 import org.apache.james.mime4j.stream.Field;
@@ -94,7 +95,7 @@ class TestHandler implements ContentHandler {
         sb.append("<header>\r\n");
     }
     public void field(Field field) {
-        
sb.append("<field>\r\n").append(escape(ContentUtil.decode(field.getRaw()))).append("</field>\r\n");
+        
sb.append("<field>\r\n").append(escape(ContentUtil.decode(Charsets.UTF_8, 
field.getRaw()))).append("</field>\r\n");
     }
     public void endHeader() {
         sb.append("</header>\r\n");
diff --git 
a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java 
b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
index ca41198e..dd3e189a 100644
--- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
+++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
@@ -75,6 +75,25 @@ public class RawFieldParserTest {
         Assert.assertTrue(cursor.atEnd());
     }
 
+
+    @Test
+    public void testUtf8StringParsing() throws Exception {
+        String s = "grå \"rød\"";
+        ByteSequence raw = ContentUtil.encode(s);
+        ParserCursor cursor = new ParserCursor(0, 2 + s.length());
+
+        StringBuilder strbuf1 = new StringBuilder();
+        parser.copyContent(raw, cursor, RawFieldParser.INIT_BITSET(':'), 
strbuf1);
+        Assert.assertFalse(cursor.atEnd());
+        Assert.assertEquals("grå", strbuf1.toString());
+
+        parser.skipWhiteSpace(raw, cursor);
+
+        StringBuilder strbuf2 = new StringBuilder();
+        parser.copyQuotedContent(raw, cursor, strbuf2);
+        Assert.assertEquals("rød", strbuf2.toString());
+    }
+
     @Test
     public void testTokenParsingWithQuotedPairs() throws Exception {
         String s = "raw: \"\\\"some\\stuff\\\\\"";
@@ -228,6 +247,17 @@ public class RawFieldParserTest {
         }
     }
 
+    @Test
+    public void testLongString() throws Exception {
+        String body = 
"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+        String s = "raw: " + body;
+        ByteSequence raw = ContentUtil.encode(s);
+
+        RawField rawField = parser.parseField(raw);
+        Assert.assertEquals("raw", rawField.getName());
+        Assert.assertEquals(body, rawField.getBody());
+    }
+
     @Test
     public void testParsingInvalidSyntax2() throws Exception {
         String s = "raw    \t \t";
diff --git 
a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java 
b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
index 90d85134..dbf0a3b7 100644
--- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
+++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
@@ -38,6 +38,9 @@ public class RawFieldTest {
         Assert.assertEquals("raw", field.getName());
         Assert.assertEquals("stuff;  more stuff", field.getBody());
         Assert.assertEquals(s, field.toString());
+        raw = ContentUtil.encode("To: ø@ø.example");
+        field = new RawField(raw, 3, "To", null);
+        Assert.assertEquals("ø@ø.example", field.getBody());
     }
 
     @Test
diff --git 
a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
 
b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
index 99f130ae..63edc860 100644
--- 
a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
+++ 
b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
@@ -217,7 +217,7 @@ public class LenientAddressParser implements AddressParser {
 
     public Mailbox parseMailbox(final CharSequence text) {
         ByteSequence raw = ContentUtil.encode(text);
-        ParserCursor cursor = new ParserCursor(0, text.length());
+        ParserCursor cursor = new ParserCursor(0, raw.length());
         return parseMailbox(raw, cursor, null);
     }
 
diff --git 
a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
 
b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
index 51feff11..6d6ebe34 100644
--- 
a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
+++ 
b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
@@ -243,7 +243,9 @@ TOKEN :
 {
        < #ALPHA: ["a" - "z", "A" - "Z"] >
 |      < #DIGIT: ["0" - "9"] >
+|      < UTF8NONASCII: ["\u0080" - "\uFFFF"] >
 |      < #ATEXT: ( <ALPHA> | <DIGIT>
+                         | <UTF8NONASCII>
                          | "!" | "#" | "$" | "%"
                          | "&" | "'" | "*" | "+"
                          | "-" | "/" | "=" | "?"
diff --git 
a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
 
b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
index 0302d9f5..d5a46715 100644
--- 
a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
+++ 
b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
@@ -67,6 +67,10 @@ public class DefaultAddressBuilderTest {
         Assert.assertEquals("Hans M\374ller", mailbox5.getName());
         Assert.assertEquals("hans.muel...@acme.org", mailbox5.getAddress());
 
+        // UTF8 should be allowed in atoms too now
+        Mailbox mailbox6 = parser.parseMailbox(
+                "<dr.müller@dr-müller-lüdenscheid.de>");
+        Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de", 
mailbox6.getAddress());
     }
 
     @Test
diff --git 
a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
 
b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
index a9d59642..b3fdeefc 100644
--- 
a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
+++ 
b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
@@ -231,6 +231,11 @@ public class LenientAddressBuilderTest {
                 "\"Hans M\374ller\" <hans.muel...@acme.org>");
         Assert.assertEquals("Hans M\374ller", mailbox5.getName());
         Assert.assertEquals("hans.muel...@acme.org", mailbox5.getAddress());
+
+        // UTF8 should be allowed in atoms too now
+        Mailbox mailbox6 = parser.parseMailbox(
+                "<dr.müller@dr-müller-lüdenscheid.de>");
+        Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de", 
mailbox6.getAddress());
     }
 
     @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org
For additional commands, e-mail: server-dev-h...@james.apache.org

Reply via email to