This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
The following commit(s) were added to refs/heads/master by this push: new 0ee7cc83 MIME4J-331 Add support for RFC 6532, and test it. (#114) 0ee7cc83 is described below commit 0ee7cc830ed449d8c44c0e5e2398a79fa2f5ff89 Author: Arnt Gulbrandsen <a...@gulbrandsen.priv.no> AuthorDate: Tue Mar 18 15:05:09 2025 +0100 MIME4J-331 Add support for RFC 6532, and test it. (#114) * Add support for RFC 6532, and test it. * Test long headers explicitly * Extend the 653x test with unencoded UTF8 Subject. --- .../apache/james/mime4j/stream/RawFieldParser.java | 4 +- .../james/mime4j/parser/MimeStreamParserTest.java | 51 ++++++++++++++++++++++ .../apache/james/mime4j/parser/TestHandler.java | 3 +- .../james/mime4j/stream/RawFieldParserTest.java | 30 +++++++++++++ .../apache/james/mime4j/stream/RawFieldTest.java | 3 ++ .../mime4j/field/address/LenientAddressParser.java | 2 +- .../mime4j/field/address/AddressListParser.jjt | 2 + .../field/address/DefaultAddressBuilderTest.java | 4 ++ .../field/address/LenientAddressBuilderTest.java | 5 +++ 9 files changed, 101 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java index 8437927f..b39cad33 100644 --- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java +++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java @@ -323,6 +323,7 @@ public class RawFieldParser { */ public void copyContent(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters, final StringBuilder dst) { + ByteArrayBuffer dstRaw = new ByteArrayBuffer(80); int pos = cursor.getPos(); int indexFrom = cursor.getPos(); int indexTo = cursor.getUpperBound(); @@ -333,10 +334,11 @@ public class RawFieldParser { break; } else { pos++; - dst.append(current); + dstRaw.append(current); } } cursor.updatePos(pos); + dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw)); } /** diff --git a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java index e8a3f689..74e2c36e 100644 --- a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java +++ b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java @@ -21,6 +21,7 @@ package org.apache.james.mime4j.parser; import org.apache.james.mime4j.stream.BodyDescriptor; import org.apache.james.mime4j.stream.Field; +import org.apache.james.mime4j.stream.MimeConfig; import org.apache.james.mime4j.util.ByteSequence; import org.apache.james.mime4j.util.ContentUtil; import org.junit.Assert; @@ -29,6 +30,7 @@ import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.LinkedList; public class MimeStreamParserTest { @@ -430,6 +432,55 @@ public class MimeStreamParserTest { Assert.assertEquals(expected, result); } + @Test + public void testRfc6531() throws Exception { + MimeStreamParser parser = new MimeStreamParser(MimeConfig.STRICT); + parser.setContentDecoding(true); + TestHandler handler = new TestHandler(); + parser.setContentHandler(handler); + + String msg = "Subject: Naïve Subject\r\n" + + "From: foo@ø.example\r\n" + + "To: ø@example.com\r\n" + + "Content-Type: text/plain; charset=utf-8\r\n" + + "\r\n" + + "This sentence ends with the letter x.\r\n"; + String expected = "<message>\r\n" + + "<header>\r\n" + + "<field>\r\n" + + "Subject: Naïve Subject" + + "</field>\r\n" + + "<field>\r\n" + + "From: foo@ø.example" + + "</field>\r\n" + + "<field>\r\n" + + "To: ø@example.com" + + "</field>\r\n" + + "<field>\r\n" + + "Content-Type: text/plain; charset=utf-8" + + "</field>\r\n" + + "</header>\r\n" + + "<body>\r\n" + + "This sentence ends with the letter x.\r\n" + + "</body>\r\n" + + "</message>\r\n"; + + // Dot the ı's and check that the ø is present in the message + // as its UTF8 encoding, 0xC3 0xB8. If the test uses anything + // else, then passing the test doesn't imply correctness. + byte[] msgAsUtf8 = msg.getBytes(Charset.forName("utf8")); + int i = 0; + while(i+1 < msgAsUtf8.length && + (msgAsUtf8[i] != 0xC3 || msgAsUtf8[i+1] != 0xB8)) + i++; + Assert.assertTrue(i < msgAsUtf8.length); + + parser.parse(new ByteArrayInputStream(msgAsUtf8)); + String result = handler.sb.toString(); + + Assert.assertEquals(expected, result); + } + protected String decode(ByteSequence byteSequence) { return ContentUtil.decode(byteSequence); } diff --git a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java index f0ccd4dd..449a342b 100644 --- a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java +++ b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java @@ -22,6 +22,7 @@ package org.apache.james.mime4j.parser; import java.io.IOException; import java.io.InputStream; +import org.apache.james.mime4j.Charsets; import org.apache.james.mime4j.parser.ContentHandler; import org.apache.james.mime4j.stream.BodyDescriptor; import org.apache.james.mime4j.stream.Field; @@ -94,7 +95,7 @@ class TestHandler implements ContentHandler { sb.append("<header>\r\n"); } public void field(Field field) { - sb.append("<field>\r\n").append(escape(ContentUtil.decode(field.getRaw()))).append("</field>\r\n"); + sb.append("<field>\r\n").append(escape(ContentUtil.decode(Charsets.UTF_8, field.getRaw()))).append("</field>\r\n"); } public void endHeader() { sb.append("</header>\r\n"); diff --git a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java index ca41198e..dd3e189a 100644 --- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java +++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java @@ -75,6 +75,25 @@ public class RawFieldParserTest { Assert.assertTrue(cursor.atEnd()); } + + @Test + public void testUtf8StringParsing() throws Exception { + String s = "grå \"rød\""; + ByteSequence raw = ContentUtil.encode(s); + ParserCursor cursor = new ParserCursor(0, 2 + s.length()); + + StringBuilder strbuf1 = new StringBuilder(); + parser.copyContent(raw, cursor, RawFieldParser.INIT_BITSET(':'), strbuf1); + Assert.assertFalse(cursor.atEnd()); + Assert.assertEquals("grå", strbuf1.toString()); + + parser.skipWhiteSpace(raw, cursor); + + StringBuilder strbuf2 = new StringBuilder(); + parser.copyQuotedContent(raw, cursor, strbuf2); + Assert.assertEquals("rød", strbuf2.toString()); + } + @Test public void testTokenParsingWithQuotedPairs() throws Exception { String s = "raw: \"\\\"some\\stuff\\\\\""; @@ -228,6 +247,17 @@ public class RawFieldParserTest { } } + @Test + public void testLongString() throws Exception { + String body = "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + String s = "raw: " + body; + ByteSequence raw = ContentUtil.encode(s); + + RawField rawField = parser.parseField(raw); + Assert.assertEquals("raw", rawField.getName()); + Assert.assertEquals(body, rawField.getBody()); + } + @Test public void testParsingInvalidSyntax2() throws Exception { String s = "raw \t \t"; diff --git a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java index 90d85134..dbf0a3b7 100644 --- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java +++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java @@ -38,6 +38,9 @@ public class RawFieldTest { Assert.assertEquals("raw", field.getName()); Assert.assertEquals("stuff; more stuff", field.getBody()); Assert.assertEquals(s, field.toString()); + raw = ContentUtil.encode("To: ø@ø.example"); + field = new RawField(raw, 3, "To", null); + Assert.assertEquals("ø@ø.example", field.getBody()); } @Test diff --git a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java index 99f130ae..63edc860 100644 --- a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java +++ b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java @@ -217,7 +217,7 @@ public class LenientAddressParser implements AddressParser { public Mailbox parseMailbox(final CharSequence text) { ByteSequence raw = ContentUtil.encode(text); - ParserCursor cursor = new ParserCursor(0, text.length()); + ParserCursor cursor = new ParserCursor(0, raw.length()); return parseMailbox(raw, cursor, null); } diff --git a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt index 51feff11..6d6ebe34 100644 --- a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt +++ b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt @@ -243,7 +243,9 @@ TOKEN : { < #ALPHA: ["a" - "z", "A" - "Z"] > | < #DIGIT: ["0" - "9"] > +| < UTF8NONASCII: ["\u0080" - "\uFFFF"] > | < #ATEXT: ( <ALPHA> | <DIGIT> + | <UTF8NONASCII> | "!" | "#" | "$" | "%" | "&" | "'" | "*" | "+" | "-" | "/" | "=" | "?" diff --git a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java index 0302d9f5..d5a46715 100644 --- a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java +++ b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java @@ -67,6 +67,10 @@ public class DefaultAddressBuilderTest { Assert.assertEquals("Hans M\374ller", mailbox5.getName()); Assert.assertEquals("hans.muel...@acme.org", mailbox5.getAddress()); + // UTF8 should be allowed in atoms too now + Mailbox mailbox6 = parser.parseMailbox( + "<dr.müller@dr-müller-lüdenscheid.de>"); + Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de", mailbox6.getAddress()); } @Test diff --git a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java index a9d59642..b3fdeefc 100644 --- a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java +++ b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java @@ -231,6 +231,11 @@ public class LenientAddressBuilderTest { "\"Hans M\374ller\" <hans.muel...@acme.org>"); Assert.assertEquals("Hans M\374ller", mailbox5.getName()); Assert.assertEquals("hans.muel...@acme.org", mailbox5.getAddress()); + + // UTF8 should be allowed in atoms too now + Mailbox mailbox6 = parser.parseMailbox( + "<dr.müller@dr-müller-lüdenscheid.de>"); + Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de", mailbox6.getAddress()); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org