This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
The following commit(s) were added to refs/heads/master by this push:
new 0ee7cc83 MIME4J-331 Add support for RFC 6532, and test it. (#114)
0ee7cc83 is described below
commit 0ee7cc830ed449d8c44c0e5e2398a79fa2f5ff89
Author: Arnt Gulbrandsen <[email protected]>
AuthorDate: Tue Mar 18 15:05:09 2025 +0100
MIME4J-331 Add support for RFC 6532, and test it. (#114)
* Add support for RFC 6532, and test it.
* Test long headers explicitly
* Extend the 653x test with unencoded UTF8 Subject.
---
.../apache/james/mime4j/stream/RawFieldParser.java | 4 +-
.../james/mime4j/parser/MimeStreamParserTest.java | 51 ++++++++++++++++++++++
.../apache/james/mime4j/parser/TestHandler.java | 3 +-
.../james/mime4j/stream/RawFieldParserTest.java | 30 +++++++++++++
.../apache/james/mime4j/stream/RawFieldTest.java | 3 ++
.../mime4j/field/address/LenientAddressParser.java | 2 +-
.../mime4j/field/address/AddressListParser.jjt | 2 +
.../field/address/DefaultAddressBuilderTest.java | 4 ++
.../field/address/LenientAddressBuilderTest.java | 5 +++
9 files changed, 101 insertions(+), 3 deletions(-)
diff --git
a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
index 8437927f..b39cad33 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
@@ -323,6 +323,7 @@ public class RawFieldParser {
*/
public void copyContent(final ByteSequence buf, final ParserCursor cursor,
final BitSet delimiters,
final StringBuilder dst) {
+ ByteArrayBuffer dstRaw = new ByteArrayBuffer(80);
int pos = cursor.getPos();
int indexFrom = cursor.getPos();
int indexTo = cursor.getUpperBound();
@@ -333,10 +334,11 @@ public class RawFieldParser {
break;
} else {
pos++;
- dst.append(current);
+ dstRaw.append(current);
}
}
cursor.updatePos(pos);
+ dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw));
}
/**
diff --git
a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
index e8a3f689..74e2c36e 100644
---
a/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
+++
b/core/src/test/java/org/apache/james/mime4j/parser/MimeStreamParserTest.java
@@ -21,6 +21,7 @@ package org.apache.james.mime4j.parser;
import org.apache.james.mime4j.stream.BodyDescriptor;
import org.apache.james.mime4j.stream.Field;
+import org.apache.james.mime4j.stream.MimeConfig;
import org.apache.james.mime4j.util.ByteSequence;
import org.apache.james.mime4j.util.ContentUtil;
import org.junit.Assert;
@@ -29,6 +30,7 @@ import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.Charset;
import java.util.LinkedList;
public class MimeStreamParserTest {
@@ -430,6 +432,55 @@ public class MimeStreamParserTest {
Assert.assertEquals(expected, result);
}
+ @Test
+ public void testRfc6531() throws Exception {
+ MimeStreamParser parser = new MimeStreamParser(MimeConfig.STRICT);
+ parser.setContentDecoding(true);
+ TestHandler handler = new TestHandler();
+ parser.setContentHandler(handler);
+
+ String msg = "Subject: Naïve Subject\r\n"
+ + "From: foo@ø.example\r\n"
+ + "To: ø@example.com\r\n"
+ + "Content-Type: text/plain; charset=utf-8\r\n"
+ + "\r\n"
+ + "This sentence ends with the letter x.\r\n";
+ String expected = "<message>\r\n"
+ + "<header>\r\n"
+ + "<field>\r\n"
+ + "Subject: Naïve Subject"
+ + "</field>\r\n"
+ + "<field>\r\n"
+ + "From: foo@ø.example"
+ + "</field>\r\n"
+ + "<field>\r\n"
+ + "To: ø@example.com"
+ + "</field>\r\n"
+ + "<field>\r\n"
+ + "Content-Type: text/plain; charset=utf-8"
+ + "</field>\r\n"
+ + "</header>\r\n"
+ + "<body>\r\n"
+ + "This sentence ends with the letter x.\r\n"
+ + "</body>\r\n"
+ + "</message>\r\n";
+
+ // Dot the ı's and check that the ø is present in the message
+ // as its UTF8 encoding, 0xC3 0xB8. If the test uses anything
+ // else, then passing the test doesn't imply correctness.
+ byte[] msgAsUtf8 = msg.getBytes(Charset.forName("utf8"));
+ int i = 0;
+ while(i+1 < msgAsUtf8.length &&
+ (msgAsUtf8[i] != 0xC3 || msgAsUtf8[i+1] != 0xB8))
+ i++;
+ Assert.assertTrue(i < msgAsUtf8.length);
+
+ parser.parse(new ByteArrayInputStream(msgAsUtf8));
+ String result = handler.sb.toString();
+
+ Assert.assertEquals(expected, result);
+ }
+
protected String decode(ByteSequence byteSequence) {
return ContentUtil.decode(byteSequence);
}
diff --git a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
index f0ccd4dd..449a342b 100644
--- a/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
+++ b/core/src/test/java/org/apache/james/mime4j/parser/TestHandler.java
@@ -22,6 +22,7 @@ package org.apache.james.mime4j.parser;
import java.io.IOException;
import java.io.InputStream;
+import org.apache.james.mime4j.Charsets;
import org.apache.james.mime4j.parser.ContentHandler;
import org.apache.james.mime4j.stream.BodyDescriptor;
import org.apache.james.mime4j.stream.Field;
@@ -94,7 +95,7 @@ class TestHandler implements ContentHandler {
sb.append("<header>\r\n");
}
public void field(Field field) {
-
sb.append("<field>\r\n").append(escape(ContentUtil.decode(field.getRaw()))).append("</field>\r\n");
+
sb.append("<field>\r\n").append(escape(ContentUtil.decode(Charsets.UTF_8,
field.getRaw()))).append("</field>\r\n");
}
public void endHeader() {
sb.append("</header>\r\n");
diff --git
a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
index ca41198e..dd3e189a 100644
--- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
+++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldParserTest.java
@@ -75,6 +75,25 @@ public class RawFieldParserTest {
Assert.assertTrue(cursor.atEnd());
}
+
+ @Test
+ public void testUtf8StringParsing() throws Exception {
+ String s = "grå \"rød\"";
+ ByteSequence raw = ContentUtil.encode(s);
+ ParserCursor cursor = new ParserCursor(0, 2 + s.length());
+
+ StringBuilder strbuf1 = new StringBuilder();
+ parser.copyContent(raw, cursor, RawFieldParser.INIT_BITSET(':'),
strbuf1);
+ Assert.assertFalse(cursor.atEnd());
+ Assert.assertEquals("grå", strbuf1.toString());
+
+ parser.skipWhiteSpace(raw, cursor);
+
+ StringBuilder strbuf2 = new StringBuilder();
+ parser.copyQuotedContent(raw, cursor, strbuf2);
+ Assert.assertEquals("rød", strbuf2.toString());
+ }
+
@Test
public void testTokenParsingWithQuotedPairs() throws Exception {
String s = "raw: \"\\\"some\\stuff\\\\\"";
@@ -228,6 +247,17 @@ public class RawFieldParserTest {
}
}
+ @Test
+ public void testLongString() throws Exception {
+ String body =
"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+ String s = "raw: " + body;
+ ByteSequence raw = ContentUtil.encode(s);
+
+ RawField rawField = parser.parseField(raw);
+ Assert.assertEquals("raw", rawField.getName());
+ Assert.assertEquals(body, rawField.getBody());
+ }
+
@Test
public void testParsingInvalidSyntax2() throws Exception {
String s = "raw \t \t";
diff --git
a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
index 90d85134..dbf0a3b7 100644
--- a/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
+++ b/core/src/test/java/org/apache/james/mime4j/stream/RawFieldTest.java
@@ -38,6 +38,9 @@ public class RawFieldTest {
Assert.assertEquals("raw", field.getName());
Assert.assertEquals("stuff; more stuff", field.getBody());
Assert.assertEquals(s, field.toString());
+ raw = ContentUtil.encode("To: ø@ø.example");
+ field = new RawField(raw, 3, "To", null);
+ Assert.assertEquals("ø@ø.example", field.getBody());
}
@Test
diff --git
a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
index 99f130ae..63edc860 100644
---
a/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
+++
b/dom/src/main/java/org/apache/james/mime4j/field/address/LenientAddressParser.java
@@ -217,7 +217,7 @@ public class LenientAddressParser implements AddressParser {
public Mailbox parseMailbox(final CharSequence text) {
ByteSequence raw = ContentUtil.encode(text);
- ParserCursor cursor = new ParserCursor(0, text.length());
+ ParserCursor cursor = new ParserCursor(0, raw.length());
return parseMailbox(raw, cursor, null);
}
diff --git
a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
index 51feff11..6d6ebe34 100644
---
a/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
+++
b/dom/src/main/jjtree/org/apache/james/mime4j/field/address/AddressListParser.jjt
@@ -243,7 +243,9 @@ TOKEN :
{
< #ALPHA: ["a" - "z", "A" - "Z"] >
| < #DIGIT: ["0" - "9"] >
+| < UTF8NONASCII: ["\u0080" - "\uFFFF"] >
| < #ATEXT: ( <ALPHA> | <DIGIT>
+ | <UTF8NONASCII>
| "!" | "#" | "$" | "%"
| "&" | "'" | "*" | "+"
| "-" | "/" | "=" | "?"
diff --git
a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
index 0302d9f5..d5a46715 100644
---
a/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
+++
b/dom/src/test/java/org/apache/james/mime4j/field/address/DefaultAddressBuilderTest.java
@@ -67,6 +67,10 @@ public class DefaultAddressBuilderTest {
Assert.assertEquals("Hans M\374ller", mailbox5.getName());
Assert.assertEquals("[email protected]", mailbox5.getAddress());
+ // UTF8 should be allowed in atoms too now
+ Mailbox mailbox6 = parser.parseMailbox(
+ "<dr.müller@dr-müller-lüdenscheid.de>");
+ Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de",
mailbox6.getAddress());
}
@Test
diff --git
a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
index a9d59642..b3fdeefc 100644
---
a/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
+++
b/dom/src/test/java/org/apache/james/mime4j/field/address/LenientAddressBuilderTest.java
@@ -231,6 +231,11 @@ public class LenientAddressBuilderTest {
"\"Hans M\374ller\" <[email protected]>");
Assert.assertEquals("Hans M\374ller", mailbox5.getName());
Assert.assertEquals("[email protected]", mailbox5.getAddress());
+
+ // UTF8 should be allowed in atoms too now
+ Mailbox mailbox6 = parser.parseMailbox(
+ "<dr.müller@dr-müller-lüdenscheid.de>");
+ Assert.assertEquals("dr.müller@dr-müller-lüdenscheid.de",
mailbox6.getAddress());
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]