This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
commit d5ddd879a7fd4e1e1e7c9edb30850a43031fa7b3 Author: Shamil Vakhitov <sha...@bgerp.org> AuthorDate: Thu Aug 19 13:25:26 2021 +0500 Content-Disposition filename Q and UTF-8 encoded. --- .../apache/james/mime4j/stream/RawFieldParser.java | 46 ++++++++++++++++++++-- .../org/apache/james/mime4j/util/CharsetUtil.java | 17 ++++++++ .../field/LenientContentDispositionFieldTest.java | 34 +++++++++++++++- 3 files changed, 92 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java index bb6448b6..e2c099dc 100644 --- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java +++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java @@ -19,11 +19,15 @@ package org.apache.james.mime4j.stream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.BitSet; import java.util.List; import org.apache.james.mime4j.MimeException; +import org.apache.james.mime4j.codec.DecodeMonitor; +import org.apache.james.mime4j.codec.DecoderUtil; +import org.apache.james.mime4j.util.ByteArrayBuffer; import org.apache.james.mime4j.util.ByteSequence; import org.apache.james.mime4j.util.CharsetUtil; import org.apache.james.mime4j.util.ContentUtil; @@ -191,6 +195,12 @@ public class RawFieldParser { * is not delimited by any character. */ public String parseValue(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) { + if (!CharsetUtil.isASCII(buf)) { + String value = parseUtf8Filename(buf); + if (value != null) + return value; + } + StringBuilder dst = new StringBuilder(); boolean whitespace = false; while (!cursor.atEnd()) { @@ -219,6 +229,25 @@ public class RawFieldParser { return dst.toString(); } + /** + * Special case for parsing {@code filename} attribute in nonstandard encoding like: + * {@code Content-Disposition: attachment; filename="УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ "СТАНЦИЯ ВИРТУАЛЬНАЯ" 01-05-21.pdf"} + * + * @param buf field raw. + * @return filename value or {@code null}. + */ + private String parseUtf8Filename(ByteSequence buf) { + final String value = new String(buf.toByteArray(), StandardCharsets.UTF_8); + + final String prefix = "filename=\""; + final int pos = value.indexOf(prefix); + if (pos > 0) { + return value.substring(pos + prefix.length(), value.length() - 1); + } + + return null; + } + /** * Skips semantically insignificant whitespace characters and moves the cursor to the closest * non-whitespace character. @@ -384,14 +413,17 @@ public class RawFieldParser { } pos++; indexFrom++; + + ByteArrayBuffer dstRaw = new ByteArrayBuffer(200); + boolean escaped = false; for (int i = indexFrom; i < indexTo; i++, pos++) { current = (char) (buf.byteAt(i) & 0xff); if (escaped) { if (current != '\"' && current != '\\') { - dst.append('\\'); + dstRaw.append('\\'); } - dst.append(current); + dstRaw.append(current); escaped = false; } else { if (current == '\"') { @@ -401,10 +433,18 @@ public class RawFieldParser { if (current == '\\') { escaped = true; } else if (current != '\r' && current != '\n') { - dst.append(current); + dstRaw.append(current); } } } + + String decoded = ContentUtil.decode(dstRaw); + if (decoded.startsWith("=?")) { + decoded = DecoderUtil.decodeEncodedWords(decoded, DecodeMonitor.STRICT); + } + + dst.append(decoded); + cursor.updatePos(pos); } diff --git a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java index 3504d29f..0a9c983c 100644 --- a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java +++ b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java @@ -56,6 +56,23 @@ public class CharsetUtil { return (0xFF80 & ch) == 0; } + /** + * Returns <code>true</code> if the specified byte array consists entirely of + * US ASCII characters. + * + * @param raw + * byte array to test. + * @return <code>true</code> if the specified string consists entirely of + * US ASCII characters, <code>false</code> otherwise. + */ + public static boolean isASCII(ByteSequence raw) { + for (int i = 0; i < raw.length(); i++) { + if (!isASCII((char) (raw.byteAt(i) & 0xff))) + return false; + } + return true; + } + /** * Returns <code>true</code> if the specified string consists entirely of * US ASCII characters. diff --git a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java index 1031da71..e3ca1c8c 100644 --- a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java +++ b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java @@ -19,17 +19,19 @@ package org.apache.james.mime4j.field; +import java.nio.charset.StandardCharsets; +import java.util.Date; + import org.apache.james.mime4j.MimeException; import org.apache.james.mime4j.dom.field.ContentDispositionField; import org.apache.james.mime4j.stream.RawField; import org.apache.james.mime4j.stream.RawFieldParser; +import org.apache.james.mime4j.util.ByteArrayBuffer; import org.apache.james.mime4j.util.ByteSequence; import org.apache.james.mime4j.util.ContentUtil; import org.junit.Assert; import org.junit.Test; -import java.util.Date; - public class LenientContentDispositionFieldTest { static ContentDispositionField parse(final String s) throws MimeException { @@ -38,6 +40,11 @@ public class LenientContentDispositionFieldTest { return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null); } + static ContentDispositionField parse(final byte[] raw) throws MimeException { + RawField rawField = RawFieldParser.DEFAULT.parseField(new ByteArrayBuffer(raw, true)); + return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null); + } + @Test public void testDispositionTypeWithSemiColonNoParams() throws Exception { ContentDispositionField f = parse("Content-Disposition: inline;"); @@ -111,6 +118,29 @@ public class LenientContentDispositionFieldTest { Assert.assertNull(f.getFilename()); } + @Test + public void testGetFilenameEncoded() throws Exception { + byte[] data = ("Content-Disposition: attachment;\n" + + " FileName=\"=?WINDOWS-1251?Q?3244659=5F=C0=EA=F2_=E7=E0_=C8=FE=EB=FC_?=\n" + + " =?WINDOWS-1251?Q?2020.pdf?=\"") + .getBytes(StandardCharsets.UTF_8); + + ContentDispositionField f = parse(data); + + Assert.assertEquals("WINDOWS-1251 Q encoded filename", "3244659_Акт за Июль 2020.pdf", f.getFilename()); + } + + @Test + public void testGetFilenameUtf8() throws Exception { + byte[] data = + "Content-Disposition: attachment; filename=\"УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf\"" + .getBytes(StandardCharsets.UTF_8); + + ContentDispositionField f = parse(data); + + Assert.assertEquals("UTF8 encoded filename", "УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf", f.getFilename()); + } + @Test public void testGetCreationDate() throws Exception { ContentDispositionField f = parse("Content-Disposition: inline; " --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org