This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
commit cab37c370b01fc39b588b92e556d2dc233f7407c Author: Tyler Rockwood <[email protected]> AuthorDate: Fri Mar 26 07:55:32 2021 -1000 MIME4J-295 Allow for overriding the specified charset when decoding q or b encoded words. It it very common to recieve mislabeled charsets for windows-1252, which usually end up being ISO-8898-1. This is a minimal change allows our email servers to not have to rewrite q-encoding, but doesn't change any default behaviors within Mime4J. --- .../org/apache/james/mime4j/codec/DecoderUtil.java | 61 +++++++++++++++++----- .../apache/james/mime4j/codec/DecoderUtilTest.java | 9 ++++ 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java b/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java index ab4c9ee..60aa31d 100644 --- a/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java +++ b/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java @@ -22,6 +22,8 @@ package org.apache.james.mime4j.codec; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; +import java.util.Collections; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -140,7 +142,7 @@ public class DecoderUtil { * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) */ public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException { - return decodeEncodedWords(body, monitor, null); + return decodeEncodedWords(body, monitor, null, Collections.emptyMap()); } /** @@ -155,7 +157,7 @@ public class DecoderUtil { * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) */ public static String decodeEncodedWords(String body, Charset fallback) throws IllegalArgumentException { - return decodeEncodedWords(body, null, fallback); + return decodeEncodedWords(body, null, fallback, Collections.emptyMap()); } /** @@ -172,6 +174,26 @@ public class DecoderUtil { */ public static String decodeEncodedWords(String body, DecodeMonitor monitor, Charset fallback) throws IllegalArgumentException { + return decodeEncodedWords(body, monitor, fallback, Collections.emptyMap()); + } + + /** + * Decodes a string containing encoded words as defined by RFC 2047. Encoded + * words have the form =?charset?enc?encoded-text?= where enc is either 'Q' + * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback + * charset if charset in encoded words is invalid. Additionally, the found charset + * will be overridden if a corresponding mapping is found. + * + * @param body the string to decode + * @param monitor the DecodeMonitor to be used. + * @param fallback the fallback Charset to be used. + * @param charsetOverrides the Charsets to override and their replacements. Must not be null. + * @return the decoded string. + * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) + */ + public static String decodeEncodedWords(String body, DecodeMonitor monitor, Charset fallback, + Map<Charset, Charset> charsetOverrides) + throws IllegalArgumentException { int tailIndex = 0; boolean lastMatchValid = false; @@ -187,7 +209,7 @@ public class DecoderUtil { return ""; String decoded; - decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback); + decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback, charsetOverrides); if (decoded == null) { sb.append(separator); sb.append(matcher.group(0)); @@ -211,17 +233,18 @@ public class DecoderUtil { } // return null on error - private static String tryDecodeEncodedWord(final String mimeCharset, - final String encoding, final String encodedText, final DecodeMonitor monitor, final Charset fallback) { - Charset charset = CharsetUtil.lookup(mimeCharset); + private static String tryDecodeEncodedWord( + final String mimeCharset, + final String encoding, + final String encodedText, + final DecodeMonitor monitor, + final Charset fallback, + final Map<Charset, Charset> charsetOverrides) { + Charset charset = lookupCharset(mimeCharset, fallback, charsetOverrides); if (charset == null) { - if(fallback == null) { - monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", - "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset"); - return null; - } else { - charset = fallback; - } + monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", + "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset"); + return null; } if (encodedText.length() == 0) { @@ -252,6 +275,18 @@ public class DecoderUtil { } } + private static Charset lookupCharset( + final String mimeCharset, + final Charset fallback, + final Map<Charset, Charset> charsetOverrides) { + Charset charset = CharsetUtil.lookup(mimeCharset); + if (charset == null) { + return fallback; + } + Charset override = charsetOverrides.get(charset); + return override != null ? override : charset; + } + private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding, String encodedText, String dropDesc, String... strings) throws IllegalArgumentException { if (monitor.isListening()) { diff --git a/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java b/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java index a33dd3c..6a76d99 100644 --- a/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java +++ b/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java @@ -24,6 +24,7 @@ import org.junit.Test; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; +import java.util.HashMap; public class DecoderUtilTest { @@ -157,4 +158,12 @@ public class DecoderUtilTest { // Bug detected on June 7, 2005. Decoding the following string caused OutOfMemoryError. Assert.assertEquals("=3?!!\\=?\"!g6P\"!Xp:\"!", DecoderUtil.decodeEncodedWords("=3?!!\\=?\"!g6P\"!Xp:\"!")); } + + @Test + public void testAllowsForOverriddingCharsets() { + HashMap<Charset, Charset> overrides = new HashMap<Charset, Charset>(); + overrides.put(Charset.forName("ISO-8859-1"), Charset.forName("WINDOWS-1252")); + String decoded = DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?You=92re_a_winner?=", DecodeMonitor.SILENT, null, overrides); + Assert.assertEquals("You’re a winner", decoded); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
