Author: olegk Date: Wed Sep 25 08:15:22 2013 New Revision: 1526130 URL: http://svn.apache.org/r1526130 Log: MIME4J-211: Add an optional fallback charset argument to DecoderUtil.decodeEncodedWords Contributed by TzeKai Lee <chikei at gmail.com>
Modified: james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java Modified: james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=1526130&r1=1526129&r2=1526130&view=diff ============================================================================== --- james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java (original) +++ james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java Wed Sep 25 08:15:22 2013 @@ -142,6 +142,38 @@ public class DecoderUtil { * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) */ public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException { + return decodeEncodedWords(body, monitor, null); + } + + /** + * Decodes a string containing encoded words as defined by RFC 2047. Encoded + * words have the form =?charset?enc?encoded-text?= where enc is either 'Q' + * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback + * charset if charset in encoded words is invalid. + * + * @param body the string to decode + * @param fallback the fallback Charset to be used. + * @return the decoded string. + * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) + */ + public static String decodeEncodedWords(String body, Charset fallback) throws IllegalArgumentException { + return decodeEncodedWords(body, null, fallback); + } + + /** + * Decodes a string containing encoded words as defined by RFC 2047. Encoded + * words have the form =?charset?enc?encoded-text?= where enc is either 'Q' + * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback + * charset if charset in encoded words is invalid. + * + * @param body the string to decode + * @param monitor the DecodeMonitor to be used. + * @param fallback the fallback Charset to be used. + * @return the decoded string. + * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) + */ + public static String decodeEncodedWords(String body, DecodeMonitor monitor, Charset fallback) + throws IllegalArgumentException { int tailIndex = 0; boolean lastMatchValid = false; @@ -154,7 +186,7 @@ public class DecoderUtil { String encodedText = matcher.group(4); String decoded = null; - decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor); + decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback); if (decoded == null) { sb.append(matcher.group(0)); } else { @@ -178,12 +210,16 @@ public class DecoderUtil { // return null on error private static String tryDecodeEncodedWord(final String mimeCharset, - final String encoding, final String encodedText, final DecodeMonitor monitor) { + final String encoding, final String encodedText, final DecodeMonitor monitor, final Charset fallback) { Charset charset = CharsetUtil.lookup(mimeCharset); if (charset == null) { - monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", - "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset"); - return null; + if(fallback == null) { + monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", + "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset"); + return null; + } else { + charset = fallback; + } } if (encodedText.length() == 0) { Modified: james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=1526130&r1=1526129&r2=1526130&view=diff ============================================================================== --- james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java (original) +++ james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java Wed Sep 25 08:15:22 2013 @@ -20,6 +20,7 @@ package org.apache.james.mime4j.codec; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import junit.framework.TestCase; @@ -67,6 +68,18 @@ public class DecoderUtilTest extends Tes + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec); } + public void testDecodeJapaneseEncodedWordsWithFallback(){ + String enc = "=?random?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= " + + "=?garbage?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= " + + "=?charset?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= " + + "=?name?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= " + + "=?trash?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?="; + + String dec = DecoderUtil.decodeEncodedWords(enc, Charset.forName("ISO-2022-JP")); + assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3" + + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec); + } + public void testInvalidEncodedWordsAreIgnored() { assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?=")); assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));