Author: ggregory Date: Mon Mar 19 20:21:26 2012 New Revision: 1302639 URL: http://svn.apache.org/viewvc?rev=1302639&view=rev Log: CODEC-121 will be for 2.0.
Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java?rev=1302639&r1=1302638&r2=1302639&view=diff ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java (original) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java Mon Mar 19 20:21:26 2012 @@ -42,10 +42,20 @@ import org.apache.commons.codec.binary.S * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping * gateway. * </p> - * + * + * <p> + * Note: + * </p> + * <p> + * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec + * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec + * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy + * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. + * </p> + * * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> - * + * * @author Apache Software Foundation * @since 1.3 * @version $Id$ @@ -66,14 +76,6 @@ public class QuotedPrintableCodec implem private static final byte TAB = 9; private static final byte SPACE = 32; - - private static final byte CR = 13; - - private static final byte LF = 10; - - /** Safe line length for quoted printable encoded text. */ - private static final int SAFE_LENGTH = 73; - // Static initializer for printable chars collection static { // alpha characters @@ -109,78 +111,26 @@ public class QuotedPrintableCodec implem * Encodes byte into its quoted-printable representation. * * @param b - * byte to encode + * byte to encode * @param buffer - * the buffer to write to - * @return The number of bytes written to the <code>buffer</code> + * the buffer to write to */ - private static final int encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { + private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { buffer.write(ESCAPE_CHAR); char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); buffer.write(hex1); buffer.write(hex2); - return 3; - } - - /** - * Return the byte at position <code>index</code> of the byte array and - * make sure it is unsigned. - * - * @param index - * position in the array - * @param bytes - * the byte array - * @return the unsigned octet at position <code>index</code> from the array - */ - private static int getUnsignedOctet(final int index, final byte[] bytes) { - int b = bytes[index]; - if (b < 0) { - b = 256 + b; - } - return b; - } - - /** - * Write a byte to the buffer. - * - * @param b - * byte to write - * @param encode - * indicates whether the octet shall be encoded - * @param buffer - * the buffer to write to - * @return the number of bytes that have been written to the buffer - */ - private static int encodeByte(final int b, final boolean encode, - final ByteArrayOutputStream buffer) { - if (encode) { - return encodeQuotedPrintable(b, buffer); - } else { - buffer.write(b); - return 1; - } - } - - /** - * Checks whether the given byte is whitespace. - * - * @param b - * byte to be checked - * @return <code>true</code> if the byte is either a space or tab character - */ - private static boolean isWhitespace(final int b) { - return b == SPACE || b == TAB; } /** * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. - * + * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) - * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. * </p> - * + * * @param printable * bitset of characters deemed quoted-printable * @param bytes @@ -195,59 +145,29 @@ public class QuotedPrintableCodec implem printable = PRINTABLE_CHARS; } ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - int pos = 1; - // encode up to buffer.length - 3, the last three octets will be treated - // separately for simplification of note #3 - for (int i = 0; i < bytes.length - 3; i++) { - int b = getUnsignedOctet(i, bytes); - if (pos < SAFE_LENGTH) { - // up to this length it is safe to add any byte, encoded or not - pos += encodeByte(b, !printable.get(b), buffer); + for (byte c : bytes) { + int b = c; + if (b < 0) { + b = 256 + b; + } + if (printable.get(b)) { + buffer.write(b); } else { - // rule #3: whitespace at the end of a line *must* be encoded - encodeByte(b, !printable.get(b) || isWhitespace(b), buffer); - - // rule #5: soft line break - buffer.write(ESCAPE_CHAR); - buffer.write(CR); - buffer.write(LF); - pos = 1; + encodeQuotedPrintable(b, buffer); } } - - // rule #3: whitespace at the end of a line *must* be encoded - // if we would do a soft break line after this octet, encode whitespace - int b = getUnsignedOctet(bytes.length - 3, bytes); - boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5); - pos += encodeByte(b, encode, buffer); - - // note #3: '=' *must not* be the ultimate or penultimate character - // simplification: if < 6 bytes left, do a soft line break as we may need - // exactly 6 bytes space for the last 2 bytes - if (pos > SAFE_LENGTH - 2) { - buffer.write(ESCAPE_CHAR); - buffer.write(CR); - buffer.write(LF); - } - for (int i = bytes.length - 2; i < bytes.length; i++) { - b = getUnsignedOctet(i, bytes); - // rule #3: trailing whitespace shall be encoded - encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b)); - encodeByte(b, encode, buffer); - } - return buffer.toByteArray(); } /** - * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are - * converted back to their original representation. - * + * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted + * back to their original representation. + * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as - * defined in RFC 1521. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521. * </p> - * + * * @param bytes * array of quoted-printable characters * @return array of original bytes @@ -260,21 +180,16 @@ public class QuotedPrintableCodec implem } ByteArrayOutputStream buffer = new ByteArrayOutputStream(); for (int i = 0; i < bytes.length; i++) { - final int b = bytes[i]; + int b = bytes[i]; if (b == ESCAPE_CHAR) { try { - // if the next octet is a CR we have found a soft line break - if (bytes[++i] == CR) { - continue; - } - int u = Utils.digit16(bytes[i]); + int u = Utils.digit16(bytes[++i]); int l = Utils.digit16(bytes[++i]); buffer.write((char) ((u << 4) + l)); } catch (ArrayIndexOutOfBoundsException e) { throw new DecoderException("Invalid quoted-printable encoding", e); } - } else if (b != CR && b != LF) { - // every other octet is appended except for CR & LF + } else { buffer.write(b); } } @@ -285,8 +200,8 @@ public class QuotedPrintableCodec implem * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) - * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. * </p> * * @param bytes @@ -302,8 +217,8 @@ public class QuotedPrintableCodec implem * back to their original representation. * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2) - * as defined in RFC 1521. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521. * </p> * * @param bytes @@ -320,8 +235,8 @@ public class QuotedPrintableCodec implem * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2) - * as defined in RFC 1521 and is suitable for encoding binary data. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data. * </p> * * @param pString @@ -450,8 +365,8 @@ public class QuotedPrintableCodec implem * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. * * <p> - * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2) - * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text. + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. * </p> * * @param pString Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java?rev=1302639&r1=1302638&r2=1302639&view=diff ============================================================================== --- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java (original) +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java Mon Mar 19 20:21:26 2012 @@ -24,6 +24,7 @@ import static org.junit.Assert.fail; import org.apache.commons.codec.CharEncoding; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.EncoderException; +import org.junit.Ignore; import org.junit.Test; /** @@ -263,92 +264,30 @@ public class QuotedPrintableCodecTest { } @Test + @Ignore + /** + * The QuotedPrintableCodec documentation states that this is not supported. + * + * @throws Exception + * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a> + */ public void testSoftLineBreakDecode() throws Exception { - String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics " + - "is the most beautiful branch of philosophy."; - String expected = "If you believe that truth=beauty, then surely mathematics " + - "is the most beautiful branch of philosophy."; - - QuotedPrintableCodec qpcodec = new QuotedPrintableCodec(); - assertEquals(expected, qpcodec.decode(qpdata)); - - String encoded = qpcodec.encode(expected); - assertEquals(expected, qpcodec.decode(encoded)); + String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy."; + String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy."; + assertEquals(expected, new QuotedPrintableCodec().decode(qpdata)); } @Test + @Ignore + /** + * The QuotedPrintableCodec documentation states that this is not supported. + * + * @throws Exception + * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a> + */ public void testSoftLineBreakEncode() throws Exception { - String qpdata = "If you believe that truth=3Dbeauty, then surely mathematics is the most " + - "b=\r\neautiful branch of philosophy."; - String expected = "If you believe that truth=beauty, then surely mathematics is the most " + - "beautiful branch of philosophy."; - - QuotedPrintableCodec qpcodec = new QuotedPrintableCodec(); - assertEquals(qpdata, qpcodec.encode(expected)); - - String decoded = qpcodec.decode(qpdata); - assertEquals(qpdata, qpcodec.encode(decoded)); - } - - @Test - public void testSkipNotEncodedCRLF() throws Exception { - String qpdata = "CRLF in an\n encoded text should be=20=\r\n\rskipped in the\r decoding."; - String expected = "CRLF in an encoded text should be skipped in the decoding."; - - QuotedPrintableCodec qpcodec = new QuotedPrintableCodec(); - assertEquals(expected, qpcodec.decode(qpdata)); - - String encoded = qpcodec.encode(expected); - assertEquals(expected, qpcodec.decode(encoded)); - } - - @Test - public void testTrailingSpecial() throws Exception { - final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec(); - - String plain ="This is a example of a quoted-printable text file. This might contain sp=cial chars."; - String expected = "This is a example of a quoted-printable text file. This might contain sp=3D=\r\ncial chars."; - assertEquals(expected, qpcodec.encode(plain)); - - plain ="This is a example of a quoted-printable text file. This might contain ta\tbs as well."; - expected = "This is a example of a quoted-printable text file. This might contain ta=09=\r\nbs as well."; - assertEquals(expected, qpcodec.encode(plain)); + String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy."; + String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy."; + assertEquals(qpdata, new QuotedPrintableCodec().encode(expected)); } - - @Test - public void testUltimateSoftBreak() throws Exception { - final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec(); - - String plain ="This is a example of a quoted-printable text file. There is no end to it\t"; - String expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=09"; - - assertEquals(expected, qpcodec.encode(plain)); - - plain ="This is a example of a quoted-printable text file. There is no end to it "; - expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=20"; - - assertEquals(expected, qpcodec.encode(plain)); - - // whitespace before soft break - plain ="This is a example of a quoted-printable text file. There is no end to "; - expected = "This is a example of a quoted-printable text file. There is no end to=20=\r\n =20"; - - assertEquals(expected, qpcodec.encode(plain)); - - // non-printable character before soft break - plain ="This is a example of a quoted-printable text file. There is no end to= "; - expected = "This is a example of a quoted-printable text file. There is no end to=3D=\r\n =20"; - - assertEquals(expected, qpcodec.encode(plain)); - } - - @Test - public void testFinalBytes() throws Exception { - // whitespace, but does not need to be encoded - String plain ="This is a example of a quoted=printable text file. There is no tt"; - String expected = "This is a example of a quoted=3Dprintable text file. There is no tt"; - - assertEquals(expected, new QuotedPrintableCodec().encode(plain)); - } - }