dlr 2002/11/01 14:06:10
Modified: src/java/org/apache/xmlrpc Base64.java
src/test/org/apache/xmlrpc Base64Test.java
Log:
* Base64.java
Added a new discardWhitespace(byte[]) function which is called at
the beginning of decode(byte[]) to perform pre-processing on its
arguments. Filtering whitespace the body of decode() would be both
more memory and CPU-efficient, but I'm not comfortable enough with
the the code to make that invasive of a change. I'm curious what
the Tomcat folks are doing here these days.
I noticed that encode() isn't line wrapping at 76 characters --
should we log this as a problem? What effect is this going to have
on our interop?
* Base64Test.java
Renamed the mis-named testWriter() to testBase64(). Implemented
more tests for encoding/decoding using output from Perl's
MIME::Base64 module.
http://issues.apache.org/bugzilla/show_bug.cgi?id=9931
Revision Changes Path
1.4 +75 -4 xml-rpc/src/java/org/apache/xmlrpc/Base64.java
Index: Base64.java
===================================================================
RCS file: /home/cvs/xml-rpc/src/java/org/apache/xmlrpc/Base64.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -u -r1.3 -r1.4
--- Base64.java 20 Mar 2002 15:11:03 -0000 1.3
+++ Base64.java 1 Nov 2002 22:06:10 -0000 1.4
@@ -63,6 +63,9 @@
*
*/
+import java.util.Enumeration;
+import java.util.Vector;
+
/**
* This class provides encode/decode for RFC 2045 Base64 as defined by
* RFC 2045, N. Freed and N. Borenstein. <a
@@ -71,7 +74,8 @@
* Internet Message Bodies. Reference 1996
*
* @author Jeffrey Rodriguez
- * @version $Id$
+ * @author Daniel Rall
+ * @since 1.2
*/
public final class Base64
{
@@ -254,6 +258,10 @@
*/
public static byte[] decode( byte[] base64Data )
{
+ // RFC 2045 suggests line wrapping at (no more than) 76
+ // characters -- we may have embedded whitespace.
+ base64Data = discardWhitespace(base64Data);
+
// handle the edge case, so we don't have to worry about it later
if(base64Data.length == 0) { return new byte[0]; }
@@ -316,5 +324,68 @@
encodedIndex += 3;
}
return decodedData;
+ }
+
+ /**
+ * Discards any whitespace from a base-64 encoded block.
+ *
+ * @param data The base-64 encoded data to discard the whitespace
+ * from.
+ * @return The data, less whitespace (see RFC 2045).
+ */
+ static byte[] discardWhitespace(byte[] data)
+ {
+ // Locate any regions of whitespace within our data.
+ int nbrToDiscard = 0;
+ Vector discardRegions = new Vector();
+ boolean discarding = false;
+ for (int i = 0; i < data.length; i++)
+ {
+ switch (data[i])
+ {
+ case (byte) ' ':
+ case (byte) '\n':
+ case (byte) '\r':
+ case (byte) '\t':
+ if (!discarding)
+ {
+ int[] region = { i, data.length };
+ discardRegions.addElement(region);
+ discarding = true;
+ }
+ nbrToDiscard++;
+ break;
+
+ default:
+ if (discarding)
+ {
+ // End region to discard.
+ ((int []) discardRegions.lastElement())[1] = i;
+ discarding = false;
+ }
+ }
+ }
+
+ if (nbrToDiscard > 0)
+ {
+ // Groom whitespace from the data.
+ byte[] groomed = new byte[data.length - nbrToDiscard];
+ int srcOffset = 0;
+ int destOffset = 0;
+ int[] region = null;
+ Enumeration enum = discardRegions.elements();
+ while (enum.hasMoreElements())
+ {
+ region = (int []) enum.nextElement();
+ int len = region[0] - srcOffset;
+ System.arraycopy(data, srcOffset, groomed, destOffset, len);
+ destOffset += len;
+ srcOffset = region[1];
+ }
+ System.arraycopy(data, srcOffset, groomed, destOffset,
+ data.length - region[1]);
+ data = groomed;
+ }
+ return data;
}
}
1.7 +30 -1 xml-rpc/src/test/org/apache/xmlrpc/Base64Test.java
Index: Base64Test.java
===================================================================
RCS file: /home/cvs/xml-rpc/src/test/org/apache/xmlrpc/Base64Test.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -u -r1.6 -r1.7
--- Base64Test.java 27 Sep 2002 23:40:42 -0000 1.6
+++ Base64Test.java 1 Nov 2002 22:06:10 -0000 1.7
@@ -75,6 +75,29 @@
"foo bar\nbaz"
};
+ private static final String UNENCODED =
+ "This module provides functions to encode and decode\n" +
+ "strings into the Base64 encoding specified in RFC 2045 -\n" +
+ "MIME (Multipurpose Internet Mail Extensions). The Base64\n" +
+ "encoding is designed to represent arbitrary sequences of\n" +
+ "octets in a form that need not be humanly readable. A\n" +
+ "65-character subset ([A-Za-z0-9+/=]) of US-ASCII is used,\n" +
+ "enabling 6 bits to be represented per printable character.";
+
+ /**
+ * The string <code>UNENCODED</code> after being encoded by Perl's
+ * MIME::Base64 module.
+ */
+ private static final String ENCODED =
+
"VGhpcyBtb2R1bGUgcHJvdmlkZXMgZnVuY3Rpb25zIHRvIGVuY29kZSBhbmQgZGVjb2RlCnN0cmlu\n" +
+
"Z3MgaW50byB0aGUgQmFzZTY0IGVuY29kaW5nIHNwZWNpZmllZCBpbiBSRkMgMjA0NSAtCk1JTUUg\n" +
+
"KE11bHRpcHVycG9zZSBJbnRlcm5ldCBNYWlsIEV4dGVuc2lvbnMpLiBUaGUgQmFzZTY0CmVuY29k\n" +
+
"aW5nIGlzIGRlc2lnbmVkIHRvIHJlcHJlc2VudCBhcmJpdHJhcnkgc2VxdWVuY2VzIG9mCm9jdGV0\n" +
+
"cyBpbiBhIGZvcm0gdGhhdCBuZWVkIG5vdCBiZSBodW1hbmx5IHJlYWRhYmxlLiBBCjY1LWNoYXJh\n" +
+
"Y3RlciBzdWJzZXQgKFtBLVphLXowLTkrLz1dKSBvZiBVUy1BU0NJSSBpcyB1c2VkLAplbmFibGlu\n" +
+ "ZyA2IGJpdHMgdG8gYmUgcmVwcmVzZW50ZWQgcGVyIHByaW50YWJsZSBjaGFyYWN0ZXIu";
+
+
/**
* Constructor
*/
@@ -91,7 +114,7 @@
return new TestSuite(Base64Test.class);
}
- public void testWriter()
+ public void testBase64()
throws Exception
{
try
@@ -107,6 +130,12 @@
assertEquals(raw, decoded);
assertEquals(TEST_DATA[i], new String(decoded));
}
+
+ // FIXME: The Base64.encode() function doesn't wrap at 76 chars.
+ //assertEquals(Base64.encode(UNENCODED.getBytes()),
+ // ENCODED.getBytes());
+ assertEquals(UNENCODED.getBytes(),
+ Base64.decode(ENCODED.getBytes()));
}
catch (Exception e)
{