Author: ggregory Date: Wed Mar 28 14:22:17 2012 New Revision: 1306366 URL: http://svn.apache.org/viewvc?rev=1306366&view=rev Log: [CODEC-136] Use Charset objects when possible, create Charsets for required character encodings.
Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/Charsets.java commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/CharsetsTest.java Modified: commons/proper/codec/trunk/pom.xml commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/StringUtils.java Modified: commons/proper/codec/trunk/pom.xml URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/pom.xml?rev=1306366&r1=1306365&r2=1306366&view=diff ============================================================================== --- commons/proper/codec/trunk/pom.xml (original) +++ commons/proper/codec/trunk/pom.xml Wed Mar 28 14:22:17 2012 @@ -196,8 +196,8 @@ limitations under the License. </dependency> </dependencies> <properties> - <maven.compile.source>1.5</maven.compile.source> - <maven.compile.target>1.5</maven.compile.target> + <maven.compile.source>1.6</maven.compile.source> + <maven.compile.target>1.6</maven.compile.target> <commons.componentid>codec</commons.componentid> <commons.release.version>1.7</commons.release.version> <!-- The RC version used in the staging repository URL. --> Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/Charsets.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/Charsets.java?rev=1306366&view=auto ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/Charsets.java (added) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/Charsets.java Wed Mar 28 14:22:17 2012 @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.codec; + +import java.nio.charset.Charset; + +/** + * Charsets required of every implementation of the Java platform. + * + * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard + * charsets</a>: + * <p> + * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the + * release documentation for your implementation to see if any other encodings are supported. Consult the release + * documentation for your implementation to see if any other encodings are supported. </cite> + * </p> + * + * <ul> + * <li><code>US-ASCII</code><br/> + * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> + * <li><code>ISO-8859-1</code><br/> + * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> + * <li><code>UTF-8</code><br/> + * Eight-bit Unicode Transformation Format.</li> + * <li><code>UTF-16BE</code><br/> + * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> + * <li><code>UTF-16LE</code><br/> + * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> + * <li><code>UTF-16</code><br/> + * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order + * accepted on input, big-endian used on output.)</li> + * </ul> + * + * This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is + * not foreseen that Commons Codec would be made to depend on Commons Lang. + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + * @author Apache Software Foundation + * @since 1.7 + * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $ + */ +public class Charsets { + // + // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and + // without delay on all Java platforms. + // + /** + * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1); + + /** + * <p> + * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII); + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark + * (either order accepted on input, big-endian used on output) + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16); + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, big-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE); + + /** + * <p> + * Sixteen-bit Unicode Transformation Format, little-endian byte order. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE); + + /** + * <p> + * Eight-bit Unicode Transformation Format. + * </p> + * <p> + * Every implementation of the Java platform is required to support this character encoding. + * </p> + * + * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> + */ + public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8); +} Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/StringUtils.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/StringUtils.java?rev=1306366&r1=1306365&r2=1306366&view=diff ============================================================================== --- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/StringUtils.java (original) +++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/StringUtils.java Wed Mar 28 14:22:17 2012 @@ -18,8 +18,10 @@ package org.apache.commons.codec.binary; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import org.apache.commons.codec.CharEncoding; +import org.apache.commons.codec.Charsets; /** * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a @@ -34,35 +36,86 @@ import org.apache.commons.codec.CharEnco public class StringUtils { /** + * Calls {@link String#getBytes(Charset)} + * + * @param string + * The string to encode (if null, return null). + * @param charset + * The {@link Charset} to encode the {@code String} + * @return the encoded bytes + */ + private static byte[] getBytes(String string, Charset charset) { + if (string == null) { + return null; + } + return string.getBytes(charset); + } + + /** * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new * byte array. * * @param string * the String to encode, may be <code>null</code> * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * @throws NullPointerException + * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesIso8859_1(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1); + return getBytes(string, Charsets.ISO_8859_1); } + /** - * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte + * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte * array. + * <p> + * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which + * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. + * </p> * * @param string * the String to encode, may be <code>null</code> + * @param charsetName + * The name of a required {@link java.nio.charset.Charset} * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a + * required charset name. + * @see CharEncoding + * @see String#getBytes(String) + */ + public static byte[] getBytesUnchecked(String string, String charsetName) { + if (string == null) { + return null; + } + try { + return string.getBytes(charsetName); + } catch (UnsupportedEncodingException e) { + throw StringUtils.newIllegalStateException(charsetName, e); + } + } + + /** + * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte + * array. + * + * @param string + * the String to encode, may be <code>null</code> + * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> + * @throws NullPointerException + * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesUsAscii(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII); + return getBytes(string, Charsets.US_ASCII); } /** @@ -72,13 +125,15 @@ public class StringUtils { * @param string * the String to encode, may be <code>null</code> * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesUtf16(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16); + return getBytes(string, Charsets.UTF_16); } /** @@ -88,13 +143,15 @@ public class StringUtils { * @param string * the String to encode, may be <code>null</code> * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesUtf16Be(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE); + return getBytes(string, Charsets.UTF_16BE); } /** @@ -104,13 +161,15 @@ public class StringUtils { * @param string * the String to encode, may be <code>null</code> * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesUtf16Le(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE); + return getBytes(string, Charsets.UTF_16LE); } /** @@ -120,47 +179,36 @@ public class StringUtils { * @param string * the String to encode, may be <code>null</code> * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when the charset is missing, which should be never according the the Java specification. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a> * @see #getBytesUnchecked(String, String) */ public static byte[] getBytesUtf8(String string) { - return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8); + return getBytes(string, Charsets.UTF_8); + } + + private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { + return new IllegalStateException(charsetName + ": " + e); } /** - * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte - * array. - * <p> - * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which - * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. - * </p> + * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. * - * @param string - * the String to encode, may be <code>null</code> - * @param charsetName - * The name of a required {@link java.nio.charset.Charset} - * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a - * required charset name. - * @see CharEncoding - * @see String#getBytes(String) + * @param bytes + * The bytes to be decoded into characters + * @param charset + * The {@link Charset} to encode the {@code String} + * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, + * or <code>null</code> if the input byte array was <code>null</code>. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is + * required by the Java platform specification. */ - public static byte[] getBytesUnchecked(String string, String charsetName) { - if (string == null) { - return null; - } - try { - return string.getBytes(charsetName); - } catch (UnsupportedEncodingException e) { - throw StringUtils.newIllegalStateException(charsetName, e); - } - } - - private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { - return new IllegalStateException(charsetName + ": " + e); + private static String newString(byte[] bytes, Charset charset) { + return bytes == null ? null : new String(bytes, charset); } /** @@ -198,14 +246,15 @@ public class StringUtils { * * @param bytes * The bytes to be decoded into characters, may be <code>null</code> - * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, - * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or + * <code>null</code> if the input byte array was <code>null</code>. + * @throws NullPointerException + * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringIso8859_1(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.ISO_8859_1); + return new String(bytes, Charsets.ISO_8859_1); } /** @@ -215,12 +264,13 @@ public class StringUtils { * The bytes to be decoded into characters * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @throws NullPointerException + * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringUsAscii(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.US_ASCII); + return new String(bytes, Charsets.US_ASCII); } /** @@ -230,12 +280,13 @@ public class StringUtils { * The bytes to be decoded into characters * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringUtf16(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.UTF_16); + return new String(bytes, Charsets.UTF_16); } /** @@ -245,12 +296,13 @@ public class StringUtils { * The bytes to be decoded into characters * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringUtf16Be(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.UTF_16BE); + return new String(bytes, Charsets.UTF_16BE); } /** @@ -260,12 +312,13 @@ public class StringUtils { * The bytes to be decoded into characters * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringUtf16Le(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.UTF_16LE); + return new String(bytes, Charsets.UTF_16LE); } /** @@ -275,12 +328,13 @@ public class StringUtils { * The bytes to be decoded into characters * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, * or <code>null</code> if the input byte array was <code>null</code>. - * @throws IllegalStateException - * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the - * charset is required. + * @throws NullPointerException + * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is + * required by the Java platform specification. + * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException */ public static String newStringUtf8(byte[] bytes) { - return StringUtils.newString(bytes, CharEncoding.UTF_8); + return newString(bytes, Charsets.UTF_8); } } Added: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/CharsetsTest.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/CharsetsTest.java?rev=1306366&view=auto ============================================================================== --- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/CharsetsTest.java (added) +++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/CharsetsTest.java Wed Mar 28 14:22:17 2012 @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec; + +import junit.framework.Assert; + +import org.junit.Test; + +/** + * Sanity checks for {@link Charsets}. + * + * @version $Id: CharEncodingTest.java 1298985 2012-03-09 19:12:49Z ggregory $ + */ +public class CharsetsTest { + + @Test + public void testIso8859_1() { + Assert.assertEquals("ISO-8859-1", Charsets.ISO_8859_1.name()); + } + + @Test + public void testUsAscii() { + Assert.assertEquals("US-ASCII", Charsets.US_ASCII.name()); + } + + @Test + public void testUtf16() { + Assert.assertEquals("UTF-16", Charsets.UTF_16.name()); + } + + @Test + public void testUtf16Be() { + Assert.assertEquals("UTF-16BE", Charsets.UTF_16BE.name()); + } + + @Test + public void testUtf16Le() { + Assert.assertEquals("UTF-16LE", Charsets.UTF_16LE.name()); + } + + @Test + public void testUtf8() { + Assert.assertEquals("UTF-8", Charsets.UTF_8.name()); + } + +}