HBASE-16658 Optimize UTF8 string/byte conversions (binlijin)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6624c676 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6624c676 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6624c676 Branch: refs/heads/hbase-14439 Commit: 6624c676fe7043a95b47f84db4b21df0728ce359 Parents: 6682120 Author: tedyu <yuzhih...@gmail.com> Authored: Tue Sep 20 13:08:04 2016 -0700 Committer: tedyu <yuzhih...@gmail.com> Committed: Tue Sep 20 13:08:04 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/util/Bytes.java | 38 +++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/6624c676/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 41292a5..2d7d3f6 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -24,10 +24,12 @@ import static com.google.common.base.Preconditions.checkPositionIndex; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.util.Arrays; import java.util.Collection; @@ -35,15 +37,13 @@ import java.util.Comparator; import java.util.Iterator; import java.util.List; -import com.google.protobuf.ByteString; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.io.WritableUtils; @@ -52,6 +52,7 @@ import sun.misc.Unsafe; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; /** * Utility class that handles byte arrays, conversions to/from other types, @@ -73,6 +74,10 @@ public class Bytes implements Comparable<Bytes> { /** When we encode strings, we always specify UTF8 encoding */ private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING); + // Using the charset canonical name for String/byte[] conversions is much + // more efficient due to use of cached encoders/decoders. + private static final String UTF8_CSN = StandardCharsets.UTF_8.name(); + //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed private static final byte [] EMPTY_BYTE_ARRAY = new byte [0]; @@ -563,7 +568,7 @@ public class Bytes implements Comparable<Bytes> { * @param off offset into array * @return String made from <code>b</code> or null */ - public static String toString(final byte [] b, int off) { + public static String toString(final byte[] b, int off) { if (b == null) { return null; } @@ -571,7 +576,12 @@ public class Bytes implements Comparable<Bytes> { if (len <= 0) { return ""; } - return new String(b, off, len, UTF8_CHARSET); + try { + return new String(b, off, len, UTF8_CSN); + } catch (UnsupportedEncodingException e) { + // should never happen! + throw new IllegalArgumentException("UTF8 encoding is not supported", e); + } } /** @@ -583,14 +593,19 @@ public class Bytes implements Comparable<Bytes> { * @param len length of utf-8 sequence * @return String made from <code>b</code> or null */ - public static String toString(final byte [] b, int off, int len) { + public static String toString(final byte[] b, int off, int len) { if (b == null) { return null; } if (len == 0) { return ""; } - return new String(b, off, len, UTF8_CHARSET); + try { + return new String(b, off, len, UTF8_CSN); + } catch (UnsupportedEncodingException e) { + // should never happen! + throw new IllegalArgumentException("UTF8 encoding is not supported", e); + } } /** @@ -715,7 +730,12 @@ public class Bytes implements Comparable<Bytes> { * @return the byte array */ public static byte[] toBytes(String s) { - return s.getBytes(UTF8_CHARSET); + try { + return s.getBytes(UTF8_CSN); + } catch (UnsupportedEncodingException e) { + // should never happen! + throw new IllegalArgumentException("UTF8 decoding is not supported", e); + } } /**