Author: liyin Date: Wed Apr 2 20:49:19 2014 New Revision: 1584166 URL: http://svn.apache.org/r1584166 Log: [HBASE-10759] Testing the performance of Guava comparator.
Author: manukranthk Summary: Adding guava comparator into the Bytes.compareTo function which is very fundamental to HBase compare functions and comparators. Test Plan: Added a unit test which suggests that the guava comparator give slight improvement. Reviewers: rshroff, liyintang, daviddeng Reviewed By: daviddeng CC: hbase-eng@, platform-diffs@lists Differential Revision: https://phabricator.fb.com/D785376 Task ID: 2268161 Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1584166&r1=1584165&r2=1584166&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Wed Apr 2 20:49:19 2014 @@ -1062,6 +1062,10 @@ public final class HConstants { "hbase.client.use.conf.from.server"; public static final boolean DEFAULT_USE_CONF_FROM_SERVER = true; + public static final String USE_GUAVA_BYTES_COMPARISION = + "hbase.regionserver.use.guava.bytes.comparision"; + public static boolean DEFAULT_USE_GUAVA_BYTES_COMPARISION = false; + private HConstants() { // Can't be instantiated with this constructor. } Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1584166&r1=1584165&r2=1584166&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Apr 2 20:49:19 2014 @@ -517,6 +517,11 @@ public class HRegionServer implements HR int maxPreloadThreads = conf.getInt(HConstants.MAX_PRELOAD_THREAD_COUNT, HConstants.DEFAULT_MAX_PRELOAD_THREAD_COUNT); PreloadThreadPool.constructPreloaderThreadPool(corePreloadThreads, maxPreloadThreads); + + // Configure use of Guava bytes comparator. + Bytes.useGuavaBytesComparision = conf.getBoolean( + HConstants.USE_GUAVA_BYTES_COMPARISION, + HConstants.DEFAULT_USE_GUAVA_BYTES_COMPARISION); } /** Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java?rev=1584166&r1=1584165&r2=1584166&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java (original) +++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/Bytes.java Wed Apr 2 20:49:19 2014 @@ -23,8 +23,12 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Field; import java.math.BigInteger; import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; @@ -41,6 +45,8 @@ import org.apache.thrift.protocol.TProto import org.apache.thrift.transport.TMemoryBuffer; import org.apache.thrift.transport.TMemoryInputTransport; +import sun.misc.Unsafe; + import com.facebook.nifty.header.protocol.TFacebookCompactProtocol; import com.facebook.swift.codec.ThriftCodec; @@ -970,6 +976,9 @@ public class Bytes { return compareTo(left, 0, left.length, right, 0, right.length); } + public static boolean useGuavaBytesComparision = + HConstants.DEFAULT_USE_GUAVA_BYTES_COMPARISION; + /** * Lexographically compare two arrays. * @@ -983,17 +992,218 @@ public class Bytes { */ public static int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) { - // Bring WritableComparator code local - int end1 = offset1 + length1; - int end2 = offset2 + length2; - for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { - int a = (buffer1[i] & 0xff); - int b = (buffer2[j] & 0xff); - if (a != b) { - return a - b; + if (!useGuavaBytesComparision || (length1 < LONG_BYTES_X_2) + || (length2 < LONG_BYTES_X_2)) { + // Bring WritableComparator code local + int end1 = offset1 + length1; + int end2 = offset2 + length2; + for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { + int a = (buffer1[i] & 0xff); + int b = (buffer2[j] & 0xff); + if (a != b) { + return a - b; + } + } + return length1 - length2; + } else { + return LexicographicalComparerHolder.BEST_COMPARER.compareTo( + buffer1, offset1, length1, buffer2, offset2, length2); + } + } + + /** + * The number of bytes required to represent a primitive {@code long} + * value. + */ + public static final int LONG_BYTES = Long.SIZE / Byte.SIZE; + public static final int LONG_BYTES_X_2 = LONG_BYTES * 2; + + interface Comparer<T> { + abstract public int compareTo(T buffer1, int offset1, int length1, + T buffer2, int offset2, int length2); + } + + static Comparer<byte[]> lexicographicalComparerJavaImpl() { + return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; + } + + /** + * Provides a lexicographical comparer implementation; either a Java + * implementation or a faster implementation based on {@link Unsafe}. + * + * <p>Uses reflection to gracefully fall back to the Java implementation if + * {@code Unsafe} isn't available. + */ + static class LexicographicalComparerHolder { + static final String UNSAFE_COMPARER_NAME = + LexicographicalComparerHolder.class.getName() + "$UnsafeComparer"; + + static final Comparer<byte[]> BEST_COMPARER = getBestComparer(); + /** + * Returns the Unsafe-using Comparer, or falls back to the pure-Java + * implementation if unable to do so. + */ + static Comparer<byte[]> getBestComparer() { + try { + Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME); + + // yes, UnsafeComparer does implement Comparer<byte[]> + @SuppressWarnings("unchecked") + Comparer<byte[]> comparer = + (Comparer<byte[]>) theClass.getEnumConstants()[0]; + return comparer; + } catch (Throwable t) { // ensure we really catch *everything* + return lexicographicalComparerJavaImpl(); + } + } + + enum PureJavaComparer implements Comparer<byte[]> { + INSTANCE; + + @Override + public int compareTo(byte[] buffer1, int offset1, int length1, + byte[] buffer2, int offset2, int length2) { + // Short circuit equal case + if (buffer1 == buffer2 && + offset1 == offset2 && + length1 == length2) { + return 0; + } + // Bring WritableComparator code local + int end1 = offset1 + length1; + int end2 = offset2 + length2; + for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { + int a = (buffer1[i] & 0xff); + int b = (buffer2[j] & 0xff); + if (a != b) { + return a - b; + } + } + return length1 - length2; + } + } + + enum UnsafeComparer implements Comparer<byte[]> { + INSTANCE; + + static final Unsafe theUnsafe; + + /** The offset to the first element in a byte array. */ + static final int BYTE_ARRAY_BASE_OFFSET; + + static { + theUnsafe = (Unsafe) AccessController.doPrivileged( + new PrivilegedAction<Object>() { + @Override + public Object run() { + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + return f.get(null); + } catch (NoSuchFieldException e) { + // It doesn't matter what we throw; + // it's swallowed in getBestComparer(). + throw new Error(); + } catch (IllegalAccessException e) { + throw new Error(); + } + } + }); + + BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class); + + // sanity check - this should never fail + if (theUnsafe.arrayIndexScale(byte[].class) != 1) { + throw new AssertionError(); + } + } + + static final boolean littleEndian = + ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + + /** + * Returns true if x1 is less than x2, when both values are treated as + * unsigned. + */ + static boolean lessThanUnsigned(long x1, long x2) { + return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE); + } + + /** + * Lexicographically compare two arrays. + * + * @param buffer1 left operand + * @param buffer2 right operand + * @param offset1 Where to start comparing in the left buffer + * @param offset2 Where to start comparing in the right buffer + * @param length1 How much to compare from the left buffer + * @param length2 How much to compare from the right buffer + * @return 0 if equal, < 0 if left is less than right, etc. + */ + @Override + public int compareTo(byte[] buffer1, int offset1, int length1, + byte[] buffer2, int offset2, int length2) { + // Short circuit equal case + if (buffer1 == buffer2 && + offset1 == offset2 && + length1 == length2) { + return 0; + } + int minLength = Math.min(length1, length2); + int minWords = minLength / LONG_BYTES; + int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; + int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; + + /* + * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a + * time is no slower than comparing 4 bytes at a time even on 32-bit. + * On the other hand, it is substantially faster on 64-bit. + */ + for (int i = 0; i < minWords * LONG_BYTES; i += LONG_BYTES) { + long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); + long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); + long diff = lw ^ rw; + + if (diff != 0) { + if (!littleEndian) { + return lessThanUnsigned(lw, rw) ? -1 : 1; + } + + // Use binary search + int n = 0; + int y; + int x = (int) diff; + if (x == 0) { + x = (int) (diff >>> 32); + n = 32; + } + + y = x << 16; + if (y == 0) { + n += 16; + } else { + x = y; + } + + y = x << 8; + if (y == 0) { + n += 8; + } + return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL)); + } + } + + // The epilogue to cover the last (minLength % 8) elements. + for (int i = minWords * LONG_BYTES; i < minLength; i++) { + int a = (buffer1[offset1 + i] & 0xff); + int b = (buffer2[offset2 + i] & 0xff); + if (a != b) { + return a - b; + } + } + return length1 - length2; } } - return length1 - length2; } /** Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java?rev=1584166&r1=1584165&r2=1584166&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java Wed Apr 2 20:49:19 2014 @@ -24,7 +24,10 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.Random; import junit.framework.TestCase; @@ -289,4 +292,80 @@ public class TestBytes extends TestCase Assert.assertArrayEquals("appendToTail(a, 0, 6)", Bytes.appendToTail(a, 2, (byte) 6), new byte[] { 1, 2, 6, 6 }); } + + /** + * The rows in a given region follow the following pattern: + * [PREFIX BYTES][ID BYTES] + * + * @param numRows : number of rows to return in the list. + * @param prefixLength : length of common prefix. + * @param length : length of the rest of the row. + * @return : List of rows generated + */ + private List<byte[]> getRowsRandom(int numRows, int prefixLength, int length) { + Random r = new Random(); + byte[] prefixBytes = new byte[prefixLength]; + r.nextBytes(prefixBytes); + List<byte[]> list = new ArrayList<byte[]>(); + for (int i=0; i<numRows; i++) { + byte[] b = new byte[length]; + r.nextBytes(b); + byte[] finalBytes = new byte[prefixLength + length]; + Bytes.putBytes(finalBytes, 0, prefixBytes, 0, prefixLength); + Bytes.putBytes(finalBytes, prefixLength, b, 0, b.length); + list.add(finalBytes); + } + return list; + } + + public void testComparatorPerfRandom() { + // The rows in a given region follow the following pattern: + // [PREFIX BYTES][ID BYTES] + // With long prefixes, the comparison using guava is faster. + // With fewer common bytes, the guava comparison is slower. + for (int PREFIX = 50; PREFIX >= 0; PREFIX -= 5) { + int ID = 100; + int numRows = 10000; + List<byte[]> list = getRowsRandom(numRows, PREFIX, ID); + + // Correctness + for (int i=0; i<numRows; i++) { + for (int j=0; j<numRows; j++) { + Bytes.useGuavaBytesComparision = true; + int bg = Bytes.compareTo(list.get(i), list.get(j)); + Bytes.useGuavaBytesComparision = false; + int bs = Bytes.compareTo(list.get(i), list.get(j)); + assertTrue(bg == bs); + } + } + + // Comparing the Bytes + boolean[] bools = new boolean[]{true, false}; + long[] timeNs = new long[2]; + + for (int idx = 0; idx < 2; idx++) { + Bytes.useGuavaBytesComparision = bools[idx]; + long st = System.nanoTime(); + for (int i=0; i<numRows; i++) { + for (int j=0; j<numRows; j++) { + Bytes.compareTo(list.get(i), list.get(j)); + } + } + long en = System.nanoTime(); + timeNs[idx] += (en - st); + } + double gain = (timeNs[1] - timeNs[0]) / ((double)timeNs[1]); + System.out.println("Prefix : " + PREFIX + ", gain : " + gain * 100 + " "); + if (PREFIX > 20) { + assertTrue(gain > 0.1); + assertTrue(timeNs[1] > timeNs[0]); + } else if (PREFIX < 10) { + assertTrue(gain < -0.1); + if (PREFIX == 0) { + assertTrue(gain < -0.5); + } + assertTrue(timeNs[1] < timeNs[0]); + } + } + } }
