This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new dae078e5bc3 HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to
compare 8 bytes each time (#5354)
dae078e5bc3 is described below
commit dae078e5bc342012b49cd066027eb53ae9a21280
Author: jbewing <[email protected]>
AuthorDate: Sun Aug 20 02:53:22 2023 -0400
HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to compare 8 bytes
each time (#5354)
Signed-off-by: Duo Zhang <[email protected]>
---
.../apache/hadoop/hbase/util/ByteBufferUtils.java | 185 +++++++++++++++++----
.../java/org/apache/hadoop/hbase/util/Bytes.java | 107 +++++++++++-
.../hadoop/hbase/util/TestByteBufferUtils.java | 31 ++++
.../org/apache/hadoop/hbase/util/TestBytes.java | 43 +++++
4 files changed, 329 insertions(+), 37 deletions(-)
diff --git
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index a5a5c5105db..054de74d7d1 100644
---
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -80,6 +80,14 @@ public final class ByteBufferUtils {
abstract int putLong(ByteBuffer buffer, int index, long val);
}
+ static abstract class CommonPrefixer {
+ abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, byte[] right,
+ int rightOffset, int rightLength);
+
+ abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, ByteBuffer right,
+ int rightOffset, int rightLength);
+ }
+
static class ComparerHolder {
static final String UNSAFE_COMPARER_NAME = ComparerHolder.class.getName()
+ "$UnsafeComparer";
@@ -322,6 +330,111 @@ public final class ByteBufferUtils {
}
}
+ static class CommonPrefixerHolder {
+ static final String UNSAFE_COMMON_PREFIXER_NAME =
+ CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
+
+ static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
+
+ static CommonPrefixer getBestCommonPrefixer() {
+ try {
+ Class<? extends CommonPrefixer> theClass =
+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
+
+ return theClass.getConstructor().newInstance();
+ } catch (Throwable t) { // ensure we really catch *everything*
+ return PureJavaCommonPrefixer.INSTANCE;
+ }
+ }
+
+ static final class PureJavaCommonPrefixer extends CommonPrefixer {
+ static final PureJavaCommonPrefixer INSTANCE = new
PureJavaCommonPrefixer();
+
+ private PureJavaCommonPrefixer() {
+ }
+
+ @Override
+ public int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, byte[] right,
+ int rightOffset, int rightLength) {
+ int length = Math.min(leftLength, rightLength);
+ int result = 0;
+
+ while (
+ result < length
+ && ByteBufferUtils.toByte(left, leftOffset + result) ==
right[rightOffset + result]
+ ) {
+ result++;
+ }
+
+ return result;
+ }
+
+ @Override
+ int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength,
ByteBuffer right,
+ int rightOffset, int rightLength) {
+ int length = Math.min(leftLength, rightLength);
+ int result = 0;
+
+ while (
+ result < length && ByteBufferUtils.toByte(left, leftOffset + result)
+ == ByteBufferUtils.toByte(right, rightOffset + result)
+ ) {
+ result++;
+ }
+
+ return result;
+ }
+ }
+
+ static final class UnsafeCommonPrefixer extends CommonPrefixer {
+
+ static {
+ if (!UNSAFE_UNALIGNED) {
+ throw new Error();
+ }
+ }
+
+ public UnsafeCommonPrefixer() {
+ }
+
+ @Override
+ public int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, byte[] right,
+ int rightOffset, int rightLength) {
+ long offset1Adj;
+ Object refObj1 = null;
+ if (left.isDirect()) {
+ offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
+ } else {
+ offset1Adj = leftOffset + left.arrayOffset() +
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ refObj1 = left.array();
+ }
+ return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, right,
+ rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET, rightLength);
+ }
+
+ @Override
+ public int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, ByteBuffer right,
+ int rightOffset, int rightLength) {
+ long offset1Adj, offset2Adj;
+ Object refObj1 = null, refObj2 = null;
+ if (left.isDirect()) {
+ offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
+ } else {
+ offset1Adj = leftOffset + left.arrayOffset() +
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ refObj1 = left.array();
+ }
+ if (right.isDirect()) {
+ offset2Adj = rightOffset + UnsafeAccess.directBufferAddress(right);
+ } else {
+ offset2Adj = rightOffset + right.arrayOffset() +
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ refObj2 = right.array();
+ }
+ return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength,
refObj2, offset2Adj,
+ rightLength);
+ }
+ }
+ }
+
/**
* Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)},
but writes to a
* {@link ByteBuffer}.
@@ -744,14 +857,7 @@ public final class ByteBufferUtils {
*/
public static int findCommonPrefix(byte[] left, int leftOffset, int
leftLength, byte[] right,
int rightOffset, int rightLength) {
- int length = Math.min(leftLength, rightLength);
- int result = 0;
-
- while (result < length && left[leftOffset + result] == right[rightOffset +
result]) {
- result++;
- }
-
- return result;
+ return Bytes.findCommonPrefix(left, right, leftLength, rightLength,
leftOffset, rightOffset);
}
/**
@@ -765,17 +871,8 @@ public final class ByteBufferUtils {
*/
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength,
ByteBuffer right, int rightOffset, int rightLength) {
- int length = Math.min(leftLength, rightLength);
- int result = 0;
-
- while (
- result < length && ByteBufferUtils.toByte(left, leftOffset + result)
- == ByteBufferUtils.toByte(right, rightOffset + result)
- ) {
- result++;
- }
-
- return result;
+ return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left,
leftOffset, leftLength,
+ right, rightOffset, rightLength);
}
/**
@@ -789,17 +886,8 @@ public final class ByteBufferUtils {
*/
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int
leftLength, byte[] right,
int rightOffset, int rightLength) {
- int length = Math.min(leftLength, rightLength);
- int result = 0;
-
- while (
- result < length
- && ByteBufferUtils.toByte(left, leftOffset + result) ==
right[rightOffset + result]
- ) {
- result++;
- }
-
- return result;
+ return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left,
leftOffset, leftLength,
+ right, rightOffset, rightLength);
}
/**
@@ -972,6 +1060,43 @@ public final class ByteBufferUtils {
return l1 - l2;
}
+ static int findCommonPrefixUnsafe(Object left, long leftOffset, int
leftLength, Object right,
+ long rightOffset, int rightLength) {
+ final int stride = 8;
+ final int minLength = Math.min(leftLength, rightLength);
+ int strideLimit = minLength & ~(stride - 1);
+ int result = 0;
+ int i;
+
+ for (i = 0; i < strideLimit; i += stride) {
+ long lw = HBasePlatformDependent.getLong(left, leftOffset + (long) i);
+ long rw = HBasePlatformDependent.getLong(right, rightOffset + (long) i);
+
+ if (lw != rw) {
+ if (!UnsafeAccess.LITTLE_ENDIAN) {
+ return result + (Long.numberOfLeadingZeros(lw ^ rw) /
Bytes.SIZEOF_LONG);
+ } else {
+ return result + (Long.numberOfTrailingZeros(lw ^ rw) /
Bytes.SIZEOF_LONG);
+ }
+ } else {
+ result += Bytes.SIZEOF_LONG;
+ }
+ }
+
+ // The epilogue to cover the last (minLength % stride) elements.
+ for (; i < minLength; i++) {
+ byte il = HBasePlatformDependent.getByte(left, leftOffset + i);
+ byte ir = HBasePlatformDependent.getByte(right, rightOffset + i);
+ if (il != ir) {
+ return result;
+ } else {
+ result++;
+ }
+ }
+
+ return result;
+ }
+
/**
* Reads a short value at the given buffer's offset.
* @param buffer input byte buffer to read
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index 0203cc390fe..96b3dbd4a8a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -1179,6 +1179,11 @@ public class Bytes implements Comparable<Bytes> {
}
+ static abstract class CommonPrefixer {
+ abstract int findCommonPrefix(byte[] left, int leftOffset, int leftLength,
byte[] right,
+ int rightOffset, int rightLength);
+ }
+
static Comparer<byte[]> lexicographicalComparerJavaImpl() {
return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
}
@@ -1453,6 +1458,99 @@ public class Bytes implements Comparable<Bytes> {
}
}
+ static class CommonPrefixerHolder {
+ static final String UNSAFE_COMMON_PREFIXER_NAME =
+ CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
+
+ static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
+
+ static CommonPrefixer getBestCommonPrefixer() {
+ try {
+ Class<? extends CommonPrefixer> theClass =
+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
+
+ return theClass.getConstructor().newInstance();
+ } catch (Throwable t) { // ensure we really catch *everything*
+ return CommonPrefixerHolder.PureJavaCommonPrefixer.INSTANCE;
+ }
+ }
+
+ static final class PureJavaCommonPrefixer extends CommonPrefixer {
+ static final PureJavaCommonPrefixer INSTANCE = new
PureJavaCommonPrefixer();
+
+ private PureJavaCommonPrefixer() {
+ }
+
+ @Override
+ public int findCommonPrefix(byte[] left, int leftOffset, int leftLength,
byte[] right,
+ int rightOffset, int rightLength) {
+ int length = Math.min(leftLength, rightLength);
+ int result = 0;
+
+ while (result < length && left[leftOffset + result] ==
right[rightOffset + result]) {
+ result++;
+ }
+ return result;
+ }
+ }
+
+ static final class UnsafeCommonPrefixer extends CommonPrefixer {
+
+ static {
+ if (!UNSAFE_UNALIGNED) {
+ throw new Error();
+ }
+
+ // sanity check - this should never fail
+ if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) {
+ throw new AssertionError();
+ }
+ }
+
+ public UnsafeCommonPrefixer() {
+ }
+
+ @Override
+ public int findCommonPrefix(byte[] left, int leftOffset, int leftLength,
byte[] right,
+ int rightOffset, int rightLength) {
+ final int stride = 8;
+ final int minLength = Math.min(leftLength, rightLength);
+ int strideLimit = minLength & ~(stride - 1);
+ final long leftOffsetAdj = leftOffset +
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ final long rightOffsetAdj = rightOffset +
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+ int result = 0;
+ int i;
+
+ for (i = 0; i < strideLimit; i += stride) {
+ long lw = HBasePlatformDependent.getLong(left, leftOffsetAdj + i);
+ long rw = HBasePlatformDependent.getLong(right, rightOffsetAdj + i);
+ if (lw != rw) {
+ if (!UnsafeAccess.LITTLE_ENDIAN) {
+ return result + (Long.numberOfLeadingZeros(lw ^ rw) /
Bytes.SIZEOF_LONG);
+ } else {
+ return result + (Long.numberOfTrailingZeros(lw ^ rw) /
Bytes.SIZEOF_LONG);
+ }
+ } else {
+ result += Bytes.SIZEOF_LONG;
+ }
+ }
+
+ // The epilogue to cover the last (minLength % stride) elements.
+ for (; i < minLength; i++) {
+ int il = (left[leftOffset + i]);
+ int ir = (right[rightOffset + i]);
+ if (il != ir) {
+ return result;
+ } else {
+ result++;
+ }
+ }
+
+ return result;
+ }
+ }
+ }
+
/**
* Lexicographically determine the equality of two arrays.
* @param left left operand
@@ -2429,12 +2527,7 @@ public class Bytes implements Comparable<Bytes> {
public static int findCommonPrefix(byte[] left, byte[] right, int
leftLength, int rightLength,
int leftOffset, int rightOffset) {
- int length = Math.min(leftLength, rightLength);
- int result = 0;
-
- while (result < length && left[leftOffset + result] == right[rightOffset +
result]) {
- result++;
- }
- return result;
+ return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left,
leftOffset, leftLength,
+ right, rightOffset, rightLength);
}
}
diff --git
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
index c824e01e425..eabfed2042c 100644
---
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
+++
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
@@ -606,6 +606,37 @@ public class TestByteBufferUtils {
assertTrue(ByteBufferUtils.equals(bb, 0, a.length, a, 0, a.length));
}
+ @Test
+ public void testFindCommonPrefix() {
+ ByteBuffer bb1 = ByteBuffer.allocate(135);
+ ByteBuffer bb2 = ByteBuffer.allocate(135);
+ ByteBuffer bb3 = ByteBuffer.allocateDirect(135);
+ byte[] b = new byte[71];
+
+ fillBB(bb1, (byte) 5);
+ fillBB(bb2, (byte) 5);
+ fillBB(bb3, (byte) 5);
+ fillArray(b, (byte) 5);
+
+ assertEquals(135,
+ ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0,
bb2.remaining()));
+ assertEquals(71, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(),
b, 0, b.length));
+ assertEquals(135,
+ ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb3, 0,
bb3.remaining()));
+ assertEquals(71, ByteBufferUtils.findCommonPrefix(bb3, 0, bb3.remaining(),
b, 0, b.length));
+
+ b[13] = 9;
+ assertEquals(13, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(),
b, 0, b.length));
+
+ bb2.put(134, (byte) 6);
+ assertEquals(134,
+ ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0,
bb2.remaining()));
+
+ bb2.put(6, (byte) 4);
+ assertEquals(6,
+ ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0,
bb2.remaining()));
+ }
+
private static void fillBB(ByteBuffer bb, byte b) {
for (int i = bb.position(); i < bb.limit(); i++) {
bb.put(i, b);
diff --git
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
index 14be2f4cc37..b7434895998 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
@@ -585,4 +585,47 @@ public class TestBytes {
assertArrayEquals(testData, result);
}
}
+
+ @Test
+ public void testFindCommonPrefix() throws Exception {
+ testFindCommonPrefix(false);
+ }
+
+ @Test
+ public void testFindCommonPrefixUnsafe() throws Exception {
+ testFindCommonPrefix(true);
+ }
+
+ private static void testFindCommonPrefix(boolean unsafe) throws Exception {
+ setUnsafe(unsafe);
+ try {
+ // tests for common prefixes less than 8 bytes in length (i.e. using
non-vectorized path)
+ byte[] hello = Bytes.toBytes("hello");
+ byte[] helloWorld = Bytes.toBytes("helloworld");
+
+ assertEquals(5,
+ Bytes.findCommonPrefix(hello, helloWorld, hello.length,
helloWorld.length, 0, 0));
+ assertEquals(5, Bytes.findCommonPrefix(hello, hello, hello.length,
hello.length, 0, 0));
+ assertEquals(3,
+ Bytes.findCommonPrefix(hello, hello, hello.length - 2, hello.length -
2, 2, 2));
+ assertEquals(0, Bytes.findCommonPrefix(hello, hello, 0, 0, 0, 0));
+
+ // tests for common prefixes greater than 8 bytes in length which may
use the vectorized path
+ byte[] hellohello = Bytes.toBytes("hellohello");
+ byte[] hellohellohi = Bytes.toBytes("hellohellohi");
+
+ assertEquals(10, Bytes.findCommonPrefix(hellohello, hellohellohi,
hellohello.length,
+ hellohellohi.length, 0, 0));
+ assertEquals(10, Bytes.findCommonPrefix(hellohellohi, hellohello,
hellohellohi.length,
+ hellohello.length, 0, 0));
+ assertEquals(10,
+ Bytes.findCommonPrefix(hellohello, hellohello, hellohello.length,
hellohello.length, 0, 0));
+
+ hellohello[2] = 0;
+ assertEquals(2, Bytes.findCommonPrefix(hellohello, hellohellohi,
hellohello.length,
+ hellohellohi.length, 0, 0));
+ } finally {
+ setUnsafe(HBasePlatformDependent.unaligned());
+ }
+ }
}