This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new fbd258f2d0c HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to 
compare 8 bytes each time (#5354)
fbd258f2d0c is described below

commit fbd258f2d0c4df8b5d07f7cfc684784ad8861f94
Author: jbewing <jbew...@live.com>
AuthorDate: Sun Aug 20 02:53:22 2023 -0400

    HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to compare 8 bytes 
each time (#5354)
    
    Signed-off-by: Duo Zhang <zhang...@apache.org>
    (cherry picked from commit dae078e5bc342012b49cd066027eb53ae9a21280)
---
 .../apache/hadoop/hbase/util/ByteBufferUtils.java  | 185 +++++++++++++++++----
 .../java/org/apache/hadoop/hbase/util/Bytes.java   | 107 +++++++++++-
 .../hadoop/hbase/util/TestByteBufferUtils.java     |  31 ++++
 .../org/apache/hadoop/hbase/util/TestBytes.java    |  43 +++++
 4 files changed, 329 insertions(+), 37 deletions(-)

diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 7bd6aac06fb..6746323e98c 100644
--- 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -83,6 +83,14 @@ public final class ByteBufferUtils {
     abstract int putLong(ByteBuffer buffer, int index, long val);
   }
 
+  static abstract class CommonPrefixer {
+    abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, byte[] right,
+      int rightOffset, int rightLength);
+
+    abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, ByteBuffer right,
+      int rightOffset, int rightLength);
+  }
+
   static class ComparerHolder {
     static final String UNSAFE_COMPARER_NAME = ComparerHolder.class.getName() 
+ "$UnsafeComparer";
 
@@ -325,6 +333,111 @@ public final class ByteBufferUtils {
     }
   }
 
+  static class CommonPrefixerHolder {
+    static final String UNSAFE_COMMON_PREFIXER_NAME =
+      CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
+
+    static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
+
+    static CommonPrefixer getBestCommonPrefixer() {
+      try {
+        Class<? extends CommonPrefixer> theClass =
+          
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
+
+        return theClass.getConstructor().newInstance();
+      } catch (Throwable t) { // ensure we really catch *everything*
+        return PureJavaCommonPrefixer.INSTANCE;
+      }
+    }
+
+    static final class PureJavaCommonPrefixer extends CommonPrefixer {
+      static final PureJavaCommonPrefixer INSTANCE = new 
PureJavaCommonPrefixer();
+
+      private PureJavaCommonPrefixer() {
+      }
+
+      @Override
+      public int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, byte[] right,
+        int rightOffset, int rightLength) {
+        int length = Math.min(leftLength, rightLength);
+        int result = 0;
+
+        while (
+          result < length
+            && ByteBufferUtils.toByte(left, leftOffset + result) == 
right[rightOffset + result]
+        ) {
+          result++;
+        }
+
+        return result;
+      }
+
+      @Override
+      int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, 
ByteBuffer right,
+        int rightOffset, int rightLength) {
+        int length = Math.min(leftLength, rightLength);
+        int result = 0;
+
+        while (
+          result < length && ByteBufferUtils.toByte(left, leftOffset + result)
+              == ByteBufferUtils.toByte(right, rightOffset + result)
+        ) {
+          result++;
+        }
+
+        return result;
+      }
+    }
+
+    static final class UnsafeCommonPrefixer extends CommonPrefixer {
+
+      static {
+        if (!UNSAFE_UNALIGNED) {
+          throw new Error();
+        }
+      }
+
+      public UnsafeCommonPrefixer() {
+      }
+
+      @Override
+      public int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, byte[] right,
+        int rightOffset, int rightLength) {
+        long offset1Adj;
+        Object refObj1 = null;
+        if (left.isDirect()) {
+          offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
+        } else {
+          offset1Adj = leftOffset + left.arrayOffset() + 
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+          refObj1 = left.array();
+        }
+        return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, right,
+          rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET, rightLength);
+      }
+
+      @Override
+      public int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, ByteBuffer right,
+        int rightOffset, int rightLength) {
+        long offset1Adj, offset2Adj;
+        Object refObj1 = null, refObj2 = null;
+        if (left.isDirect()) {
+          offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
+        } else {
+          offset1Adj = leftOffset + left.arrayOffset() + 
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+          refObj1 = left.array();
+        }
+        if (right.isDirect()) {
+          offset2Adj = rightOffset + UnsafeAccess.directBufferAddress(right);
+        } else {
+          offset2Adj = rightOffset + right.arrayOffset() + 
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+          refObj2 = right.array();
+        }
+        return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, 
refObj2, offset2Adj,
+          rightLength);
+      }
+    }
+  }
+
   /**
    * Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)}, 
but writes to a
    * {@link ByteBuffer}.
@@ -769,14 +882,7 @@ public final class ByteBufferUtils {
    */
   public static int findCommonPrefix(byte[] left, int leftOffset, int 
leftLength, byte[] right,
     int rightOffset, int rightLength) {
-    int length = Math.min(leftLength, rightLength);
-    int result = 0;
-
-    while (result < length && left[leftOffset + result] == right[rightOffset + 
result]) {
-      result++;
-    }
-
-    return result;
+    return Bytes.findCommonPrefix(left, right, leftLength, rightLength, 
leftOffset, rightOffset);
   }
 
   /**
@@ -790,17 +896,8 @@ public final class ByteBufferUtils {
    */
   public static int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength,
     ByteBuffer right, int rightOffset, int rightLength) {
-    int length = Math.min(leftLength, rightLength);
-    int result = 0;
-
-    while (
-      result < length && ByteBufferUtils.toByte(left, leftOffset + result)
-          == ByteBufferUtils.toByte(right, rightOffset + result)
-    ) {
-      result++;
-    }
-
-    return result;
+    return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, 
leftOffset, leftLength,
+      right, rightOffset, rightLength);
   }
 
   /**
@@ -814,17 +911,8 @@ public final class ByteBufferUtils {
    */
   public static int findCommonPrefix(ByteBuffer left, int leftOffset, int 
leftLength, byte[] right,
     int rightOffset, int rightLength) {
-    int length = Math.min(leftLength, rightLength);
-    int result = 0;
-
-    while (
-      result < length
-        && ByteBufferUtils.toByte(left, leftOffset + result) == 
right[rightOffset + result]
-    ) {
-      result++;
-    }
-
-    return result;
+    return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, 
leftOffset, leftLength,
+      right, rightOffset, rightLength);
   }
 
   /**
@@ -997,6 +1085,43 @@ public final class ByteBufferUtils {
     return l1 - l2;
   }
 
+  static int findCommonPrefixUnsafe(Object left, long leftOffset, int 
leftLength, Object right,
+    long rightOffset, int rightLength) {
+    final int stride = 8;
+    final int minLength = Math.min(leftLength, rightLength);
+    int strideLimit = minLength & ~(stride - 1);
+    int result = 0;
+    int i;
+
+    for (i = 0; i < strideLimit; i += stride) {
+      long lw = HBasePlatformDependent.getLong(left, leftOffset + (long) i);
+      long rw = HBasePlatformDependent.getLong(right, rightOffset + (long) i);
+
+      if (lw != rw) {
+        if (!UnsafeAccess.LITTLE_ENDIAN) {
+          return result + (Long.numberOfLeadingZeros(lw ^ rw) / 
Bytes.SIZEOF_LONG);
+        } else {
+          return result + (Long.numberOfTrailingZeros(lw ^ rw) / 
Bytes.SIZEOF_LONG);
+        }
+      } else {
+        result += Bytes.SIZEOF_LONG;
+      }
+    }
+
+    // The epilogue to cover the last (minLength % stride) elements.
+    for (; i < minLength; i++) {
+      byte il = HBasePlatformDependent.getByte(left, leftOffset + i);
+      byte ir = HBasePlatformDependent.getByte(right, rightOffset + i);
+      if (il != ir) {
+        return result;
+      } else {
+        result++;
+      }
+    }
+
+    return result;
+  }
+
   /**
    * Reads a short value at the given buffer's offset.
    * @param buffer input byte buffer to read
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index afa222ddc41..1c5da4ed034 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -1305,6 +1305,11 @@ public class Bytes implements Comparable<Bytes> {
 
   }
 
+  static abstract class CommonPrefixer {
+    abstract int findCommonPrefix(byte[] left, int leftOffset, int leftLength, 
byte[] right,
+      int rightOffset, int rightLength);
+  }
+
   @InterfaceAudience.Private
   static Comparer<byte[]> lexicographicalComparerJavaImpl() {
     return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
@@ -1582,6 +1587,99 @@ public class Bytes implements Comparable<Bytes> {
     }
   }
 
+  static class CommonPrefixerHolder {
+    static final String UNSAFE_COMMON_PREFIXER_NAME =
+      CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
+
+    static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
+
+    static CommonPrefixer getBestCommonPrefixer() {
+      try {
+        Class<? extends CommonPrefixer> theClass =
+          
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
+
+        return theClass.getConstructor().newInstance();
+      } catch (Throwable t) { // ensure we really catch *everything*
+        return CommonPrefixerHolder.PureJavaCommonPrefixer.INSTANCE;
+      }
+    }
+
+    static final class PureJavaCommonPrefixer extends CommonPrefixer {
+      static final PureJavaCommonPrefixer INSTANCE = new 
PureJavaCommonPrefixer();
+
+      private PureJavaCommonPrefixer() {
+      }
+
+      @Override
+      public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, 
byte[] right,
+        int rightOffset, int rightLength) {
+        int length = Math.min(leftLength, rightLength);
+        int result = 0;
+
+        while (result < length && left[leftOffset + result] == 
right[rightOffset + result]) {
+          result++;
+        }
+        return result;
+      }
+    }
+
+    static final class UnsafeCommonPrefixer extends CommonPrefixer {
+
+      static {
+        if (!UNSAFE_UNALIGNED) {
+          throw new Error();
+        }
+
+        // sanity check - this should never fail
+        if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) {
+          throw new AssertionError();
+        }
+      }
+
+      public UnsafeCommonPrefixer() {
+      }
+
+      @Override
+      public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, 
byte[] right,
+        int rightOffset, int rightLength) {
+        final int stride = 8;
+        final int minLength = Math.min(leftLength, rightLength);
+        int strideLimit = minLength & ~(stride - 1);
+        final long leftOffsetAdj = leftOffset + 
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+        final long rightOffsetAdj = rightOffset + 
UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
+        int result = 0;
+        int i;
+
+        for (i = 0; i < strideLimit; i += stride) {
+          long lw = HBasePlatformDependent.getLong(left, leftOffsetAdj + i);
+          long rw = HBasePlatformDependent.getLong(right, rightOffsetAdj + i);
+          if (lw != rw) {
+            if (!UnsafeAccess.LITTLE_ENDIAN) {
+              return result + (Long.numberOfLeadingZeros(lw ^ rw) / 
Bytes.SIZEOF_LONG);
+            } else {
+              return result + (Long.numberOfTrailingZeros(lw ^ rw) / 
Bytes.SIZEOF_LONG);
+            }
+          } else {
+            result += Bytes.SIZEOF_LONG;
+          }
+        }
+
+        // The epilogue to cover the last (minLength % stride) elements.
+        for (; i < minLength; i++) {
+          int il = (left[leftOffset + i]);
+          int ir = (right[rightOffset + i]);
+          if (il != ir) {
+            return result;
+          } else {
+            result++;
+          }
+        }
+
+        return result;
+      }
+    }
+  }
+
   /**
    * Lexicographically determine the equality of two arrays.
    * @param left  left operand
@@ -2618,12 +2716,7 @@ public class Bytes implements Comparable<Bytes> {
 
   public static int findCommonPrefix(byte[] left, byte[] right, int 
leftLength, int rightLength,
     int leftOffset, int rightOffset) {
-    int length = Math.min(leftLength, rightLength);
-    int result = 0;
-
-    while (result < length && left[leftOffset + result] == right[rightOffset + 
result]) {
-      result++;
-    }
-    return result;
+    return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, 
leftOffset, leftLength,
+      right, rightOffset, rightLength);
   }
 }
diff --git 
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
 
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
index 1be4e29ebd2..7d38690da77 100644
--- 
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
+++ 
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
@@ -606,6 +606,37 @@ public class TestByteBufferUtils {
     assertTrue(ByteBufferUtils.equals(bb, 0, a.length, a, 0, a.length));
   }
 
+  @Test
+  public void testFindCommonPrefix() {
+    ByteBuffer bb1 = ByteBuffer.allocate(135);
+    ByteBuffer bb2 = ByteBuffer.allocate(135);
+    ByteBuffer bb3 = ByteBuffer.allocateDirect(135);
+    byte[] b = new byte[71];
+
+    fillBB(bb1, (byte) 5);
+    fillBB(bb2, (byte) 5);
+    fillBB(bb3, (byte) 5);
+    fillArray(b, (byte) 5);
+
+    assertEquals(135,
+      ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, 
bb2.remaining()));
+    assertEquals(71, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), 
b, 0, b.length));
+    assertEquals(135,
+      ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb3, 0, 
bb3.remaining()));
+    assertEquals(71, ByteBufferUtils.findCommonPrefix(bb3, 0, bb3.remaining(), 
b, 0, b.length));
+
+    b[13] = 9;
+    assertEquals(13, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), 
b, 0, b.length));
+
+    bb2.put(134, (byte) 6);
+    assertEquals(134,
+      ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, 
bb2.remaining()));
+
+    bb2.put(6, (byte) 4);
+    assertEquals(6,
+      ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, 
bb2.remaining()));
+  }
+
   private static void fillBB(ByteBuffer bb, byte b) {
     for (int i = bb.position(); i < bb.limit(); i++) {
       bb.put(i, b);
diff --git 
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java 
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
index 14be2f4cc37..b7434895998 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
@@ -585,4 +585,47 @@ public class TestBytes {
       assertArrayEquals(testData, result);
     }
   }
+
+  @Test
+  public void testFindCommonPrefix() throws Exception {
+    testFindCommonPrefix(false);
+  }
+
+  @Test
+  public void testFindCommonPrefixUnsafe() throws Exception {
+    testFindCommonPrefix(true);
+  }
+
+  private static void testFindCommonPrefix(boolean unsafe) throws Exception {
+    setUnsafe(unsafe);
+    try {
+      // tests for common prefixes less than 8 bytes in length (i.e. using 
non-vectorized path)
+      byte[] hello = Bytes.toBytes("hello");
+      byte[] helloWorld = Bytes.toBytes("helloworld");
+
+      assertEquals(5,
+        Bytes.findCommonPrefix(hello, helloWorld, hello.length, 
helloWorld.length, 0, 0));
+      assertEquals(5, Bytes.findCommonPrefix(hello, hello, hello.length, 
hello.length, 0, 0));
+      assertEquals(3,
+        Bytes.findCommonPrefix(hello, hello, hello.length - 2, hello.length - 
2, 2, 2));
+      assertEquals(0, Bytes.findCommonPrefix(hello, hello, 0, 0, 0, 0));
+
+      // tests for common prefixes greater than 8 bytes in length which may 
use the vectorized path
+      byte[] hellohello = Bytes.toBytes("hellohello");
+      byte[] hellohellohi = Bytes.toBytes("hellohellohi");
+
+      assertEquals(10, Bytes.findCommonPrefix(hellohello, hellohellohi, 
hellohello.length,
+        hellohellohi.length, 0, 0));
+      assertEquals(10, Bytes.findCommonPrefix(hellohellohi, hellohello, 
hellohellohi.length,
+        hellohello.length, 0, 0));
+      assertEquals(10,
+        Bytes.findCommonPrefix(hellohello, hellohello, hellohello.length, 
hellohello.length, 0, 0));
+
+      hellohello[2] = 0;
+      assertEquals(2, Bytes.findCommonPrefix(hellohello, hellohellohi, 
hellohello.length,
+        hellohellohi.length, 0, 0));
+    } finally {
+      setUnsafe(HBasePlatformDependent.unaligned());
+    }
+  }
 }

Reply via email to