This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c39cf40fd5c [Upsert] add xxhash for PrimaryKey compression (#17253)
c39cf40fd5c is described below

commit c39cf40fd5c41bf84b55efc26920b6d723d02f0c
Author: Qiaochu Liu <[email protected]>
AuthorDate: Sun Nov 23 15:42:38 2025 -0800

    [Upsert] add xxhash for PrimaryKey compression (#17253)
    
    * [Upsert] add xxhash for PrimaryKey compression
    
    * add support for xxh_128
    
    * fix lint
---
 .../pinot/segment/local/utils/HashUtils.java       | 30 ++++++++++++++++++++++
 .../pinot/segment/local/utils/HashUtilsTest.java   |  4 +++
 .../pinot/spi/config/table/HashFunction.java       |  2 +-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
index 6e2bcf6e9ba..5070bbf9270 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/HashUtils.java
@@ -18,10 +18,13 @@
  */
 package org.apache.pinot.segment.local.utils;
 
+import com.dynatrace.hash4j.hashing.HashValue128;
+import com.dynatrace.hash4j.hashing.Hasher128;
 import com.google.common.hash.Hashing;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.UUID;
+import net.jpountz.xxhash.XXHashFactory;
 import org.apache.pinot.spi.config.table.HashFunction;
 import org.apache.pinot.spi.data.readers.PrimaryKey;
 import org.apache.pinot.spi.utils.ByteArray;
@@ -63,6 +66,29 @@ public class HashUtils {
     return result;
   }
 
+  /** Compute 64-bit xxHash (XXH64) with seed=0, returned as big-endian 8-byte 
array. */
+  public static byte[] hashXXHash(byte[] bytes) {
+    XXHashFactory xxhFactory = XXHashFactory.fastestInstance();
+    long hash64 = xxhFactory.hash64().hash(bytes, 0, bytes.length, 0L);
+    ByteBuffer buf = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN);
+    buf.putLong(hash64);
+    return buf.array();
+  }
+
+  /**
+   * Compute xxh128 using hash4j (XXH3-128). Returns a 16-byte array 
(big-endian order for each 64-bit half).
+   */
+  public static byte[] hashXXH128(byte[] bytes) {
+    Hasher128 hasher = com.dynatrace.hash4j.hashing.Hashing.xxh3_128();
+    HashValue128 hashValue128 = hasher.hashBytesTo128Bits(bytes);
+
+    // Encode as big-endian 16 bytes
+    ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.BIG_ENDIAN);
+    buf.putLong(hashValue128.getMostSignificantBits());
+    buf.putLong(hashValue128.getLeastSignificantBits());
+    return buf.array();
+  }
+
   public static Object hashPrimaryKey(PrimaryKey primaryKey, HashFunction 
hashFunction) {
     switch (hashFunction) {
       case NONE:
@@ -73,6 +99,10 @@ public class HashUtils {
         return new ByteArray(HashUtils.hashMurmur3(primaryKey.asBytes()));
       case UUID:
         return new ByteArray(HashUtils.hashUUID(primaryKey));
+      case XXHASH:
+        return new ByteArray(HashUtils.hashXXHash(primaryKey.asBytes()));
+      case XXH128:
+        return new ByteArray(HashUtils.hashXXH128(primaryKey.asBytes()));
       default:
         throw new IllegalArgumentException(String.format("Unrecognized hash 
function %s", hashFunction));
     }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
index c0f67ec1703..9f4649b162a 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/HashUtilsTest.java
@@ -33,6 +33,10 @@ public class HashUtilsTest {
         "5eb63bbbe01eeed093cb22bb8f5acdc3");
     assertEquals(BytesUtils.toHexString(HashUtils.hashMurmur3("hello 
world".getBytes())),
         "0e617feb46603f53b163eb607d4697ab");
+    assertEquals(BytesUtils.toHexString(HashUtils.hashXXHash("hello 
world".getBytes())),
+        "45ab6734b21e6968");
+    assertEquals(BytesUtils.toHexString(HashUtils.hashXXH128("hello 
world".getBytes())),
+        "df8d09e93f874900a99b8775cc15b6c7");
   }
 
   @Test
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
index effd15ff9b6..5185b92119d 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/HashFunction.java
@@ -19,5 +19,5 @@
 package org.apache.pinot.spi.config.table;
 
 public enum HashFunction {
-  NONE, MD5, MURMUR3, UUID
+  NONE, MD5, MURMUR3, UUID, XXHASH, XXH128
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to