This is an automated email from the ASF dual-hosted git repository.

mimaison pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 96f83be096b KAFKA-19861: Optimization of the Murmur2 hash computation 
(#20359)
96f83be096b is described below

commit 96f83be096bfb3855dee7e37418cd22208d47791
Author: Otmar Ertl <[email protected]>
AuthorDate: Tue Nov 4 16:37:51 2025 +0100

    KAFKA-19861: Optimization of the Murmur2 hash computation (#20359)
    
    
    Reviewers: Mickael Maison <[email protected]>
---
 .../java/org/apache/kafka/common/utils/Utils.java  | 20 +++--
 .../org/apache/kafka/common/utils/UtilsTest.java   | 31 +++++++
 .../apache/kafka/jmh/clients/Murmur2Benchmark.java | 95 ++++++++++++++++++++++
 3 files changed, 139 insertions(+), 7 deletions(-)

diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java 
b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
index dc7b0e7625a..e1dc97a17e4 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
@@ -33,6 +33,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintWriter;
 import java.io.StringWriter;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Modifier;
@@ -111,6 +113,9 @@ public final class Utils {
 
     private static final Logger log = LoggerFactory.getLogger(Utils.class);
 
+    private static final VarHandle INT_HANDLE =
+            MethodHandles.byteArrayViewVarHandle(int[].class, 
ByteOrder.LITTLE_ENDIAN);
+
     /**
      * Get a sorted list representation of a collection.
      * @param collection The collection to sort
@@ -500,11 +505,11 @@ public final class Utils {
 
         // Initialize the hash to a random value
         int h = seed ^ length;
-        int length4 = length / 4;
+        int length4 = length >> 2;
 
         for (int i = 0; i < length4; i++) {
-            final int i4 = i * 4;
-            int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + 
((data[i4 + 2] & 0xff) << 16) + ((data[i4 + 3] & 0xff) << 24);
+            final int i4 = i << 2;
+            int k = (int) INT_HANDLE.get(data, i4);
             k *= m;
             k ^= k >>> r;
             k *= m;
@@ -513,13 +518,14 @@ public final class Utils {
         }
 
         // Handle the last few bytes of the input array
-        switch (length % 4) {
+        int index = length4 << 2;
+        switch (length - index) {
             case 3:
-                h ^= (data[(length & ~3) + 2] & 0xff) << 16;
+                h ^= (data[index + 2] & 0xff) << 16;
             case 2:
-                h ^= (data[(length & ~3) + 1] & 0xff) << 8;
+                h ^= (data[index + 1] & 0xff) << 8;
             case 1:
-                h ^= data[length & ~3] & 0xff;
+                h ^= data[index] & 0xff;
                 h *= m;
         }
 
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java 
b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
index 74518fe0f44..2f325d7bcf0 100755
--- a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
@@ -62,6 +62,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 import java.util.Set;
+import java.util.SplittableRandom;
 import java.util.TreeSet;
 import java.util.concurrent.Callable;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -117,6 +118,36 @@ public class UtilsTest {
         }
     }
 
+    private static String toHexString(byte[] buf) {
+        StringBuilder bld = new StringBuilder();
+        for (byte b : buf) {
+            bld.append(String.format("%02x", b));
+        }
+        return bld.toString();
+    }
+
+    @Test
+    public void testMurmur2Checksum() {
+        // calculates the checksum of hashes of many different random byte 
arrays of variable length
+        // this test detects any incompatible changes to the Murmur2 
implementation with near certainty
+        int numTrials = 100;
+        int maxLen = 1000;
+        long seed = 0;
+        SplittableRandom random = new SplittableRandom(seed);
+        long checksum = 0;
+
+        for (int len = 0; len <= maxLen; ++len) {
+            byte[] data = new byte[len];
+            for (int i = 0; i < numTrials; ++i) {
+                random.nextBytes(data);
+                int hash = Utils.murmur2(data);
+                checksum += Integer.toUnsignedLong(hash);
+            }
+        }
+
+        assertEquals(0xc3b8cf7c99fcL, checksum);
+    }
+
     @ParameterizedTest
     @CsvSource(value = {"PLAINTEXT", "SASL_PLAINTEXT", "SSL", "SASL_SSL"})
     public void testGetHostValid(String protocol) {
diff --git 
a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
 
b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
new file mode 100644
index 00000000000..f7ede55629d
--- /dev/null
+++ 
b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.clients;
+
+import org.apache.kafka.common.utils.Utils;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.SplittableRandom;
+import java.util.concurrent.TimeUnit;
+
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+
+public class Murmur2Benchmark {
+
+    private static final int SAMPLE_SIZE = 1000;
+
+    public enum TestCase {
+        TEST_CASE_1_4(1, 4, 0xcc6444ca02edfbd0L),
+        TEST_CASE_1_16(1, 16, 0x187c616cabc3e0a7L),
+        TEST_CASE_1_64(1, 64, 0xa820ddbf8f76273dL),
+        TEST_CASE_1_256(1, 256, 0x898e4c60ab901376L),
+        TEST_CASE_4_4(4, 4, 0x91c73b19f97f9e07L),
+        TEST_CASE_16_16(16, 16, 0xae0ddf78c2705d4eL),
+        TEST_CASE_64_64(64, 64, 0x9024a9c2f7355dcdL),
+        TEST_CASE_256_256(256, 256, 0x548b0ff34eb8e4aaL);
+
+        TestCase(int minLen, int maxLen, long seed) {
+            data = createRandomByteArrays(SAMPLE_SIZE, minLen, maxLen, seed);
+        }
+
+        byte[][] data;
+    }
+
+    private static byte[][] createRandomByteArrays(int size, int minLen, int 
maxLen, long seed) {
+        byte[][] result = new byte[size][];
+        SplittableRandom random = new SplittableRandom(seed);
+        for (int i = 0; i < size; ++i) {
+            result[i] = createRandomByteArray(minLen, maxLen, random);
+        }
+        return result;
+    }
+
+    private static byte[] createRandomByteArray(int minLen, int maxLen, 
SplittableRandom random) {
+        int len = random.nextInt(minLen, maxLen + 1);
+        byte[] b = new byte[len];
+        random.nextBytes(b);
+        return b;
+    }
+
+    @State(Scope.Benchmark)
+    public static class TestCaseState {
+        @Param
+        public TestCase testCase;
+    }
+
+    @Benchmark
+    @BenchmarkMode(Mode.AverageTime)
+    @OutputTimeUnit(TimeUnit.MICROSECONDS)
+    @Warmup(iterations = 5, time = 1000, timeUnit = MILLISECONDS)
+    @Measurement(iterations = 20, time = 1000, timeUnit = MILLISECONDS)
+    @Fork(value = 1, warmups = 0)
+    public void hashBytes(TestCaseState testCaseState, Blackhole blackhole) {
+        var data = testCaseState.testCase.data;
+        for (byte[] b : data) {
+            blackhole.consume(Utils.murmur2(b));
+        }
+    }
+
+}

Reply via email to