This is an automated email from the ASF dual-hosted git repository.
mimaison pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git
The following commit(s) were added to refs/heads/trunk by this push:
new 96f83be096b KAFKA-19861: Optimization of the Murmur2 hash computation
(#20359)
96f83be096b is described below
commit 96f83be096bfb3855dee7e37418cd22208d47791
Author: Otmar Ertl <[email protected]>
AuthorDate: Tue Nov 4 16:37:51 2025 +0100
KAFKA-19861: Optimization of the Murmur2 hash computation (#20359)
Reviewers: Mickael Maison <[email protected]>
---
.../java/org/apache/kafka/common/utils/Utils.java | 20 +++--
.../org/apache/kafka/common/utils/UtilsTest.java | 31 +++++++
.../apache/kafka/jmh/clients/Murmur2Benchmark.java | 95 ++++++++++++++++++++++
3 files changed, 139 insertions(+), 7 deletions(-)
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
index dc7b0e7625a..e1dc97a17e4 100644
--- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
@@ -33,6 +33,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Modifier;
@@ -111,6 +113,9 @@ public final class Utils {
private static final Logger log = LoggerFactory.getLogger(Utils.class);
+ private static final VarHandle INT_HANDLE =
+ MethodHandles.byteArrayViewVarHandle(int[].class,
ByteOrder.LITTLE_ENDIAN);
+
/**
* Get a sorted list representation of a collection.
* @param collection The collection to sort
@@ -500,11 +505,11 @@ public final class Utils {
// Initialize the hash to a random value
int h = seed ^ length;
- int length4 = length / 4;
+ int length4 = length >> 2;
for (int i = 0; i < length4; i++) {
- final int i4 = i * 4;
- int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) +
((data[i4 + 2] & 0xff) << 16) + ((data[i4 + 3] & 0xff) << 24);
+ final int i4 = i << 2;
+ int k = (int) INT_HANDLE.get(data, i4);
k *= m;
k ^= k >>> r;
k *= m;
@@ -513,13 +518,14 @@ public final class Utils {
}
// Handle the last few bytes of the input array
- switch (length % 4) {
+ int index = length4 << 2;
+ switch (length - index) {
case 3:
- h ^= (data[(length & ~3) + 2] & 0xff) << 16;
+ h ^= (data[index + 2] & 0xff) << 16;
case 2:
- h ^= (data[(length & ~3) + 1] & 0xff) << 8;
+ h ^= (data[index + 1] & 0xff) << 8;
case 1:
- h ^= data[length & ~3] & 0xff;
+ h ^= data[index] & 0xff;
h *= m;
}
diff --git a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
index 74518fe0f44..2f325d7bcf0 100755
--- a/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java
@@ -62,6 +62,7 @@ import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
+import java.util.SplittableRandom;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
@@ -117,6 +118,36 @@ public class UtilsTest {
}
}
+ private static String toHexString(byte[] buf) {
+ StringBuilder bld = new StringBuilder();
+ for (byte b : buf) {
+ bld.append(String.format("%02x", b));
+ }
+ return bld.toString();
+ }
+
+ @Test
+ public void testMurmur2Checksum() {
+ // calculates the checksum of hashes of many different random byte
arrays of variable length
+ // this test detects any incompatible changes to the Murmur2
implementation with near certainty
+ int numTrials = 100;
+ int maxLen = 1000;
+ long seed = 0;
+ SplittableRandom random = new SplittableRandom(seed);
+ long checksum = 0;
+
+ for (int len = 0; len <= maxLen; ++len) {
+ byte[] data = new byte[len];
+ for (int i = 0; i < numTrials; ++i) {
+ random.nextBytes(data);
+ int hash = Utils.murmur2(data);
+ checksum += Integer.toUnsignedLong(hash);
+ }
+ }
+
+ assertEquals(0xc3b8cf7c99fcL, checksum);
+ }
+
@ParameterizedTest
@CsvSource(value = {"PLAINTEXT", "SASL_PLAINTEXT", "SSL", "SASL_SSL"})
public void testGetHostValid(String protocol) {
diff --git
a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
new file mode 100644
index 00000000000..f7ede55629d
--- /dev/null
+++
b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/clients/Murmur2Benchmark.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.clients;
+
+import org.apache.kafka.common.utils.Utils;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.SplittableRandom;
+import java.util.concurrent.TimeUnit;
+
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+
+public class Murmur2Benchmark {
+
+ private static final int SAMPLE_SIZE = 1000;
+
+ public enum TestCase {
+ TEST_CASE_1_4(1, 4, 0xcc6444ca02edfbd0L),
+ TEST_CASE_1_16(1, 16, 0x187c616cabc3e0a7L),
+ TEST_CASE_1_64(1, 64, 0xa820ddbf8f76273dL),
+ TEST_CASE_1_256(1, 256, 0x898e4c60ab901376L),
+ TEST_CASE_4_4(4, 4, 0x91c73b19f97f9e07L),
+ TEST_CASE_16_16(16, 16, 0xae0ddf78c2705d4eL),
+ TEST_CASE_64_64(64, 64, 0x9024a9c2f7355dcdL),
+ TEST_CASE_256_256(256, 256, 0x548b0ff34eb8e4aaL);
+
+ TestCase(int minLen, int maxLen, long seed) {
+ data = createRandomByteArrays(SAMPLE_SIZE, minLen, maxLen, seed);
+ }
+
+ byte[][] data;
+ }
+
+ private static byte[][] createRandomByteArrays(int size, int minLen, int
maxLen, long seed) {
+ byte[][] result = new byte[size][];
+ SplittableRandom random = new SplittableRandom(seed);
+ for (int i = 0; i < size; ++i) {
+ result[i] = createRandomByteArray(minLen, maxLen, random);
+ }
+ return result;
+ }
+
+ private static byte[] createRandomByteArray(int minLen, int maxLen,
SplittableRandom random) {
+ int len = random.nextInt(minLen, maxLen + 1);
+ byte[] b = new byte[len];
+ random.nextBytes(b);
+ return b;
+ }
+
+ @State(Scope.Benchmark)
+ public static class TestCaseState {
+ @Param
+ public TestCase testCase;
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ @Warmup(iterations = 5, time = 1000, timeUnit = MILLISECONDS)
+ @Measurement(iterations = 20, time = 1000, timeUnit = MILLISECONDS)
+ @Fork(value = 1, warmups = 0)
+ public void hashBytes(TestCaseState testCaseState, Blackhole blackhole) {
+ var data = testCaseState.testCase.data;
+ for (byte[] b : data) {
+ blackhole.consume(Utils.murmur2(b));
+ }
+ }
+
+}