This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
The following commit(s) were added to refs/heads/cpc-sketch by this push:
new 35935b8 Add murmur3 test
35935b8 is described below
commit 35935b8e7a9a8fd62fbe289531f5defad6d820b2
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 29 17:15:54 2024 +0200
Add murmur3 test
---
internal/murmur3.go | 43 +++++++++++++++++++++++++++++++++++++++++++
internal/murmur3_test.go | 33 +++++++++++++++++++++++++++++++++
2 files changed, 76 insertions(+)
diff --git a/internal/murmur3.go b/internal/murmur3.go
index f5a8188..636e098 100644
--- a/internal/murmur3.go
+++ b/internal/murmur3.go
@@ -54,6 +54,49 @@ func HashInt64SliceMurmur3(key []int64, offsetLongs int,
lengthLongs int, seed u
return hashState.finalMix128(k1, 0, uint64(lengthLongs)<<3)
}
+func HashByteArrMurmur3(key []byte, offsetBytes int, lengthBytes int, seed
uint64) (uint64, uint64) {
+ hashState := SimpleMurmur3{h1: seed, h2: seed}
+
+ // Number of full 128-bit blocks of 16 bytes.
+ // Possible exclusion of a remainder of up to 15 bytes.
+ nblocks := lengthBytes >> 4 //bytes / 16
+
+ // Process the 128-bit blocks (the body) into the hash
+ for i := 0; i < nblocks; i++ {
+ k1 := getUint64(key, offsetBytes+(i<<4), 8) //0, 16, 32, ...
+ k2 := getUint64(key, offsetBytes+(i<<4)+8, 8) //8, 24, 40, ...
+ hashState.blockMix128(k1, k2)
+ }
+
+ // Get the tail index wrt hashed portion, remainder length
+ tail := nblocks << 4 // 16 bytes / block
+ rem := lengthBytes - tail // remainder bytes: 0,1,...,15
+
+ // Get the tail
+ k1 := uint64(0)
+ k2 := uint64(0)
+ if rem > 8 {
+ k1 = getUint64(key, offsetBytes+tail, 8)
+ k2 = getUint64(key, offsetBytes+tail+8, rem-8)
+ } else {
+ if rem != 0 {
+ k1 = getUint64(key, offsetBytes+tail, rem)
+ }
+ }
+
+ // Mix the tail into the hash and return
+ return hashState.finalMix128(k1, k2, uint64(lengthBytes))
+}
+
+func getUint64(bArr []byte, index int, rem int) uint64 {
+ var out uint64
+ for i := rem - 1; i >= 0; i-- { //i= 7,6,5,4,3,2,1,0
+ b := bArr[index+i]
+ out ^= uint64(b&0xFF) << uint(i*8) //equivalent to |=
+ }
+ return out
+}
+
func mixK1(k1 uint64) uint64 {
k1 *= C1
k1 = (k1 << 31) | (k1 >> (64 - 31))
diff --git a/internal/murmur3_test.go b/internal/murmur3_test.go
new file mode 100644
index 0000000..7aa6a2a
--- /dev/null
+++ b/internal/murmur3_test.go
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package internal
+
+import "testing"
+
+func TestByteArrRemainderGT8(t *testing.T) {
+ key := []byte("The quick brown fox jumps over the lazy dog")
+ resultLo, resultHi := HashByteArrMurmur3(key, 0, len(key), 0)
+ h1 := uint64(0xe34bbc7bbc071b6c)
+ h2 := uint64(0x7a433ca9c49a9347)
+ if resultLo != h1 {
+ t.Errorf("expected %v, got %v", h1, resultLo)
+ }
+ if resultHi != h2 {
+ t.Errorf("expected %v, got %v", h2, resultHi)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]