This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


The following commit(s) were added to refs/heads/cpc-sketch by this push:
     new 35935b8  Add murmur3 test
35935b8 is described below

commit 35935b8e7a9a8fd62fbe289531f5defad6d820b2
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 29 17:15:54 2024 +0200

    Add murmur3 test
---
 internal/murmur3.go      | 43 +++++++++++++++++++++++++++++++++++++++++++
 internal/murmur3_test.go | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/internal/murmur3.go b/internal/murmur3.go
index f5a8188..636e098 100644
--- a/internal/murmur3.go
+++ b/internal/murmur3.go
@@ -54,6 +54,49 @@ func HashInt64SliceMurmur3(key []int64, offsetLongs int, 
lengthLongs int, seed u
        return hashState.finalMix128(k1, 0, uint64(lengthLongs)<<3)
 }
 
+func HashByteArrMurmur3(key []byte, offsetBytes int, lengthBytes int, seed 
uint64) (uint64, uint64) {
+       hashState := SimpleMurmur3{h1: seed, h2: seed}
+
+       // Number of full 128-bit blocks of 16 bytes.
+       // Possible exclusion of a remainder of up to 15 bytes.
+       nblocks := lengthBytes >> 4 //bytes / 16
+
+       // Process the 128-bit blocks (the body) into the hash
+       for i := 0; i < nblocks; i++ {
+               k1 := getUint64(key, offsetBytes+(i<<4), 8)   //0, 16, 32, ...
+               k2 := getUint64(key, offsetBytes+(i<<4)+8, 8) //8, 24, 40, ...
+               hashState.blockMix128(k1, k2)
+       }
+
+       // Get the tail index wrt hashed portion, remainder length
+       tail := nblocks << 4      // 16 bytes / block
+       rem := lengthBytes - tail // remainder bytes: 0,1,...,15
+
+       // Get the tail
+       k1 := uint64(0)
+       k2 := uint64(0)
+       if rem > 8 {
+               k1 = getUint64(key, offsetBytes+tail, 8)
+               k2 = getUint64(key, offsetBytes+tail+8, rem-8)
+       } else {
+               if rem != 0 {
+                       k1 = getUint64(key, offsetBytes+tail, rem)
+               }
+       }
+
+       // Mix the tail into the hash and return
+       return hashState.finalMix128(k1, k2, uint64(lengthBytes))
+}
+
+func getUint64(bArr []byte, index int, rem int) uint64 {
+       var out uint64
+       for i := rem - 1; i >= 0; i-- { //i= 7,6,5,4,3,2,1,0
+               b := bArr[index+i]
+               out ^= uint64(b&0xFF) << uint(i*8) //equivalent to |=
+       }
+       return out
+}
+
 func mixK1(k1 uint64) uint64 {
        k1 *= C1
        k1 = (k1 << 31) | (k1 >> (64 - 31))
diff --git a/internal/murmur3_test.go b/internal/murmur3_test.go
new file mode 100644
index 0000000..7aa6a2a
--- /dev/null
+++ b/internal/murmur3_test.go
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package internal
+
+import "testing"
+
+func TestByteArrRemainderGT8(t *testing.T) {
+       key := []byte("The quick brown fox jumps over the lazy dog")
+       resultLo, resultHi := HashByteArrMurmur3(key, 0, len(key), 0)
+       h1 := uint64(0xe34bbc7bbc071b6c)
+       h2 := uint64(0x7a433ca9c49a9347)
+       if resultLo != h1 {
+               t.Errorf("expected %v, got %v", h1, resultLo)
+       }
+       if resultHi != h2 {
+               t.Errorf("expected %v, got %v", h2, resultHi)
+       }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to