This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


The following commit(s) were added to refs/heads/cpc-sketch by this push:
     new 9021fdb  Also port Murmur3 implem
9021fdb is described below

commit 9021fdbd0562e2063df829f2de4fe5045af7edff
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 29 16:57:44 2024 +0200

    Also port Murmur3 implem
---
 cpc/cpc_sketch.go      |   6 +++
 cpc/cpc_sketch_test.go |   3 ++
 internal/murmur3.go    | 106 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+)

diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index a9c6ddb..7817da5 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -20,6 +20,7 @@ package cpc
 import (
        "encoding/binary"
        "fmt"
+       "github.com/apache/datasketches-go/internal"
        "github.com/twmb/murmur3"
        "math"
        "math/bits"
@@ -84,6 +85,11 @@ func (c *CpcSketch) UpdateSlice(datum []byte) error {
        return c.hashUpdate(hashLo, hashHi)
 }
 
+func (c *CpcSketch) UpdateInt64Slice(datum []int64) error {
+       hashLo, hashHi := internal.HashInt64SliceMurmur3(datum, 0, len(datum), 
c.seed)
+       return c.hashUpdate(hashLo, hashHi)
+}
+
 func (c *CpcSketch) UpdateString(datum string) error {
        // get a slice to the string data (avoiding a copy to heap)
        return c.UpdateSlice(unsafe.Slice(unsafe.StringData(datum), len(datum)))
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
index 9062b6d..59c261d 100644
--- a/cpc/cpc_sketch_test.go
+++ b/cpc/cpc_sketch_test.go
@@ -35,6 +35,9 @@ func TestCPCCheckUpdatesEstimate(t *testing.T) {
        bytes := []byte{4, 4}
        err = sk.UpdateSlice(bytes)
        assert.NoError(t, err)
+
+       err = sk.UpdateInt64Slice([]int64{7})
+       assert.NoError(t, err)
 }
 
 /*
diff --git a/internal/murmur3.go b/internal/murmur3.go
new file mode 100644
index 0000000..f5a8188
--- /dev/null
+++ b/internal/murmur3.go
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package internal
+
+const (
+       C1 = 0x87c37b91114253d5
+       C2 = 0x4cf5ad432745937f
+)
+
+type SimpleMurmur3 struct {
+       h1 uint64
+       h2 uint64
+}
+
+func HashInt64SliceMurmur3(key []int64, offsetLongs int, lengthLongs int, seed 
uint64) (uint64, uint64) {
+       hashState := SimpleMurmur3{h1: seed, h2: seed}
+
+       // Number of full 128-bit blocks of 2 longs (the body).
+       // Possible exclusion of a remainder of 1 long.
+       nblocks := lengthLongs >> 1 //longs / 2
+
+       // Process the 128-bit blocks (the body) into the hash
+       for i := 0; i < nblocks; i++ {
+               k1 := uint64(key[offsetLongs+(i<<1)])   //offsetLongs + 0, 2, 
4, ...
+               k2 := uint64(key[offsetLongs+(i<<1)+1]) //offsetLongs + 1, 3, 
5, ...
+               hashState.blockMix128(k1, k2)
+       }
+
+       // Get the tail index wrt hashed portion, remainder length
+       tail := nblocks << 1      // 2 longs / block
+       rem := lengthLongs - tail // remainder longs: 0,1
+
+       // Get the tail
+       k1 := uint64(0)
+       if rem != 0 {
+               k1 = uint64(key[offsetLongs+tail]) //k2 -> 0
+       }
+
+       return hashState.finalMix128(k1, 0, uint64(lengthLongs)<<3)
+}
+
+func mixK1(k1 uint64) uint64 {
+       k1 *= C1
+       k1 = (k1 << 31) | (k1 >> (64 - 31))
+       k1 *= C2
+       return k1
+
+}
+
+func mixK2(k2 uint64) uint64 {
+       k2 *= C2
+       k2 = (k2 << 33) | (k2 >> (64 - 33))
+       k2 *= C1
+       return k2
+}
+
+func finalMix64(h uint64) uint64 {
+       h ^= h >> 33
+       h *= 0xff51afd7ed558ccd
+       h ^= h >> 33
+       h *= 0xc4ceb9fe1a85ec53
+       h ^= h >> 33
+       return h
+
+}
+
+func (m *SimpleMurmur3) blockMix128(k1, k2 uint64) {
+       m.h1 ^= mixK1(k1)
+       m.h1 = (m.h1 << 27) | (m.h1 >> (64 - 27))
+       m.h1 += m.h2
+       m.h1 = m.h1*5 + 0x52dce729
+
+       m.h2 ^= mixK2(k2)
+       m.h2 = (m.h2 << 31) | (m.h2 >> (64 - 31))
+       m.h2 += m.h1
+       m.h2 = m.h2*5 + 0x38495ab5
+}
+
+func (m *SimpleMurmur3) finalMix128(k1, k2, inputLengthBytes uint64) (uint64, 
uint64) {
+       m.h1 ^= mixK1(k1)
+       m.h2 ^= mixK2(k2)
+       m.h1 ^= inputLengthBytes
+       m.h2 ^= inputLengthBytes
+       m.h1 += m.h2
+       m.h2 += m.h1
+       m.h1 = finalMix64(m.h1)
+       m.h2 = finalMix64(m.h2)
+       m.h1 += m.h2
+       m.h2 += m.h1
+       return m.h1, m.h2
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to