This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
The following commit(s) were added to refs/heads/cpc-sketch by this push:
new 9021fdb Also port Murmur3 implem
9021fdb is described below
commit 9021fdbd0562e2063df829f2de4fe5045af7edff
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 29 16:57:44 2024 +0200
Also port Murmur3 implem
---
cpc/cpc_sketch.go | 6 +++
cpc/cpc_sketch_test.go | 3 ++
internal/murmur3.go | 106 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+)
diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index a9c6ddb..7817da5 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -20,6 +20,7 @@ package cpc
import (
"encoding/binary"
"fmt"
+ "github.com/apache/datasketches-go/internal"
"github.com/twmb/murmur3"
"math"
"math/bits"
@@ -84,6 +85,11 @@ func (c *CpcSketch) UpdateSlice(datum []byte) error {
return c.hashUpdate(hashLo, hashHi)
}
+func (c *CpcSketch) UpdateInt64Slice(datum []int64) error {
+ hashLo, hashHi := internal.HashInt64SliceMurmur3(datum, 0, len(datum),
c.seed)
+ return c.hashUpdate(hashLo, hashHi)
+}
+
func (c *CpcSketch) UpdateString(datum string) error {
// get a slice to the string data (avoiding a copy to heap)
return c.UpdateSlice(unsafe.Slice(unsafe.StringData(datum), len(datum)))
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
index 9062b6d..59c261d 100644
--- a/cpc/cpc_sketch_test.go
+++ b/cpc/cpc_sketch_test.go
@@ -35,6 +35,9 @@ func TestCPCCheckUpdatesEstimate(t *testing.T) {
bytes := []byte{4, 4}
err = sk.UpdateSlice(bytes)
assert.NoError(t, err)
+
+ err = sk.UpdateInt64Slice([]int64{7})
+ assert.NoError(t, err)
}
/*
diff --git a/internal/murmur3.go b/internal/murmur3.go
new file mode 100644
index 0000000..f5a8188
--- /dev/null
+++ b/internal/murmur3.go
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package internal
+
+const (
+ C1 = 0x87c37b91114253d5
+ C2 = 0x4cf5ad432745937f
+)
+
+type SimpleMurmur3 struct {
+ h1 uint64
+ h2 uint64
+}
+
+func HashInt64SliceMurmur3(key []int64, offsetLongs int, lengthLongs int, seed
uint64) (uint64, uint64) {
+ hashState := SimpleMurmur3{h1: seed, h2: seed}
+
+ // Number of full 128-bit blocks of 2 longs (the body).
+ // Possible exclusion of a remainder of 1 long.
+ nblocks := lengthLongs >> 1 //longs / 2
+
+ // Process the 128-bit blocks (the body) into the hash
+ for i := 0; i < nblocks; i++ {
+ k1 := uint64(key[offsetLongs+(i<<1)]) //offsetLongs + 0, 2,
4, ...
+ k2 := uint64(key[offsetLongs+(i<<1)+1]) //offsetLongs + 1, 3,
5, ...
+ hashState.blockMix128(k1, k2)
+ }
+
+ // Get the tail index wrt hashed portion, remainder length
+ tail := nblocks << 1 // 2 longs / block
+ rem := lengthLongs - tail // remainder longs: 0,1
+
+ // Get the tail
+ k1 := uint64(0)
+ if rem != 0 {
+ k1 = uint64(key[offsetLongs+tail]) //k2 -> 0
+ }
+
+ return hashState.finalMix128(k1, 0, uint64(lengthLongs)<<3)
+}
+
+func mixK1(k1 uint64) uint64 {
+ k1 *= C1
+ k1 = (k1 << 31) | (k1 >> (64 - 31))
+ k1 *= C2
+ return k1
+
+}
+
+func mixK2(k2 uint64) uint64 {
+ k2 *= C2
+ k2 = (k2 << 33) | (k2 >> (64 - 33))
+ k2 *= C1
+ return k2
+}
+
+func finalMix64(h uint64) uint64 {
+ h ^= h >> 33
+ h *= 0xff51afd7ed558ccd
+ h ^= h >> 33
+ h *= 0xc4ceb9fe1a85ec53
+ h ^= h >> 33
+ return h
+
+}
+
+func (m *SimpleMurmur3) blockMix128(k1, k2 uint64) {
+ m.h1 ^= mixK1(k1)
+ m.h1 = (m.h1 << 27) | (m.h1 >> (64 - 27))
+ m.h1 += m.h2
+ m.h1 = m.h1*5 + 0x52dce729
+
+ m.h2 ^= mixK2(k2)
+ m.h2 = (m.h2 << 31) | (m.h2 >> (64 - 31))
+ m.h2 += m.h1
+ m.h2 = m.h2*5 + 0x38495ab5
+}
+
+func (m *SimpleMurmur3) finalMix128(k1, k2, inputLengthBytes uint64) (uint64,
uint64) {
+ m.h1 ^= mixK1(k1)
+ m.h2 ^= mixK2(k2)
+ m.h1 ^= inputLengthBytes
+ m.h2 ^= inputLengthBytes
+ m.h1 += m.h2
+ m.h2 += m.h1
+ m.h1 = finalMix64(m.h1)
+ m.h2 = finalMix64(m.h2)
+ m.h1 += m.h2
+ m.h2 += m.h1
+ return m.h1, m.h2
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]