This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
The following commit(s) were added to refs/heads/cpc-sketch by this push:
new 11b99ba Add cpc constructor
11b99ba is described below
commit 11b99ba10efdac7542cb60e052f13f58d1e7aca6
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 22 14:25:30 2024 +0200
Add cpc constructor
---
cpc/cpc_sketch.go | 59 ++++++++++++++++++++++++++++++++++++++++--
cpc/cpc_sketch_test.go | 58 +++++++++++++++++++++++++++++++++++++++++
cpc/utils.go | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 185 insertions(+), 2 deletions(-)
diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index 8eebe04..2b9b7ca 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -17,6 +17,11 @@
package cpc
+const (
+ minLgK = 4
+ maxLgK = 26
+)
+
type CpcSketch struct {
seed int64
@@ -27,10 +32,60 @@ type CpcSketch struct {
fiCol int // First Interesting Column. This is part of a speed
optimization.
windowOffset int
- slidingWindow []byte //either null or size K bytes
- pairTable pairTable //for sparse and surprising values, either null
or variable size
+ slidingWindow []byte //either null or size K bytes
+ pairTable *pairTable //for sparse and surprising values, either
null or variable size
//The following variables are only valid in HIP varients
kxp float64 //used with HIP
hipEstAccum float64 //used with HIP
}
+
+func NewCpcSketch(lgK int, seed int64) (*CpcSketch, error) {
+ if err := checkLgK(lgK); err != nil {
+ return nil, err
+ }
+
+ return &CpcSketch{
+ lgK: lgK,
+ seed: seed,
+ kxp: float64(int64(1) << lgK),
+ }, nil
+}
+
+func (c *CpcSketch) getFormat() cpcFormat {
+ ordinal := 0
+ f := c.getFlavor()
+ if f == flavor_hybrid || f == flavor_sparse {
+ ordinal = 2
+ if !c.mergeFlag {
+ ordinal |= 1
+ }
+ } else {
+ ordinal = 0
+ if c.slidingWindow != nil {
+ ordinal |= 4
+ }
+ if c.pairTable != nil && c.pairTable.numPairs > 0 {
+ ordinal |= 2
+ }
+ if !c.mergeFlag {
+ ordinal |= 1
+ }
+ }
+ return cpcFormat(ordinal)
+}
+
+func (c *CpcSketch) getFlavor() cpcFlavor {
+ return determineFlavor(c.lgK, c.numCoupons)
+}
+
+func (c *CpcSketch) reset() {
+ c.numCoupons = 0
+ c.mergeFlag = false
+ c.fiCol = 0
+ c.windowOffset = 0
+ c.slidingWindow = nil
+ c.pairTable = nil
+ c.kxp = float64(int64(1) << c.lgK)
+ c.hipEstAccum = 0
+}
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
new file mode 100644
index 0000000..0049768
--- /dev/null
+++ b/cpc/cpc_sketch_test.go
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cpc
+
+import (
+ "github.com/stretchr/testify/assert"
+ "testing"
+)
+
+func TestCPCCheckUpdatesEstimate(t *testing.T) {
+ sk, err := NewCpcSketch(10, 0)
+ assert.NoError(t, err)
+ assert.Equal(t, sk.getFormat(), format_empty_hip)
+}
+
+/*
+ @Test
+ public void checkUpdatesEstimate() {
+ final CpcSketch sk = new CpcSketch(10, 0);
+ println(sk.toString(true));
+ assertEquals(sk.getFormat(), Format.EMPTY_HIP);
+ sk.update(1L);
+ sk.update(2.0);
+ sk.update("3");
+ byte[] bytes = new byte[] { 4, 4 };
+ sk.update(bytes);
+ sk.update(ByteBuffer.wrap(bytes)); // same as previous
+ sk.update(ByteBuffer.wrap(bytes, 0, 1));
+ sk.update(new char[] { 5 });
+ sk.update(new int[] { 6 });
+ sk.update(new long[] { 7 });
+ final double est = sk.getEstimate();
+ final double lb = sk.getLowerBound(2);
+ final double ub = sk.getUpperBound(2);
+ assertTrue(lb >= 0);
+ assertTrue(lb <= est);
+ assertTrue(est <= ub);
+ assertEquals(sk.getFlavor(), Flavor.SPARSE);
+ assertEquals(sk.getFormat(), Format.SPARSE_HYBRID_HIP);
+ println(sk.toString());
+ println(sk.toString(true));
+ }
+*/
diff --git a/cpc/utils.go b/cpc/utils.go
new file mode 100644
index 0000000..f59c4b2
--- /dev/null
+++ b/cpc/utils.go
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cpc
+
+import "fmt"
+
+type cpcFormat int
+type cpcFlavor int
+
+const (
+ format_empty_merged cpcFormat = 0
+ format_empty_hip cpcFormat = 1
+ format_sparse_hybrid_merged cpcFormat = 2
+ format_sparce_hybrid_hip cpcFormat = 3
+ format_pinned_sliding_merged_nosv cpcFormat = 4
+ format_pinned_sliding_hip_nosv cpcFormat = 5
+ format_pinned_sliding_merged cpcFormat = 6
+ format_pinned_sliding_hip cpcFormat = 7
+)
+
+const (
+ flavor_empty cpcFlavor = 0 // 0 == C < 1
+ flavor_sparse cpcFlavor = 1 // 1 <= C < 3K/32
+ flavor_hybrid cpcFlavor = 2 // 3K/32 <= C < K/2
+ flavor_pinned cpcFlavor = 3 // K/2 <= C < 27K/8 [NB: 27/8 = 3 + 3/8]
+ flavor_sliding cpcFlavor = 4 // 27K/8 <= C
+)
+
+func checkLgK(lgK int) error {
+ if lgK < minLgK || lgK > maxLgK {
+ return fmt.Errorf("LgK must be >= %d and <= %d: %d", minLgK,
maxLgK, lgK)
+ }
+ return nil
+}
+
+func determineFlavor(lgK int, numCoupons int64) cpcFlavor {
+ c := numCoupons
+ k := int64(1) << lgK
+ c2 := c << 1
+ c8 := c << 3
+ c32 := c << 5
+ if c == 0 {
+ return flavor_empty // 0 == C < 1
+ }
+ if c32 < (int64(3) * k) {
+ return flavor_sparse // 1 <= C < 3K/32
+ }
+ if c2 < k {
+ return flavor_hybrid // 3K/32 <= C < K/2
+ }
+ if c8 < (int64(27) * k) {
+ return flavor_pinned // K/2 <= C < 27K/8
+ }
+ return flavor_sliding // 27K/8 <= C
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]