This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch cpc-sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


The following commit(s) were added to refs/heads/cpc-sketch by this push:
     new 11b99ba  Add cpc constructor
11b99ba is described below

commit 11b99ba10efdac7542cb60e052f13f58d1e7aca6
Author: Pierre Lacave <[email protected]>
AuthorDate: Sat Jun 22 14:25:30 2024 +0200

    Add cpc constructor
---
 cpc/cpc_sketch.go      | 59 ++++++++++++++++++++++++++++++++++++++++--
 cpc/cpc_sketch_test.go | 58 +++++++++++++++++++++++++++++++++++++++++
 cpc/utils.go           | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 185 insertions(+), 2 deletions(-)

diff --git a/cpc/cpc_sketch.go b/cpc/cpc_sketch.go
index 8eebe04..2b9b7ca 100644
--- a/cpc/cpc_sketch.go
+++ b/cpc/cpc_sketch.go
@@ -17,6 +17,11 @@
 
 package cpc
 
+const (
+       minLgK = 4
+       maxLgK = 26
+)
+
 type CpcSketch struct {
        seed int64
 
@@ -27,10 +32,60 @@ type CpcSketch struct {
        fiCol      int   // First Interesting Column. This is part of a speed 
optimization.
 
        windowOffset  int
-       slidingWindow []byte    //either null or size K bytes
-       pairTable     pairTable //for sparse and surprising values, either null 
or variable size
+       slidingWindow []byte     //either null or size K bytes
+       pairTable     *pairTable //for sparse and surprising values, either 
null or variable size
 
        //The following variables are only valid in HIP varients
        kxp         float64 //used with HIP
        hipEstAccum float64 //used with HIP
 }
+
+func NewCpcSketch(lgK int, seed int64) (*CpcSketch, error) {
+       if err := checkLgK(lgK); err != nil {
+               return nil, err
+       }
+
+       return &CpcSketch{
+               lgK:  lgK,
+               seed: seed,
+               kxp:  float64(int64(1) << lgK),
+       }, nil
+}
+
+func (c *CpcSketch) getFormat() cpcFormat {
+       ordinal := 0
+       f := c.getFlavor()
+       if f == flavor_hybrid || f == flavor_sparse {
+               ordinal = 2
+               if !c.mergeFlag {
+                       ordinal |= 1
+               }
+       } else {
+               ordinal = 0
+               if c.slidingWindow != nil {
+                       ordinal |= 4
+               }
+               if c.pairTable != nil && c.pairTable.numPairs > 0 {
+                       ordinal |= 2
+               }
+               if !c.mergeFlag {
+                       ordinal |= 1
+               }
+       }
+       return cpcFormat(ordinal)
+}
+
+func (c *CpcSketch) getFlavor() cpcFlavor {
+       return determineFlavor(c.lgK, c.numCoupons)
+}
+
+func (c *CpcSketch) reset() {
+       c.numCoupons = 0
+       c.mergeFlag = false
+       c.fiCol = 0
+       c.windowOffset = 0
+       c.slidingWindow = nil
+       c.pairTable = nil
+       c.kxp = float64(int64(1) << c.lgK)
+       c.hipEstAccum = 0
+}
diff --git a/cpc/cpc_sketch_test.go b/cpc/cpc_sketch_test.go
new file mode 100644
index 0000000..0049768
--- /dev/null
+++ b/cpc/cpc_sketch_test.go
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cpc
+
+import (
+       "github.com/stretchr/testify/assert"
+       "testing"
+)
+
+func TestCPCCheckUpdatesEstimate(t *testing.T) {
+       sk, err := NewCpcSketch(10, 0)
+       assert.NoError(t, err)
+       assert.Equal(t, sk.getFormat(), format_empty_hip)
+}
+
+/*
+  @Test
+  public void checkUpdatesEstimate() {
+    final CpcSketch sk = new CpcSketch(10, 0);
+    println(sk.toString(true));
+    assertEquals(sk.getFormat(), Format.EMPTY_HIP);
+    sk.update(1L);
+    sk.update(2.0);
+    sk.update("3");
+    byte[] bytes = new byte[] { 4, 4 };
+    sk.update(bytes);
+    sk.update(ByteBuffer.wrap(bytes));  // same as previous
+    sk.update(ByteBuffer.wrap(bytes, 0, 1));
+    sk.update(new char[] { 5 });
+    sk.update(new int[] { 6 });
+    sk.update(new long[] { 7 });
+    final double est = sk.getEstimate();
+    final double lb = sk.getLowerBound(2);
+    final double ub = sk.getUpperBound(2);
+    assertTrue(lb >= 0);
+    assertTrue(lb <= est);
+    assertTrue(est <= ub);
+    assertEquals(sk.getFlavor(), Flavor.SPARSE);
+    assertEquals(sk.getFormat(), Format.SPARSE_HYBRID_HIP);
+    println(sk.toString());
+    println(sk.toString(true));
+  }
+*/
diff --git a/cpc/utils.go b/cpc/utils.go
new file mode 100644
index 0000000..f59c4b2
--- /dev/null
+++ b/cpc/utils.go
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cpc
+
+import "fmt"
+
+type cpcFormat int
+type cpcFlavor int
+
+const (
+       format_empty_merged               cpcFormat = 0
+       format_empty_hip                  cpcFormat = 1
+       format_sparse_hybrid_merged       cpcFormat = 2
+       format_sparce_hybrid_hip          cpcFormat = 3
+       format_pinned_sliding_merged_nosv cpcFormat = 4
+       format_pinned_sliding_hip_nosv    cpcFormat = 5
+       format_pinned_sliding_merged      cpcFormat = 6
+       format_pinned_sliding_hip         cpcFormat = 7
+)
+
+const (
+       flavor_empty   cpcFlavor = 0 //    0  == C <    1
+       flavor_sparse  cpcFlavor = 1 //    1  <= C <   3K/32
+       flavor_hybrid  cpcFlavor = 2 // 3K/32 <= C <   K/2
+       flavor_pinned  cpcFlavor = 3 //   K/2 <= C < 27K/8  [NB: 27/8 = 3 + 3/8]
+       flavor_sliding cpcFlavor = 4 // 27K/8 <= C
+)
+
+func checkLgK(lgK int) error {
+       if lgK < minLgK || lgK > maxLgK {
+               return fmt.Errorf("LgK must be >= %d and <= %d: %d", minLgK, 
maxLgK, lgK)
+       }
+       return nil
+}
+
+func determineFlavor(lgK int, numCoupons int64) cpcFlavor {
+       c := numCoupons
+       k := int64(1) << lgK
+       c2 := c << 1
+       c8 := c << 3
+       c32 := c << 5
+       if c == 0 {
+               return flavor_empty //    0  == C <    1
+       }
+       if c32 < (int64(3) * k) {
+               return flavor_sparse //    1  <= C <   3K/32
+       }
+       if c2 < k {
+               return flavor_hybrid // 3K/32 <= C <   K/2
+       }
+       if c8 < (int64(27) * k) {
+               return flavor_pinned //   K/2 <= C < 27K/8
+       }
+       return flavor_sliding // 27K/8 <= C
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to