This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git

commit badf2c50636f8223a9e457cb53be4ac07f6ba2c5
Author: Pierre Lacave <[email protected]>
AuthorDate: Tue Dec 19 22:35:44 2023 +0100

    Continue implementation of Frequency longSketch + QuickSelect
---
 common/family.go                           |  3 +-
 frequencies/long_sketch.go                 | 16 ++++++++
 frequencies/long_sketch_test.go            | 10 +++++
 frequencies/preable_utils.go               | 11 +++++
 frequencies/reverse_purge_long_hash_map.go |  5 +--
 thetacommon/quick_select.go                | 65 ++++++++++++++++++++++++++++++
 6 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/common/family.go b/common/family.go
index 7daa5dc..957934f 100644
--- a/common/family.go
+++ b/common/family.go
@@ -18,5 +18,6 @@
 package common
 
 const (
-       FamilyHllId = 7
+       FamilyHllId     = 7
+       FamilyFrequencyId = 10
 )
diff --git a/frequencies/long_sketch.go b/frequencies/long_sketch.go
index b7c1ef8..75dcc3a 100644
--- a/frequencies/long_sketch.go
+++ b/frequencies/long_sketch.go
@@ -20,6 +20,7 @@ package frequencies
 import (
        "fmt"
        "github.com/apache/datasketches-go/common"
+       "strings"
 )
 
 type LongSketch struct {
@@ -132,3 +133,18 @@ func (s *LongSketch) Update(item int64, count int64) error 
{
        }
        return nil
 }
+
+func (s *LongSketch) serializeToString() string {
+       var sb strings.Builder
+       //start the string with parameters of the sketch
+       serVer := serVer //0
+       famID := common.FamilyFrequencyId
+       lgMaxMapSz := s.lgMaxMapSize
+       flags := 0
+       if s.hashMap.numActive == 0 {
+               flags = emptyFlagMask
+       }
+       fmt.Fprintf(&sb, "%d,%d,%d,%d,%d,%d,", serVer, famID, lgMaxMapSz, 
flags, s.streamWeight, s.offset)
+       sb.WriteString(s.hashMap.serializeToString()) //numActive, curMaplen, 
key[i], value[i], ...
+       return sb.String()
+}
diff --git a/frequencies/long_sketch_test.go b/frequencies/long_sketch_test.go
index 56e3568..03a0c69 100644
--- a/frequencies/long_sketch_test.go
+++ b/frequencies/long_sketch_test.go
@@ -32,4 +32,14 @@ func TestFrequentItsemsStringSerialTest(t *testing.T) {
        sketch.Update(15, 3443)
        sketch.Update(1000001, 1010230)
        sketch.Update(1000002, 1010230)
+
+       s := sketch.serializeToString()
+       assert.Equal(t, 
"1,10,5,0,2024103,0,4,32,10,200,15,3443,1000001,1010230,1000002,1010230,", s)
+       /*
+          LongsSketch new_sketch0 = LongsSketch.getInstance(string0);
+          String new_string0 = new_sketch0.serializeToString();
+          Assert.assertTrue(string0.equals(new_string0));
+          Assert.assertTrue(new_sketch0.getMaximumMapCapacity() == 
sketch.getMaximumMapCapacity());
+          Assert.assertTrue(new_sketch0.getCurrentMapCapacity() == 
sketch.getCurrentMapCapacity());
+       */
 }
diff --git a/frequencies/preable_utils.go b/frequencies/preable_utils.go
new file mode 100644
index 0000000..f04aefc
--- /dev/null
+++ b/frequencies/preable_utils.go
@@ -0,0 +1,11 @@
+package frequencies
+
+const (
+
+       // emptyFlagMask flag bit masks
+       // due to a mistake different bits were used in C++ and Java to 
indicate empty sketch
+       // therefore both are set and checked for compatibility with historical 
binary format
+       emptyFlagMask = 5
+
+       serVer = 1
+)
diff --git a/frequencies/reverse_purge_long_hash_map.go 
b/frequencies/reverse_purge_long_hash_map.go
index 99b9a29..28df67d 100644
--- a/frequencies/reverse_purge_long_hash_map.go
+++ b/frequencies/reverse_purge_long_hash_map.go
@@ -20,6 +20,7 @@ package frequencies
 import (
        "fmt"
        "github.com/apache/datasketches-go/common"
+       "github.com/apache/datasketches-go/thetacommon"
        "math/bits"
        "strconv"
        "strings"
@@ -135,9 +136,7 @@ func (r *reversePurgeLongHashMap) purge(sampleSize int) 
int64 {
                i++
        }
 
-       // TODO implement quickSelect
-       //val := quickSelect(samples, 0, numSamples-1, limit/2)
-       val := int64(0)
+       val := thetacommon.QuickSelect(samples, 0, numSamples-1, limit/2)
        r.adjustAllValuesBy(-1 * val)
        r.keepOnlyPositiveCounts()
        return val
diff --git a/thetacommon/quick_select.go b/thetacommon/quick_select.go
new file mode 100644
index 0000000..47da86a
--- /dev/null
+++ b/thetacommon/quick_select.go
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package thetacommon
+
+func QuickSelect(arr []int64, lo int, hi int, pivot int) int64 {
+       for hi > 0 {
+               j := partition(arr, lo, hi)
+               if j == pivot {
+                       return arr[pivot]
+               }
+               if j > pivot {
+                       hi = j - 1
+               } else {
+                       lo = j + 1
+               }
+       }
+       return arr[pivot]
+}
+
+func partition(arr []int64, lo int, hi int) int {
+       i := lo      // left scan index
+       j := hi + 1  // right scan index
+       v := arr[lo] //partitioning item value
+       for {
+               // Scan right, scan left, check for scan complete, and exchange
+               for arr[i] < v {
+                       i++
+                       if i == hi {
+                               break
+                       }
+               }
+               for v < arr[j] {
+                       j--
+                       if j == lo {
+                               break
+                       }
+               }
+               if i >= j {
+                       break
+               }
+               x := arr[i]
+               arr[i] = arr[j]
+               arr[j] = x
+       }
+       // put v=arr[j] into position with a[lo .. j-1] <= a[j] <= a[j+1 .. hi]
+       x := arr[lo]
+       arr[lo] = arr[j]
+       arr[j] = x
+       return j
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to