This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch reverse-order
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git

commit 88d45685f909e018d92ad448403d2f0f983efcf4
Author: Pierre Lacave <[email protected]>
AuthorDate: Wed Mar 20 22:54:23 2024 +0100

    Add support for reverse order for ItemSketches
---
 common/array_of_doubles_serde.go |  5 +++
 common/array_of_longs_serde.go   |  5 +++
 common/array_of_strings_serde.go |  4 +++
 kll/items_sketch_test.go         | 68 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+)

diff --git a/common/array_of_doubles_serde.go b/common/array_of_doubles_serde.go
index 84d5872..628266e 100644
--- a/common/array_of_doubles_serde.go
+++ b/common/array_of_doubles_serde.go
@@ -24,6 +24,8 @@ import (
 )
 
 type ArrayOfDoublesSerDe struct {
+       ReverseOrder bool
+
        scratch [8]byte
 }
 
@@ -38,6 +40,9 @@ func (f ArrayOfDoublesSerDe) Hash(item float64) uint64 {
 
 func (f ArrayOfDoublesSerDe) LessFn() LessFn[float64] {
        return func(a float64, b float64) bool {
+               if f.ReverseOrder {
+                       return a > b
+               }
                return a < b
        }
 }
diff --git a/common/array_of_longs_serde.go b/common/array_of_longs_serde.go
index b829cdf..7e14b40 100644
--- a/common/array_of_longs_serde.go
+++ b/common/array_of_longs_serde.go
@@ -23,6 +23,8 @@ import (
 )
 
 type ArrayOfLongsSerDe struct {
+       ReverseOrder bool
+
        scratch [8]byte
 }
 
@@ -37,6 +39,9 @@ func (f ArrayOfLongsSerDe) Hash(item int64) uint64 {
 
 func (f ArrayOfLongsSerDe) LessFn() LessFn[int64] {
        return func(a int64, b int64) bool {
+               if f.ReverseOrder {
+                       return a > b
+               }
                return a < b
        }
 }
diff --git a/common/array_of_strings_serde.go b/common/array_of_strings_serde.go
index a153f28..20410c8 100644
--- a/common/array_of_strings_serde.go
+++ b/common/array_of_strings_serde.go
@@ -26,6 +26,7 @@ import (
 )
 
 type ArrayOfStringsSerDe struct {
+       ReverseOrder bool
 }
 
 func (f ArrayOfStringsSerDe) Identity() string {
@@ -39,6 +40,9 @@ func (f ArrayOfStringsSerDe) Hash(item string) uint64 {
 
 func (f ArrayOfStringsSerDe) LessFn() LessFn[string] {
        return func(a string, b string) bool {
+               if f.ReverseOrder {
+                       return a > b
+               }
                return a < b
        }
 }
diff --git a/kll/items_sketch_test.go b/kll/items_sketch_test.go
index a0f6d23..f20598b 100644
--- a/kll/items_sketch_test.go
+++ b/kll/items_sketch_test.go
@@ -22,6 +22,9 @@ import (
        "github.com/apache/datasketches-go/common"
        "github.com/stretchr/testify/assert"
        "math"
+       "math/rand"
+       "strconv"
+       "strings"
        "testing"
 )
 
@@ -936,3 +939,68 @@ func TestSerializeDeserializeFloat(t *testing.T) {
                }
        }
 }
+
+// There is no guarantee that L0 is sorted after a merge.
+// The issue is, during a merge, L0 must be sorted prior to a compaction to a 
higher level.
+// Otherwise the higher levels would not be sorted properly.
+func TestL0SortDuringMerge(t *testing.T) {
+       sk1, err := NewKllItemsSketch[string](8, _DEFAULT_M, 
common.ArrayOfStringsSerDe{
+               ReverseOrder: true,
+       })
+       assert.NoError(t, err)
+       sk2, err := NewKllItemsSketch[string](8, _DEFAULT_M, 
common.ArrayOfStringsSerDe{
+               ReverseOrder: true,
+       })
+       assert.NoError(t, err)
+       n := 26 //don't change this
+       for i := 1; i <= n; i++ {
+               j := rand.Intn(n) + 1
+               sk1.Update(intToFixedLengthString(j, 3))
+               sk2.Update(intToFixedLengthString(j+100, 3))
+       }
+       sk1.Merge(sk2)
+       //println(sk1.String(true, true)) //L1 and above should be sorted in 
reverse. Ignore L0.
+       lvl1size := sk1.levels[2] - sk1.levels[1]
+       itr := sk1.GetIterator()
+       itr.Next()
+       prev, _ := strconv.Atoi(strings.TrimSpace(itr.GetQuantile()))
+       for i := uint32(1); i < lvl1size; i++ {
+               if itr.Next() {
+                       v, _ := 
strconv.Atoi(strings.TrimSpace(itr.GetQuantile()))
+                       assert.True(t, v <= prev)
+                       prev = v
+               }
+       }
+
+}
+
+/*
+  @Test
+  //There is no guarantee that L0 is sorted after a merge.
+  //The issue is, during a merge, L0 must be sorted prior to a compaction to a 
higher level.
+  //Otherwise the higher levels would not be sorted properly.
+  public void checkL0SortDuringMerge() throws NumberFormatException {
+    final Random rand = new Random();
+    final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(8, 
Comparator.reverseOrder(), serDe);
+    final KllItemsSketch<String> sk2 = KllItemsSketch.newHeapInstance(8, 
Comparator.reverseOrder(), serDe);
+    final int n = 26; //don't change this
+    for (int i = 1; i <= n; i++ ) {
+      final int j = rand.nextInt(n) + 1;
+      sk1.update(getString(j, 3));
+      sk2.update(getString(j +100, 3));
+    }
+    sk1.merge(sk2);
+    println(sk1.toString(true, true)); //L1 and above should be sorted in 
reverse. Ignore L0.
+    final int lvl1size = sk1.levelsArr[2] - sk1.levelsArr[1];
+    final QuantilesGenericSketchIterator<String> itr = sk1.iterator();
+    itr.next();
+    int prev = Integer.parseInt(itr.getQuantile().trim());
+    for (int i = 1; i < lvl1size; i++) {
+      if (itr.next()) {
+        int v = Integer.parseInt(itr.getQuantile().trim());
+        assertTrue(v <= prev);
+        prev = v;
+      }
+    }
+  }
+*/


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to