This is an automated email from the ASF dual-hosted git repository. placave pushed a commit to branch reverse-order in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
commit 88d45685f909e018d92ad448403d2f0f983efcf4 Author: Pierre Lacave <[email protected]> AuthorDate: Wed Mar 20 22:54:23 2024 +0100 Add support for reverse order for ItemSketches --- common/array_of_doubles_serde.go | 5 +++ common/array_of_longs_serde.go | 5 +++ common/array_of_strings_serde.go | 4 +++ kll/items_sketch_test.go | 68 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/common/array_of_doubles_serde.go b/common/array_of_doubles_serde.go index 84d5872..628266e 100644 --- a/common/array_of_doubles_serde.go +++ b/common/array_of_doubles_serde.go @@ -24,6 +24,8 @@ import ( ) type ArrayOfDoublesSerDe struct { + ReverseOrder bool + scratch [8]byte } @@ -38,6 +40,9 @@ func (f ArrayOfDoublesSerDe) Hash(item float64) uint64 { func (f ArrayOfDoublesSerDe) LessFn() LessFn[float64] { return func(a float64, b float64) bool { + if f.ReverseOrder { + return a > b + } return a < b } } diff --git a/common/array_of_longs_serde.go b/common/array_of_longs_serde.go index b829cdf..7e14b40 100644 --- a/common/array_of_longs_serde.go +++ b/common/array_of_longs_serde.go @@ -23,6 +23,8 @@ import ( ) type ArrayOfLongsSerDe struct { + ReverseOrder bool + scratch [8]byte } @@ -37,6 +39,9 @@ func (f ArrayOfLongsSerDe) Hash(item int64) uint64 { func (f ArrayOfLongsSerDe) LessFn() LessFn[int64] { return func(a int64, b int64) bool { + if f.ReverseOrder { + return a > b + } return a < b } } diff --git a/common/array_of_strings_serde.go b/common/array_of_strings_serde.go index a153f28..20410c8 100644 --- a/common/array_of_strings_serde.go +++ b/common/array_of_strings_serde.go @@ -26,6 +26,7 @@ import ( ) type ArrayOfStringsSerDe struct { + ReverseOrder bool } func (f ArrayOfStringsSerDe) Identity() string { @@ -39,6 +40,9 @@ func (f ArrayOfStringsSerDe) Hash(item string) uint64 { func (f ArrayOfStringsSerDe) LessFn() LessFn[string] { return func(a string, b string) bool { + if f.ReverseOrder { + return a > b + } return a < b } } diff --git a/kll/items_sketch_test.go b/kll/items_sketch_test.go index a0f6d23..f20598b 100644 --- a/kll/items_sketch_test.go +++ b/kll/items_sketch_test.go @@ -22,6 +22,9 @@ import ( "github.com/apache/datasketches-go/common" "github.com/stretchr/testify/assert" "math" + "math/rand" + "strconv" + "strings" "testing" ) @@ -936,3 +939,68 @@ func TestSerializeDeserializeFloat(t *testing.T) { } } } + +// There is no guarantee that L0 is sorted after a merge. +// The issue is, during a merge, L0 must be sorted prior to a compaction to a higher level. +// Otherwise the higher levels would not be sorted properly. +func TestL0SortDuringMerge(t *testing.T) { + sk1, err := NewKllItemsSketch[string](8, _DEFAULT_M, common.ArrayOfStringsSerDe{ + ReverseOrder: true, + }) + assert.NoError(t, err) + sk2, err := NewKllItemsSketch[string](8, _DEFAULT_M, common.ArrayOfStringsSerDe{ + ReverseOrder: true, + }) + assert.NoError(t, err) + n := 26 //don't change this + for i := 1; i <= n; i++ { + j := rand.Intn(n) + 1 + sk1.Update(intToFixedLengthString(j, 3)) + sk2.Update(intToFixedLengthString(j+100, 3)) + } + sk1.Merge(sk2) + //println(sk1.String(true, true)) //L1 and above should be sorted in reverse. Ignore L0. + lvl1size := sk1.levels[2] - sk1.levels[1] + itr := sk1.GetIterator() + itr.Next() + prev, _ := strconv.Atoi(strings.TrimSpace(itr.GetQuantile())) + for i := uint32(1); i < lvl1size; i++ { + if itr.Next() { + v, _ := strconv.Atoi(strings.TrimSpace(itr.GetQuantile())) + assert.True(t, v <= prev) + prev = v + } + } + +} + +/* + @Test + //There is no guarantee that L0 is sorted after a merge. + //The issue is, during a merge, L0 must be sorted prior to a compaction to a higher level. + //Otherwise the higher levels would not be sorted properly. + public void checkL0SortDuringMerge() throws NumberFormatException { + final Random rand = new Random(); + final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(8, Comparator.reverseOrder(), serDe); + final KllItemsSketch<String> sk2 = KllItemsSketch.newHeapInstance(8, Comparator.reverseOrder(), serDe); + final int n = 26; //don't change this + for (int i = 1; i <= n; i++ ) { + final int j = rand.nextInt(n) + 1; + sk1.update(getString(j, 3)); + sk2.update(getString(j +100, 3)); + } + sk1.merge(sk2); + println(sk1.toString(true, true)); //L1 and above should be sorted in reverse. Ignore L0. + final int lvl1size = sk1.levelsArr[2] - sk1.levelsArr[1]; + final QuantilesGenericSketchIterator<String> itr = sk1.iterator(); + itr.next(); + int prev = Integer.parseInt(itr.getQuantile().trim()); + for (int i = 1; i < lvl1size; i++) { + if (itr.next()) { + int v = Integer.parseInt(itr.getQuantile().trim()); + assertTrue(v <= prev); + prev = v; + } + } + } +*/ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
