This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch new_kll_items_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


The following commit(s) were added to refs/heads/new_kll_items_sketch by this 
push:
     new 7b51bdbd Interim 10, works, remove all .sk files
7b51bdbd is described below

commit 7b51bdbd8a20390a6262ec9ee8950b43055b899c
Author: Lee Rhodes <[email protected]>
AuthorDate: Mon Jul 31 09:22:39 2023 -0700

    Interim 10, works, remove all .sk files
---
 .../kll/KllDirectCompactDoublesSketch.java         |   2 +-
 .../kll/KllDirectCompactFloatsSketch.java          |   2 +-
 .../apache/datasketches/kll/KllItemsHelper.java    |  16 ++
 .../datasketches/kll/KllItemsSketchSortedView.java |   2 +-
 .../quantilescommon/GenericSortedView.java         |   8 +-
 .../quantilescommon/QuantilesUtil.java             |  16 +-
 .../datasketches/req/ReqSketchSortedView.java      |   2 +-
 .../datasketches/kll/KllDoublesSketchTest.java     |  10 +
 .../datasketches/kll/KllFloatsSketchTest.java      |  10 +
 .../datasketches/kll/KllItemsSketchTest.java       | 257 ++++++++++++++++++++-
 10 files changed, 306 insertions(+), 19 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java 
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
index 41dee4d4..ad525206 100644
--- 
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
+++ 
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
@@ -22,7 +22,7 @@ package org.apache.datasketches.kll;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
 
-class KllDirectCompactDoublesSketch extends KllDirectDoublesSketch {
+final class KllDirectCompactDoublesSketch extends KllDirectDoublesSketch {
 
   KllDirectCompactDoublesSketch(
       final SketchStructure sketchStructure,
diff --git 
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java 
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
index b00c4ef4..af5489ad 100644
--- 
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
+++ 
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
@@ -22,7 +22,7 @@ package org.apache.datasketches.kll;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
 
-class KllDirectCompactFloatsSketch extends KllDirectFloatsSketch {
+final class KllDirectCompactFloatsSketch extends KllDirectFloatsSketch {
 
   KllDirectCompactFloatsSketch(
       final SketchStructure sketchStructure,
diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java 
b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
index 13d1ffd6..662829d8 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
@@ -19,8 +19,11 @@
 
 package org.apache.datasketches.kll;
 
+import static java.lang.Math.ceil;
+import static java.lang.Math.log;
 import static java.lang.Math.max;
 import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.characterPad;
 import static org.apache.datasketches.common.Util.isEven;
 import static org.apache.datasketches.common.Util.isOdd;
 import static org.apache.datasketches.kll.KllHelper.findLevelToCompact;
@@ -483,10 +486,23 @@ final class KllItemsHelper<T> {
     return c.compare((T)item1, (T)item2) < 0;
   }
 
+  //used in test
   static <T> boolean le(final Object item1, final Object item2, final 
Comparator<? super T> c) {
     return c.compare((T)item1, (T)item2) <= 0;
   }
 
+  static int numDigits(int n) {
+    if (n % 10 == 0) { n++; }
+    return (int) ceil(log(n) / log(10));
+  }
+
+  static String intToFixedLengthString(final int number, final int length) {
+    final String num = Integer.valueOf(number).toString();
+    return characterPad(num, length, ' ', false);
+  }
+
+
+
   /*
    * Validation Method.
    * The following must be enabled for use with the KllItemsValidationTest,
diff --git 
a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java 
b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
index 2b40c1be..a8bde67c 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
@@ -42,7 +42,7 @@ import org.apache.datasketches.quantilescommon.QuantilesUtil;
  * @author Lee Rhodes
  */
 @SuppressWarnings("unchecked")
-public class KllItemsSketchSortedView<T> implements GenericSortedView<T> {
+public final class KllItemsSketchSortedView<T> implements GenericSortedView<T> 
{
   private final Object[] quantiles;
   private final long[] cumWeights; //comes in as individual weights, converted 
to cumulative natural weights
   private final long totalN;
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
index d1a8d365..452467bb 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
@@ -153,8 +153,12 @@ public interface GenericSortedView<T> extends SortedView {
    * @param comparator the comparator for generic item data type T
    */
   static <T> void validateItems(final T[] items, final Comparator<? super T> 
comparator) {
-    final int lenM1 = items.length - 1;
-    for (int j = 0; j < lenM1; j++) {
+    final int len = items.length;
+    if (len == 1 && items[0] == null) {
+      throw new SketchesArgumentException(
+          "Items must be unique, monotonically increasing and not null.");
+    }
+    for (int j = 0; j < len - 1; j++) {
       if ((items[j] != null) && (items[j + 1] != null)
           && (comparator.compare(items[j], items[j + 1]) < 0)) {
         continue;
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java 
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
index b7ce737f..34faefb4 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
@@ -54,8 +54,12 @@ public final class QuantilesUtil {
    */
   public static final void checkDoublesSplitPointsOrder(final double[] values) 
{
     Objects.requireNonNull(values);
-    final int len = values.length - 1;
-    for (int j = 0; j < len; j++) {
+    final int len = values.length;
+    if (len == 1 && Double.isNaN(values[0])) {
+      throw new SketchesArgumentException(
+        "Values must be unique, monotonically increasing and not NaN.");
+    }
+    for (int j = 0; j < len - 1; j++) {
       if (values[j] < values[j + 1]) { continue; }
       throw new SketchesArgumentException(
           "Values must be unique, monotonically increasing and not NaN.");
@@ -69,8 +73,12 @@ public final class QuantilesUtil {
    */
   public static final void checkFloatsSplitPointsOrder(final float[] values) {
     Objects.requireNonNull(values);
-    final int len = values.length - 1;
-    for (int j = 0; j < len; j++) {
+    final int len = values.length;
+    if (len == 1 && Float.isNaN(values[0])) {
+      throw new SketchesArgumentException(
+        "Values must be unique, monotonically increasing and not NaN.");
+    }
+    for (int j = 0; j < len - 1; j++) {
       if (values[j] < values[j + 1]) { continue; }
       throw new SketchesArgumentException(
           "Values must be unique, monotonically increasing and not NaN.");
diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java 
b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
index 0b17f679..1b8586ab 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
@@ -34,7 +34,7 @@ import org.apache.datasketches.quantilescommon.QuantilesUtil;
  * @author Alexander Saydakov
  * @author Lee Rhodes
  */
-public class ReqSketchSortedView implements FloatsSortedView {
+public final class ReqSketchSortedView implements FloatsSortedView {
   private float[] quantiles;
   private long[] cumWeights; //comes in as individual weights, converted to 
cumulative natural weights
   private final long totalN;
diff --git 
a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java 
b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
index b74fd367..edae1ecd 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
@@ -511,8 +511,10 @@ public class KllDoublesSketchTest {
   public void checkWrapCase1Doubles() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(sk.toByteArray());
     KllDoublesSketch sk2 = KllDoublesSketch.wrap(mem);
+
     assertTrue(mem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -522,8 +524,10 @@ public class KllDoublesSketchTest {
   public void checkWritableWrapCase6And2Doubles() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     WritableMemory wmem = 
WritableMemory.writableWrap(KllHelper.toByteArray(sk, true));
     KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
     assertFalse(wmem.isReadOnly());
     assertFalse(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -533,8 +537,10 @@ public class KllDoublesSketchTest {
   public void checkKllSketchCase5Doubles() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
     KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
     assertFalse(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -544,9 +550,11 @@ public class KllDoublesSketchTest {
   public void checkKllSketchCase3Doubles() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
     WritableMemory wmem = (WritableMemory) mem;
     KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
     assertTrue(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -556,9 +564,11 @@ public class KllDoublesSketchTest {
   public void checkKllSketchCase7Doubles() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
     WritableMemory wmem = (WritableMemory) mem;
     KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
     assertTrue(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java 
b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
index e03612e7..23d12bb3 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
@@ -511,8 +511,10 @@ public class KllFloatsSketchTest {
   public void checkWrapCase1Floats() {
     KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(sk.toByteArray());
     KllFloatsSketch sk2 = KllFloatsSketch.wrap(mem);
+
     assertTrue(mem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -522,8 +524,10 @@ public class KllFloatsSketchTest {
   public void checkWritableWrapCase6And2Floats() {
     KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     WritableMemory wmem = 
WritableMemory.writableWrap(KllHelper.toByteArray(sk, true));
     KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
     assertFalse(wmem.isReadOnly());
     assertFalse(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -533,8 +537,10 @@ public class KllFloatsSketchTest {
   public void checkKllSketchCase5Floats() {
     KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
     KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
     assertFalse(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -544,9 +550,11 @@ public class KllFloatsSketchTest {
   public void checkKllSketchCase3Floats() {
     KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
     WritableMemory wmem = (WritableMemory) mem;
     KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
     assertTrue(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
@@ -556,9 +564,11 @@ public class KllFloatsSketchTest {
   public void checkKllSketchCase7Floats() {
     KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
     for (int i = 1; i <= 21; i++) { sk.update(i); }
+
     Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
     WritableMemory wmem = (WritableMemory) mem;
     KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
     assertTrue(wmem.isReadOnly());
     assertTrue(sk2.isReadOnly());
     assertFalse(sk2.isDirect());
diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java 
b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
index 30d0e680..88b4b3b2 100644
--- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
@@ -19,8 +19,10 @@
 
 package org.apache.datasketches.kll;
 
-import static org.apache.datasketches.common.Util.characterPad;
+import static java.lang.Math.ceil;
+import static 
org.apache.datasketches.kll.KllItemsHelper.intToFixedLengthString;
 import static org.apache.datasketches.kll.KllItemsHelper.le;
+import static org.apache.datasketches.kll.KllItemsHelper.numDigits;
 import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
 import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
 import static org.testng.Assert.assertEquals;
@@ -148,29 +150,30 @@ public class KllItemsSketchTest {
   public void manyValuesEstimationMode() {
     final KllItemsSketch<String> sketch = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
     final int n = 1_000_000;
+    final int digits = numDigits(n);
 
     for (int i = 1; i <= n; i++) {
-      sketch.update(intToFixedLengthString(i, 7));
+      sketch.update(intToFixedLengthString(i, digits));
       assertEquals(sketch.getN(), i);
     }
 
     // test getRank
     for (int i = 1; i <= n; i++) {
       final double trueRank = (double) i / n;
-      String s = intToFixedLengthString(i, 7);
+      String s = intToFixedLengthString(i, digits);
       double r = sketch.getRank(s);
       assertEquals(sketch.getRank(s), trueRank, PMF_EPS_FOR_K_256, "for value 
" + s);
     }
 
     // test getPMF
-    String s = intToFixedLengthString(n/2, 7);
+    String s = intToFixedLengthString(n/2, digits);
     final double[] pmf = sketch.getPMF(new String[] {s}); // split at median
     assertEquals(pmf.length, 2);
     assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256);
     assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256);
 
-    assertEquals(sketch.getMinItem(), intToFixedLengthString(1, 7));
-    assertEquals(sketch.getMaxItem(), intToFixedLengthString(n, 7));
+    assertEquals(sketch.getMinItem(), intToFixedLengthString(1, digits));
+    assertEquals(sketch.getMaxItem(), intToFixedLengthString(n, digits));
 
  // check at every 0.1 percentage point
     final double[] fractions = new double[1001];
@@ -191,23 +194,259 @@ public class KllItemsSketchTest {
     }
   }
 
+  @Test
+  public void getRankGetCdfGetPmfConsistency() {
+    final KllItemsSketch<String> sketch = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final int n = 1000;
+    final int digits = numDigits(n);
+    final String[] quantiles = new String[n];
+    for (int i = 0; i < n; i++) {
+      final String str = intToFixedLengthString(i, digits);
+      sketch.update(str);
+      quantiles[i] = str;
+    }
+    { //EXCLUSIVE
+      final double[] ranks = sketch.getCDF(quantiles, EXCLUSIVE);
+      final double[] pmf = sketch.getPMF(quantiles, EXCLUSIVE);
+      double sumPmf = 0;
+      for (int i = 0; i < n; i++) {
+        assertEquals(ranks[i], sketch.getRank(quantiles[i], EXCLUSIVE), 
NUMERIC_NOISE_TOLERANCE, "rank vs CDF for value " + i);
+        sumPmf += pmf[i];
+        assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF 
for value " + i);
+      }
+      sumPmf += pmf[n];
+      assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
+      assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
+    }
+    { // INCLUSIVE (default)
+      final double[] ranks = sketch.getCDF(quantiles, INCLUSIVE);
+      final double[] pmf = sketch.getPMF(quantiles, INCLUSIVE);
+      double sumPmf = 0;
+      for (int i = 0; i < n; i++) {
+        assertEquals(ranks[i], sketch.getRank(quantiles[i], INCLUSIVE), 
NUMERIC_NOISE_TOLERANCE,
+            "rank vs CDF for value " + i);
+        sumPmf += pmf[i];
+        assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF 
for value " + i);
+      }
+      sumPmf += pmf[n];
+      assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
+      assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
+    }
+  }
 
+  @Test
+  public void merge() {
+    final KllItemsSketch<String> sketch1 = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final int n = 10000;
+    final int digits = numDigits(2 * n);
+    for (int i = 0; i < n; i++) {
+      sketch1.update(intToFixedLengthString(i, digits));
+      sketch2.update(intToFixedLengthString(2 * n - i - 1, digits));
+    }
 
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
 
+    assertEquals(sketch2.getMinItem(), intToFixedLengthString(n, digits));
+    assertEquals(sketch2.getMaxItem(), intToFixedLengthString(2 * n - 1, 
digits));
 
+    sketch1.merge(sketch2);
 
+    assertFalse(sketch1.isEmpty());
+    assertEquals(sketch1.getN(), 2L * n);
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(2 * n - 1, 
digits));
+    String upperBound = intToFixedLengthString(n + (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String lowerBound = intToFixedLengthString(n - (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String median = sketch1.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+  }
 
+  @Test
+  public void mergeLowerK() {
+    final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, 
Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, 
Comparator.naturalOrder(), serDe);
+    final int n = 10000;
+    final int digits = numDigits(2 * n);
+    for (int i = 0; i < n; i++) {
+      sketch1.update(intToFixedLengthString(i, digits));
+      sketch2.update(intToFixedLengthString(2 * n - i - 1, digits));
+    }
 
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+
+    assertEquals(sketch2.getMinItem(), intToFixedLengthString(n, digits));
+    assertEquals(sketch2.getMaxItem(), intToFixedLengthString(2 * n - 1, 
digits));
+
+    assertTrue(sketch1.getNormalizedRankError(false) < 
sketch2.getNormalizedRankError(false));
+    assertTrue(sketch1.getNormalizedRankError(true) < 
sketch2.getNormalizedRankError(true));
+    sketch1.merge(sketch2);
+
+    // sketch1 must get "contaminated" by the lower K in sketch2
+    assertEquals(sketch1.getNormalizedRankError(false), 
sketch2.getNormalizedRankError(false));
+    assertEquals(sketch1.getNormalizedRankError(true), 
sketch2.getNormalizedRankError(true));
+
+    assertFalse(sketch1.isEmpty());
+    assertEquals(sketch1.getN(), 2 * n);
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(2 * n - 1, 
digits));
+    String upperBound = intToFixedLengthString(n + (int)ceil(n * 
PMF_EPS_FOR_K_128), digits);
+    String lowerBound = intToFixedLengthString(n - (int)ceil(n * 
PMF_EPS_FOR_K_128), digits);
+    String median = sketch1.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+  }
 
+  @Test
+  public void mergeEmptyLowerK() {
+    final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, 
Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, 
Comparator.naturalOrder(), serDe);
+    final int n = 10000;
+    final int digits = numDigits(n);
+    for (int i = 0; i < n; i++) {
+      sketch1.update(intToFixedLengthString(i, digits)); //sketch2 is empty
+    }
 
+    // rank error should not be affected by a merge with an empty sketch with 
lower K
+    final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
+    sketch1.merge(sketch2);
+    assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
+    {
+    assertFalse(sketch1.isEmpty());
+    assertTrue(sketch2.isEmpty());
+    assertEquals(sketch1.getN(), n);
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+    String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String median = sketch1.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+    }
+    {
+    //merge the other way
+    sketch2.merge(sketch1);
+    assertFalse(sketch1.isEmpty());
+    assertFalse(sketch2.isEmpty());
+    assertEquals(sketch1.getN(), n);
+    assertEquals(sketch2.getN(), n);
+    assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+    assertEquals(sketch2.getMinItem(), intToFixedLengthString(0, digits));
+    assertEquals(sketch2.getMaxItem(), intToFixedLengthString(n - 1, digits));
+    String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n * 
PMF_EPS_FOR_K_128), digits);
+    String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n * 
PMF_EPS_FOR_K_128), digits);
+    String median = sketch2.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+    }
+  }
 
+  @Test
+  public void mergeExactModeLowerK() {
+    final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256, 
Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128, 
Comparator.naturalOrder(), serDe);
+    final int n = 10000;
+    final int digits = numDigits(n);
+    for (int i = 0; i < n; i++) {
+      sketch1.update(intToFixedLengthString(i, digits));
+    }
+    sketch2.update(intToFixedLengthString(1, digits));
 
+    // rank error should not be affected by a merge with a sketch in exact 
mode with lower K
+    final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
+    sketch1.merge(sketch2);
+    assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
+  }
 
-  static String intToFixedLengthString(int number, int length) {
-    final String num = Integer.valueOf(number).toString();
-    return characterPad(num, length, '.', false);
+  @Test
+  public void mergeMinMinValueFromOther() {
+    final KllItemsSketch<String> sketch1 = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    sketch1.update(intToFixedLengthString(1, 1));
+    sketch2.update(intToFixedLengthString(2, 1));
+    sketch2.merge(sketch1);
+    assertEquals(sketch2.getMinItem(), intToFixedLengthString(1, 1));
   }
 
+  @Test
+  public void mergeMinAndMaxFromOther() {
+    final KllItemsSketch<String> sketch1 = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(10, 
Comparator.naturalOrder(), serDe);
+    final int n = 1_000_000;
+    final int digits = numDigits(n);
+    for (int i = 1; i <= 1_000_000; i++) {
+      sketch1.update(intToFixedLengthString(i, digits)); //sketch2 is empty
+    }
+    sketch2.merge(sketch1);
+    assertEquals(sketch2.getMinItem(), intToFixedLengthString(1, digits));
+    assertEquals(sketch2.getMaxItem(), intToFixedLengthString(n, digits));
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void kTooSmall() {
+    KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1, 
Comparator.naturalOrder(), serDe);
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void kTooLarge() {
+    KllItemsSketch.newHeapInstance(KllSketch.MAX_K + 1, 
Comparator.naturalOrder(), serDe);
+  }
+
+  @Test
+  public void minK() {
+    final KllItemsSketch<String> sketch =
+        
KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M,Comparator.naturalOrder(), 
serDe);
+    final int n = 1000;
+    final int digits = numDigits(n);
+    for (int i = 0; i < n; i++) {
+      sketch.update(intToFixedLengthString(i, digits));
+    }
+    assertEquals(sketch.getK(), KllSketch.DEFAULT_M);
+    String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n * 
PMF_EPS_FOR_K_8), digits);
+    String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n * 
PMF_EPS_FOR_K_8), digits);
+    String median = sketch.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+  }
+
+  @Test
+  public void maxK() {
+    final KllItemsSketch<String> sketch =
+        
KllItemsSketch.newHeapInstance(KllSketch.MAX_K,Comparator.naturalOrder(), 
serDe);
+    final int n = 1000;
+    final int digits = numDigits(n);
+    for (int i = 0; i < n; i++) {
+      sketch.update(intToFixedLengthString(i, digits));
+    }
+    assertEquals(sketch.getK(), KllSketch.MAX_K);
+    String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n * 
PMF_EPS_FOR_K_256), digits);
+    String median = sketch.getQuantile(0.5);
+    assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+    assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void outOfOrderSplitPoints() {
+    final KllItemsSketch<String> sketch = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    final String s0 = intToFixedLengthString(0, 1);
+    final String s1 = intToFixedLengthString(1, 1);
+    sketch.update(s0);
+    sketch.getCDF(new String[] {s1, s0});
+  }
+
+  @Test(expectedExceptions = SketchesArgumentException.class)
+  public void nullSplitPoint() {
+    final KllItemsSketch<String> sketch = 
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+    sketch.update(intToFixedLengthString(0, 1));
+    sketch.getCDF(new String[] {null});
+  }
+
+
   private final static boolean enablePrinting = true;
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to