This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch new_kll_items_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
The following commit(s) were added to refs/heads/new_kll_items_sketch by this
push:
new 7b51bdbd Interim 10, works, remove all .sk files
7b51bdbd is described below
commit 7b51bdbd8a20390a6262ec9ee8950b43055b899c
Author: Lee Rhodes <[email protected]>
AuthorDate: Mon Jul 31 09:22:39 2023 -0700
Interim 10, works, remove all .sk files
---
.../kll/KllDirectCompactDoublesSketch.java | 2 +-
.../kll/KllDirectCompactFloatsSketch.java | 2 +-
.../apache/datasketches/kll/KllItemsHelper.java | 16 ++
.../datasketches/kll/KllItemsSketchSortedView.java | 2 +-
.../quantilescommon/GenericSortedView.java | 8 +-
.../quantilescommon/QuantilesUtil.java | 16 +-
.../datasketches/req/ReqSketchSortedView.java | 2 +-
.../datasketches/kll/KllDoublesSketchTest.java | 10 +
.../datasketches/kll/KllFloatsSketchTest.java | 10 +
.../datasketches/kll/KllItemsSketchTest.java | 257 ++++++++++++++++++++-
10 files changed, 306 insertions(+), 19 deletions(-)
diff --git
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
index 41dee4d4..ad525206 100644
---
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
+++
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketch.java
@@ -22,7 +22,7 @@ package org.apache.datasketches.kll;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
-class KllDirectCompactDoublesSketch extends KllDirectDoublesSketch {
+final class KllDirectCompactDoublesSketch extends KllDirectDoublesSketch {
KllDirectCompactDoublesSketch(
final SketchStructure sketchStructure,
diff --git
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
index b00c4ef4..af5489ad 100644
---
a/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
+++
b/src/main/java/org/apache/datasketches/kll/KllDirectCompactFloatsSketch.java
@@ -22,7 +22,7 @@ package org.apache.datasketches.kll;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
-class KllDirectCompactFloatsSketch extends KllDirectFloatsSketch {
+final class KllDirectCompactFloatsSketch extends KllDirectFloatsSketch {
KllDirectCompactFloatsSketch(
final SketchStructure sketchStructure,
diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
index 13d1ffd6..662829d8 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
@@ -19,8 +19,11 @@
package org.apache.datasketches.kll;
+import static java.lang.Math.ceil;
+import static java.lang.Math.log;
import static java.lang.Math.max;
import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.characterPad;
import static org.apache.datasketches.common.Util.isEven;
import static org.apache.datasketches.common.Util.isOdd;
import static org.apache.datasketches.kll.KllHelper.findLevelToCompact;
@@ -483,10 +486,23 @@ final class KllItemsHelper<T> {
return c.compare((T)item1, (T)item2) < 0;
}
+ //used in test
static <T> boolean le(final Object item1, final Object item2, final
Comparator<? super T> c) {
return c.compare((T)item1, (T)item2) <= 0;
}
+ static int numDigits(int n) {
+ if (n % 10 == 0) { n++; }
+ return (int) ceil(log(n) / log(10));
+ }
+
+ static String intToFixedLengthString(final int number, final int length) {
+ final String num = Integer.valueOf(number).toString();
+ return characterPad(num, length, ' ', false);
+ }
+
+
+
/*
* Validation Method.
* The following must be enabled for use with the KllItemsValidationTest,
diff --git
a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
index 2b40c1be..a8bde67c 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java
@@ -42,7 +42,7 @@ import org.apache.datasketches.quantilescommon.QuantilesUtil;
* @author Lee Rhodes
*/
@SuppressWarnings("unchecked")
-public class KllItemsSketchSortedView<T> implements GenericSortedView<T> {
+public final class KllItemsSketchSortedView<T> implements GenericSortedView<T>
{
private final Object[] quantiles;
private final long[] cumWeights; //comes in as individual weights, converted
to cumulative natural weights
private final long totalN;
diff --git
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
index d1a8d365..452467bb 100644
---
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
+++
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
@@ -153,8 +153,12 @@ public interface GenericSortedView<T> extends SortedView {
* @param comparator the comparator for generic item data type T
*/
static <T> void validateItems(final T[] items, final Comparator<? super T>
comparator) {
- final int lenM1 = items.length - 1;
- for (int j = 0; j < lenM1; j++) {
+ final int len = items.length;
+ if (len == 1 && items[0] == null) {
+ throw new SketchesArgumentException(
+ "Items must be unique, monotonically increasing and not null.");
+ }
+ for (int j = 0; j < len - 1; j++) {
if ((items[j] != null) && (items[j + 1] != null)
&& (comparator.compare(items[j], items[j + 1]) < 0)) {
continue;
diff --git
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
index b7ce737f..34faefb4 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java
@@ -54,8 +54,12 @@ public final class QuantilesUtil {
*/
public static final void checkDoublesSplitPointsOrder(final double[] values)
{
Objects.requireNonNull(values);
- final int len = values.length - 1;
- for (int j = 0; j < len; j++) {
+ final int len = values.length;
+ if (len == 1 && Double.isNaN(values[0])) {
+ throw new SketchesArgumentException(
+ "Values must be unique, monotonically increasing and not NaN.");
+ }
+ for (int j = 0; j < len - 1; j++) {
if (values[j] < values[j + 1]) { continue; }
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
@@ -69,8 +73,12 @@ public final class QuantilesUtil {
*/
public static final void checkFloatsSplitPointsOrder(final float[] values) {
Objects.requireNonNull(values);
- final int len = values.length - 1;
- for (int j = 0; j < len; j++) {
+ final int len = values.length;
+ if (len == 1 && Float.isNaN(values[0])) {
+ throw new SketchesArgumentException(
+ "Values must be unique, monotonically increasing and not NaN.");
+ }
+ for (int j = 0; j < len - 1; j++) {
if (values[j] < values[j + 1]) { continue; }
throw new SketchesArgumentException(
"Values must be unique, monotonically increasing and not NaN.");
diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
index 0b17f679..1b8586ab 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
@@ -34,7 +34,7 @@ import org.apache.datasketches.quantilescommon.QuantilesUtil;
* @author Alexander Saydakov
* @author Lee Rhodes
*/
-public class ReqSketchSortedView implements FloatsSortedView {
+public final class ReqSketchSortedView implements FloatsSortedView {
private float[] quantiles;
private long[] cumWeights; //comes in as individual weights, converted to
cumulative natural weights
private final long totalN;
diff --git
a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
index b74fd367..edae1ecd 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
@@ -511,8 +511,10 @@ public class KllDoublesSketchTest {
public void checkWrapCase1Doubles() {
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(sk.toByteArray());
KllDoublesSketch sk2 = KllDoublesSketch.wrap(mem);
+
assertTrue(mem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -522,8 +524,10 @@ public class KllDoublesSketchTest {
public void checkWritableWrapCase6And2Doubles() {
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
WritableMemory wmem =
WritableMemory.writableWrap(KllHelper.toByteArray(sk, true));
KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
assertFalse(wmem.isReadOnly());
assertFalse(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -533,8 +537,10 @@ public class KllDoublesSketchTest {
public void checkKllSketchCase5Doubles() {
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
assertFalse(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -544,9 +550,11 @@ public class KllDoublesSketchTest {
public void checkKllSketchCase3Doubles() {
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
WritableMemory wmem = (WritableMemory) mem;
KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
assertTrue(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -556,9 +564,11 @@ public class KllDoublesSketchTest {
public void checkKllSketchCase7Doubles() {
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
WritableMemory wmem = (WritableMemory) mem;
KllDoublesSketch sk2 = KllDoublesSketch.writableWrap(wmem, memReqSvr);
+
assertTrue(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
index e03612e7..23d12bb3 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
@@ -511,8 +511,10 @@ public class KllFloatsSketchTest {
public void checkWrapCase1Floats() {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(sk.toByteArray());
KllFloatsSketch sk2 = KllFloatsSketch.wrap(mem);
+
assertTrue(mem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -522,8 +524,10 @@ public class KllFloatsSketchTest {
public void checkWritableWrapCase6And2Floats() {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
WritableMemory wmem =
WritableMemory.writableWrap(KllHelper.toByteArray(sk, true));
KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
assertFalse(wmem.isReadOnly());
assertFalse(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -533,8 +537,10 @@ public class KllFloatsSketchTest {
public void checkKllSketchCase5Floats() {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
assertFalse(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -544,9 +550,11 @@ public class KllFloatsSketchTest {
public void checkKllSketchCase3Floats() {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
WritableMemory wmem = (WritableMemory) mem;
KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
assertTrue(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
@@ -556,9 +564,11 @@ public class KllFloatsSketchTest {
public void checkKllSketchCase7Floats() {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20);
for (int i = 1; i <= 21; i++) { sk.update(i); }
+
Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true));
WritableMemory wmem = (WritableMemory) mem;
KllFloatsSketch sk2 = KllFloatsSketch.writableWrap(wmem, memReqSvr);
+
assertTrue(wmem.isReadOnly());
assertTrue(sk2.isReadOnly());
assertFalse(sk2.isDirect());
diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
index 30d0e680..88b4b3b2 100644
--- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
@@ -19,8 +19,10 @@
package org.apache.datasketches.kll;
-import static org.apache.datasketches.common.Util.characterPad;
+import static java.lang.Math.ceil;
+import static
org.apache.datasketches.kll.KllItemsHelper.intToFixedLengthString;
import static org.apache.datasketches.kll.KllItemsHelper.le;
+import static org.apache.datasketches.kll.KllItemsHelper.numDigits;
import static
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
@@ -148,29 +150,30 @@ public class KllItemsSketchTest {
public void manyValuesEstimationMode() {
final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
final int n = 1_000_000;
+ final int digits = numDigits(n);
for (int i = 1; i <= n; i++) {
- sketch.update(intToFixedLengthString(i, 7));
+ sketch.update(intToFixedLengthString(i, digits));
assertEquals(sketch.getN(), i);
}
// test getRank
for (int i = 1; i <= n; i++) {
final double trueRank = (double) i / n;
- String s = intToFixedLengthString(i, 7);
+ String s = intToFixedLengthString(i, digits);
double r = sketch.getRank(s);
assertEquals(sketch.getRank(s), trueRank, PMF_EPS_FOR_K_256, "for value
" + s);
}
// test getPMF
- String s = intToFixedLengthString(n/2, 7);
+ String s = intToFixedLengthString(n/2, digits);
final double[] pmf = sketch.getPMF(new String[] {s}); // split at median
assertEquals(pmf.length, 2);
assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256);
assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256);
- assertEquals(sketch.getMinItem(), intToFixedLengthString(1, 7));
- assertEquals(sketch.getMaxItem(), intToFixedLengthString(n, 7));
+ assertEquals(sketch.getMinItem(), intToFixedLengthString(1, digits));
+ assertEquals(sketch.getMaxItem(), intToFixedLengthString(n, digits));
// check at every 0.1 percentage point
final double[] fractions = new double[1001];
@@ -191,23 +194,259 @@ public class KllItemsSketchTest {
}
}
+ @Test
+ public void getRankGetCdfGetPmfConsistency() {
+ final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final int n = 1000;
+ final int digits = numDigits(n);
+ final String[] quantiles = new String[n];
+ for (int i = 0; i < n; i++) {
+ final String str = intToFixedLengthString(i, digits);
+ sketch.update(str);
+ quantiles[i] = str;
+ }
+ { //EXCLUSIVE
+ final double[] ranks = sketch.getCDF(quantiles, EXCLUSIVE);
+ final double[] pmf = sketch.getPMF(quantiles, EXCLUSIVE);
+ double sumPmf = 0;
+ for (int i = 0; i < n; i++) {
+ assertEquals(ranks[i], sketch.getRank(quantiles[i], EXCLUSIVE),
NUMERIC_NOISE_TOLERANCE, "rank vs CDF for value " + i);
+ sumPmf += pmf[i];
+ assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF
for value " + i);
+ }
+ sumPmf += pmf[n];
+ assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
+ assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
+ }
+ { // INCLUSIVE (default)
+ final double[] ranks = sketch.getCDF(quantiles, INCLUSIVE);
+ final double[] pmf = sketch.getPMF(quantiles, INCLUSIVE);
+ double sumPmf = 0;
+ for (int i = 0; i < n; i++) {
+ assertEquals(ranks[i], sketch.getRank(quantiles[i], INCLUSIVE),
NUMERIC_NOISE_TOLERANCE,
+ "rank vs CDF for value " + i);
+ sumPmf += pmf[i];
+ assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF
for value " + i);
+ }
+ sumPmf += pmf[n];
+ assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE);
+ assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE);
+ }
+ }
+ @Test
+ public void merge() {
+ final KllItemsSketch<String> sketch1 =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final int n = 10000;
+ final int digits = numDigits(2 * n);
+ for (int i = 0; i < n; i++) {
+ sketch1.update(intToFixedLengthString(i, digits));
+ sketch2.update(intToFixedLengthString(2 * n - i - 1, digits));
+ }
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+ assertEquals(sketch2.getMinItem(), intToFixedLengthString(n, digits));
+ assertEquals(sketch2.getMaxItem(), intToFixedLengthString(2 * n - 1,
digits));
+ sketch1.merge(sketch2);
+ assertFalse(sketch1.isEmpty());
+ assertEquals(sketch1.getN(), 2L * n);
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(2 * n - 1,
digits));
+ String upperBound = intToFixedLengthString(n + (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String lowerBound = intToFixedLengthString(n - (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String median = sketch1.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+ @Test
+ public void mergeLowerK() {
+ final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256,
Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128,
Comparator.naturalOrder(), serDe);
+ final int n = 10000;
+ final int digits = numDigits(2 * n);
+ for (int i = 0; i < n; i++) {
+ sketch1.update(intToFixedLengthString(i, digits));
+ sketch2.update(intToFixedLengthString(2 * n - i - 1, digits));
+ }
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+
+ assertEquals(sketch2.getMinItem(), intToFixedLengthString(n, digits));
+ assertEquals(sketch2.getMaxItem(), intToFixedLengthString(2 * n - 1,
digits));
+
+ assertTrue(sketch1.getNormalizedRankError(false) <
sketch2.getNormalizedRankError(false));
+ assertTrue(sketch1.getNormalizedRankError(true) <
sketch2.getNormalizedRankError(true));
+ sketch1.merge(sketch2);
+
+ // sketch1 must get "contaminated" by the lower K in sketch2
+ assertEquals(sketch1.getNormalizedRankError(false),
sketch2.getNormalizedRankError(false));
+ assertEquals(sketch1.getNormalizedRankError(true),
sketch2.getNormalizedRankError(true));
+
+ assertFalse(sketch1.isEmpty());
+ assertEquals(sketch1.getN(), 2 * n);
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(2 * n - 1,
digits));
+ String upperBound = intToFixedLengthString(n + (int)ceil(n *
PMF_EPS_FOR_K_128), digits);
+ String lowerBound = intToFixedLengthString(n - (int)ceil(n *
PMF_EPS_FOR_K_128), digits);
+ String median = sketch1.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+ @Test
+ public void mergeEmptyLowerK() {
+ final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256,
Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128,
Comparator.naturalOrder(), serDe);
+ final int n = 10000;
+ final int digits = numDigits(n);
+ for (int i = 0; i < n; i++) {
+ sketch1.update(intToFixedLengthString(i, digits)); //sketch2 is empty
+ }
+ // rank error should not be affected by a merge with an empty sketch with
lower K
+ final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
+ sketch1.merge(sketch2);
+ assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
+ {
+ assertFalse(sketch1.isEmpty());
+ assertTrue(sketch2.isEmpty());
+ assertEquals(sketch1.getN(), n);
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+ String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String median = sketch1.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+ {
+ //merge the other way
+ sketch2.merge(sketch1);
+ assertFalse(sketch1.isEmpty());
+ assertFalse(sketch2.isEmpty());
+ assertEquals(sketch1.getN(), n);
+ assertEquals(sketch2.getN(), n);
+ assertEquals(sketch1.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch1.getMaxItem(), intToFixedLengthString(n - 1, digits));
+ assertEquals(sketch2.getMinItem(), intToFixedLengthString(0, digits));
+ assertEquals(sketch2.getMaxItem(), intToFixedLengthString(n - 1, digits));
+ String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n *
PMF_EPS_FOR_K_128), digits);
+ String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n *
PMF_EPS_FOR_K_128), digits);
+ String median = sketch2.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+ }
+ @Test
+ public void mergeExactModeLowerK() {
+ final KllItemsSketch<String> sketch1 = KllItemsSketch.newHeapInstance(256,
Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(128,
Comparator.naturalOrder(), serDe);
+ final int n = 10000;
+ final int digits = numDigits(n);
+ for (int i = 0; i < n; i++) {
+ sketch1.update(intToFixedLengthString(i, digits));
+ }
+ sketch2.update(intToFixedLengthString(1, digits));
+ // rank error should not be affected by a merge with a sketch in exact
mode with lower K
+ final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true);
+ sketch1.merge(sketch2);
+ assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge);
+ }
- static String intToFixedLengthString(int number, int length) {
- final String num = Integer.valueOf(number).toString();
- return characterPad(num, length, '.', false);
+ @Test
+ public void mergeMinMinValueFromOther() {
+ final KllItemsSketch<String> sketch1 =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ sketch1.update(intToFixedLengthString(1, 1));
+ sketch2.update(intToFixedLengthString(2, 1));
+ sketch2.merge(sketch1);
+ assertEquals(sketch2.getMinItem(), intToFixedLengthString(1, 1));
}
+ @Test
+ public void mergeMinAndMaxFromOther() {
+ final KllItemsSketch<String> sketch1 =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final KllItemsSketch<String> sketch2 = KllItemsSketch.newHeapInstance(10,
Comparator.naturalOrder(), serDe);
+ final int n = 1_000_000;
+ final int digits = numDigits(n);
+ for (int i = 1; i <= 1_000_000; i++) {
+ sketch1.update(intToFixedLengthString(i, digits)); //sketch2 is empty
+ }
+ sketch2.merge(sketch1);
+ assertEquals(sketch2.getMinItem(), intToFixedLengthString(1, digits));
+ assertEquals(sketch2.getMaxItem(), intToFixedLengthString(n, digits));
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void kTooSmall() {
+ KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1,
Comparator.naturalOrder(), serDe);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void kTooLarge() {
+ KllItemsSketch.newHeapInstance(KllSketch.MAX_K + 1,
Comparator.naturalOrder(), serDe);
+ }
+
+ @Test
+ public void minK() {
+ final KllItemsSketch<String> sketch =
+
KllItemsSketch.newHeapInstance(KllSketch.DEFAULT_M,Comparator.naturalOrder(),
serDe);
+ final int n = 1000;
+ final int digits = numDigits(n);
+ for (int i = 0; i < n; i++) {
+ sketch.update(intToFixedLengthString(i, digits));
+ }
+ assertEquals(sketch.getK(), KllSketch.DEFAULT_M);
+ String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n *
PMF_EPS_FOR_K_8), digits);
+ String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n *
PMF_EPS_FOR_K_8), digits);
+ String median = sketch.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+
+ @Test
+ public void maxK() {
+ final KllItemsSketch<String> sketch =
+
KllItemsSketch.newHeapInstance(KllSketch.MAX_K,Comparator.naturalOrder(),
serDe);
+ final int n = 1000;
+ final int digits = numDigits(n);
+ for (int i = 0; i < n; i++) {
+ sketch.update(intToFixedLengthString(i, digits));
+ }
+ assertEquals(sketch.getK(), KllSketch.MAX_K);
+ String upperBound = intToFixedLengthString(n / 2 + (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String lowerBound = intToFixedLengthString(n / 2 - (int)ceil(n *
PMF_EPS_FOR_K_256), digits);
+ String median = sketch.getQuantile(0.5);
+ assertTrue(le(median, upperBound, Comparator.naturalOrder()));
+ assertTrue(le(lowerBound, median, Comparator.naturalOrder()));
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void outOfOrderSplitPoints() {
+ final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ final String s0 = intToFixedLengthString(0, 1);
+ final String s1 = intToFixedLengthString(1, 1);
+ sketch.update(s0);
+ sketch.getCDF(new String[] {s1, s0});
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void nullSplitPoint() {
+ final KllItemsSketch<String> sketch =
KllItemsSketch.newHeapInstance(Comparator.naturalOrder(), serDe);
+ sketch.update(intToFixedLengthString(0, 1));
+ sketch.getCDF(new String[] {null});
+ }
+
+
private final static boolean enablePrinting = true;
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]