This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch thetaRework
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
The following commit(s) were added to refs/heads/thetaRework by this push:
new 5db9cccf2 Theta Rework: completed all classes and tests.
5db9cccf2 is described below
commit 5db9cccf23c47061daa3e5436993c0c776aa685b
Author: Lee Rhodes <[email protected]>
AuthorDate: Mon Jun 16 14:09:28 2025 -0700
Theta Rework: completed all classes and tests.
---
.../java/org/apache/datasketches/common/Util.java | 34 +-
.../theta2/ConcurrentHeapThetaBuffer.java | 16 +
.../theta2/ConcurrentSharedThetaSketch.java | 4 +-
.../theta2/DirectQuickSelectSketch.java | 2 +-
.../theta2/DirectQuickSelectSketchR.java | 3 +-
.../datasketches/theta2/JaccardSimilarity.java | 179 +++++
.../org/apache/datasketches/theta2/Sketch.java | 4 +-
.../BoundsOnRatiosInThetaSketchedSets2.java | 121 ++++
.../ConcurrentDirectQuickSelectSketchTest.java | 4 +-
.../datasketches/theta2/BackwardConversions.java | 2 +-
.../ConcurrentDirectQuickSelectSketchTest.java | 220 +++---
.../ConcurrentHeapQuickSelectSketchTest.java | 745 +++++++++++++++++++++
.../datasketches/theta2/JaccardSimilarityTest.java | 248 +++++++
.../datasketches/theta2/ReadOnlyMemoryTest.java | 211 ++++++
.../datasketches/theta2/SetOperationTest.java | 438 ++++++++++++
.../datasketches/theta2/SetOpsCornerCasesTest.java | 501 ++++++++++++++
.../apache/datasketches/theta2/SketchesTest.java | 202 ++++++
.../theta2/ThetaSketchCrossLanguageTest.java | 121 ++++
.../BoundsOnRatiosInThetaSketchedSets2Test.java | 94 +++
19 files changed, 3020 insertions(+), 129 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/common/Util.java
b/src/main/java/org/apache/datasketches/common/Util.java
index 11615a39a..4701ddf9e 100644
--- a/src/main/java/org/apache/datasketches/common/Util.java
+++ b/src/main/java/org/apache/datasketches/common/Util.java
@@ -909,21 +909,29 @@ public final class Util {
}
/**
- * Request a new heap MemorySegment with the given capacityBytes.
+ * Request a new heap MemorySegment with the given capacityBytes and 8-byte
aligned or one byte aligned.
*
- * <p>The returned MemorySegment will be constructed from a <i>long[]</i>
array.
- * As a result, it will be on-heap and have a memory alignment of 8.
- * If the requested capacity is not divisible by eight, the returned size
- * will be rolled up to the next multiple of eight.</p>
+ * <p>If <i>aligned</i> is true, the returned MemorySegment will be
constructed from a <i>long[]</i> array,
+ * and, as a result, it will have a memory alignment of 8 bytes.
+ * If the requested capacity is not exactly divisible by eight, the returned
size
+ * will be rolled up to the next multiple of eight bytes.</p>
*
- * @param capacityBytes The new capacity being requested. It must not be
negative.
- * @return a new MemorySegment with the requested capacity.
- */
- public static MemorySegment newHeapSegment(final int capacityBytes) {
- final long[] array = ((capacityBytes & 0x7) == 0)
- ? new long[capacityBytes >>> 3]
- : new long[(capacityBytes >>> 3) + 1];
- return MemorySegment.ofArray(array);
+ * <p>If <i>aligned</i> is false, the returned MemorySegment will be
constructed from a <i>byte[]</i> array,
+ * and have a memory alignment of 1 byte.
+ *
+ * @param capacityBytes The new capacity being requested. It must not be
negative and cannot exceed Integer.MAX_VALUE.
+ * @param aligned if true, the new heap segment will have an alignment of 8
bytes, otherwise the alignment will be 1 byte.
+ * @return a new MemorySegment with the requested capacity and alignment.
+ */
+ public static MemorySegment newHeapSegment(final int capacityBytes, final
boolean aligned) {
+ if (aligned) {
+ final int lenLongs = capacityBytes >>> 3;
+ final long[] array = ((capacityBytes & 0x7) == 0)
+ ? new long[lenLongs]
+ : new long[lenLongs + 1];
+ return MemorySegment.ofArray(array);
+ }
+ return MemorySegment.ofArray(new byte[capacityBytes]);
}
/**
diff --git
a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
index c93ed892b..f8f5a0947 100644
---
a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
+++
b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java
@@ -23,6 +23,7 @@ import static
org.apache.datasketches.theta2.UpdateReturnState.ConcurrentBufferI
import static
org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated;
import static
org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta;
+import java.lang.foreign.MemorySegment;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.datasketches.common.ResizeFactor;
@@ -147,6 +148,16 @@ final class ConcurrentHeapThetaBuffer extends
HeapQuickSelectSketch {
return shared.getUpperBound(numStdDev);
}
+ @Override
+ public boolean hasMemorySegment() {
+ return shared.hasMemorySegment();
+ }
+
+ @Override
+ public boolean isDirect() {
+ return shared.isDirect();
+ }
+
@Override
public boolean isEmpty() {
return shared.isEmpty();
@@ -157,6 +168,11 @@ final class ConcurrentHeapThetaBuffer extends
HeapQuickSelectSketch {
return shared.isEstimationMode();
}
+ @Override
+ public boolean isSameResource(final MemorySegment that) {
+ return shared.isSameResource(that);
+ }
+
//End of proxies
@Override
diff --git
a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
index 40746c3e6..5c89b3e68 100644
---
a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
+++
b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java
@@ -22,6 +22,8 @@ package org.apache.datasketches.theta2;
import java.lang.foreign.MemorySegment;
import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.datasketches.common.MemorySegmentStatus;
+
/**
* An internal interface to define the API of a concurrent shared theta sketch.
* It reflects all data processed by a single or multiple update threads, and
can serve queries at
@@ -29,7 +31,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
*
* @author eshcar
*/
-interface ConcurrentSharedThetaSketch {
+interface ConcurrentSharedThetaSketch extends MemorySegmentStatus {
long NOT_SINGLE_HASH = -1L;
double MIN_ERROR = 0.0000001;
diff --git
a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
index 32ae0d14d..193385a1f 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java
@@ -323,7 +323,7 @@ class DirectQuickSelectSketch extends
DirectQuickSelectSketchR {
//}
//final MemorySegment newDstSeg = memReqSvr_.request(wseg_,
reqBytes);
- final MemorySegment newDstSeg = newHeapSegment(reqBytes);
+ final MemorySegment newDstSeg = newHeapSegment(reqBytes, false);
moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg,
tgtLgArrLongs, thetaLong);
wseg_ = newDstSeg;
diff --git
a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
index c0db75b16..b7c47de47 100644
--- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java
@@ -223,8 +223,7 @@ class DirectQuickSelectSketchR extends UpdateSketch {
final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF;
final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F;
final long[] cacheArr = new long[1 << lgArrLongs];
- final MemorySegment seg = MemorySegment.ofArray(cacheArr);
- MemorySegment.copy(wseg_, preambleLongs << 3, seg, 0, 8 << lgArrLongs);
+ MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3,
cacheArr, 0, 1 << lgArrLongs);
return cacheArr;
}
diff --git
a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
new file mode 100644
index 000000000..624dcc3d7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static org.apache.datasketches.common.Util.ceilingPowerOf2;
+import static
org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA;
+import static
org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA;
+import static
org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA;
+
+import org.apache.datasketches.thetacommon.ThetaUtil;
+
+/**
+ * Jaccard similarity of two Theta Sketches.
+ *
+ * @author Lee Rhodes
+ */
+public final class JaccardSimilarity {
+ private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB
+ private static final double[] ONES = {1.0, 1.0, 1.0};
+
+ /**
+ * Computes the Jaccard similarity index with upper and lower bounds. The
Jaccard similarity index
+ * <i>J(A,B) = (A ^ B)/(A U B)</i> is used to measure how similar the two
sketches are to each
+ * other. If J = 1.0, the sketches are considered equal. If J = 0, the two
sketches are
+ * distinct from each other. A Jaccard of .95 means the overlap between the
two
+ * populations is 95% of the union of the two populations.
+ *
+ * <p>Note: For very large pairs of sketches, where the configured nominal
entries of the sketches
+ * are 2^25 or 2^26, this method may produce unpredictable results.
+ *
+ * @param sketchA given sketch A
+ * @param sketchB given sketch B
+ * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard
index.
+ * The Upper and Lower bounds are for a confidence interval of 95.4% or +/-
2 standard deviations.
+ */
+ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
+ if (sketchA == sketchB) { return ONES.clone(); }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
+
+ final int countA = sketchA.getRetainedEntries(true);
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS;
+ final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS;
+ final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
+ final Union union =
+ SetOperation.builder().setNominalEntries(newK).buildUnion();
+ union.union(sketchA);
+ union.union(sketchB);
+ final Sketch unionAB = union.getResult(false, null);
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries(true);
+
+ //Check for identical data
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return ONES.clone();
+ }
+
+ //Create the Intersection
+ final Intersection inter = SetOperation.builder().buildIntersection();
+ inter.intersect(sketchA);
+ inter.intersect(sketchB);
+ inter.intersect(unionAB); //ensures that intersection is a subset of the
union
+ final Sketch interABU = inter.getResult(false, null);
+
+ final double lb = getLowerBoundForBoverA(unionAB, interABU);
+ final double est = getEstimateOfBoverA(unionAB, interABU);
+ final double ub = getUpperBoundForBoverA(unionAB, interABU);
+ return new double[] {lb, est, ub};
+ }
+
+ /**
+ * Returns true if the two given sketches have exactly the same hash values
and the same
+ * theta values. Thus, they are equivalent.
+ * @param sketchA the given sketch A
+ * @param sketchB the given sketch B
+ * @return true if the two given sketches have exactly the same hash values
and the same
+ * theta values.
+ */
+ public static boolean exactlyEqual(final Sketch sketchA, final Sketch
sketchB) {
+ //Corner case checks
+ if (sketchA == null || sketchB == null) { return false; }
+ if (sketchA == sketchB) { return true; }
+ if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
+ if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
+
+ final int countA = sketchA.getRetainedEntries(true);
+ final int countB = sketchB.getRetainedEntries(true);
+
+ //Create the Union
+ final Union union =
+ SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA +
countB)).buildUnion();
+ union.union(sketchA);
+ union.union(sketchB);
+ final Sketch unionAB = union.getResult();
+ final long thetaLongUAB = unionAB.getThetaLong();
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ final int countUAB = unionAB.getRetainedEntries(true);
+
+ //Check for identical counts and thetas
+ if (countUAB == countA && countUAB == countB
+ && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Tests similarity of a measured Sketch against an expected Sketch.
+ * Computes the lower bound of the Jaccard index <i>J<sub>LB</sub></i> of
the measured and
+ * expected sketches.
+ * if <i>J<sub>LB</sub> ≥ threshold</i>, then the sketches are considered
to be
+ * similar with a confidence of 97.7%.
+ *
+ * @param measured the sketch to be tested
+ * @param expected the reference sketch that is considered to be correct.
+ * @param threshold a real value between zero and one.
+ * @return if true, the similarity of the two sketches is greater than the
given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean similarityTest(final Sketch measured, final Sketch
expected,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioLB = jaccard(measured, expected)[0]; //choosing the
lower bound
+ return jRatioLB >= threshold;
+ }
+
+ /**
+ * Tests dissimilarity of a measured Sketch against an expected Sketch.
+ * Computes the upper bound of the Jaccard index <i>J<sub>UB</sub></i> of
the measured and
+ * expected sketches.
+ * if <i>J<sub>UB</sub> ≤ threshold</i>, then the sketches are considered
to be
+ * dissimilar with a confidence of 97.7%.
+ *
+ * @param measured the sketch to be tested
+ * @param expected the reference sketch that is considered to be correct.
+ * @param threshold a real value between zero and one.
+ * @return if true, the dissimilarity of the two sketches is greater than
the given threshold
+ * with at least 97.7% confidence.
+ */
+ public static boolean dissimilarityTest(final Sketch measured, final Sketch
expected,
+ final double threshold) {
+ //index 0: the lower bound
+ //index 1: the mean estimate
+ //index 2: the upper bound
+ final double jRatioUB = jaccard(measured, expected)[2]; //choosing the
upper bound
+ return jRatioUB <= threshold;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java
b/src/main/java/org/apache/datasketches/theta2/Sketch.java
index 82661aa27..e98396842 100644
--- a/src/main/java/org/apache/datasketches/theta2/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java
@@ -222,7 +222,7 @@ public abstract class Sketch implements MemorySegmentStatus
{
*
* <p>A new <i>CompactSketch</i> object is created:</p>
* <ul><li>if <i>dstMem != null</i></li>
- * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li>
+ * <li>if <i>dstMem == null</i> and <i>this.hasMemorySegment() ==
true</i></li>
* <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item and
<i>this.isOrdered() == false</i>
* and <i>dstOrdered == true</i>.</li>
*</ul>
@@ -564,7 +564,7 @@ public abstract class Sketch implements MemorySegmentStatus
{
/**
* Gets the internal cache array. For on-heap sketches this will return a
reference to the actual
- * cache array. For Memory-based sketches this returns a copy.
+ * cache array. For MemorySegment-based sketches this returns a copy.
* @return the internal cache array.
*/
abstract long[] getCache();
diff --git
a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
new file mode 100644
index 000000000..f8199cc4f
--- /dev/null
+++
b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.thetacommon;
+
+import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE;
+
+import org.apache.datasketches.common.BoundsOnRatiosInSampledSets;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.theta2.Sketch;
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio <i>B
/ A</i>, where:
+ * <ul>
+ * <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
+ * <li><i>B</i> is a Theta Sketch of population <i>PopB</i> that is a subset
of <i>A</i>,
+ * obtained by an intersection of <i>A</i> with some other Theta Sketch
<i>C</i>,
+ * which acts like a predicate or selection clause.</li>
+ * <li>The estimate of the ratio <i>PopB/PopA</i> is
+ * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(<i>A, B</i>).</li>
+ * <li>The Upper Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(<i>A, B</i>).</li>
+ * <li>The Lower Bound estimate on the ratio PopB/PopA is
+ * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(<i>A, B</i>).</li>
+ * </ul>
+ * Note: The theta of <i>A</i> cannot be greater than the theta of <i>B</i>.
+ * If <i>B</i> is formed as an intersection of <i>A</i> and some other set
<i>C</i>,
+ * then the theta of <i>B</i> is guaranteed to be less than or equal to the
theta of <i>B</i>.
+ *
+ * @author Kevin Lang
+ * @author Lee Rhodes
+ */
+public final class BoundsOnRatiosInThetaSketchedSets2 {
+
+ private BoundsOnRatiosInThetaSketchedSets2() {}
+
+ /**
+ * Gets the approximate lower bound for B over A based on a 95% confidence
interval
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the approximate lower bound for B over A
+ */
+ public static double getLowerBoundForBoverA(final Sketch sketchA, final
Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB,
f);
+ }
+
+ /**
+ * Gets the approximate upper bound for B over A based on a 95% confidence
interval
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the approximate upper bound for B over A
+ */
+ public static double getUpperBoundForBoverA(final Sketch sketchA, final
Sketch sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 1.0; }
+ final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE;
+ return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB,
f);
+ }
+
+ /**
+ * Gets the estimate for B over A
+ * @param sketchA the sketch A
+ * @param sketchB the sketch B
+ * @return the estimate for B over A
+ */
+ public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch
sketchB) {
+ final long thetaLongA = sketchA.getThetaLong();
+ final long thetaLongB = sketchB.getThetaLong();
+ checkThetas(thetaLongA, thetaLongB);
+
+ final int countB = sketchB.getRetainedEntries(true);
+ final int countA = (thetaLongB == thetaLongA)
+ ? sketchA.getRetainedEntries(true)
+ : sketchA.getCountLessThanThetaLong(thetaLongB);
+
+ if (countA <= 0) { return 0.5; }
+
+ return (double) countB / (double) countA;
+ }
+
+ static void checkThetas(final long thetaLongA, final long thetaLongB) {
+ if (thetaLongB > thetaLongA) {
+ throw new SketchesArgumentException("ThetaLongB cannot be >
ThetaLongA.");
+ }
+ }
+}
diff --git
a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
index 6d6af7047..fe2b138ca 100644
---
a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++
b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
@@ -696,7 +696,9 @@ public class ConcurrentDirectQuickSelectSketchTest {
}
private static void checkMemoryDirectProxyMethods(Sketch local, Sketch
shared) {
- assertEquals(local.hasMemory(), shared.hasMemory());
+ assertEquals(
+ local.hasMemory(),
+ shared.hasMemory());
assertEquals(local.isDirect(), shared.isDirect());
}
diff --git
a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
index 0e1348684..bec67b219 100644
--- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
+++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java
@@ -220,7 +220,7 @@ public class BackwardConversions {
final int entries = skV3.getRetainedEntries(true);
final boolean unordered = !(skV3.isOrdered());
final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered,
NoRebuild, notEmpty, ReadOnly, LE
- wseg = Util.newHeapSegment((preLongs + entries) << 3);
+ wseg = Util.newHeapSegment((preLongs + entries) << 3, false);
wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs
wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
diff --git
a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
similarity index 77%
copy from
src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
copy to
src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
index 6d6af7047..7a7b89cef 100644
---
a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++
b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java
@@ -17,21 +17,23 @@
* under the License.
*/
-package org.apache.datasketches.theta;
+package org.apache.datasketches.theta2;
-import static
org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete;
-import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
-import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
-import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static
org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
-import
org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.SharedLocal;
+import org.apache.datasketches.common.Util;
+import
org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.SharedLocal;
import org.apache.datasketches.thetacommon.HashOperations;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.testng.annotations.Test;
@@ -45,8 +47,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
@Test
public void checkDirectCompactConversion() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
assertTrue(sl.shared instanceof ConcurrentDirectQuickSelectSketch);
assertTrue(sl.shared.compact().isCompact());
}
@@ -56,8 +58,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 9;
int k = 1 << lgK;
int u = 2*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared; //off-heap
UpdateSketch local = sl.local;
@@ -69,7 +71,7 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertEquals(local.getClass().getSimpleName(),
"ConcurrentHeapThetaBuffer");
//This sharedHeap is not linked to the concurrent local buffer
- UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wmem);
+ UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wseg);
assertEquals(sharedHeap.getClass().getSimpleName(),
"HeapQuickSelectSketch");
checkMemoryDirectProxyMethods(local, shared);
@@ -90,8 +92,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkHeapifyByteArrayExact() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -99,13 +101,13 @@ public class ConcurrentDirectQuickSelectSketchTest {
waitForBgPropagationToComplete(shared);
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- Sketch recoveredShared = Sketch.heapify(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketch.heapify(srcSeg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared,
wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared,
wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), k, 0.0);
@@ -127,8 +129,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int k = 1 << lgK;
int u = 2*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -141,13 +143,13 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertEquals(local.isEstimationMode(), true);
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- Sketch recoveredShared = Sketch.heapify(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketch.heapify(srcSeg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared,
wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared,
wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), uskEst);
@@ -165,8 +167,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int u = 2*k;
//boolean estimating = (u > k);
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -178,7 +180,7 @@ public class ConcurrentDirectQuickSelectSketchTest {
double sk1ub = local.getUpperBound(2);
assertTrue(local.isEstimationMode());
- Sketch local2 = Sketch.wrap(sl.wmem);
+ Sketch local2 = Sketch.wrap(sl.wseg);
assertEquals(local2.getEstimate(), sk1est);
assertEquals(local2.getLowerBound(2), sk1lb);
@@ -193,14 +195,14 @@ public class ConcurrentDirectQuickSelectSketchTest {
int k = 1 << lgK;
int u = 4*k;
//boolean estimating = (u > k);
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertEquals(local.getClass().getSimpleName(),
"ConcurrentHeapThetaBuffer");
assertFalse(local.isDirect());
- assertTrue(local.hasMemory());
+ assertTrue(local.hasMemorySegment());
for (int i=0; i<u; i++) { local.update(i); }
waitForBgPropagationToComplete(shared);
@@ -233,10 +235,10 @@ public class ConcurrentDirectQuickSelectSketchTest {
int bytes = shared.getCompactBytes();
assertEquals(bytes, (k*8) + (Family.COMPACT.getMaxPreLongs() << 3));
- byte[] memArr2 = new byte[bytes];
- WritableMemory mem2 = WritableMemory.writableWrap(memArr2);
+ byte[] segArr2 = new byte[bytes];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2);
- csk = shared.compact(false, mem2);
+ csk = shared.compact(false, seg2);
assertEquals(csk.getEstimate(), localEst);
assertEquals(csk.getLowerBound(2), localLB);
assertEquals(csk.getUpperBound(2), localUB);
@@ -244,8 +246,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertTrue(csk.isEstimationMode());
assertEquals(csk.getClass().getSimpleName(), "DirectCompactSketch");
- mem2.clear();
- csk = shared.compact(true, mem2);
+ Util.clear(seg2);
+ csk = shared.compact(true, seg2);
assertEquals(csk.getEstimate(), localEst);
assertEquals(csk.getLowerBound(2), localLB);
assertEquals(csk.getUpperBound(2), localUB);
@@ -258,8 +260,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
@Test
public void checkDQStoCompactEmptyForms() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -273,17 +275,17 @@ public class ConcurrentDirectQuickSelectSketchTest {
int bytes = local.getCompactBytes(); //compact form
assertEquals(bytes, 8);
- byte[] memArr2 = new byte[bytes];
- WritableMemory mem2 = WritableMemory.writableWrap(memArr2);
+ byte[] segArr2 = new byte[bytes];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2);
- CompactSketch csk2 = shared.compact(false, mem2);
+ CompactSketch csk2 = shared.compact(false, seg2);
assertEquals(csk2.getEstimate(), localEst);
assertEquals(csk2.getLowerBound(2), localLB);
assertEquals(csk2.getUpperBound(2), localUB);
assertTrue(csk2.isEmpty());
assertFalse(csk2.isEstimationMode());
assertTrue(csk2.isOrdered());
- CompactSketch csk3 = shared.compact(true, mem2);
+ CompactSketch csk3 = shared.compact(true, seg2);
csk3.toString(false, true, 0, false);
csk3.toString();
assertEquals(csk3.getEstimate(), localEst);
@@ -298,8 +300,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkEstMode() {
int lgK = 12;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -315,8 +317,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkErrorBounds() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -350,8 +352,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 9;
int k = 1 << lgK;
int u = 2*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -370,8 +372,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 9;
int k = 1 << lgK;
int u = 4*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -396,8 +398,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkResetAndStartingSubMultiple() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -424,8 +426,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 12;
int k = 1 << lgK;
int u = k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -441,8 +443,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkEstModeMemoryArr() {
int lgK = 12;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -460,8 +462,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkEstModeNativeMemory() {
int lgK = 12;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -478,8 +480,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkConstructReconstructFromMemory() {
int lgK = 12;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -498,13 +500,13 @@ public class ConcurrentDirectQuickSelectSketchTest {
double est2;
serArr = shared.toByteArray();
- WritableMemory mem = WritableMemory.writableWrap(serArr);
- UpdateSketch recoveredShared = Sketches.wrapUpdateSketch(mem);
+ MemorySegment seg = MemorySegment.ofArray(serArr);
+ UpdateSketch recoveredShared = Sketches.wrapUpdateSketch(seg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch(recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
est2 = local2.getEstimate();
@@ -518,7 +520,7 @@ public class ConcurrentDirectQuickSelectSketchTest {
for (int i = 0; i < 1000; i++) { sk.update(i); }
final UpdateSketch shared = bldr.buildSharedFromSketch(sk, null);
assertEquals(shared.getRetainedEntries(true), 1000);
- assertFalse(shared.hasMemory());
+ assertFalse(shared.hasMemorySegment());
}
//checks Alex's bug where lgArrLongs > lgNomLongs +1.
@@ -526,8 +528,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkResizeInBigMem() {
int lgK = 14;
int u = 1 << 20;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useMem, true, 8); //mem
is 8X larger than needed
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg
is 8X larger than needed
UpdateSketch local = sl.local;
for (int i = 0; i < u; i++) { local.update(i); }
@@ -536,37 +538,37 @@ public class ConcurrentDirectQuickSelectSketchTest {
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkConstructorKtooSmall() {
int lgK = 3;
- boolean useMem = true;
- new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ new SharedLocal(lgK, lgK, useSeg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkConstructorMemTooSmall() {
int lgK = 4;
int k = 1 << lgK;
- WritableMemory wmem = WritableMemory.allocate(k/2);
+ MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]);
UpdateSketchBuilder bldr = new UpdateSketchBuilder();
bldr.setLogNominalEntries(lgK);
- bldr.buildShared(wmem);
+ bldr.buildShared(wseg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyIllegalFamilyID_heapify() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte
- //try to heapify the corrupted mem
- Sketch.heapify(sl.wmem); //catch in Sketch.constructHeapSketch
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte
+ //try to heapify the corrupted seg
+ Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkBadLgNomLongs() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
- sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte
- DirectQuickSelectSketch.writableWrap(sl.wmem,
ThetaUtil.DEFAULT_UPDATE_SEED);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs
byte
+ DirectQuickSelectSketch.writableWrap(sl.wseg,
ThetaUtil.DEFAULT_UPDATE_SEED);
}
@Test
@@ -574,8 +576,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 4;
int k = 1 << lgK;
int u = 10*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -612,8 +614,8 @@ public class ConcurrentDirectQuickSelectSketchTest {
public void checkBadSerVer() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -625,30 +627,30 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertEquals(local.getEstimate(), k, 0.0);
assertEquals(shared.getRetainedEntries(false), k);
- sl.wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
- Sketch.wrap(sl.wmem);
+ sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.wrap(sl.wseg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkWrapIllegalFamilyID_wrap() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
- //try to wrap the corrupted mem
- Sketch.wrap(sl.wmem); //catch in Sketch.constructDirectSketch
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkWrapIllegalFamilyID_direct() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
- //try to wrap the corrupted mem
- DirectQuickSelectSketch.writableWrap(sl.wmem,
ThetaUtil.DEFAULT_UPDATE_SEED);
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ DirectQuickSelectSketch.writableWrap(sl.wseg,
ThetaUtil.DEFAULT_UPDATE_SEED);
}
@Test(expectedExceptions = SketchesArgumentException.class)
@@ -656,29 +658,29 @@ public class ConcurrentDirectQuickSelectSketchTest {
int lgK = 9;
long seed1 = 1021;
long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useMem, true, 1);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1);
UpdateSketch shared = sl.shared;
- Memory srcMem = Memory.wrap(shared.toByteArray());
- Sketch.heapify(srcMem, seed2);
+ MemorySegment srcSeg =
MemorySegment.ofArray(shared.toByteArray()).asReadOnly();
+ Sketch.heapify(srcSeg, seed2);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkCorruptLgNomLongs() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt
- Sketch.heapify(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
public void checkIllegalHashUpdate() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
shared.hashUpdate(1);
}
@@ -696,11 +698,13 @@ public class ConcurrentDirectQuickSelectSketchTest {
}
private static void checkMemoryDirectProxyMethods(Sketch local, Sketch
shared) {
- assertEquals(local.hasMemory(), shared.hasMemory());
+ assertEquals(
+ local.hasMemorySegment(),
+ shared.hasMemorySegment());
assertEquals(local.isDirect(), shared.isDirect());
}
- //Does not check hasMemory(), isDirect()
+ //Does not check hasMemorySegment(), isDirect()
private static void checkOtherProxyMethods(Sketch local, Sketch shared) {
assertEquals(local.getCompactBytes(), shared.getCompactBytes());
assertEquals(local.getCurrentBytes(), shared.getCurrentBytes());
diff --git
a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
new file mode 100644
index 000000000..4685639ec
--- /dev/null
+++
b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java
@@ -0,0 +1,745 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE;
+import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Arrays;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author eshcar
+ */
+public class ConcurrentHeapQuickSelectSketchTest {
+
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSerVer() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+
+ byte[] serArr = shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(serArr);
+ Sketch sk = Sketch.heapify(seg, sl.seed);
+ assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion
to Parent
+
+ seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.heapify(seg, sl.seed);
+ }
+
+ @Test
+ public void checkPropagationNotOrdered() {
+ int lgK = 8;
+ int k = 1 << lgK;
+ int u = 200*k;
+ SharedLocal sl = new SharedLocal(lgK, 4, false, false);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertEquals((sl.bldr.getLocalLgNominalEntries()), 4);
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i < u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(shared.getRetainedEntries(true) <= u);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSketchID_UpdateSketch() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ assertTrue(local.isEmpty());
+ assertTrue(shared instanceof ConcurrentHeapQuickSelectSketch);
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ assertTrue(shared.compact().isCompact());
+
+ assertFalse(local.isEmpty());
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+ byte[] byteArray = shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+
+ //try to heapify the corrupted seg
+ Sketch.heapify(seg, sl.seed);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifySeedConflict() {
+ int lgK = 9;
+ long seed = 1021;
+ long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
+ SharedLocal sl = new SharedLocal(lgK, lgK, seed);
+ byte[] byteArray = sl.shared.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(byteArray);
+ Sketch.heapify(srcSeg, seed2);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkHeapifyCorruptLgNomLongs() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ byte[] serArr = sl.shared.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr);
+ srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void checkIllegalHashUpdate() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ sl.shared.hashUpdate(1);
+ }
+
+ @Test
+ public void checkHeapifyByteArrayExact() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = k;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ for (int i=0; i<u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ byte[] serArr = shared.toByteArray();
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketches.heapifyUpdateSketch(srcSeg);
+
+ //reconstruct to Native/Direct
+ final int bytes = Sketch.getMaxUpdateSketchBytes(k);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared,
wseg);
+ UpdateSketch local2 = sl.bldr.buildLocal(shared);
+
+ assertEquals(local2.getEstimate(), u, 0.0);
+ assertEquals(local2.getLowerBound(2), u, 0.0);
+ assertEquals(local2.getUpperBound(2), u, 0.0);
+ assertFalse(local2.isEmpty());
+ assertFalse(local2.isEstimationMode());
+ assertEquals(local2.getResizeFactor(), local.getResizeFactor());
+ local2.toString(true, true, 8, true);
+ }
+
+ @Test
+ public void checkHeapifyByteArrayEstimating() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ int u = 2*k;
+
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ for (int i=0; i<u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double localUB = local.getUpperBound(2);
+ assertTrue(local.isEstimationMode());
+ byte[] serArr = shared.toByteArray();
+
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, sl.seed);
+
+ final int bytes = Sketch.getMaxUpdateSketchBytes(k);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
+ UpdateSketch local2 = sl.bldr.buildLocal(shared);
+ assertEquals(local2.getEstimate(), localEst);
+ assertEquals(local2.getLowerBound(2), localLB);
+ assertEquals(local2.getUpperBound(2), localUB);
+ assertFalse(local2.isEmpty());
+ assertTrue(local2.isEstimationMode());
+ assertEquals(local2.getResizeFactor(), local.getResizeFactor());
+ }
+
+ @Test
+ public void checkHeapifyMemoryEstimating() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = 2*k; //thus estimating
+
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ for (int i=0; i<u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double localUB = local.getUpperBound(2);
+ assertTrue(local.isEstimationMode());
+ assertFalse(local.isDirect());
+ assertFalse(local.hasMemorySegment());
+
+ byte[] serArr = shared.toByteArray();
+
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg,
ThetaUtil.DEFAULT_UPDATE_SEED);
+
+ final int bytes = Sketch.getMaxUpdateSketchBytes(k);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
+ UpdateSketch local2 = sl.bldr.buildLocal(shared);
+
+ assertEquals(local2.getEstimate(), localEst);
+ assertEquals(local2.getLowerBound(2), localLB);
+ assertEquals(local2.getUpperBound(2), localUB);
+ assertEquals(local2.isEmpty(), false);
+ assertTrue(local2.isEstimationMode());
+ }
+
+ @Test
+ public void checkHQStoCompactForms() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ int u = 4*k; //thus estimating
+
+ int maxBytes = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3);
+
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertEquals(local.getClass().getSimpleName(),
"ConcurrentHeapThetaBuffer");
+ assertFalse(local.isDirect());
+ assertFalse(local.hasMemorySegment());
+
+ for (int i=0; i<u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ shared.rebuild(); //forces size back to k
+
+ //get baseline values
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double localUB = local.getUpperBound(2);
+ int sharedBytes = shared.getCurrentBytes();
+ int sharedCompBytes = shared.getCompactBytes();
+ assertEquals(sharedBytes, maxBytes);
+ assertTrue(local.isEstimationMode());
+
+ CompactSketch comp1, comp2, comp3, comp4;
+
+ comp1 = shared.compact(false, null);
+
+ assertEquals(comp1.getEstimate(), localEst);
+ assertEquals(comp1.getLowerBound(2), localLB);
+ assertEquals(comp1.getUpperBound(2), localUB);
+ assertEquals(comp1.isEmpty(), false);
+ assertTrue(comp1.isEstimationMode());
+ assertEquals(comp1.getCompactBytes(), sharedCompBytes);
+ assertEquals(comp1.getClass().getSimpleName(), "HeapCompactSketch");
+
+ comp2 = shared.compact(true, null);
+
+ assertEquals(comp2.getEstimate(), localEst);
+ assertEquals(comp2.getLowerBound(2), localLB);
+ assertEquals(comp2.getUpperBound(2), localUB);
+ assertEquals(comp2.isEmpty(), false);
+ assertTrue(comp2.isEstimationMode());
+ assertEquals(comp2.getCompactBytes(), sharedCompBytes);
+ assertEquals(comp2.getClass().getSimpleName(), "HeapCompactSketch");
+
+ byte[] segArr2 = new byte[sharedCompBytes];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2); //allocate seg for
compact form
+
+ comp3 = shared.compact(false, seg2); //load the seg2
+
+ assertEquals(comp3.getEstimate(), localEst);
+ assertEquals(comp3.getLowerBound(2), localLB);
+ assertEquals(comp3.getUpperBound(2), localUB);
+ assertEquals(comp3.isEmpty(), false);
+ assertTrue(comp3.isEstimationMode());
+ assertEquals(comp3.getCompactBytes(), sharedCompBytes);
+ assertEquals(comp3.getClass().getSimpleName(), "DirectCompactSketch");
+
+ Util.clear(seg2);
+ comp4 = shared.compact(true, seg2);
+
+ assertEquals(comp4.getEstimate(), localEst);
+ assertEquals(comp4.getLowerBound(2), localLB);
+ assertEquals(comp4.getUpperBound(2), localUB);
+ assertEquals(comp4.isEmpty(), false);
+ assertTrue(comp4.isEstimationMode());
+ assertEquals(comp4.getCompactBytes(), sharedCompBytes);
+ assertEquals(comp4.getClass().getSimpleName(), "DirectCompactSketch");
+ comp4.toString(false, true, 0, false);
+ }
+
+ @Test
+ public void checkHQStoCompactEmptyForms() {
+ int lgK = 9;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+ println("lgArr: "+ local.getLgArrLongs());
+
+ //empty
+ local.toString(false, true, 0, false);
+ boolean estimating = false;
+ assertTrue(local instanceof ConcurrentHeapThetaBuffer);
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double localUB = local.getUpperBound(2);
+ //int currentUSBytes = local.getCurrentBytes(false);
+ //assertEquals(currentUSBytes, (32*8) + 24); // clumsy, but a function of
RF and TCF
+ int compBytes = local.getCompactBytes(); //compact form
+ assertEquals(compBytes, 8);
+ assertEquals(local.isEstimationMode(), estimating);
+
+ byte[] arr2 = new byte[compBytes];
+ MemorySegment seg2 = MemorySegment.ofArray(arr2);
+
+ CompactSketch csk2 = shared.compact(false, seg2);
+ assertEquals(csk2.getEstimate(), localEst);
+ assertEquals(csk2.getLowerBound(2), localLB);
+ assertEquals(csk2.getUpperBound(2), localUB);
+ assertEquals(csk2.isEmpty(), true);
+ assertEquals(csk2.isEstimationMode(), estimating);
+ assertTrue(csk2.isOrdered());
+
+ CompactSketch csk3 = shared.compact(true, seg2);
+ csk3.toString(false, true, 0, false);
+ csk3.toString();
+ assertEquals(csk3.getEstimate(), localEst);
+ assertEquals(csk3.getLowerBound(2), localLB);
+ assertEquals(csk3.getUpperBound(2), localUB);
+ assertEquals(csk3.isEmpty(), true);
+ assertEquals(csk3.isEstimationMode(), estimating);
+ assertTrue(csk3.isOrdered());
+ }
+
+ @Test
+ public void checkExactMode() {
+ int lgK = 12;
+ int u = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertEquals(local.getEstimate(), u, 0.0);
+ assertEquals(shared.getRetainedEntries(false), u);
+ }
+
+ @Test
+ public void checkEstMode() {
+ int lgK = 12;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+
+ int u = 3*k;
+ for (int i = 0; i< u; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+ final int retained = shared.getRetainedEntries(false);
+ assertTrue(retained > k);
+ // it could be exactly k, but in this case must be greater
+ }
+
+ @Test
+ public void checkErrorBounds() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ //Exact mode
+ //int limit = (int)ConcurrentSharedThetaSketch.computeExactLimit(lim, 0);
//? ask Eshcar
+ for (int i = 0; i < k; i++ ) {
+ local.update(i);
+ }
+
+ double est = local.getEstimate();
+ double lb = local.getLowerBound(2);
+ double ub = local.getUpperBound(2);
+ assertEquals(est, ub, 0.0);
+ assertEquals(est, lb, 0.0);
+
+ //Est mode
+ int u = 2 * k;
+ for (int i = k; i < u; i++ ) {
+ local.update(i);
+ local.update(i); //test duplicate rejection
+ }
+ waitForBgPropagationToComplete(shared);
+ est = local.getEstimate();
+ lb = local.getLowerBound(2);
+ ub = local.getUpperBound(2);
+ assertTrue(est <= ub);
+ assertTrue(est >= lb);
+ }
+
+ @Test
+ public void checkRebuild() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ //must build shared first
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+ int t = ((ConcurrentHeapThetaBuffer)local).getHashTableThreshold();
+
+ for (int i = 0; i< t; i++) {
+ local.update(i);
+ }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(local.getEstimate() > 0.0);
+ assertTrue(shared.getRetainedEntries(false) > k);
+
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ shared.rebuild();
+ assertEquals(shared.getRetainedEntries(false), k);
+ assertEquals(shared.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkBuilder() {
+ int lgK = 4;
+ SharedLocal sl = new SharedLocal(lgK);
+ assertEquals(sl.bldr.getLocalLgNominalEntries(), lgK);
+ assertEquals(sl.bldr.getLgNominalEntries(), lgK);
+ println(sl.bldr.toString());
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderSmallNominal() {
+ int lgK = 2; //too small
+ new SharedLocal(lgK);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkNegativeHashes() {
+ int lgK = 9;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ local.hashUpdate(-1L);
+ }
+
+ @Test
+ public void checkResetAndStartingSubMultiple() {
+ int lgK = 9;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch shared = sl.shared;
+ UpdateSketch local = sl.local;
+
+ assertTrue(local.isEmpty());
+ int u = 3*k;
+
+ for (int i = 0; i< u; i++) { local.update(i); }
+ waitForBgPropagationToComplete(shared);
+
+ assertFalse(local.isEmpty());
+ assertTrue(shared.getRetainedEntries(false) >= k);
+ assertTrue(local.getThetaLong() < Long.MAX_VALUE);
+
+ shared.reset();
+ local.reset();
+ assertTrue(local.isEmpty());
+ assertEquals(shared.getRetainedEntries(false), 0);
+ assertEquals(local.getEstimate(), 0.0, 0.0);
+ assertEquals(local.getThetaLong(), Long.MAX_VALUE);
+ }
+
+ @Test
+ public void checkDQStoCompactEmptyForms() {
+ int lgK = 9;
+ SharedLocal sl = new SharedLocal(lgK);
+ UpdateSketch local = sl.local;
+ UpdateSketch shared = sl.shared;
+
+ //empty
+ local.toString(false, true, 0, false); //exercise toString
+ assertTrue(local instanceof ConcurrentHeapThetaBuffer);
+ double localEst = local.getEstimate();
+ double localLB = local.getLowerBound(2);
+ double uskUB = local.getUpperBound(2);
+ assertFalse(local.isEstimationMode());
+
+ int bytes = local.getCompactBytes();
+ assertEquals(bytes, 8);
+ byte[] segArr2 = new byte[bytes];
+ MemorySegment seg2 = MemorySegment.ofArray(segArr2);
+
+ CompactSketch csk2 = shared.compact(false, seg2);
+ assertEquals(csk2.getEstimate(), localEst);
+ assertEquals(csk2.getLowerBound(2), localLB);
+ assertEquals(csk2.getUpperBound(2), uskUB);
+ assertTrue(csk2.isEmpty());
+ assertFalse(csk2.isEstimationMode());
+ assertTrue(csk2.isOrdered());
+
+ CompactSketch csk3 = shared.compact(true, seg2);
+ csk3.toString(false, true, 0, false);
+ csk3.toString();
+ assertEquals(csk3.getEstimate(), localEst);
+ assertEquals(csk3.getLowerBound(2), localLB);
+ assertEquals(csk3.getUpperBound(2), uskUB);
+ assertTrue(csk3.isEmpty());
+ assertFalse(csk3.isEstimationMode());
+ assertTrue(csk2.isOrdered());
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkMinReqBytes() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ for (int i = 0; i < (4 * k); i++) { sl.local.update(i); }
+ waitForBgPropagationToComplete(sl.shared);
+ byte[] byteArray = sl.shared.toByteArray();
+ byte[] badBytes = Arrays.copyOfRange(byteArray, 0, 24); //corrupt no. bytes
+ MemorySegment seg = MemorySegment.ofArray(badBytes).asReadOnly();
+ Sketch.heapify(seg);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkThetaAndLgArrLongs() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ SharedLocal sl = new SharedLocal(lgK);
+ for (int i = 0; i < k; i++) { sl.local.update(i); }
+ waitForBgPropagationToComplete(sl.shared);
+ byte[] badArray = sl.shared.toByteArray();
+ MemorySegment seg = MemorySegment.ofArray(badArray);
+ PreambleUtil.insertLgArrLongs(seg, 4); //corrupt
+ PreambleUtil.insertThetaLong(seg, Long.MAX_VALUE / 2); //corrupt
+ Sketch.heapify(seg);
+ }
+
+ @Test
+ public void checkFamily() {
+ SharedLocal sl = new SharedLocal();
+ UpdateSketch local = sl.local;
+ assertEquals(local.getFamily(), Family.QUICKSELECT);
+ }
+
+ @Test
+ public void checkBackgroundPropagation() {
+ int lgK = 4;
+ int k = 1 << lgK;
+ int u = 5*k;
+ SharedLocal sl = new SharedLocal(lgK);
+ assertTrue(sl.local.isEmpty());
+
+ int i = 0;
+ for (; i < k; i++) { sl.local.update(i); } //exact
+ waitForBgPropagationToComplete(sl.shared);
+
+ assertFalse(sl.local.isEmpty());
+ assertTrue(sl.local.getEstimate() > 0.0);
+ long theta1 = sl.sharedIf.getVolatileTheta();
+
+ for (; i < u; i++) { sl.local.update(i); } //continue, make it estimating
+ waitForBgPropagationToComplete(sl.shared);
+
+ long theta2 = sl.sharedIf.getVolatileTheta();
+ int entries = sl.shared.getRetainedEntries(false);
+ assertTrue((entries > k) || (theta2 < theta1),
+ "entries= " + entries + " k= " + k + " theta1= " + theta1 + " theta2=
" + theta2);
+
+ sl.shared.rebuild();
+ assertEquals(sl.shared.getRetainedEntries(false), k);
+ assertEquals(sl.shared.getRetainedEntries(true), k);
+ sl.local.rebuild();
+ assertEquals(sl.shared.getRetainedEntries(false), k);
+ assertEquals(sl.shared.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void checkBuilderExceptions() {
+ UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+ try {
+ bldr.setNominalEntries(8);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ try {
+ bldr.setLocalNominalEntries(8);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ try {
+ bldr.setLocalLogNominalEntries(3);
+ fail();
+ } catch (SketchesArgumentException e) { }
+ bldr.setNumPoolThreads(4);
+ assertEquals(bldr.getNumPoolThreads(), 4);
+ bldr.setMaxConcurrencyError(0.04);
+ assertEquals(bldr.getMaxConcurrencyError(), 0.04);
+ bldr.setMaxNumLocalThreads(4);
+ assertEquals(bldr.getMaxNumLocalThreads(), 4);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void checkToByteArray() {
+ SharedLocal sl = new SharedLocal();
+ sl.local.toByteArray();
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+ static class SharedLocal {
+ static final long DefaultSeed = ThetaUtil.DEFAULT_UPDATE_SEED;
+ final UpdateSketch shared;
+ final ConcurrentSharedThetaSketch sharedIf;
+ final UpdateSketch local;
+ final int sharedLgK;
+ final int localLgK;
+ final long seed;
+ final MemorySegment wseg;
+ final UpdateSketchBuilder bldr = new UpdateSketchBuilder();
+
+ SharedLocal() {
+ this(9, 9, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int lgK) {
+ this(lgK, lgK, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK) {
+ this(sharedLgK, localLgK, DefaultSeed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, long seed) {
+ this(sharedLgK, localLgK, seed, false, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, boolean useSeg) {
+ this(sharedLgK, localLgK, DefaultSeed, useSeg, true, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, boolean useSeg, boolean ordered) {
+ this(sharedLgK, localLgK, DefaultSeed, useSeg, ordered, 1);
+ }
+
+ SharedLocal(int sharedLgK, int localLgK, long seed, boolean useSeg,
boolean ordered, int segMult) {
+ this.sharedLgK = sharedLgK;
+ this.localLgK = localLgK;
+ this.seed = seed;
+ if (useSeg) {
+ int bytes = (((4 << sharedLgK) * segMult) +
(Family.QUICKSELECT.getMaxPreLongs())) << 3;
+ wseg = MemorySegment.ofArray(new byte[bytes]);
+ } else {
+ wseg = null;
+ }
+ bldr.setLogNominalEntries(sharedLgK);
+ bldr.setLocalLogNominalEntries(localLgK);
+ bldr.setPropagateOrderedCompact(ordered);
+ bldr.setSeed(this.seed);
+ shared = bldr.buildShared(wseg);
+ local = bldr.buildLocal(shared);
+ sharedIf = (ConcurrentSharedThetaSketch) shared;
+ }
+ }
+
+ static void waitForBgPropagationToComplete(UpdateSketch shared) {
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ ConcurrentSharedThetaSketch csts = (ConcurrentSharedThetaSketch)shared;
+ csts.awaitBgPropagationTermination();
+
ConcurrentPropagationService.resetExecutorService(Thread.currentThread().getId());
+ csts.initBgPropagationService();
+ }
+
+}
diff --git
a/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
new file mode 100644
index 000000000..5d0e42176
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.JaccardSimilarity.exactlyEqual;
+import static org.apache.datasketches.theta2.JaccardSimilarity.jaccard;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class JaccardSimilarityTest {
+
+ @Test
+ public void checkNullsEmpties() {
+ int minK = 1 << 12;
+ double threshold = 0.95;
+ println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold);
+ //check both null
+ double[] jResults = jaccard(null, null);
+ boolean state = jResults[1] > threshold;
+ println("null \t null:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(null, null);
+ assertFalse(state);
+
+ UpdateSketch measured =
UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch expected =
UpdateSketch.builder().setNominalEntries(minK).build();
+
+ //check both empty
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println("empty\tempty:\t" + state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ state = exactlyEqual(measured, measured);
+ assertTrue(state);
+
+ //adjust one
+ expected.update(1);
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println("empty\t 1:\t" + state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkExactMode() {
+ int k = 1 << 12;
+ int u = k;
+ double threshold = 0.9999;
+ println("Exact Mode, minK: " + k + "\t Th: " + threshold);
+
+ UpdateSketch measured =
UpdateSketch.builder().setNominalEntries(k).build();
+ UpdateSketch expected =
UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i = 0; i < (u-1); i++) { //one short
+ measured.update(i);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ measured.update(u-1); //now exactly k entries
+ expected.update(u); //now exactly k entries but differs by one
+ jResults = jaccard(measured, expected);
+ state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ @Test
+ public void checkEstMode() {
+ int k = 1 << 12;
+ int u = 1 << 20;
+ double threshold = 0.9999;
+ println("Estimation Mode, minK: " + k + "\t Th: " + threshold);
+
+ UpdateSketch measured =
UpdateSketch.builder().setNominalEntries(k).build();
+ UpdateSketch expected =
UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i = 0; i < u; i++) {
+ measured.update(i);
+ expected.update(i);
+ }
+
+ double[] jResults = jaccard(measured, expected);
+ boolean state = jResults[1] > threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+
+ state = exactlyEqual(measured, expected);
+ assertTrue(state);
+
+ for (int i = u; i < (u + 50); i++) { //empirically determined
+ measured.update(i);
+ }
+
+ jResults = jaccard(measured, expected);
+ state = jResults[1] >= threshold;
+ println(state + "\t" + jaccardString(jResults));
+ assertFalse(state);
+
+ state = exactlyEqual(measured, expected);
+ assertFalse(state);
+
+ println("");
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is
pretty tight,
+ * about +/- 0.7%, which is pretty good since the accuracy of the underlying
sketch is about
+ * +/- 1.56%.
+ */
+ @Test
+ public void checkSimilarity() {
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.95);
+ double threshold = 0.943;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ UpdateSketch expected =
UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch measured =
UpdateSketch.builder().setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i);
+ }
+
+ double[] jResults = JaccardSimilarity.jaccard(measured, expected);
+ boolean state = JaccardSimilarity.similarityTest(measured, expected,
threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ //check identity case
+ state = JaccardSimilarity.similarityTest(measured, measured, threshold);
+ assertTrue(state);
+ }
+
+ /**
+ * Enable printing on this test and you will see that the distribution is
much looser,
+ * about +/- 14%. This is due to the fact that intersections loose accuracy
as the ratio of
+ * intersection to the union becomes a small number.
+ */
+ @Test
+ public void checkDissimilarity() {
+ int minK = 1 << 12;
+ int u1 = 1 << 20;
+ int u2 = (int) (u1 * 0.05);
+ double threshold = 0.061;
+ println("Estimation Mode, minK: " + minK + "\t Th: " + threshold);
+
+ UpdateSketch expected =
UpdateSketch.builder().setNominalEntries(minK).build();
+ UpdateSketch measured =
UpdateSketch.builder().setNominalEntries(minK).build();
+
+ for (int i = 0; i < u1; i++) {
+ expected.update(i);
+ }
+
+ for (int i = 0; i < u2; i++) {
+ measured.update(i);
+ }
+
+ double[] jResults = JaccardSimilarity.jaccard(measured, expected);
+ boolean state = JaccardSimilarity.dissimilarityTest(measured, expected,
threshold);
+ println(state + "\t" + jaccardString(jResults));
+ assertTrue(state);
+ }
+
+ private static String jaccardString(double[] jResults) {
+ double lb = jResults[0];
+ double est = jResults[1];
+ double ub = jResults[2];
+ return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" +
((ub/est) - 1.0);
+ }
+
+ @Test
+ public void checkMinK() {
+ UpdateSketch skA = UpdateSketch.builder().build(); //4096
+ UpdateSketch skB = UpdateSketch.builder().build(); //4096
+ skA.update(1);
+ skB.update(1);
+ double[] result = JaccardSimilarity.jaccard(skA, skB);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ for (int i = 1; i < 4096; i++) {
+ skA.update(i);
+ skB.update(i);
+ }
+ result = JaccardSimilarity.jaccard(skA, skB);
+ println(result[0] + ", " + result[1] + ", " + result[2]);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git
a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
new file mode 100644
index 000000000..ab0ed1495
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.SketchesReadOnlyException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class ReadOnlyMemoryTest {
+
+ @Test
+ public void wrapAndTryUpdatingUpdateSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(updateSketch.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ UpdateSketch sketch = (UpdateSketch) Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+
+ boolean thrown = false;
+ try {
+ sketch.update(2);
+ } catch (SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ }
+
+ @Test
+ public void wrapCompactUnorderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.compact(false,
null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapCompactOrderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg =
MemorySegment.ofBuffer(ByteBuffer.wrap(updateSketch.compact().toByteArray())
+ .asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.wrap(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyUpdateSketch() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(us1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ // downcasting is not recommended, for testing only
+ UpdateSketch us2 = (UpdateSketch) Sketch.heapify(seg);
+ us2.update(2);
+ assertEquals(us2.getEstimate(), 2.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyCompactUnorderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+ ByteBuffer.wrap(updateSketch.compact(false,
null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.heapify(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyCompactOrderedSketch() {
+ UpdateSketch updateSketch = UpdateSketch.builder().build();
+ updateSketch.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(updateSketch.compact().toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Sketch sketch = Sketch.heapify(seg);
+ assertEquals(sketch.getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void heapifyUnion() {
+ Union u1 = SetOperation.builder().buildUnion();
+ u1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Union u2 = (Union) SetOperation.heapify(seg);
+ u2.update(2);
+ Assert.assertEquals(u2.getResult().getEstimate(), 2.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapAndTryUpdatingUnion() {
+ Union u1 = SetOperation.builder().buildUnion();
+ u1.update(1);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+
+ Union u2 = (Union) Sketches.wrapSetOperation(seg);
+ Union u3 = Sketches.wrapUnion(seg);
+ Assert.assertEquals(u2.getResult().getEstimate(), 1.0);
+ Assert.assertEquals(u3.getResult().getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+
+ try {
+ u2.update(2);
+ fail();
+ } catch (SketchesReadOnlyException e) {
+ //expected
+ }
+
+ try {
+ u3.update(2);
+ fail();
+ } catch (SketchesReadOnlyException e) {
+ //expected
+ }
+ }
+
+ @Test
+ public void heapifyIntersection() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ us1.update(2);
+ UpdateSketch us2 = UpdateSketch.builder().build();
+ us2.update(2);
+ us2.update(3);
+
+ Intersection i1 = SetOperation.builder().buildIntersection();
+ i1.intersect(us1);
+ i1.intersect(us2);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Intersection i2 = (Intersection) SetOperation.heapify(seg);
+ i2.intersect(us1);
+ Assert.assertEquals(i2.getResult().getEstimate(), 1.0);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void wrapIntersection() {
+ UpdateSketch us1 = UpdateSketch.builder().build();
+ us1.update(1);
+ us1.update(2);
+ UpdateSketch us2 = UpdateSketch.builder().build();
+ us2.update(2);
+ us2.update(3);
+
+ Intersection i1 = SetOperation.builder().buildIntersection();
+ i1.intersect(us1);
+ i1.intersect(us2);
+ MemorySegment seg = MemorySegment.ofBuffer(
+
ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder()));
+ Intersection i2 = (Intersection) SetOperation.wrap(seg);
+ Assert.assertEquals(i2.getResult().getEstimate(), 1.0);
+
+ boolean thrown = false;
+ try {
+ i2.intersect(us1);
+ } catch (SketchesReadOnlyException e) {
+ thrown = true;
+ }
+ Assert.assertTrue(thrown);
+ assertTrue(seg.isReadOnly());
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
new file mode 100644
index 000000000..02efffd75
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java
@@ -0,0 +1,438 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.ResizeFactor.X4;
+import static org.apache.datasketches.theta2.Sketch.getMaxUpdateSketchBytes;
+import static
org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class SetOperationTest {
+
+ @Test
+ public void checkBuilder() {
+ final int k = 2048;
+ final long seed = 1021;
+
+ final UpdateSketch usk1 =
UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build();
+ final UpdateSketch usk2 =
UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build();
+
+ for (int i=0; i<k/2; i++) {
+ usk1.update(i); //256
+ }
+ for (int i=k/2; i<k; i++) {
+ usk2.update(i); //256 no overlap
+ }
+
+ final ResizeFactor rf = X4;
+ //use default size
+ final Union union =
SetOperation.builder().setSeed(seed).setResizeFactor(rf).buildUnion();
+
+ union.union(usk1);
+ union.union(usk2);
+
+ final double exactUnionAnswer = k;
+
+ final CompactSketch comp1 = union.getResult(false, null); //ordered: false
+ final double compEst = comp1.getEstimate();
+ assertEquals(compEst, exactUnionAnswer, 0.0);
+ }
+
+ @Test
+ public void checkBuilder2() {
+ final SetOperationBuilder bldr = SetOperation.builder();
+
+ final long seed = 12345L;
+ bldr.setSeed(seed);
+ assertEquals(bldr.getSeed(), seed);
+
+ final float p = (float)0.5;
+ bldr.setP(p);
+ assertEquals(bldr.getP(), p);
+
+ final ResizeFactor rf = ResizeFactor.X4;
+ bldr.setResizeFactor(rf);
+ assertEquals(bldr.getResizeFactor(), rf);
+
+ final int lgK = 10;
+ final int k = 1 << lgK;
+ bldr.setNominalEntries(k);
+ assertEquals(bldr.getLgNominalEntries(), lgK);
+
+ println(bldr.toString());
+ }
+
+ @Test
+ public void checkBuilderNonPowerOf2() {
+ SetOperation.builder().setNominalEntries(1000).buildUnion();
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderBadFamily() {
+ SetOperation.builder().build(Family.ALPHA);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderIllegalPhi() {
+ final float p = (float)1.5;
+ SetOperation.builder().setP(p).buildUnion();
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderIllegalPlo() {
+ final float p = 0;
+ SetOperation.builder().setP(p).buildUnion();
+ }
+
+ @Test
+ public void checkBuilderValidP() {
+ final float p = (float).5;
+ SetOperation.builder().setP(p).buildUnion();
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderAnotB_noMem() {
+ final MemorySegment mem = MemorySegment.ofArray(new byte[64]);
+ SetOperation.builder().build(Family.A_NOT_B, mem);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderBadSeedHashes() {
+ final int k = 2048;
+ final long seed = 1021;
+
+ final UpdateSketch usk1 =
UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build();
+ final UpdateSketch usk2 =
UpdateSketch.builder().setNominalEntries(k).build();
+
+ for (int i=0; i<k/2; i++) {
+ usk1.update(i); //256
+ }
+ for (int i=k/2; i<k; i++) {
+ usk2.update(i); //256 no overlap
+ }
+
+ final ResizeFactor rf = X4;
+
+ final Union union =
SetOperation.builder().setSeed(seed).setResizeFactor(rf).setNominalEntries(k).buildUnion();
+
+ union.union(usk1);
+ union.union(usk2); //throws seed exception here
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBuilderNomEntries() {
+ final int k = 1 << 27;
+ final SetOperationBuilder bldr = SetOperation.builder();
+ bldr.setNominalEntries(k);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSetOpHeapify() {
+ final int k = 64;
+ final UpdateSketch usk1 =
UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i=0; i<k; i++) {
+ usk1.update(i); //64
+ }
+ final byte[] byteArray = usk1.toByteArray();
+ final MemorySegment mem = MemorySegment.ofArray(byteArray).asReadOnly();
+ SetOperation.heapify(mem);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSetOpWrap() {
+ final int k = 64;
+ final UpdateSketch usk1 =
UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i=0; i<k; i++) {
+ usk1.update(i); //64
+ }
+ final byte[] byteArray = usk1.toByteArray();
+ final MemorySegment mem = MemorySegment.ofArray(byteArray).asReadOnly();
+ Sketches.wrapIntersection(mem);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSetOpWrap2() {
+ final int k = 64;
+ final UpdateSketch usk1 =
UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i=0; i<k; i++) {
+ usk1.update(i); //64
+ }
+ final MemorySegment wmem = MemorySegment.ofArray(usk1.toByteArray());
+ PreambleUtil.insertSerVer(wmem, 2); //corrupt
+ final MemorySegment mem = wmem.asReadOnly();
+ SetOperation.wrap(mem);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkIllegalSetOpWrap3() {
+ final int k = 64;
+ final UpdateSketch usk1 =
UpdateSketch.builder().setNominalEntries(k).build();
+ for (int i=0; i<k; i++) {
+ usk1.update(i); //64
+ }
+ final MemorySegment wmem = MemorySegment.ofArray(usk1.toByteArray());
+ SetOperation.wrap(wmem);
+ }
+
+ @Test
+ public void checkBuildSetOps() {
+ final SetOperationBuilder bldr = Sketches.setOperationBuilder();
+ bldr.buildUnion();
+ bldr.buildIntersection();
+ bldr.buildANotB();
+ }
+
+ @Test
+ public void checkComputeLgArrLongs() {
+ assertEquals(minLgHashTableSize(30, ThetaUtil.REBUILD_THRESHOLD), 5);
+ assertEquals(minLgHashTableSize(31, ThetaUtil.REBUILD_THRESHOLD), 6);
+ }
+
+ /**
+ * The objective is to union 3 16K sketches into a union SetOperation and
get the result.
+ * All operations are to be performed within a single direct ByteBuffer as
the backing store.
+ * First we will make the union size large enough so that its answer will be
exact (with this
+ * specific example).
+ * <p> Next, we recover the Union SetOp and the 3 sketches and the space for
the result. Then
+ * recompute the union using a Union of the same size as the input sketches,
where the end result
+ * will be an estimate.
+ */
+ @Test
+ public void checkDirectUnionExample() {
+ //The first task is to compute how much direct memory we need and set the
heap large enough.
+ //For the first trial, we will set the Union large enough for an exact
result for THIS example.
+ final int sketchNomEntries = 1 << 14; //16K
+ int unionNomEntries = 1 << 15; //32K
+ final int[] heapLayout = getHeapLayout(sketchNomEntries, unionNomEntries);
+
+ //This BB belongs to you and you always retain a link to it until you are
completely
+ // done and then let java garbage collect it.
+ //I use a heap backing array, because for this example it is easier to
peak into it and
+ // see what is going on.
+ final byte[] backingArr = new byte[heapLayout[5]];
+ final ByteBuffer heapBuf =
ByteBuffer.wrap(backingArr).order(ByteOrder.nativeOrder());
+
+ // Attaches a MemorySegment object to the underlying memory of heapBuf.
+ // heapMem will have a Read/Write view of the complete backing memory of
heapBuf (direct or not).
+ // Any R/W action from heapMem will be visible via heapBuf and visa versa.
+ //
+ // However, if you had created this WM object directly in raw, off-heap
"native" memory
+ // you would have the responsibility to close it when you are done.
+ // But, since it was allocated via BB, it closes it for you.
+ final MemorySegment heapMem = MemorySegment.ofBuffer(heapBuf);
+
+ double result = directUnionTrial1(heapMem, heapLayout, sketchNomEntries,
unionNomEntries);
+ println("1st est: "+result);
+ final int expected = sketchNomEntries*2;
+ assertEquals(result, expected, 0.0); //est must be exact.
+
+ //For trial 2, we will use the same union space but use only part of it.
+ unionNomEntries = 1 << 14; //16K
+ result = directUnionTrial2(heapMem, heapLayout, sketchNomEntries,
unionNomEntries);
+
+ //intentionally loose bounds
+ assertEquals(result, expected, expected*0.05);
+ println("2nd est: "+result);
+ println("Error %: "+(result/expected -1.0)*100);
+ }
+
+ @Test
+ public void setOpsExample() {
+ println("Set Operations Example:");
+ final int k = 4096;
+ final UpdateSketch skA =
Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ final UpdateSketch skB =
Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ final UpdateSketch skC =
Sketches.updateSketchBuilder().setNominalEntries(k).build();
+
+ for (int i=1; i<=10; i++) { skA.update(i); }
+ for (int i=1; i<=20; i++) { skB.update(i); }
+ for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set
+
+ final Union union =
Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ union.union(skA);
+ union.union(skB);
+ // ... continue to iterate on the input sketches to union
+
+ final CompactSketch unionSk = union.getResult(); //the result union
sketch
+ println("A U B : "+unionSk.getEstimate()); //the estimate of the
union
+
+ //Intersection is similar
+
+ final Intersection inter =
Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(unionSk);
+ inter.intersect(skC);
+ // ... continue to iterate on the input sketches to intersect
+
+ final CompactSketch interSk = inter.getResult(); //the result
intersection sketch
+ println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the
intersection
+
+ //The AnotB operation is a little different as it is stateless:
+
+ final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+ final CompactSketch not = aNotB.aNotB(skA, skC);
+
+ println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB
operation
+ }
+
+ @Test
+ public void checkIsSameResource() {
+ final int k = 16;
+ final MemorySegment wmem = MemorySegment.ofArray(new byte[k*16 + 32]);//288
+ final MemorySegment emptyMem = MemorySegment.ofArray(new byte[8]);
+ final Union union =
Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(wmem);
+ assertTrue(union.isSameResource(wmem));
+ assertFalse(union.isSameResource(emptyMem));
+
+ final Intersection inter =
Sketches.setOperationBuilder().buildIntersection(wmem);
+ assertTrue(inter.isSameResource(wmem));
+ assertFalse(inter.isSameResource(emptyMem));
+
+ final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
+
+ assertFalse(aNotB.isSameResource(emptyMem));
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+
+ /**
+ * Compute offsets for MyHeap for Union, sketch1, sketch2, sketch3,
resultSketch, total layout.
+ * @param sketchNomEntries the configured nominal entries of the sketch
+ * @param unionNomEntries configured nominal entries of the union
+ * @return array of offsets for Union, sketch1, sketch2, sketch3,
resultSketch, total layout
+ */
+ private static int[] getHeapLayout(final int sketchNomEntries, final int
unionNomEntries) {
+ final int[] heapLayout = new int[6];
+ final int unionBytes = SetOperation.getMaxUnionBytes(unionNomEntries);
+ final int sketchBytes = getMaxUpdateSketchBytes(sketchNomEntries);
+ final int resultBytes = Sketch.getMaxCompactSketchBytes(unionNomEntries);
+ heapLayout[0] = 0; //offset for Union
+ heapLayout[1] = unionBytes; //offset for sketch1
+ heapLayout[2] = unionBytes + sketchBytes; //offset for sketch2
+ heapLayout[3] = unionBytes + 2*sketchBytes; //offset for sketch3
+ heapLayout[4] = unionBytes + 3*sketchBytes; //offset for result
+ heapLayout[5] = unionBytes + 3*sketchBytes + resultBytes; //total
+ return heapLayout;
+ }
+
+ private static double directUnionTrial1(
+ final MemorySegment heapMem, final int[] heapLayout, final int
sketchNomEntries, final int unionNomEntries) {
+
+ final int offset = heapLayout[0];
+ final int bytes = heapLayout[1] - offset;
+ final MemorySegment unionMem = heapMem.asSlice(offset, bytes);
+
+ Union union =
SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem);
+
+ final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1],
heapLayout[2]-heapLayout[1]);
+ final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2],
heapLayout[3]-heapLayout[2]);
+ final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3],
heapLayout[4]-heapLayout[3]);
+ final MemorySegment resultMem = heapMem.asSlice(heapLayout[4],
heapLayout[5]-heapLayout[4]);
+
+ //Initialize the 3 sketches
+ final UpdateSketch sk1 =
UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch1mem);
+ final UpdateSketch sk2 =
UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch2mem);
+ final UpdateSketch sk3 =
UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch3mem);
+
+ //This little trial has sk1 and sk2 distinct and sk2 overlap both.
+ //Build the sketches.
+ for (int i=0; i< sketchNomEntries; i++) {
+ sk1.update(i);
+ sk2.update(i + sketchNomEntries/2);
+ sk3.update(i + sketchNomEntries);
+ }
+
+ //confirm that each of these 3 sketches is exact.
+ assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
+
+ //Let's union the first 2 sketches
+ union.union(sk1);
+ union.union(sk2);
+
+ //Let's recover the union and the 3rd sketch
+ union = Sketches.wrapUnion(unionMem);
+ union.union(Sketch.wrap(sketch3mem));
+
+ final Sketch resSk = union.getResult(true, resultMem);
+ final double est = resSk.getEstimate();
+
+ return est;
+ }
+
+ private static double directUnionTrial2(
+ final MemorySegment heapMem, final int[] heapLayout, final int
sketchNomEntries, final int unionNomEntries) {
+
+ final MemorySegment unionMem = heapMem.asSlice(heapLayout[0],
heapLayout[1]-heapLayout[0]);
+ final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1],
heapLayout[2]-heapLayout[1]);
+ final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2],
heapLayout[3]-heapLayout[2]);
+ final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3],
heapLayout[4]-heapLayout[3]);
+ final MemorySegment resultMem = heapMem.asSlice(heapLayout[4],
heapLayout[5]-heapLayout[4]);
+
+ //Recover the 3 sketches
+ final UpdateSketch sk1 = (UpdateSketch) Sketch.wrap(sketch1mem);
+ final UpdateSketch sk2 = (UpdateSketch) Sketch.wrap(sketch2mem);
+ final UpdateSketch sk3 = (UpdateSketch) Sketch.wrap(sketch3mem);
+
+ //confirm that each of these 3 sketches is exact.
+ assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
+ assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
+
+ //Create a new union in the same space with a smaller size.
+ Util.clear(unionMem);
+ final Union union =
SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem);
+ union.union(sk1);
+ union.union(sk2);
+ union.union(sk3);
+
+ final Sketch resSk = union.getResult(true, resultMem);
+ final double est = resSk.getEstimate();
+
+ return est;
+ }
+
+}
diff --git
a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
new file mode 100644
index 000000000..6848c224e
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java
@@ -0,0 +1,501 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EMPTY;
+import static
org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_HEAP;
+import static
org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_MEMORY_UNORDERED;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EXACT;
+import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.NULL;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+
+import java.lang.foreign.MemorySegment;
+import java.util.Random;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class SetOpsCornerCasesTest {
+
+ /*******************************************/
+ Random rand = new Random(9001); //deterministic
+
+ @Test
+ public void checkSetOpsRandom() {
+ int hiA = 0, loB = 0, hiB = 0;
+ for (int i = 0; i < 1000; i++) {
+ hiA = rand.nextInt(128); //skA fed values between 0 and 127
+ loB = rand.nextInt(64);
+ hiB = loB + rand.nextInt(64); //skB fed up to 63 values starting at loB
+ compareSetOpsRandom(64, 0, hiA, loB, hiB);
+ }
+ }
+
+ private static void compareSetOpsRandom(int k, int loA, int hiA, int loB,
int hiB) {
+ UpdateSketch tskA =
Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ UpdateSketch tskB =
Sketches.updateSketchBuilder().setNominalEntries(k).build();
+
+ for (int i = loA; i < hiA; i++) { tskA.update(i); }
+ for (int i = loB; i < hiB; i++) { tskB.update(i); }
+
+ CompactSketch rcskStdU = doStdUnion(tskA, tskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tskA, tskB, k);
+ checkCornerCase(rcskPwU, rcskStdU);
+
+ CompactSketch rcskStdPairU = doStdPairUnion(tskA, tskB, k, null);
+ checkCornerCase(rcskStdPairU, rcskStdU);
+
+ CompactSketch rcskStdI = doStdIntersection(tskA, tskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tskA, tskB);
+ checkCornerCase(rcskPwI, rcskStdI);
+
+ CompactSketch rcskStdPairI = doStdPairIntersection(tskA, tskB, null);
+ checkCornerCase(rcskStdPairI, rcskStdI);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tskA, tskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tskA, tskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB);
+
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tskA, tskB, null);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB);
+ }
+
+ /*******************************************/
+
+ @Test
+ //Check all corner cases against standard Union, Intersection, and AnotB.
+ //The unordered case is not tested
+ public void compareCornerCases() {
+ int k = 64;
+ for (State stateA : State.values()) {
+ for (State stateB : State.values()) {
+ if ((stateA == EST_MEMORY_UNORDERED) || (stateB ==
EST_MEMORY_UNORDERED)) { continue; }
+ if ((stateA == NULL) || (stateB == NULL)) { continue; }
+ cornerCaseChecks(stateA, stateB, k);
+ cornerCaseChecksMemory(stateA, stateB, k);
+ }
+ }
+ }
+
+// @Test
+// public void checkExactNullSpecificCase() {
+// cornerCaseChecksMemory(State.EXACT, State.NULL, 64);
+// }
+
+ private static void cornerCaseChecksMemory(State stateA, State stateB, int
k) {
+ println("StateA: " + stateA + ", StateB: " + stateB);
+ CompactSketch tcskA = generate(stateA, k);
+ CompactSketch tcskB = generate(stateB, k);
+
+ MemorySegment wseg = MemorySegment.ofArray(new
byte[SetOperation.getMaxUnionBytes(k)]);
+
+ CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k);
+ checkCornerCase(rcskPwU, rcskStdU); //heap, heap
+
+ rcskStdU = doStdUnion(tcskA, tcskB, k, wseg);
+ CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, wseg);
+ checkCornerCase(rcskStdPairU, rcskStdU); //direct, direct
+
+ wseg = MemorySegment.ofArray(new
byte[SetOperation.getMaxIntersectionBytes(k)]);
+
+ CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB);
+ checkCornerCase(rcskPwI, rcskStdI); //empty, empty
+
+ rcskStdI = doStdIntersection(tcskA, tcskB, wseg);
+ CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, wseg);
+ checkCornerCase(rcskStdPairI, rcskStdI); //empty, empty //direct, direct???
+
+ wseg = MemorySegment.ofArray(new
byte[SetOperation.getMaxAnotBResultBytes(k)]);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB); //heap, heap
+
+ rcskStdAnotB = doStdAnotB(tcskA, tcskB, wseg);
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB,
wseg);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); //direct, heap
+ }
+
+ private static void cornerCaseChecks(State stateA, State stateB, int k) {
+ println("StateA: " + stateA + ", StateB: " + stateB);
+ CompactSketch tcskA = generate(stateA, k);
+ CompactSketch tcskB = generate(stateB, k);
+
+ CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null);
+ CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k);
+ checkCornerCase(rcskPwU, rcskStdU);
+
+ CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, null);
+ checkCornerCase(rcskStdPairU, rcskStdU);
+
+ CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null);
+ CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB);
+ checkCornerCase(rcskPwI, rcskStdI);
+
+ CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, null);
+ checkCornerCase(rcskStdPairI, rcskStdI);
+
+ CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null);
+ CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB);
+ checkCornerCase(rcskPwAnotB, rcskStdAnotB);
+
+ CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB,
null);
+ checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB);
+ }
+
+ private static CompactSketch doStdUnion(Sketch tskA, Sketch tskB, int k,
MemorySegment wseg) {
+ Union union =
Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ union.union(tskA);
+ union.union(tskB);
+ return union.getResult(true, wseg);
+ }
+
+ private static CompactSketch doStdPairUnion(Sketch tskA, Sketch tskB, int k,
MemorySegment wseg) {
+ Union union =
Sketches.setOperationBuilder().setNominalEntries(k).buildUnion();
+ return union.union(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdIntersection(Sketch tskA, Sketch tskB,
MemorySegment wseg) {
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(tskA);
+ inter.intersect(tskB);
+ return inter.getResult(true, wseg);
+ }
+
+ private static CompactSketch doStdPairIntersection(Sketch tskA, Sketch tskB,
MemorySegment wseg) {
+ Intersection inter = Sketches.setOperationBuilder().buildIntersection();
+ return inter.intersect(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdAnotB(Sketch tskA, Sketch tskB,
MemorySegment wseg) {
+ AnotB anotb = Sketches.setOperationBuilder().buildANotB();
+ return anotb.aNotB(tskA, tskB, true, wseg);
+ }
+
+ private static CompactSketch doStdStatefulAnotB(Sketch tskA, Sketch tskB,
MemorySegment wseg) {
+ AnotB anotb = Sketches.setOperationBuilder().buildANotB();
+ anotb.setA(tskA);
+ anotb.notB(tskB);
+ anotb.getResult(false);
+ return anotb.getResult(true, wseg, true);
+ }
+
+ private static CompactSketch doPwUnion(Sketch tskA, Sketch tskB, int k) {
+ CompactSketch tcskA, tcskB;
+ if (tskA == null) { tcskA = null; }
+ else { tcskA = (tskA instanceof CompactSketch) ? (CompactSketch) tskA :
tskA.compact(); }
+ if (tskB == null) { tcskB = null; }
+ else { tcskB = (tskB instanceof CompactSketch) ? (CompactSketch) tskB :
tskB.compact(); }
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ return union.union(tcskA, tcskB);
+ }
+
+ private static CompactSketch doPwIntersection(Sketch tskA, Sketch tskB) {
+ Intersection inter = SetOperation.builder().buildIntersection();
+ return inter.intersect(tskA, tskB);
+ }
+
+ private static CompactSketch doPwAnotB(Sketch tskA, Sketch tskB) {
+ AnotB aNotB = SetOperation.builder().buildANotB();
+ return aNotB.aNotB(tskA, tskB);
+ }
+
+
+ private static void checkCornerCase(Sketch rskA, Sketch rskB) {
+ double estA = rskA.getEstimate();
+ double estB = rskB.getEstimate();
+ boolean emptyA = rskA.isEmpty();
+ boolean emptyB = rskB.isEmpty();
+ long thetaLongA = rskA.getThetaLong();
+ long thetaLongB = rskB.getThetaLong();
+ int countA = rskA.getRetainedEntries(true);
+ int countB = rskB.getRetainedEntries(true);
+ Assert.assertEquals(estB, estA, 0.0);
+ Assert.assertEquals(emptyB, emptyA);
+ Assert.assertEquals(thetaLongB, thetaLongA);
+ Assert.assertEquals(countB, countA);
+ Assert.assertEquals(rskA.getClass().getSimpleName(),
rskB.getClass().getSimpleName());
+ }
+
+ /*******************************************/
+
+ @Test
+ public void checkUnionNotOrdered() {
+ int k = 64;
+ CompactSketch skNull = generate(NULL, k);
+ CompactSketch skEmpty = generate(EMPTY, k);
+ CompactSketch skHeap = generate(EST_HEAP, k);
+ CompactSketch skHeapUO = generate(EST_MEMORY_UNORDERED, k);
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ union.union(skNull, skHeapUO);
+ union.union(skEmpty, skHeapUO);
+ union.union(skHeapUO, skNull);
+ union.union(skHeapUO, skEmpty);
+ union.union(skHeapUO, skHeap);
+ union.union(skHeap, skHeapUO);
+ }
+
+ @Test
+ public void checkSeedHash() {
+ int k = 64;
+ UpdateSketch tmp1 =
Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build();
+ tmp1.update(1);
+ tmp1.update(3);
+ CompactSketch skSmallSeed2A = tmp1.compact(true, null);
+
+ UpdateSketch tmp2 =
Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build();
+ tmp2.update(1);
+ tmp2.update(2);
+ CompactSketch skSmallSeed2B = tmp2.compact(true, null);
+
+ CompactSketch skExact = generate(EXACT, k);
+ CompactSketch skHeap = generate(EST_HEAP, 2 * k);
+
+ Intersection inter = SetOperation.builder().buildIntersection();
+ AnotB aNotB = SetOperation.builder().buildANotB();
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+
+ //Intersect
+ try {
+ inter.intersect(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ inter.intersect(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ //A NOT B
+ try {
+ aNotB.aNotB(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ aNotB.aNotB(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ //Union
+ try {
+ union.union(skExact, skSmallSeed2A);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skExact, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skSmallSeed2B, skExact);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ try {
+ union.union(skHeap, skSmallSeed2B);
+ Assert.fail();
+ } catch (Exception e) { } //pass
+ }
+
+ @Test
+ public void checkPwUnionReduceToK() {
+ int k = 16;
+ CompactSketch skNull = generate(NULL, k);
+ CompactSketch skEmpty = generate(EMPTY, k);
+ CompactSketch skHeap1 = generate(EST_HEAP, k);
+ CompactSketch skHeap2 = generate(EST_HEAP, k);
+ Union union = SetOperation.builder().setNominalEntries(k).buildUnion();
+ CompactSketch csk;
+ csk = union.union(skNull, skHeap1);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skEmpty, skHeap1);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skNull);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skEmpty);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ csk = union.union(skHeap1, skHeap2);
+ Assert.assertEquals(csk.getRetainedEntries(true), k);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(String s) {
+ //System.out.println(s); //disable here
+ }
+
+ @Test
+ public void checkGenerator() {
+ int k = 16;
+ CompactSketch csk;
+
+ csk = generate(State.NULL, 0);
+ assertNull(csk);
+
+ csk = generate(State.EMPTY, k);
+ assertEquals(csk.isEmpty(), true);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.SINGLE, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 1);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EXACT, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), k);
+ assertEquals(csk.getThetaLong(), Long.MAX_VALUE);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EST_HEAP, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true) > k, true);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.THLT1_CNT0_FALSE, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.THEQ1_CNT0_TRUE, k);
+ assertEquals(csk.isEmpty(), true);
+ assertEquals(csk.isEstimationMode(), false);
+ assertEquals(csk.getRetainedEntries(true), 0);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, false);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), false);
+ assertEquals(csk.isOrdered(), true);
+
+ csk = generate(State.EST_MEMORY_UNORDERED, k);
+ assertEquals(csk.isEmpty(), false);
+ assertEquals(csk.isEstimationMode(), true);
+ assertEquals(csk.getRetainedEntries(true) > k, true);
+ assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true);
+ assertEquals(csk.isDirect(), false);
+ assertEquals(csk.hasMemorySegment(), true);
+ assertEquals(csk.isOrdered(), false);
+ }
+
+ enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE,
THEQ1_CNT0_TRUE, EST_MEMORY_UNORDERED}
+
+ private static CompactSketch generate(State state, int k) {
+ UpdateSketch sk = null;
+ CompactSketch csk = null;
+
+ switch(state) {
+ case NULL : {
+ //already null
+ break;
+ }
+ case EMPTY : { //results in EmptyCompactSketch
+ csk =
Sketches.updateSketchBuilder().setNominalEntries(k).build().compact(true, null);
+ break;
+ }
+ case SINGLE : { //results in SingleItemSketches most of the time
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ sk.update(1);
+ csk = sk.compact(true, null);
+ break;
+ }
+ case EXACT : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < k; i++) {
+ sk.update(i);
+ }
+ csk = sk.compact(true, null);
+ break;
+ }
+ case EST_HEAP : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < (4 * k); i++) {
+ sk.update(i);
+ }
+ csk = sk.compact(true, null);
+ break;
+ }
+ case THLT1_CNT0_FALSE : {
+ sk =
Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build();
+ sk.update(7); //above theta
+ assert(sk.getRetainedEntries(true) == 0);
+ csk = sk.compact(true, null); //compact as {Th < 1.0, 0, F}
+ break;
+ }
+ case THEQ1_CNT0_TRUE : {
+ sk =
Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build();
+ assert(sk.getRetainedEntries(true) == 0);
+ csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T}
+ break;
+ }
+ case EST_MEMORY_UNORDERED : {
+ sk = Sketches.updateSketchBuilder().setNominalEntries(k).build();
+ for (int i = 0; i < (4 * k); i++) {
+ sk.update(i);
+ }
+ int bytes =
Sketch.getMaxCompactSketchBytes(sk.getRetainedEntries(true));
+ byte[] byteArr = new byte[bytes];
+ MemorySegment wseg = MemorySegment.ofArray(byteArr);
+ csk = sk.compact(false, wseg);
+ break;
+ }
+ }
+ return csk;
+ }
+
+}
diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java
b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java
new file mode 100644
index 000000000..277aae961
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static
org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1;
+import static org.apache.datasketches.theta2.Sketches.getCompactSketchMaxBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxCompactSketchBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxIntersectionBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxUnionBytes;
+import static org.apache.datasketches.theta2.Sketches.getMaxUpdateSketchBytes;
+import static org.apache.datasketches.theta2.Sketches.getSerializationVersion;
+import static org.apache.datasketches.theta2.Sketches.heapifySetOperation;
+import static org.apache.datasketches.theta2.Sketches.heapifySketch;
+import static org.apache.datasketches.theta2.Sketches.setOperationBuilder;
+import static org.apache.datasketches.theta2.Sketches.updateSketchBuilder;
+import static org.apache.datasketches.theta2.Sketches.wrapSetOperation;
+import static org.apache.datasketches.theta2.Sketches.wrapSketch;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.lang.foreign.MemorySegment;
+
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+public class SketchesTest {
+
+ private static MemorySegment getCompactSketchMemory(final int k, final int
from, final int to) {
+ final UpdateSketch sk1 =
updateSketchBuilder().setNominalEntries(k).build();
+ for (int i=from; i<to; i++) {
+ sk1.update(i);
+ }
+ final CompactSketch csk = sk1.compact(true, null);
+ final byte[] sk1bytes = csk.toByteArray();
+ final MemorySegment seg = MemorySegment.ofArray(sk1bytes).asReadOnly();
+ return seg;
+ }
+
+ private static MemorySegment getMemoryFromCompactSketch(final CompactSketch
csk) {
+ final byte[] sk1bytes = csk.toByteArray();
+ final MemorySegment seg = MemorySegment.ofArray(sk1bytes).asReadOnly();
+ return seg;
+ }
+
+ private static CompactSketch getCompactSketch(final int k, final int from,
final int to) {
+ final UpdateSketch sk1 =
updateSketchBuilder().setNominalEntries(k).build();
+ for (int i=from; i<to; i++) {
+ sk1.update(i);
+ }
+ return sk1.compact(true, null);
+ }
+
+ @Test
+ public void checkSketchMethods() {
+ final int k = 1024;
+ final MemorySegment seg = getCompactSketchMemory(k, 0, k);
+
+ CompactSketch csk2 = (CompactSketch)heapifySketch(seg);
+ assertEquals((int)csk2.getEstimate(), k);
+
+ csk2 = (CompactSketch)heapifySketch(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ assertEquals((int)csk2.getEstimate(), k);
+
+ csk2 = (CompactSketch)wrapSketch(seg);
+ assertEquals((int)csk2.getEstimate(), k);
+
+ csk2 = (CompactSketch)wrapSketch(seg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ assertEquals((int)csk2.getEstimate(), k);
+ }
+
+ @Test
+ public void checkSetOpMethods() {
+ final int k = 1024;
+ final MemorySegment seg1 = getCompactSketchMemory(k, 0, k);
+ final MemorySegment seg2 = getCompactSketchMemory(k, k/2, 3*k/2);
+
+ final SetOperationBuilder bldr = setOperationBuilder();
+ final Union union = bldr.setNominalEntries(2 * k).buildUnion();
+
+ union.union(seg1);
+ CompactSketch cSk = union.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), k);
+ union.union(seg2);
+ cSk = union.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), 3*k/2);
+
+ final byte[] ubytes = union.toByteArray();
+ final MemorySegment uSeg = MemorySegment.ofArray(ubytes);
+
+ Union union2 = (Union)heapifySetOperation(uSeg);
+ cSk = union2.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), 3*k/2);
+
+ union2 = (Union)heapifySetOperation(uSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ cSk = union2.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), 3*k/2);
+
+ union2 = (Union)wrapSetOperation(uSeg);
+ cSk = union2.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), 3*k/2);
+
+ union2 = (Union)wrapSetOperation(uSeg, ThetaUtil.DEFAULT_UPDATE_SEED);
+ cSk = union2.getResult(true, null);
+ assertEquals((int)cSk.getEstimate(), 3*k/2);
+
+ final int serVer = getSerializationVersion(uSeg);
+ assertEquals(serVer, 3);
+ }
+
+ @Test
+ public void checkUtilMethods() {
+ final int lgK = 10;
+ final int k = 1 << lgK;
+
+ final int maxUnionBytes = getMaxUnionBytes(k);
+ assertEquals(2*k*8+32, maxUnionBytes);
+
+ final int maxInterBytes = getMaxIntersectionBytes(k);
+ assertEquals(2*k*8+24, maxInterBytes);
+
+ final int maxCompSkBytes = getMaxCompactSketchBytes(k+1);
+ assertEquals(24+(k+1)*8, maxCompSkBytes);
+
+ final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); {
+ int bytes = (int)((2 << lgK) * ThetaUtil.REBUILD_THRESHOLD +
Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES;
+ assertEquals(compSkMaxBytes, bytes);
+ }
+
+ final int maxSkBytes = getMaxUpdateSketchBytes(k);
+ assertEquals(24+2*k*8, maxSkBytes);
+ }
+
+ @Test
+ public void checkStaticEstimators() {
+ final int k = 4096;
+ final int u = 4*k;
+ final CompactSketch csk = getCompactSketch(k, 0, u);
+ final MemorySegment srcSeg = getMemoryFromCompactSketch(csk);
+ final double est = Sketches.getEstimate(srcSeg);
+ assertEquals(est, u, 0.05*u);
+ final double rse = 1.0/Math.sqrt(k);
+ final double ub = Sketches.getUpperBound(1, srcSeg);
+ assertEquals(ub, est+rse, 0.05*u);
+ final double lb = Sketches.getLowerBound(1, srcSeg);
+ assertEquals(lb, est-rse, 0.05*u);
+ final MemorySegment segV1 = convertSerVer3toSerVer1(csk);
+ boolean empty = Sketches.getEmpty(segV1);
+ assertFalse(empty);
+
+ final CompactSketch csk2 = getCompactSketch(k, 0, 0);
+ final MemorySegment emptySegV3 = getMemoryFromCompactSketch(csk2);
+ assertEquals(Sketches.getRetainedEntries(emptySegV3), 0);
+ assertEquals(Sketches.getThetaLong(emptySegV3), Long.MAX_VALUE);
+ final MemorySegment emptySegV1 = convertSerVer3toSerVer1(csk2);
+ empty = Sketches.getEmpty(emptySegV1);
+ assertTrue(empty);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkBadSketchFamily() {
+ final Union union = setOperationBuilder().buildUnion();
+ final byte[] byteArr = union.toByteArray();
+ final MemorySegment srcSeg = MemorySegment.ofArray(byteArr);
+ Sketches.getEstimate(srcSeg); //Union is not a Theta Sketch, it is an
operation
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: "+this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+
+}
diff --git
a/src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java
b/src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java
new file mode 100644
index 000000000..d0177d204
--- /dev/null
+++
b/src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.theta2;
+
+import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES;
+import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
+import static org.apache.datasketches.common.TestUtil.cppPath;
+import static org.apache.datasketches.common.TestUtil.javaPath;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.nio.file.Files;
+
+import org.testng.annotations.Test;
+
+/**
+ * Serialize binary sketches to be tested by C++ code.
+ * Test deserialization of binary sketches serialized by C++ code.
+ */
+public class ThetaSketchCrossLanguageTest {
+
+ @Test(groups = {GENERATE_JAVA_FILES})
+ public void generateBinariesForCompatibilityTesting() throws IOException {
+ final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
+ for (int n: nArr) {
+ final UpdateSketch sk = UpdateSketch.builder().build();
+ for (int i = 0; i < n; i++) {
+ sk.update(i);
+ }
+ Files.newOutputStream(javaPath.resolve("theta_n" + n +
"_java.sk")).write(sk.compact().toByteArray());
+ }
+ }
+
+ @Test(groups = {GENERATE_JAVA_FILES})
+ public void generateBinariesForCompatibilityTestingCompressed() throws
IOException {
+ final int[] nArr = {10, 100, 1000, 10_000, 100_000, 1_000_000};
+ for (int n: nArr) {
+ final UpdateSketch sk = UpdateSketch.builder().build();
+ for (int i = 0; i < n; i++) {
+ sk.update(i);
+ }
+ Files.newOutputStream(javaPath.resolve("theta_compressed_n" + n +
"_java.sk")).write(sk.compact().toByteArrayCompressed());
+ }
+ }
+
+ @Test(groups = {GENERATE_JAVA_FILES})
+ public void generateBinariesForCompatibilityTestingNonEmptyNoEntries()
throws IOException {
+ final UpdateSketch sk = UpdateSketch.builder().setP(0.01f).build();
+ sk.update(1);
+ assertFalse(sk.isEmpty());
+ assertEquals(sk.getRetainedEntries(), 0);
+
Files.newOutputStream(javaPath.resolve("theta_non_empty_no_entries_java.sk")).write(sk.compact().toByteArray());
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCpp() throws IOException {
+ final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
+ for (int n: nArr) {
+ final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_n" + n +
"_cpp.sk"));
+ final CompactSketch sketch =
CompactSketch.wrap(MemorySegment.ofArray(bytes));
+ assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
+ assertEquals(sketch.getEstimate(), n, n * 0.03);
+ assertTrue(sketch.isOrdered());
+ final HashIterator it = sketch.iterator();
+ long previous = 0;
+ while (it.next()) {
+ assertTrue(it.get() < sketch.getThetaLong());
+ assertTrue(it.get() > previous);
+ previous = it.get();
+ }
+ }
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCppCompressed() throws IOException {
+ final int[] nArr = {10, 100, 1000, 10000, 100000, 1000000};
+ for (int n: nArr) {
+ final byte[] bytes =
Files.readAllBytes(cppPath.resolve("theta_compressed_n" + n + "_cpp.sk"));
+ final CompactSketch sketch =
CompactSketch.wrap(MemorySegment.ofArray(bytes));
+ assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
+ assertEquals(sketch.getEstimate(), n, n * 0.03);
+ assertTrue(sketch.isOrdered());
+ final HashIterator it = sketch.iterator();
+ long previous = 0;
+ while (it.next()) {
+ assertTrue(it.get() < sketch.getThetaLong());
+ assertTrue(it.get() > previous);
+ previous = it.get();
+ }
+ }
+ }
+
+ @Test(groups = {CHECK_CPP_FILES})
+ public void deserializeFromCppNonEmptyNoEntries() throws IOException {
+ final byte[] bytes =
Files.readAllBytes(cppPath.resolve("theta_non_empty_no_entries_cpp.sk"));
+ final CompactSketch sketch =
CompactSketch.wrap(MemorySegment.ofArray(bytes));
+ assertFalse(sketch.isEmpty());
+ assertEquals(sketch.getRetainedEntries(), 0);
+ }
+
+}
diff --git
a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
new file mode 100644
index 000000000..88dd009c0
--- /dev/null
+++
b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.thetacommon;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+import org.apache.datasketches.theta2.CompactSketch;
+import org.apache.datasketches.theta2.Intersection;
+import org.apache.datasketches.theta2.Sketches;
+import org.apache.datasketches.theta2.UpdateSketch;
+import org.testng.annotations.Test;
+
+public class BoundsOnRatiosInThetaSketchedSets2Test {
+
+ @Test
+ public void checkNormalReturns() {
+ final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K
+ final UpdateSketch skC = Sketches.updateSketchBuilder().build();
+ final int uA = 10000;
+ final int uC = 100000;
+ for (int i = 0; i < uA; i++) { skA.update(i); }
+ for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+ final Intersection inter =
Sketches.setOperationBuilder().buildIntersection();
+ inter.intersect(skA);
+ inter.intersect(skC);
+ final CompactSketch skB = inter.getResult();
+
+ double est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA,
skB);
+ double lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA,
skB);
+ double ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA,
skB);
+ assertTrue(ub > est);
+ assertTrue(est > lb);
+ assertEquals(est, 0.5, .03);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ skA.reset(); //skA is now empty
+ est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB);
+ lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB);
+ ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ skC.reset(); //Now both are empty
+ est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC);
+ lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skC);
+ ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skC);
+ println("ub : " + ub);
+ println("est: " + est);
+ println("lb : " + lb);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkAbnormalReturns() {
+ final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K
+ final UpdateSketch skC = Sketches.updateSketchBuilder().build();
+ final int uA = 100000;
+ final int uC = 10000;
+ for (int i = 0; i < uA; i++) { skA.update(i); }
+ for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); }
+ BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC);
+ }
+
+ @Test
+ public void printlnTest() {
+ println("PRINTING: " + this.getClass().getName());
+ }
+
+ /**
+ * @param s value to print
+ */
+ static void println(final String s) {
+ //System.out.println(s); //disable here
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]