This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch special_branch_for_theta_compact_update in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 9455569cfda9508a00dd9ed0ec1f901f009a1e63 Author: Lee Rhodes <[email protected]> AuthorDate: Tue Jun 13 14:44:29 2023 -0700 Update theta compact() behavior. Update associated javadocs. --- .../datasketches/theta/CompactOperations.java | 2 +- .../datasketches/theta/HeapCompactSketch.java | 1 + .../java/org/apache/datasketches/theta/Sketch.java | 41 +-- .../datasketches/theta/CompactSketchTest.java | 279 ++++++++++++++++----- 4 files changed, 238 insertions(+), 85 deletions(-) diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java index 42fe9fdf..a8066314 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java +++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java @@ -110,7 +110,7 @@ final class CompactOperations { * This assumes hashSeed is OK; serVer = 3. * @param srcMem the given input source Memory image * @param dstOrdered the desired ordering of the resulting CompactSketch - * @param dstMem Used for the target CompactSketch if it is Direct. + * @param dstMem Used for the target CompactSketch if it is Memory-based. * @return a CompactSketch of the correct form. */ @SuppressWarnings("unused") diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index f1606884..479aa3ee 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -77,6 +77,7 @@ class HeapCompactSketch extends CompactSketch { @Override public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { + if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), true, ordered_, dstOrdered, dstMem, getCache().clone()); } diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index dc7df5a7..99642e12 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -39,7 +39,7 @@ import org.apache.datasketches.thetacommon.BinomialBoundsN; import org.apache.datasketches.thetacommon.ThetaUtil; /** - * The top-level class for all sketches. This class is never constructed directly. + * The top-level class for all theta sketches. This class is never constructed directly. * Use the UpdateSketch.builder() methods to create UpdateSketches. * * @author Lee Rhodes @@ -193,43 +193,50 @@ public abstract class Sketch { //Sketch interface /** - * Converts this sketch to a ordered CompactSketch on the Java heap. + * Converts this sketch to a ordered CompactSketch. * - * <p>If this sketch is already in the proper form, this method returns <i>this</i>, - * otherwise, this method returns a new CompactSketch of the proper form. + * <p>If <i>this.isCompact() == true</i> this method returns <i>this</i>, + * otherwise, this method is equivalent to + * {@link #compact(boolean, WritableMemory) compact(true, null)}. * * <p>A CompactSketch is always immutable.</p> * - * @return this sketch as an ordered CompactSketch on the Java heap. + * @return this sketch as an ordered CompactSketch. */ public CompactSketch compact() { - return compact(true, null); + return (this.isCompact()) ? (CompactSketch)this : compact(true, null); } /** - * Convert this sketch to a new CompactSketch of the chosen order and direct or on the heap. + * Convert this sketch to a <i>CompactSketch</i>. * - * <p>If this sketch is already in the proper form, this operation returns <i>this</i>, - * otherwise, this method returns a new CompactSketch of the proper form. - * - * <p>If this sketch is a type of UpdateSketch, the compacting process converts the hash table - * of the UpdateSketch to a simple list of the valid hash values. + * <p>If this sketch is a type of <i>UpdateSketch</i>, the compacting process converts the hash table + * of the <i>UpdateSketch</i> to a simple list of the valid hash values. * Any hash values of zero or equal-to or greater than theta will be discarded. - * The number of valid values remaining in the CompactSketch depends on a number of factors, + * The number of valid values remaining in the <i>CompactSketch</i> depends on a number of factors, * but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>). * It will never exceed 2<i>k</i>. * If it is critical to always limit the size to no more than <i>k</i>, - * then <i>rebuild()</i> should be called on the UpdateSketch prior to calling this method.</p> + * then <i>rebuild()</i> should be called on the <i>UpdateSketch</i> prior to calling this method.</p> * - * <p>A CompactSketch is always immutable.</p> + * <p>A <i>CompactSketch</i> is always immutable.</p> + * + * <p>A new <i>CompactSketch</i> object is created:</p> + * <ul><li>if <i>dstMem != null</i></li> + * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li> + * <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item</li> and <i>this.isOrdered() == false</i> + * and <i>dstOrdered == true</i>.</li> + *</ul> + * + * <p>Otherwise, this operation returns <i>this</i>.</p> * - * @param dstOrdered + * @param dstOrdered assumed true if this sketch is empty or has only one value * <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a> * * @param dstMem * <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>. * - * @return this sketch as a CompactSketch in the chosen form + * @return this sketch as a <i>CompactSketch</i>. */ public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem); diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 020e306b..d5aae288 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -21,6 +21,7 @@ package org.apache.datasketches.theta; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; @@ -220,81 +221,226 @@ public class CompactSketchTest { @Test public void checkCompactCachePart() { - //phony values except for curCount = 0. + //phoney values except for curCount = 0. long[] result = Intersection.compactCachePart(null, 4, 0, 0L, false); assertEquals(result.length, 0); } + //See class State + //Class Name + //Count + //Bytes + private static final boolean COMPACT = true; + private static final boolean EMPTY = true; + private static final boolean DIRECT = true; + private static final boolean MEMORY = true; + private static final boolean ORDERED = true; + private static final boolean ESTIMATION = true; + @Test - public void checkDirectCompactSingleItemSketch() { - State state; + /** + * Empty, memory-based Compact sketches are always ordered + */ + public void checkEmptyMemoryCompactSketch() { + UpdateSketch sk = Sketches.updateSketchBuilder().build(); + + WritableMemory wmem1 = WritableMemory.allocate(16); + CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty + State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + WritableMemory wmem2 = WritableMemory.allocate(16); + CompactSketch csk2 = sk.compact(false, wmem2); + state1.check(csk2); + + assertNotEquals(csk1, csk2); //different object because memory is valid + assertFalse(csk1 == csk2); + + WritableMemory wmem3 = WritableMemory.allocate(16); + CompactSketch csk3 = csk1.compact(false, wmem3); + state1.check(csk3); + + assertNotEquals(csk1, csk3); //different object because memory is valid + assertFalse(csk1 == csk3); + + CompactSketch csk4 = csk1.compact(false, null); + State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + state4.check(csk4); + + assertNotEquals(csk1, csk4); //different object because on heap + assertFalse(csk1 == csk4); + + CompactSketch cskc = csk1.compact(); + state1.check(cskc); + + assertEquals(csk1, cskc); //the same object + assertTrue(csk1 == cskc); + } + + @Test + /** + * Single-Item, memory-based Compact sketches are always ordered: + */ + public void checkSingleItemMemoryCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); + sk.update(1); + + WritableMemory wmem1 = WritableMemory.allocate(16); + CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item + State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + WritableMemory wmem2 = WritableMemory.allocate(16); + CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item + state1.check(csk2); + + assertNotEquals(csk1, csk2); //different object because memory is valid + assertFalse(csk1 == csk2); + + WritableMemory wmem3 = WritableMemory.allocate(16); + CompactSketch csk3 = csk1.compact(false, wmem3); + state1.check(csk3); + + assertNotEquals(csk1, csk3); //different object because memory is valid + assertFalse(csk1 == csk3); + + CompactSketch cskc = csk1.compact(); + state1.check(cskc); + + assertEquals(csk1, cskc); //the same object + assertTrue(csk1 == cskc); + } - CompactSketch csko; //ordered - CompactSketch csku; //unordered + @Test + public void checkMultipleItemMemoryCompactSketch() { + UpdateSketch sk = Sketches.updateSketchBuilder().build(); + //This sequence is naturally out-of-order by the hash values. + sk.update(1); + sk.update(2); + sk.update(3); + + WritableMemory wmem1 = WritableMemory.allocate(50); + CompactSketch csk1 = sk.compact(true, wmem1); + State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + WritableMemory wmem2 = WritableMemory.allocate(50); + CompactSketch csk2 = sk.compact(false, wmem2); + State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION); + state2.check(csk2); + + assertNotEquals(csk1, csk2); //different object because memory is valid + assertFalse(csk1 == csk2); + + WritableMemory wmem3 = WritableMemory.allocate(50); + CompactSketch csk3 = csk1.compact(false, wmem3); + state2.check(csk3); + + assertNotEquals(csk1, csk3); //different object because memory is valid + assertFalse(csk1 == csk3); + + CompactSketch cskc = csk1.compact(); + state1.check(cskc); + + assertEquals(csk1, cskc); //the same object + assertTrue(csk1 == cskc); + } + + @Test + /** + * Empty, heap-based Compact sketches are always ordered. + * All empty, heap-based, compact sketches point to the same static, final constant of 8 bytes. + */ + public void checkEmptyHeapCompactSketch() { + UpdateSketch sk = Sketches.updateSketchBuilder().build(); - WritableMemory wmem = WritableMemory.allocate(16); - csko = sk.compact(true, wmem); //empty, direct, ordered - //ClassType, Count, Bytes, Compact, Empty, Direct, Memory, Ordered, Estimation - state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false); - state.check(csko); + CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty + State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty + state1.check(csk1); + + assertEquals(csk1, csk2); + assertTrue(csk1 == csk2); + + CompactSketch csk3 = csk1.compact(false, null); + state1.check(csk3); + + assertEquals(csk1, csk3); //The same object + assertTrue(csk1 == csk3); + + CompactSketch cskc = csk1.compact(); + state1.check(cskc); + + assertEquals(csk1, cskc); //The same object + assertTrue(csk1 == cskc); + } - wmem = WritableMemory.allocate(16); - csku = sk.compact(false, wmem); //empty, direct, unordered - state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false); - state.check(csku); + @Test + /** + * Single-Item, heap-based Compact sketches are always ordered. + */ + public void checkSingleItemHeapCompactSketch() { + UpdateSketch sk = Sketches.updateSketchBuilder().build(); + sk.update(1); + + CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item + State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item + state1.check(csk2); + + assertNotEquals(csk1, csk2); //calling the compact(boolean, null) method creates a new object + assertFalse(csk1 == csk2); + + CompactSketch csk3 = csk1.compact(false, null); + state1.check(csk3); + + assertEquals(csk1, csk3); //The same object + assertTrue(csk1 == csk3); + + CompactSketch cskc = csk1.compact(); //this, however just returns the same object. + state1.check(csk1); + + assertEquals(csk1, cskc); //the same object + assertTrue(csk1 == cskc); + } + @Test + public void checkMultipleItemHeapCompactSketch() { + UpdateSketch sk = Sketches.updateSketchBuilder().build(); + //This sequence is naturally out-of-order by the hash values. sk.update(1); - wmem = WritableMemory.allocate(16); - csko = sk.compact(true, wmem); //Single, direct, ordered - state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false); - state.check(csko); - - wmem = WritableMemory.allocate(16); - csku = sk.compact(false, wmem); //Single, direct, unordered - state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false); - state.check(csku); - - CompactSketch csk2o; //ordered - CompactSketch csk2u; //unordered - - csk2o = csku.compact(); //single, heap, ordered - state = new State("SingleItemSketch", 1, 16, true, false, false, false, true, false); - state.check(csk2o); - - csk2o = csku.compact(true, null); //single, heap, ordered - state.check(csk2o); - - csk2o = csku.compact(false, null); //single, heap, unordered - state.check(csk2o); - - csk2o = csko.compact(true, null); //single, heap, ordered - state.check(csk2o); - - csk2o = csko.compact(false, null); //single, heap, unordered - state.check(csk2o); - - wmem = WritableMemory.allocate(16); - csk2o = csku.compact(true, wmem); - state.classType = "DirectCompactSketch"; - state.memory = true; - state.check(csk2o); - - wmem = WritableMemory.allocate(16); - csk2u = csku.compact(false, wmem); - state.classType = "DirectCompactSketch"; - state.check(csk2u); - - wmem = WritableMemory.allocate(16); - csk2o = csko.compact(true, wmem); - state.classType = "DirectCompactSketch"; - state.memory = true; - state.check(csk2o); - - wmem = WritableMemory.allocate(16); - csk2u = csko.compact(false, wmem); - state.classType = "DirectCompactSketch"; - state.check(csk2u); + sk.update(2); + sk.update(3); + + CompactSketch csk1 = sk.compact(true, null); //creates a new object + State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + state1.check(csk1); + + CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered + State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION); + state2.check(csk2); + + assertNotEquals(csk1, csk2); //order is different and different objects + assertFalse(csk1 == csk2); + + CompactSketch csk3 = csk1.compact(true, null); + state1.check(csk3); + + assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered + assertTrue(csk1 == csk3); + + assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true + assertFalse(csk2 == csk3); + + CompactSketch cskc = csk1.compact(); + state1.check(cskc); + + assertEquals(csk1, cskc); //the same object + assertTrue(csk1 == cskc); } @Test @@ -471,7 +617,7 @@ public class CompactSketchTest { e.printStackTrace(); } } - + private static class State { String classType = null; int count = 0; @@ -482,8 +628,7 @@ public class CompactSketchTest { boolean memory = false; boolean ordered = false; boolean estimation = false; - - + State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct, boolean memory, boolean ordered, boolean estimation) { this.classType = classType; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
