This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch touchup_after_merge in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 9e5b51c8f426a06f0b03da38438f7398defcd50a Author: Lee Rhodes <lee...@gmail.com> AuthorDate: Mon Feb 24 22:28:13 2025 -0800 touch-up changes mostly to clean up code contributed by others. --- .../apache/datasketches/theta/CompactSketch.java | 51 +++++++++++++++++++++- .../theta/DirectCompactCompressedSketch.java | 4 +- .../datasketches/theta/IntersectionImpl.java | 6 +-- .../java/org/apache/datasketches/theta/Sketch.java | 2 +- .../datasketches/theta/WrappedCompactSketch.java | 2 +- .../datasketches/hll/SizeAndModeTransitions.java | 1 - 6 files changed, 56 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/apache/datasketches/theta/CompactSketch.java b/src/main/java/org/apache/datasketches/theta/CompactSketch.java index b6431416..630b2e13 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/CompactSketch.java @@ -228,14 +228,61 @@ public abstract class CompactSketch extends Sketch { "Corrupted: Serialization Version " + serVer + " not recognized."); } + /** + * Wrap takes the sketch image in the given Memory and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + * <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".</p> + * + * <p>Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.</p> + * + * <p>This method checks if the DEFAULT_UPDATE_SEED was used to create the source Memory image. + * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.</p> + * + * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED. + * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a> + * + * @return a CompactSketch backed by the given Memory except as above. + */ public static CompactSketch wrap(final byte[] bytes) { return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false); } - + + /** + * Wrap takes the sketch image in the given Memory and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + * <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".</p> + * + * <p>Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.</p> + * + * <p>This method checks if the given expectedSeed was used to create the source Memory image. + * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p> + * + * @param bytes a byte array image of a Sketch that was created using the given expectedSeed. + * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a> + * @param expectedSeed the seed used to validate the given Memory image. + * <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>. + * @return a CompactSketch backed by the given Memory except as above. + */ public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) { return wrap(bytes, expectedSeed, true); } - + private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) { final int serVer = bytes[PreambleUtil.SER_VER_BYTE]; final int familyId = bytes[PreambleUtil.FAMILY_BYTE]; diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java index 6c83add8..64c0fafd 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java @@ -82,7 +82,7 @@ class DirectCompactCompressedSketch extends DirectCompactSketch { private static final int START_PACKED_DATA_EXACT_MODE = 8; private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - + @Override public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding @@ -132,7 +132,7 @@ class DirectCompactCompressedSketch extends DirectCompactSketch { final int numEntries = getRetainedEntries(); final long[] cache = new long[numEntries]; int i = 0; - HashIterator it = iterator(); + final HashIterator it = iterator(); while (it.next()) { cache[i++] = it.get(); } diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index b1be73c7..772480fe 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -504,13 +504,13 @@ class IntersectionImpl extends Intersection { } private void moveDataToTgt(final Sketch sketch) { - int count = sketch.getRetainedEntries(); + final int count = sketch.getRetainedEntries(); int tmpCnt = 0; if (wmem_ != null) { //Off Heap puts directly into mem final int preBytes = CONST_PREAMBLE_LONGS << 3; final int lgArrLongs = lgArrLongs_; final long thetaLong = thetaLong_; - HashIterator it = sketch.iterator(); + final HashIterator it = sketch.iterator(); while (it.next()) { final long hash = it.get(); if (continueCondition(thetaLong, hash)) { continue; } @@ -518,7 +518,7 @@ class IntersectionImpl extends Intersection { tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough - HashIterator it = sketch.iterator(); + final HashIterator it = sketch.iterator(); while (it.next()) { final long hash = it.get(); if (continueCondition(thetaLong_, hash)) { continue; } diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index 6583e2db..cb202a18 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -472,7 +472,7 @@ public abstract class Sketch implements MemoryStatus { final int w = width > 0 ? width : 8; // default is 8 wide if (curCount > 0) { sb.append("### SKETCH DATA DETAIL"); - HashIterator it = iterator(); + final HashIterator it = iterator(); int j = 0; while (it.next()) { final long h = it.get(); diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java index b1073159..519857d2 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java @@ -136,7 +136,7 @@ class WrappedCompactSketch extends CompactSketch { long[] getCache() { final long[] cache = new long[getRetainedEntries()]; int i = 0; - HashIterator it = iterator(); + final HashIterator it = iterator(); while (it.next()) { cache[i++] = it.get(); } diff --git a/src/test/java/org/apache/datasketches/hll/SizeAndModeTransitions.java b/src/test/java/org/apache/datasketches/hll/SizeAndModeTransitions.java index ad7a4e50..c9ffa1e2 100644 --- a/src/test/java/org/apache/datasketches/hll/SizeAndModeTransitions.java +++ b/src/test/java/org/apache/datasketches/hll/SizeAndModeTransitions.java @@ -45,7 +45,6 @@ public class SizeAndModeTransitions { } else { sk = new HllSketch(lgK, tgtHllType); } - String type = tgtHllType.toString(); String store = direct ? "Memory" : "Heap"; for (int i = 1; i <= N; i++) { sk.update(i); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datasketches.apache.org For additional commands, e-mail: commits-h...@datasketches.apache.org