This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch prep_for_items_sketch in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit e4271bf3a83d15134907b7581ffed3e48c7c6809 Author: Lee Rhodes <[email protected]> AuthorDate: Tue Jun 6 19:54:53 2023 -0700 Preparatory work for generic items. 1. The UpdatableBitMask as part of the Flags field has been eliminated. This should make the Flags field identical to the one used in C++. 2. The documentation of the serialization formats has been significantly improved. See the docs for the KllPreambleUtil class. 3. I have reduced the dependence on the KllMemoryValidate class to those that are actually required for validation. 4. More cleanup of fields and variables not really being used. --- .../datasketches/kll/KllDirectDoublesSketch.java | 8 +- .../datasketches/kll/KllDirectFloatsSketch.java | 8 +- .../apache/datasketches/kll/KllDoublesHelper.java | 9 +- .../apache/datasketches/kll/KllDoublesSketch.java | 44 ++++---- .../apache/datasketches/kll/KllFloatsHelper.java | 2 +- .../apache/datasketches/kll/KllFloatsSketch.java | 40 ++++---- .../datasketches/kll/KllHeapDoublesSketch.java | 24 +++-- .../datasketches/kll/KllHeapFloatsSketch.java | 24 +++-- .../org/apache/datasketches/kll/KllHelper.java | 35 +++---- .../apache/datasketches/kll/KllMemoryValidate.java | 72 ++++++------- .../apache/datasketches/kll/KllPreambleUtil.java | 113 ++++++++++----------- .../org/apache/datasketches/kll/KllSketch.java | 18 ++-- .../datasketches/kll/KllMemoryValidateTest.java | 5 +- tools/SketchesCheckstyle.xml | 2 +- 14 files changed, 189 insertions(+), 215 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 8ee358bf..f4c9e80d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -22,7 +22,6 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; @@ -30,7 +29,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; @@ -59,13 +57,12 @@ import org.apache.datasketches.memory.WritableMemory; class KllDirectDoublesSketch extends KllDoublesSketch { /** - * The constructor with Memory that can be off-heap. + * The constructor with WritableMemory that can be off-heap. * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @param memVal the MemoryValadate object */ - KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, - final KllMemoryValidate memVal) { + KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) { super(wmem, memReqSvr); levelsArr = memVal.levelsArr; } @@ -83,7 +80,6 @@ class KllDirectDoublesSketch extends KllDoublesSketch { setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL); setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE); setMemoryFamilyID(dstMem, Family.KLL.getID()); - setMemoryFlags(dstMem, UPDATABLE_BIT_MASK); setMemoryK(dstMem, k); setMemoryM(dstMem, m); setMemoryN(dstMem, 0); diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index 4d3fb54c..a9ce96ce 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -22,7 +22,6 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; @@ -30,7 +29,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; @@ -59,13 +57,12 @@ import org.apache.datasketches.memory.WritableMemory; class KllDirectFloatsSketch extends KllFloatsSketch { /** - * The constructor with Memory that can be off-heap. + * The constructor with WritableMemory that can be off-heap. * @param wmem the current WritableMemory * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @param memVal the MemoryValadate object */ - KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, - final KllMemoryValidate memVal) { + KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) { super(wmem, memReqSvr); levelsArr = memVal.levelsArr; } @@ -83,7 +80,6 @@ class KllDirectFloatsSketch extends KllFloatsSketch { setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL); setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE); setMemoryFamilyID(dstMem, Family.KLL.getID()); - setMemoryFlags(dstMem, UPDATABLE_BIT_MASK); setMemoryK(dstMem, k); setMemoryM(dstMem, m); setMemoryN(dstMem, 0); diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 20061559..e5601b8b 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -49,14 +49,13 @@ final class KllDoublesHelper { final int myMinK = mySketch.getMinK(); //update this sketch with level0 items from the other sketch - if (otherDblSk.isCompactSingleItem()) { updateDouble(mySketch, otherDblSk.getDoubleSingleItem()); otherDoubleItemsArr = new double[0]; } else { otherDoubleItemsArr = otherDblSk.getDoubleItemsArray(); for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { - KllDoublesHelper.updateDouble(mySketch, otherDoubleItemsArr[i]); + updateDouble(mySketch, otherDoubleItemsArr[i]); } } // after the level 0 update, we capture the state of levels and items arrays @@ -68,7 +67,7 @@ final class KllDoublesHelper { int[] myNewLevelsArr = myCurLevelsArr; double[] myNewDoubleItemsArr = myCurDoubleItemsArr; - if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge other levels if they exist + if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge higher levels if they exist final int tmpSpaceNeeded = mySketch.getNumRetained() + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); final double[] workbuf = new double[tmpSpaceNeeded]; @@ -115,7 +114,7 @@ final class KllDoublesHelper { } //MEMORY SPACE MANAGEMENT - if (mySketch.updatableMemFormat) { + if (mySketch.serialVersionUpdatable) { mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewDoubleItemsArr.length); } } @@ -358,7 +357,7 @@ final class KllDoublesHelper { worklevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self" - final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels,myCurLevelsArr); + final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels, myCurLevelsArr); System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero); worklevels[1] = worklevels[0] + selfPopZero; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 47e86aac..9295f7b6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -21,8 +21,8 @@ package org.apache.datasketches.kll; import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag; -import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; @@ -61,60 +61,56 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou */ public static KllDoublesSketch heapify(final Memory srcMem) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); - if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); } return KllHeapDoublesSketch.heapifyImpl(srcMem); } /** - * Create a new direct instance of this sketch with a given <em>k</em>. - * @param k parameter that controls size of the sketch and accuracy of estimates. + * Create a new direct instance of this sketch with the default <em>k</em>. + * The default <em>k</em> = 200 results in a normalized rank error of about + * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower). * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new direct instance of this sketch */ public static KllDoublesSketch newDirectInstance( - final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); - Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr); + return newDirectInstance(DEFAULT_K, dstMem, memReqSvr); } - + /** - * Create a new direct instance of this sketch with the default <em>k</em>. - * The default <em>k</em> = 200 results in a normalized rank error of about - * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower). + * Create a new direct instance of this sketch with a given <em>k</em>. + * @param k parameter that controls size of the sketch and accuracy of estimates. * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new direct instance of this sketch */ public static KllDoublesSketch newDirectInstance( + final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - return KllDirectDoublesSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr); + return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr); } /** * Create a new heap instance of this sketch with the default <em>k = 200</em>. * The default <em>k</em> = 200 results in a normalized rank error of about * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). - * This will have a rank error of about 1.65%. - * @return new KllDoublesSketch on the heap. + * @return new KllDoublesSketch on the Java heap. */ - public static KllDoublesSketch newHeapInstance() { - return new KllHeapDoublesSketch(DEFAULT_K, DEFAULT_M); + public static KllDoublesSketch newHeapInstance() { + return newHeapInstance(DEFAULT_K); } /** * Create a new heap instance of this sketch with a given parameter <em>k</em>. - * <em>k</em> can be between DEFAULT_M and 65535, inclusive. + * <em>k</em> can be between 8, inclusive, and 65535, inclusive. * The default <em>k</em> = 200 results in a normalized rank error of about * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). * @param k parameter that controls size of the sketch and accuracy of estimates. - * @return new KllDoublesSketch on the heap. + * @return new KllDoublesSketch on the Java heap. */ public static KllDoublesSketch newHeapInstance(final int k) { return new KllHeapDoublesSketch(k, DEFAULT_M); @@ -129,7 +125,7 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou public static KllDoublesSketch wrap(final Memory srcMem) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH); - if (memVal.updatableMemFormat) { + if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) { return new KllDirectDoublesSketch((WritableMemory) srcMem, null, memVal); } else { return new KllDirectCompactDoublesSketch(srcMem, memVal); @@ -148,10 +144,8 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou final MemoryRequestServer memReqSvr) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH); - if (memVal.updatableMemFormat) { - if (!memVal.readOnly) { - Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - } + if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) { + Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal); } else { return new KllDirectCompactDoublesSketch(srcMem, memVal); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 1e399ab8..5ed537f5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -114,7 +114,7 @@ final class KllFloatsHelper { } //MEMORY SPACE MANAGEMENT - if (mySketch.updatableMemFormat) { + if (mySketch.serialVersionUpdatable) { mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewFloatItemsArr.length); } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 8ff0ccf4..5dc83c33 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -21,8 +21,8 @@ package org.apache.datasketches.kll; import static java.lang.Math.max; import static java.lang.Math.min; -import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag; -import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; @@ -61,60 +61,56 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa */ public static KllFloatsSketch heapify(final Memory srcMem) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); - if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); } return KllHeapFloatsSketch.heapifyImpl(srcMem); } /** - * Create a new direct instance of this sketch with a given <em>k</em>. - * @param k parameter that controls size of the sketch and accuracy of estimates. + * Create a new direct instance of this sketch with the default <em>k</em>. + * The default <em>k</em> = 200 results in a normalized rank error of about + * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower). * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new direct instance of this sketch */ public static KllFloatsSketch newDirectInstance( - final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { - Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); - Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr); + return newDirectInstance(DEFAULT_K, dstMem, memReqSvr); } - + /** - * Create a new direct instance of this sketch with the default <em>k</em>. - * The default <em>k</em> = 200 results in a normalized rank error of about - * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower). + * Create a new direct instance of this sketch with a given <em>k</em>. + * @param k parameter that controls size of the sketch and accuracy of estimates. * @param dstMem the given destination WritableMemory object for use by the sketch * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory * @return a new direct instance of this sketch */ public static KllFloatsSketch newDirectInstance( + final int k, final WritableMemory dstMem, final MemoryRequestServer memReqSvr) { Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - return KllDirectFloatsSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr); + return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr); } /** * Create a new heap instance of this sketch with the default <em>k = 200</em>. * The default <em>k</em> = 200 results in a normalized rank error of about * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). - * This will have a rank error of about 1.65%. - * @return new KllFloatsSketch on the heap. + * @return new KllFloatsSketch on the Java heap. */ public static KllFloatsSketch newHeapInstance() { - return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M); + return newHeapInstance(DEFAULT_K); } /** * Create a new heap instance of this sketch with a given parameter <em>k</em>. - * <em>k</em> can be between DEFAULT_M and 65535, inclusive. + * <em>k</em> can be between 8, inclusive, and 65535, inclusive. * The default <em>k</em> = 200 results in a normalized rank error of about * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). * @param k parameter that controls size of the sketch and accuracy of estimates. - * @return new KllFloatsSketch on the heap. + * @return new KllFloatsSketch on the Java heap. */ public static KllFloatsSketch newHeapInstance(final int k) { return new KllHeapFloatsSketch(k, DEFAULT_M); @@ -129,7 +125,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa public static KllFloatsSketch wrap(final Memory srcMem) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH); - if (memVal.updatableMemFormat) { + if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) { return new KllDirectFloatsSketch((WritableMemory) srcMem, null, memVal); } else { return new KllDirectCompactFloatsSketch(srcMem, memVal); @@ -148,10 +144,8 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa final MemoryRequestServer memReqSvr) { Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH); - if (memVal.updatableMemFormat) { - if (!memVal.readOnly) { + if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) { Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); - } return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal); } else { return new KllDirectCompactFloatsSketch(srcMem, memVal); diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java index 2b18fe2e..89f1dac3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java @@ -21,6 +21,8 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.NOT_SINGLE_ITEM; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; @@ -73,6 +75,12 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { doubleItems_ = new double[k]; } + static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH); + return new KllHeapDoublesSketch(srcMem, memVal); + } + /** * Heapify constructor. * @param srcMem Memory object that contains data serialized by this sketch. @@ -86,14 +94,14 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { minK_ = memValidate.minK; levelsArr = memValidate.levelsArr; isLevelZeroSorted_ = memValidate.level0Sorted; - final boolean updatableMemFormat = memValidate.updatableMemFormat; + final boolean serialVersionUpdatable = getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE; - if (memValidate.empty && !updatableMemFormat) { + if (memValidate.empty && !serialVersionUpdatable) { minDoubleItem_ = Double.NaN; maxDoubleItem_ = Double.NaN; doubleItems_ = new double[k_]; } - else if (memValidate.singleItem && !updatableMemFormat) { + else if (memValidate.singleItem && !serialVersionUpdatable) { final double item = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM); minDoubleItem_ = maxDoubleItem_ = item; doubleItems_ = new double[k_]; @@ -101,7 +109,7 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { } else { //Full or updatableMemFormat int offsetBytes = DATA_START_ADR; - offsetBytes += (updatableMemFormat ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES); + offsetBytes += (serialVersionUpdatable ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES); minDoubleItem_ = srcMem.getDouble(offsetBytes); offsetBytes += Double.BYTES; maxDoubleItem_ = srcMem.getDouble(offsetBytes); @@ -110,7 +118,7 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { final int retainedItems = capacityItems - levelsArr[0]; doubleItems_ = new double[capacityItems]; final int shift = levelsArr[0]; - if (updatableMemFormat) { + if (serialVersionUpdatable) { offsetBytes += shift * Double.BYTES; srcMem.getDoubleArray(offsetBytes, doubleItems_, shift, retainedItems); } else { @@ -119,12 +127,6 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { } } - static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) { - Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); - final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH); - return new KllHeapDoublesSketch(srcMem, memVal); - } - @Override public int getK() { return k_; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java index 73aefdb3..6ce9eaa8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java @@ -21,6 +21,8 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; import static org.apache.datasketches.kll.KllSketch.Error.NOT_SINGLE_ITEM; import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; @@ -73,6 +75,12 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { floatItems_ = new float[k]; } + static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH); + return new KllHeapFloatsSketch(srcMem, memVal); + } + /** * Heapify constructor. * @param srcMem Memory object that contains data serialized by this sketch. @@ -86,14 +94,14 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { minK_ = memValidate.minK; levelsArr = memValidate.levelsArr; isLevelZeroSorted_ = memValidate.level0Sorted; - final boolean updatableMemFormat = memValidate.updatableMemFormat; + final boolean serialVersionUpdatable = getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE; - if (memValidate.empty && !updatableMemFormat) { + if (memValidate.empty && !serialVersionUpdatable) { minFloatItem_ = Float.NaN; maxFloatItem_ = Float.NaN; floatItems_ = new float[k_]; } - else if (memValidate.singleItem && !updatableMemFormat) { + else if (memValidate.singleItem && !serialVersionUpdatable) { final float item = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM); minFloatItem_ = maxFloatItem_ = item; floatItems_ = new float[k_]; @@ -101,7 +109,7 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { } else { //Full or updatableMemFormat int offsetBytes = DATA_START_ADR; - offsetBytes += (updatableMemFormat ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES); + offsetBytes += (serialVersionUpdatable ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES); minFloatItem_ = srcMem.getFloat(offsetBytes); offsetBytes += Float.BYTES; maxFloatItem_ = srcMem.getFloat(offsetBytes); @@ -110,7 +118,7 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { final int retainedItems = capacityItems - levelsArr[0]; floatItems_ = new float[capacityItems]; final int shift = levelsArr[0]; - if (updatableMemFormat) { + if (serialVersionUpdatable) { offsetBytes += shift * Float.BYTES; srcMem.getFloatArray(offsetBytes, floatItems_, shift, retainedItems); } else { @@ -119,12 +127,6 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { } } - static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) { - Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); - final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH); - return new KllHeapFloatsSketch(srcMem, memVal); - } - @Override public int getK() { return k_; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 1384e11b..e071728d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -45,7 +45,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryEmptyFlag; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; @@ -57,7 +56,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySingleItemFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryUpdatableFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; @@ -617,7 +615,7 @@ final class KllHelper { return byteArr; } - static byte[] fastEmptyCompactByteArray(final KllSketch sketch) { + private static byte[] fastEmptyCompactByteArray(final KllSketch sketch) { final byte[] byteArr = new byte[8]; byteArr[0] = PREAMBLE_INTS_EMPTY_SINGLE; //2 byteArr[1] = SERIAL_VERSION_EMPTY_FULL; //1 @@ -628,7 +626,7 @@ final class KllHelper { return byteArr; } - static byte[] fastSingleItemCompactByteArray(final KllSketch sketch) { + private static byte[] fastSingleItemCompactByteArray(final KllSketch sketch) { final SketchType sketchType = sketch.sketchType; final byte[] byteArr; switch (sketchType) { @@ -644,8 +642,8 @@ final class KllHelper { ByteArrayUtil.putDoubleLE(byteArr, DATA_START_ADR_SINGLE_ITEM, dblSk.getDoubleSingleItem()); break; } -// case ITEMS_SKETCH: { -// byteArr = null; //TODO +// case ITEMS_SKETCH: { //TODO +// byteArr = null; // break; // } default: return null; //can't happen @@ -668,7 +666,7 @@ final class KllHelper { final String epsPct = String.format("%.3f%%", sketch.getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", sketch.getNormalizedRankError(true) * 100); final StringBuilder sb = new StringBuilder(); - final String directStr = sketch.updatableMemFormat ? "Direct" : ""; + final String directStr = sketch.serialVersionUpdatable ? "Direct" : ""; final String skType = sketchType == DOUBLES_SKETCH ? directStr + "Doubles" : sketchType == FLOATS_SKETCH ? directStr + "Floats" : directStr + "Items"; sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS); @@ -684,7 +682,7 @@ final class KllHelper { sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(Util.LS); sb.append(" Capacity Items : ").append(levelsArr[numLevels]).append(Util.LS); sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(Util.LS); - if (sketch.updatableMemFormat) { + if (sketch.serialVersionUpdatable) { sb.append(" Updatable Storage Bytes: ").append(sketch.getCurrentUpdatableSerializedSizeBytes()).append(Util.LS); } else { sb.append(" Compact Storage Bytes : ").append(sketch.getCurrentCompactSerializedSizeBytes()).append(Util.LS); @@ -735,14 +733,18 @@ final class KllHelper { * This method exists for testing purposes only. The resulting byteArray * structure is an internal format and not supported for general transport * or compatibility between systems and may be subject to change in the future. + * + * <p>The given sketch already has memory in updatable format. This updates + * the flag bits as to the actual state of <i>n</i>.</p> + * * @param sketch the current sketch to be serialized. * @return a byte array in an updatable form. */ private static byte[] toUpdatableByteArrayFromUpdatableMemory(final KllSketch sketch) { final int curBytes = sketch.getCurrentUpdatableSerializedSizeBytes(); final long n = sketch.getN(); - final byte flags = (byte) (UPDATABLE_BIT_MASK - | ((n == 0) ? EMPTY_BIT_MASK : 0) + final byte flags = (byte) + ( ((n == 0) ? EMPTY_BIT_MASK : 0) | ((n == 1) ? SINGLE_ITEM_BIT_MASK : 0)); final byte[] byteArr = new byte[curBytes]; sketch.wmem.getByteArray(0, byteArr, 0, curBytes); @@ -758,7 +760,7 @@ final class KllHelper { * @return a byte array in an updatable form. */ static byte[] toUpdatableByteArrayImpl(final KllSketch sketch) { - if (sketch.hasMemory() && sketch.updatableMemFormat) { + if (sketch.hasMemory() && sketch.serialVersionUpdatable) { return toUpdatableByteArrayFromUpdatableMemory(sketch); } final byte[] byteArr = new byte[sketch.getCurrentUpdatableSerializedSizeBytes()]; @@ -895,7 +897,7 @@ final class KllHelper { // } //MEMORY SPACE MANAGEMENT - if (sketch.updatableMemFormat) { + if (sketch.serialVersionUpdatable) { sketch.wmem = memorySpaceMgmt(sketch, myNewLevelsArr.length, myNewTotalItemsCapacity); } //update our sketch with new expanded spaces @@ -965,24 +967,23 @@ final class KllHelper { return result; } - private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem, - final boolean updatableFormat) { + private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem, + final boolean serialVersionUpdatable) { final boolean empty = sk.getN() == 0; final boolean lvlZeroSorted = sk.isLevelZeroSorted(); final boolean singleItem = sk.getN() == 1; - final int preInts = updatableFormat + final int preInts = serialVersionUpdatable ? PREAMBLE_INTS_FULL : (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL; //load the preamble setMemoryPreInts(wmem, preInts); - final int server = updatableFormat ? SERIAL_VERSION_UPDATABLE + final int server = serialVersionUpdatable ? SERIAL_VERSION_UPDATABLE : (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL); setMemorySerVer(wmem, server); setMemoryFamilyID(wmem, Family.KLL.getID()); setMemoryEmptyFlag(wmem, empty); setMemoryLevelZeroSortedFlag(wmem, lvlZeroSorted); setMemorySingleItemFlag(wmem, singleItem); - setMemoryUpdatableFlag(wmem, updatableFormat); setMemoryK(wmem, sk.getK()); setMemoryM(wmem, sk.getM()); } diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java index acfebf03..724c5ca0 100644 --- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java +++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java @@ -19,7 +19,6 @@ package org.apache.datasketches.kll; -import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_PREINTS; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SER_VER; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SINGLEBIT; @@ -27,7 +26,6 @@ import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.INV import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_PREINTS; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_SER_VER; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SRC_NOT_KLL; -import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.UPDATABLEBIT_AND_SER_VER; import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.memoryValidateThrow; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; @@ -48,7 +46,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryPreInts; import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer; import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySingleItemFlag; -import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import org.apache.datasketches.common.Family; @@ -65,59 +62,57 @@ import org.apache.datasketches.memory.WritableMemory; * */ final class KllMemoryValidate { - // first 8 bytes - final int preInts; // = extractPreInts(srcMem); + // first 8 bytes of preamble + final int preInts; final int serVer; final int familyID; - final String famName; final int flags; - boolean empty; - boolean singleItem; - final boolean level0Sorted; - final SketchType sketchType; - boolean updatableMemFormat = false; - final boolean readOnly; final int k; final int m; - final int typeBytes; - - // depending on the layout, the next 8-16 bytes of the preamble, may be filled with assumed items. - // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1, etc. - long n; - // next 4 bytes - int minK; - int numLevels; - // derived + //last byte is unused + + //Flag bits: + final boolean empty; + final boolean level0Sorted; + final boolean singleItem; + //From SerVer + private boolean serialVersionUpdatable; + + // depending on the layout, the next 8-16 bytes of the preamble, may be derived by assumption. + // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1. + long n; //8 bytes (if present) + int minK; //2 bytes (if present) + int numLevels; //1 byte (if present) + //unused byte + int[] levelsArr; //starts at byte 20, adjusted to include top index here + + // derived, other int sketchBytes; - int[] levelsArr; //adjusted to include top index - + private int typeBytes; + KllMemoryValidate(final Memory srcMem, final SketchType sketchType) { - - readOnly = srcMem.isReadOnly(); preInts = getMemoryPreInts(srcMem); serVer = getMemorySerVer(srcMem); - familyID = getMemoryFamilyID(srcMem); if (familyID != Family.KLL.getID()) { memoryValidateThrow(SRC_NOT_KLL, familyID); } - famName = idToFamily(familyID).toString(); flags = getMemoryFlags(srcMem); - updatableMemFormat = getMemoryUpdatableFormatFlag(srcMem); - empty = getMemoryEmptyFlag(srcMem); - singleItem = getMemorySingleItemFlag(srcMem); - level0Sorted = getMemoryLevelZeroSortedFlag(srcMem); - this.sketchType = sketchType; k = getMemoryK(srcMem); m = getMemoryM(srcMem); KllHelper.checkM(m); KllHelper.checkK(k, m); - if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatableMemFormat) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); } + + empty = getMemoryEmptyFlag(srcMem); + level0Sorted = getMemoryLevelZeroSortedFlag(srcMem); + singleItem = getMemorySingleItemFlag(srcMem); + + serialVersionUpdatable = serVer == SERIAL_VERSION_UPDATABLE; typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES; - if (updatableMemFormat) { updatableMemFormatValidate((WritableMemory) srcMem); } + if (serialVersionUpdatable) { updatableMemFormatValidate((WritableMemory) srcMem); } else { compactMemoryValidate(srcMem); } } - void compactMemoryValidate(final Memory srcMem) { //FOR HEAPIFY + private void compactMemoryValidate(final Memory srcMem) { //FOR HEAPIFY. NOT UPDATABLE if (empty && singleItem) { memoryValidateThrow(EMPTYBIT_AND_SINGLEBIT, flags); } final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0); @@ -129,7 +124,7 @@ final class KllMemoryValidate { minK = getMemoryMinK(srcMem); numLevels = getMemoryNumLevels(srcMem); - // Create Levels Arr + // Get Levels Arr and add the last element levelsArr = new int[numLevels + 1]; srcMem.getIntArray(DATA_START_ADR, levelsArr, 0, numLevels); //copies all except the last one final int capacityItems = KllHelper.computeTotalItemCapacity(k, m, numLevels); @@ -163,11 +158,9 @@ final class KllMemoryValidate { } } - void updatableMemFormatValidate(final WritableMemory wSrcMem) { + private void updatableMemFormatValidate(final WritableMemory wSrcMem) { if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); } n = getMemoryN(wSrcMem); - empty = n == 0; //empty & singleItem are set for convenience - singleItem = n == 1; // there is no error checking on these bits minK = getMemoryMinK(wSrcMem); numLevels = getMemoryNumLevels(wSrcMem); @@ -187,7 +180,6 @@ final class KllMemoryValidate { SINGLEBIT_AND_SER_VER("Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: "), SINGLEBIT_AND_PREINTS("Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: "), INVALID_PREINTS("PreInts Must Be: " + PREAMBLE_INTS_FULL + ", NOT: "), - UPDATABLEBIT_AND_SER_VER("((SerVer == 3) ^ (Updatable Bit)) must = 0, NOT: "), EMPTYBIT_AND_SINGLEBIT("Empty flag bit and SingleItem flag bit cannot both be set. Flags: "); private String msg; diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 5a3cf6e7..62d243e4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -19,6 +19,7 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.common.Util.zeroPad; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; @@ -37,14 +38,28 @@ import org.apache.datasketches.memory.WritableMemory; * This allows the possibility of the introduction of different serialization * schemes with minimal impact on the rest of the library.</p> * - * <p> - * LAYOUT: The low significance bytes of this <i>long</i> based data structure are on the right. + * <h3>Visual Layout</h3> + * The low significance bytes of this <i>long</i> based data structure are on the right. * The multi-byte primitives are stored in native byte order. - * The single byte fields are treated as unsigned.</p> + * The numeric <i>byte</i> and <i>short</i> fields are treated as unsigned. + * The numeric <i>int</i> and <i>long</i> fields are treated as signed. * - * <p>An empty sketch requires only 8 bytes, which is only preamble. - * A serialized, non-empty KllDoublesSketch requires at least 16 bytes of preamble. - * A serialized, non-empty KllFloatsSketch requires at least 12 bytes of preamble.</p> + * <h3>Preamble Sizes</h3> + * The preamble has 2 formats or sizes. + * <ul><li>A serialized empty sketch requires 8 bytes, all preamble. It is not updatable.</li> + * <li>A serialized, single-item sketch requires 8 bytes of preamble, followed by the one item. It is not updatable.</li> + * <li>A serialized, <i>n > 1</i> sketch requires at least 20 bytes of preamble (5 ints). + * This is followed by the Levels int array, followed by the min and max values, + * followed by the item data arrays. It can be in compact, not updatable format or in regular, updatable format.</li> + * </ul> + * + * <h3>Compact Formats</h3> + * <ul><li>The empty and single-item formats are by definition compact and non-updatable.</li> + * <li>The compact "full" format differs from the fully updatable (writable) format in two ways: + * <ul><li>The last entry of the Levels int array is omitted because it can be derived.</li> + * <li>All empty space of the data arrays is removed in the serialization. + * The empty space can be reconstructed.</li></ul> + * </ul> * * <pre>{@code * Serialized float sketch layout, more than one item: @@ -92,33 +107,26 @@ import org.apache.datasketches.memory.WritableMemory; * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts | * || | 8 | * 1 ||------------------------------Single Item-------------------------------------| + * }</pre> + * The placement and structure of the data block depends on Layout: + * <ul><li>For SerVer = SERIAL_VERSION_EMPTY_FULL (1) and <i>n</i> = 0:<br> + * The sketch is empty. The preamble is 8 bytes. There is no data.</li> + * + * <li>For SerVer = SERIAL_VERSION_SINGLE (2), <i>n</i> is assumed to be 1:<br> + * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8.</li> * - * The structure of the data block depends on Layout: - * - * For FLOAT_SINGLE_COMPACT or DOUBLE_SINGLE_COMPACT: - * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8 - * - * For FLOAT_FULL_COMPACT: - * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers; - * Followed by Float Min_Item, then Float Max_Item - * Followed by an array of Floats of length retainedItems() - * - * For DOUBLE_FULL_COMPACT - * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of numLevels integers; - * Followed by Double Min_Item, then Double Max_Item - * Followed by an array of Doubles of length retainedItems() - * - * For FLOAT_UPDATABLE - * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers; - * Followed by Float Min_Item, then Float Max_Item - * Followed by an array of Floats of length KllHelper.computeTotalItemCapacity(...). + * <li>For SerVer = SERIAL_VERSION_EMPTY_FULL (1) and <i>n</i> > 1:<br> + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers, + * <ul><li>Followed by Min_Item, then Max_Item,</li> + * <li>Followed by an array of items of length retainedItems().<br> + * The total byte length is dependent on item type.</li></ul> * - * For DOUBLE_UPDATABLE - * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of (numLevels + 1) integers; - * Followed by Double Min_Item, then Double Max_Item - * Followed by an array of Doubles of length KllHelper.computeTotalItemCapacity(...). - * - * }</pre> + * <li>For SerVer = SERIAL_VERSION_UPDATABLE (3)<br> + * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers; + * <ul><li>Followed by Min_Item, then Max_Item,</li> + * <li>Followed by an array of items of length KllHelper.computeTotalItemCapacity(...).<br> + * The total byte length is dependent on item type.</li></ul> + * </ul> * * @author Lee Rhodes */ @@ -144,13 +152,13 @@ final class KllPreambleUtil { static final int MIN_K_SHORT_ADR = 16; // to 17 static final int NUM_LEVELS_BYTE_ADR = 18; - // 19 is reserved for future use + // 19 is reserved for future use static final int DATA_START_ADR = 20; // Full Sketch, not single item // Other static members - static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format + static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format, NOT updatable static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format - static final byte SERIAL_VERSION_UPDATABLE = 3; // + static final byte SERIAL_VERSION_UPDATABLE = 3; // PreInts=5, Full preamble + LevelsArr + min, max + empty space static final byte PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item static final byte PREAMBLE_INTS_FULL = 5; // Full preamble, not empty nor single item static final byte KLL_FAMILY = 15; @@ -159,7 +167,6 @@ final class KllPreambleUtil { static final int EMPTY_BIT_MASK = 1; static final int LEVEL_ZERO_SORTED_BIT_MASK = 2; static final int SINGLE_ITEM_BIT_MASK = 4; - static final int UPDATABLE_BIT_MASK = 16; /** * Returns a human readable string summary of the internal state of the given sketch byte array. @@ -188,26 +195,27 @@ final class KllPreambleUtil { final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); final int preInts = memVal.preInts; - final boolean doublesSketch = memVal.sketchType == DOUBLES_SKETCH; - final boolean updatableMemFormat = memVal.updatableMemFormat; + final boolean serialVersionUpdatable = getMemorySerVer(mem) == SERIAL_VERSION_UPDATABLE; final boolean empty = memVal.empty; final boolean singleItem = memVal.singleItem; final int sketchBytes = memVal.sketchBytes; - final int typeBytes = memVal.typeBytes; + final int typeBytes = sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES; + final int familyID = getMemoryFamilyID(mem); + final String famName = idToFamily(familyID).toString(); final StringBuilder sb = new StringBuilder(); sb.append(Util.LS).append("### KLL SKETCH MEMORY SUMMARY:").append(LS); sb.append("Byte 0 : Preamble Ints : ").append(preInts).append(LS); sb.append("Byte 1 : SerVer : ").append(memVal.serVer).append(LS); sb.append("Byte 2 : FamilyID : ").append(memVal.familyID).append(LS); - sb.append(" FamilyName : ").append(memVal.famName).append(LS); + sb.append(" FamilyName : ").append(famName).append(LS); sb.append("Byte 3 : Flags Field : ").append(flagsStr).append(LS); sb.append(" Bit Flag Name").append(LS); sb.append(" 0 EMPTY COMPACT : ").append(empty).append(LS); sb.append(" 1 LEVEL_ZERO_SORTED : ").append(memVal.level0Sorted).append(LS); sb.append(" 2 SINGLE_ITEM COMPACT : ").append(singleItem).append(LS); - sb.append(" 3 DOUBLES_SKETCH : ").append(doublesSketch).append(LS); - sb.append(" 4 UPDATABLE : ").append(updatableMemFormat).append(LS); + sb.append(" 3 DOUBLES_SKETCH : ").append(sketchType == DOUBLES_SKETCH).append(LS); + sb.append(" 4 UPDATABLE : ").append(serialVersionUpdatable).append(LS); sb.append("Bytes 4-5 : K : ").append(memVal.k).append(LS); sb.append("Byte 6 : Min Level Cap, M : ").append(memVal.m).append(LS); sb.append("Byte 7 : (Reserved) : ").append(LS); @@ -215,7 +223,7 @@ final class KllPreambleUtil { final long n = memVal.n; final int minK = memVal.minK; final int numLevels = memVal.numLevels; - if (updatableMemFormat || (!empty && !singleItem)) { + if (serialVersionUpdatable || (!empty && !singleItem)) { sb.append("Bytes 8-15: N : ").append(n).append(LS); sb.append("Bytes 16-17: MinK : ").append(minK).append(LS); sb.append("Byte 18 : NumLevels : ").append(numLevels).append(LS); @@ -235,7 +243,7 @@ final class KllPreambleUtil { sb.append("### START KLL DATA:").append(LS); int offsetBytes = 0; - if (updatableMemFormat) { + if (serialVersionUpdatable) { sb.append("LEVELS ARR:").append(LS); offsetBytes = DATA_START_ADR; for (int i = 0; i < numLevels + 1; i++) { @@ -243,7 +251,7 @@ final class KllPreambleUtil { offsetBytes += Integer.BYTES; } sb.append("MIN/MAX:").append(LS); - if (doublesSketch) { + if (sketchType == DOUBLES_SKETCH) { sb.append(mem.getDouble(offsetBytes)).append(LS); offsetBytes += typeBytes; sb.append(mem.getDouble(offsetBytes)).append(LS); @@ -256,7 +264,7 @@ final class KllPreambleUtil { } sb.append("ITEMS DATA").append(LS); final int itemsSpace = (sketchBytes - offsetBytes) / typeBytes; - if (doublesSketch) { + if (sketchType == DOUBLES_SKETCH) { for (int i = 0; i < itemsSpace; i++) { sb.append(i + ", " + mem.getDouble(offsetBytes)).append(LS); offsetBytes += typeBytes; @@ -277,7 +285,7 @@ final class KllPreambleUtil { } sb.append("(top level of Levels arr is absent)").append(LS); sb.append("MIN/MAX:").append(LS); - if (doublesSketch) { + if (sketchType == DOUBLES_SKETCH) { sb.append(mem.getDouble(offsetBytes)).append(LS); offsetBytes += typeBytes; sb.append(mem.getDouble(offsetBytes)).append(LS); @@ -290,7 +298,7 @@ final class KllPreambleUtil { } sb.append("ITEMS DATA").append(LS); final int itemSpace = (sketchBytes - offsetBytes) / typeBytes; - if (doublesSketch) { + if (sketchType == DOUBLES_SKETCH) { for (int i = 0; i < itemSpace; i++) { sb.append(i + ", " + mem.getDouble(offsetBytes)).append(LS); offsetBytes += typeBytes; @@ -305,7 +313,7 @@ final class KllPreambleUtil { } else { //single item if (singleItem) { sb.append("SINGLE ITEM DATA").append(LS); - sb.append(doublesSketch + sb.append(sketchType == DOUBLES_SKETCH ? mem.getDouble(DATA_START_ADR_SINGLE_ITEM) : mem.getFloat(DATA_START_ADR_SINGLE_ITEM)).append(LS); } @@ -343,10 +351,6 @@ final class KllPreambleUtil { return (getMemoryFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0; } - static boolean getMemoryUpdatableFormatFlag(final Memory mem) { - return (getMemoryFlags(mem) & UPDATABLE_BIT_MASK) != 0; - } - static int getMemoryK(final Memory mem) { return mem.getShort(K_SHORT_ADR) & 0XFFFF; } @@ -398,11 +402,6 @@ final class KllPreambleUtil { setMemoryFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK); } - static void setMemoryUpdatableFlag(final WritableMemory wmem, final boolean updatable) { - final int flags = getMemoryFlags(wmem); - setMemoryFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK); - } - static void setMemoryK(final WritableMemory wmem, final int memK) { wmem.putShort(K_SHORT_ADR, (short) memK); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 700416c8..aa5bb204 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -22,6 +22,7 @@ package org.apache.datasketches.kll; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE; import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT; import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY; @@ -89,8 +90,7 @@ public abstract class KllSketch implements QuantilesAPI { SRC_MUST_BE_DOUBLE("Given sketch must be of type Double."), SRC_MUST_BE_FLOAT("Given sketch must be of type Float."), MRS_MUST_NOT_BE_NULL("MemoryRequestServer cannot be null."), - NOT_SINGLE_ITEM("Sketch is not single item."), - MUST_NOT_BE_UPDATABLE_FORMAT("Given Memory object must not be in updatableFormat."); + NOT_SINGLE_ITEM("Sketch is not single item."); private String msg; @@ -128,7 +128,7 @@ public abstract class KllSketch implements QuantilesAPI { static final int MIN_M = 2; //The minimum M static final Random random = new Random(); final SketchType sketchType; - final boolean updatableMemFormat; + final boolean serialVersionUpdatable; final MemoryRequestServer memReqSvr; final boolean readOnly; int[] levelsArr; @@ -147,8 +147,8 @@ public abstract class KllSketch implements QuantilesAPI { this.sketchType = sketchType; this.wmem = wmem; if (wmem != null) { - this.updatableMemFormat = KllPreambleUtil.getMemoryUpdatableFormatFlag(wmem); - this.readOnly = wmem.isReadOnly() || !updatableMemFormat; + this.serialVersionUpdatable = KllPreambleUtil.getMemorySerVer(wmem) == SERIAL_VERSION_UPDATABLE; + this.readOnly = wmem.isReadOnly() || !serialVersionUpdatable; if (readOnly) { this.memReqSvr = null; } else { @@ -156,7 +156,7 @@ public abstract class KllSketch implements QuantilesAPI { this.memReqSvr = memReqSvr; } } else { //wmem is null, heap case - this.updatableMemFormat = false; + this.serialVersionUpdatable = false; this.memReqSvr = null; this.readOnly = false; } @@ -270,7 +270,7 @@ public abstract class KllSketch implements QuantilesAPI { * @return the number of bytes this sketch would require if serialized. */ public int getSerializedSizeBytes() { - return (updatableMemFormat) + return (serialVersionUpdatable) ? getCurrentUpdatableSerializedSizeBytes() : getCurrentCompactSerializedSizeBytes(); } @@ -309,7 +309,7 @@ public abstract class KllSketch implements QuantilesAPI { * @return true if the backing WritableMemory is in updatable format. */ public final boolean isMemoryUpdatableFormat() { - return hasMemory() && updatableMemFormat; + return hasMemory() && serialVersionUpdatable; } @Override @@ -391,7 +391,7 @@ public abstract class KllSketch implements QuantilesAPI { abstract void incNumLevels(); final boolean isCompactSingleItem() { - return hasMemory() && !updatableMemFormat && (getN() == 1); + return hasMemory() && !serialVersionUpdatable && (getN() == 1); } boolean isDoublesSketch() { return sketchType == DOUBLES_SKETCH; } diff --git a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java index 5c23731f..972e186a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java @@ -23,8 +23,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL; import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL; +import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE; import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK; -import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags; import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; @@ -72,8 +72,7 @@ public class KllMemoryValidateTest { KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(); byte[] byteArr = sk.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(byteArr); - setMemoryFlags(wmem, UPDATABLE_BIT_MASK); - setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL); + setMemorySerVer(wmem, SERIAL_VERSION_SINGLE); KllMemoryValidate memVal = new KllMemoryValidate(wmem, FLOATS_SKETCH); } diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 0c55318a..0f5c90f9 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -64,7 +64,7 @@ under the License. <!-- Size Violations --> <module name="LineLength"> <property name="severity" value="warning"/> - <property name="max" value="120"/> + <property name="max" value="140"/> <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/> <!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> --> </module> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
