This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch replicate_KllDoubles_to_KllFloats in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 4c27b53bca17cf7b179749a165ca33af91a29f1b Author: Lee Rhodes <[email protected]> AuthorDate: Thu Apr 4 16:36:22 2024 -0700 This replicates the recent additions from KllDoubles to KllFloats. Specifically weighted updates and vector updates. --- .../datasketches/kll/KllDirectFloatsSketch.java | 89 +++--- .../apache/datasketches/kll/KllDoublesHelper.java | 6 +- .../apache/datasketches/kll/KllFloatsHelper.java | 60 +--- .../apache/datasketches/kll/KllFloatsSketch.java | 326 ++++++++++++++++++--- .../kll/KllFloatsSketchSortedView.java | 253 ---------------- .../datasketches/kll/KllHeapFloatsSketch.java | 51 ++-- .../quantilescommon/DoublesSketchSortedView.java | 2 +- ...SortedView.java => FloatsSketchSortedView.java} | 28 +- .../quantilescommon/ItemsSketchSortedView.java | 2 +- .../org/apache/datasketches/common/TestUtil.java | 4 +- .../kll/KllDirectDoublesSketchTest.java | 16 +- .../kll/KllDirectFloatsSketchTest.java | 40 ++- .../datasketches/kll/KllFloatsSketchSerDeTest.java | 13 +- .../datasketches/kll/KllFloatsSketchTest.java | 97 ++++++ .../quantilescommon/CrossCheckQuantilesTest.java | 7 +- .../quantilescommon/ReflectUtilityTest.java | 2 +- 16 files changed, 557 insertions(+), 439 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index c9be0687..a61d19f7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -116,7 +116,7 @@ class KllDirectFloatsSketch extends KllFloatsSketch { return new KllDirectFloatsSketch(UPDATABLE, wMem, memReqSvr, memVal); } - //END of Constructors + //End of Constructors @Override String getItemAsString(final int index) { @@ -129,46 +129,72 @@ class KllDirectFloatsSketch extends KllFloatsSketch { return getMemoryK(wmem); } + //MinMax Methods + @Override public float getMaxItem() { - int levelsArrBytes = 0; if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - else if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } - else if (sketchStructure == COMPACT_FULL) { - levelsArrBytes = getLevelsArrSizeBytes(COMPACT_FULL); - } else { //UPDATABLE - levelsArrBytes = getLevelsArrSizeBytes(UPDATABLE); - } - final int offset = DATA_START_ADR + levelsArrBytes + ITEM_BYTES; + if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; return wmem.getFloat(offset); } + @Override + float getMaxItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Float.NaN; } + if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + return wmem.getFloat(offset); + } + @Override String getMaxItemAsString() { - if (isEmpty()) { return "NaN"; } - return Float.toString(getMaxItem()); + final float maxItem = getMaxItemInternal(); + return Float.toString(maxItem); } @Override public float getMinItem() { - int levelsArrBytes = 0; if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - else if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } - else if (sketchStructure == COMPACT_FULL) { - levelsArrBytes = getLevelsArrSizeBytes(COMPACT_FULL); - } else { //UPDATABLE - levelsArrBytes = getLevelsArrSizeBytes(UPDATABLE); - } - final int offset = DATA_START_ADR + levelsArrBytes; + if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + return wmem.getFloat(offset); + } + + @Override + float getMinItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Float.NaN; } + if (sketchStructure == COMPACT_SINGLE) { return getFloatSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); return wmem.getFloat(offset); } @Override String getMinItemAsString() { - if (isEmpty()) { return "NaN"; } - return Float.toString(getMinItem()); + final float minItem = getMinItemInternal(); + return Float.toString(minItem); + } + + @Override + void setMaxItem(final float item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + wmem.putFloat(offset, item); } + @Override + void setMinItem(final float item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + wmem.putFloat(offset, item); + } + + //END MinMax Methods + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } @@ -176,7 +202,7 @@ class KllDirectFloatsSketch extends KllFloatsSketch { else { return getMemoryN(wmem); } } - //restricted + //other restricted @Override //returns updatable, expanded array including free space at bottom float[] getFloatItemsArray() { @@ -317,23 +343,16 @@ class KllDirectFloatsSketch extends KllFloatsSketch { } @Override - void setLevelZeroSorted(final boolean sorted) { + void setFloatItemsArrayAt(final int index, final float[] items, final int srcOffset, final int length) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - setMemoryLevelZeroSortedFlag(wmem, sorted); + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES; + wmem.putFloatArray(offset, items, srcOffset, length); } - + @Override - void setMaxItem(final float item) { - if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; - wmem.putFloat(offset, item); - } - - @Override - void setMinItem(final float item) { + void setLevelZeroSorted(final boolean sorted) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); - wmem.putFloat(offset, item); + setMemoryLevelZeroSortedFlag(wmem, sorted); } @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 5bfaf5dc..67035b45 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -30,8 +30,6 @@ import java.util.Random; import org.apache.datasketches.memory.WritableMemory; -// -// /** * Static methods to support KllDoublesSketch * @author Kevin Lang @@ -453,14 +451,14 @@ final class KllDoublesHelper { workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop; assert selfPop >= 0 && otherPop >= 0; if (selfPop == 0 && otherPop == 0) { continue; } - else if (selfPop > 0 && otherPop == 0) { + if (selfPop > 0 && otherPop == 0) { System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop); } else if (selfPop == 0 && otherPop > 0) { System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop); } else if (selfPop > 0 && otherPop > 0) { - mergeSortedDoubleArrays( //only workbuf is modified + mergeSortedDoubleArrays( //only workBuf is modified myCurDoubleItemsArr, myCurLevelsArr[lvl], selfPop, otherDoubleItemsArr, otherLevelsArr[lvl], otherPop, workBuf, workLevels[lvl]); diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 1a71c344..50cadeb3 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -24,15 +24,12 @@ import static java.lang.Math.min; import static org.apache.datasketches.common.Util.isEven; import static org.apache.datasketches.common.Util.isOdd; import static org.apache.datasketches.kll.KllHelper.findLevelToCompact; -import static org.apache.datasketches.kll.KllSketch.DEFAULT_M; import java.util.Arrays; import java.util.Random; import org.apache.datasketches.memory.WritableMemory; -// -// /** * Static methods to support KllFloatsSketch * @author Kevin Lang @@ -59,7 +56,7 @@ final class KllFloatsHelper { * It cannot be used while merging, while reducing k, or anything else. * @param fltSk the current KllFloatsSketch */ - private static void compressWhileUpdatingSketch(final KllFloatsSketch fltSk) { + static void compressWhileUpdatingSketch(final KllFloatsSketch fltSk) { final int level = findLevelToCompact(fltSk.getK(), fltSk.getM(), fltSk.getNumLevels(), fltSk.levelsArr); if (level == fltSk.getNumLevels() - 1) { @@ -128,8 +125,8 @@ final class KllFloatsHelper { //capture my key mutable fields before doing any merging final boolean myEmpty = mySketch.isEmpty(); - final float myMin = myEmpty ? Float.NaN : mySketch.getMinItem(); - final float myMax = myEmpty ? Float.NaN : mySketch.getMaxItem(); + final float myMin = mySketch.getMinItemInternal(); + final float myMax = mySketch.getMaxItemInternal(); final int myMinK = mySketch.getMinK(); final long finalN = Math.addExact(mySketch.getN(), otherFltSk.getN()); @@ -140,12 +137,12 @@ final class KllFloatsHelper { //MERGE: update this sketch with level0 items from the other sketch if (otherFltSk.isCompactSingleItem()) { - updateFloat(mySketch, otherFltSk.getFloatSingleItem()); + KllFloatsSketch.updateFloat(mySketch, otherFltSk.getFloatSingleItem()); otherFloatItemsArr = new float[0]; } else { otherFloatItemsArr = otherFltSk.getFloatItemsArray(); for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { - updateFloat(mySketch, otherFloatItemsArr[i]); + KllFloatsSketch.updateFloat(mySketch, otherFloatItemsArr[i]); } } @@ -313,35 +310,6 @@ final class KllFloatsHelper { } } - //Called from KllFloatsSketch::update and merge - static void updateFloat(final KllFloatsSketch fltSk, final float item) { - fltSk.updateMinMax(item); - int freeSpace = fltSk.levelsArr[0]; - assert freeSpace >= 0; - if (freeSpace == 0) { - compressWhileUpdatingSketch(fltSk); - freeSpace = fltSk.levelsArr[0]; - assert (freeSpace > 0); - } - fltSk.incN(1); - fltSk.setLevelZeroSorted(false); - final int nextPos = freeSpace - 1; - fltSk.setLevelsArrayAt(0, nextPos); - fltSk.setFloatItemsArrayAt(nextPos, item); - } - - //Called from KllFloatsSketch::update with weight - static void updateFloat(final KllFloatsSketch fltSk, final float item, final long weight) { - if (weight < fltSk.levelsArr[0]) { - for (int i = 0; i < (int)weight; i++) { updateFloat(fltSk, item); } - } else { - fltSk.updateMinMax(item); - final KllHeapFloatsSketch tmpSk = new KllHeapFloatsSketch(fltSk.getK(), DEFAULT_M, item, weight); - - fltSk.merge(tmpSk); - } - } - /** * Compression algorithm used to merge higher levels. * <p>Here is what we do for each level:</p> @@ -465,35 +433,35 @@ final class KllFloatsHelper { } private static void populateFloatWorkArrays( //workBuf and workLevels are modified - final float[] workbuf, final int[] worklevels, final int provisionalNumLevels, + final float[] workBuf, final int[] workLevels, final int provisionalNumLevels, final int myCurNumLevels, final int[] myCurLevelsArr, final float[] myCurFloatItemsArr, final int otherNumLevels, final int[] otherLevelsArr, final float[] otherFloatItemsArr) { - worklevels[0] = 0; + workLevels[0] = 0; // Note: the level zero data from "other" was already inserted into "self". // This copies into workbuf. final int selfPopZero = KllHelper.currentLevelSizeItems(0, myCurNumLevels, myCurLevelsArr); - System.arraycopy( myCurFloatItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; + System.arraycopy( myCurFloatItemsArr, myCurLevelsArr[0], workBuf, workLevels[0], selfPopZero); + workLevels[1] = workLevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { final int selfPop = KllHelper.currentLevelSizeItems(lvl, myCurNumLevels, myCurLevelsArr); final int otherPop = KllHelper.currentLevelSizeItems(lvl, otherNumLevels, otherLevelsArr); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; + workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop; assert selfPop >= 0 && otherPop >= 0; if (selfPop == 0 && otherPop == 0) { continue; } if (selfPop > 0 && otherPop == 0) { - System.arraycopy(myCurFloatItemsArr, myCurLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); + System.arraycopy(myCurFloatItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop); } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); + System.arraycopy(otherFloatItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop); } else if (selfPop > 0 && otherPop > 0) { - mergeSortedFloatArrays( + mergeSortedFloatArrays( //only workBuf is modified myCurFloatItemsArr, myCurLevelsArr[lvl], selfPop, otherFloatItemsArr, otherLevelsArr[lvl], otherPop, - workbuf, worklevels[lvl]); + workBuf, workLevels[lvl]); } } } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 2b0c15ef..4fcee5ee 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -25,6 +25,7 @@ import static org.apache.datasketches.common.ByteArrayUtil.putFloatLE; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; +import java.util.Arrays; import java.util.Objects; import org.apache.datasketches.common.ArrayOfItemsSerDe; @@ -35,7 +36,7 @@ import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSketchSortedView; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesFloatsAPI; import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; @@ -46,7 +47,7 @@ import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; * @see org.apache.datasketches.kll.KllSketch */ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloatsAPI { - private KllFloatsSketchSortedView kllFloatsSV = null; + private FloatsSketchSortedView floatsSV = null; final static int ITEM_BYTES = Float.BYTES; KllFloatsSketch( @@ -171,21 +172,21 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } refreshSortedView(); - return kllFloatsSV.getCDF(splitPoints, searchCrit); + return floatsSV.getCDF(splitPoints, searchCrit); } @Override public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } refreshSortedView(); - return kllFloatsSV.getPMF(splitPoints, searchCrit); + return floatsSV.getPMF(splitPoints, searchCrit); } @Override public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } refreshSortedView(); - return kllFloatsSV.getQuantile(rank, searchCrit); + return floatsSV.getQuantile(rank, searchCrit); } @Override @@ -195,7 +196,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa final int len = ranks.length; final float[] quantiles = new float[len]; for (int i = 0; i < len; i++) { - quantiles[i] = kllFloatsSV.getQuantile(ranks[i], searchCrit); + quantiles[i] = floatsSV.getQuantile(ranks[i], searchCrit); } return quantiles; } @@ -224,7 +225,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } refreshSortedView(); - return kllFloatsSV.getRank(quantile, searchCrit); + return floatsSV.getRank(quantile, searchCrit); } /** @@ -254,19 +255,11 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa final int len = quantiles.length; final double[] ranks = new double[len]; for (int i = 0; i < len; i++) { - ranks[i] = kllFloatsSV.getRank(quantiles[i], searchCrit); + ranks[i] = floatsSV.getRank(quantiles[i], searchCrit); } return ranks; } - @Override - @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.") - public FloatsSortedView getSortedView() { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - refreshSortedView(); - return kllFloatsSV; - } - @Override public QuantilesFloatsSketchIterator iterator() { return new KllFloatsSketchIterator( @@ -280,7 +273,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa final KllFloatsSketch othFltSk = (KllFloatsSketch)other; if (othFltSk.isEmpty()) { return; } KllFloatsHelper.mergeFloatImpl(this, othFltSk); - kllFloatsSV = null; + floatsSV = null; } /** @@ -299,7 +292,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa setMinItem(Float.NaN); setMaxItem(Float.NaN); setFloatItemsArray(new float[k]); - kllFloatsSV = null; + floatsSV = null; } @Override @@ -318,14 +311,49 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa return KllHelper.toStringImpl(sketch, withLevels, withLevelsAndItems, getSerDe()); } + //SINGLE UPDATE + @Override public void update(final float item) { if (Float.isNaN(item)) { return; } //ignore if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - KllFloatsHelper.updateFloat(this, item); - kllFloatsSV = null; + updateFloat(this, item); + floatsSV = null; + } + + //Also Called from KllFloatsHelper::merge + static void updateFloat(final KllFloatsSketch fltSk, final float item) { + fltSk.updateMinMax(item); + int freeSpace = fltSk.levelsArr[0]; + assert (freeSpace >= 0); + if (freeSpace == 0) { + KllFloatsHelper.compressWhileUpdatingSketch(fltSk); + freeSpace = fltSk.levelsArr[0]; + assert (freeSpace > 0); + } + fltSk.incN(1); + fltSk.setLevelZeroSorted(false); + final int nextPos = freeSpace - 1; + fltSk.setLevelsArrayAt(0, nextPos); + fltSk.setFloatItemsArrayAt(nextPos, item); } + /** + * Single update of min and max + * @param item the source item, it must not be a NaN. + */ + final void updateMinMax(final float item) { + if (isEmpty() || Float.isNaN(getMinItemInternal())) { + setMinItem(item); + setMaxItem(item); + } else { + setMinItem(min(getMinItemInternal(), item)); + setMaxItem(max(getMaxItemInternal(), item)); + } + } + + //WEIGHTED UPDATE + /** * Weighted update. Updates this sketch with the given item the number of times specified by the given integer weight. * @param item the item to be repeated. NaNs are ignored. @@ -335,12 +363,97 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa if (Float.isNaN(item)) { return; } //ignore if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } if (weight < 1L) { throw new SketchesArgumentException("Weight is less than one."); } - if (weight == 1L) { KllFloatsHelper.updateFloat(this, item); } - else { KllFloatsHelper.updateFloat(this, item, weight); } - kllFloatsSV = null; + if (weight == 1L) { updateFloat(this, item); } + else { + if (weight < levelsArr[0]) { + for (int i = 0; i < (int)weight; i++) { updateFloat(this, item); } + } else { + final KllHeapFloatsSketch tmpSk = new KllHeapFloatsSketch(getK(), DEFAULT_M, item, weight); + merge(tmpSk); + } + } + floatsSV = null; + } + + // VECTOR UPDATE + + /** + * Vector update. Updates this sketch with the given array (vector) of items, starting at the items + * offset for a length number of items. This is not supported for direct sketches. + * @param items the vector of items + * @param offset the starting index of the items[] array + * @param length the number of items + */ + public void update(final float[] items, final int offset, final int length) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (length == 0) { return; } + if (!hasNaN(items, offset, length)) { + updateFloat(items, offset, length); //fast path + floatsSV = null; + return; + } + //has at least one NaN + final int end = offset + length; + for (int i = offset; i < end; i++) { + final float v = items[i]; + if (!Float.isNaN(v)) { + updateFloat(this, v); //normal path + floatsSV = null; + } + } } - //restricted + // No NaNs are allowed at this point + private void updateFloat(final float[] srcItems, final int srcOffset, final int length) { + if (isEmpty() || Float.isNaN(getMinItemInternal())) { + setMinItem(srcItems[srcOffset]); //initialize with a real value + setMaxItem(srcItems[srcOffset]); + } + + int count = 0; + while (count < length) { + if (levelsArr[0] == 0) { + KllFloatsHelper.compressWhileUpdatingSketch(this); + } + final int spaceNeeded = length - count; + final int freeSpace = levelsArr[0]; + assert (freeSpace > 0); + final int numItemsToCopy = min(spaceNeeded, freeSpace); + final int dstOffset = freeSpace - numItemsToCopy; + final int localSrcOffset = srcOffset + count; + setFloatItemsArrayAt(dstOffset, srcItems, localSrcOffset, numItemsToCopy); + updateMinMax(srcItems, localSrcOffset, numItemsToCopy); + count += numItemsToCopy; + incN(numItemsToCopy); + setLevelsArrayAt(0, dstOffset); + } + setLevelZeroSorted(false); + } + + /** + * Vector update of min and max. + * @param srcItems the input source array of values, no NaNs allowed. + * @param srcOffset the starting offset in srcItems + * @param length the number of items to update min and max + */ + private void updateMinMax(final float[] srcItems, final int srcOffset, final int length) { + final int end = srcOffset + length; + for (int i = srcOffset; i < end; i++) { + setMinItem(min(getMinItemInternal(), srcItems[i])); + setMaxItem(max(getMaxItemInternal(), srcItems[i])); + } + } + + // this returns on the first detected NaN. + private static boolean hasNaN(final float[] items, final int offset, final int length) { + final int end = offset + length; + for (int i = offset; i < end; i++) { + if (Float.isNaN(items[i])) { return true; } + } + return false; + } + + // END ALL UPDATE METHODS /** * @return full size of internal items array including empty space at bottom. @@ -354,6 +467,16 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa abstract float getFloatSingleItem(); + // Min & Max Methods + + abstract float getMaxItemInternal(); + + abstract void setMaxItem(float item); + + abstract float getMinItemInternal(); + + abstract void setMinItem(float item); + @Override abstract byte[] getMinMaxByteArr(); @@ -362,6 +485,8 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa return Float.BYTES * 2; } + //END Min & Max Methods + @Override abstract byte[] getRetainedItemsByteArr(); @@ -393,27 +518,152 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa return levelsArr[getNumLevels()] * Float.BYTES; } - private final void refreshSortedView() { - kllFloatsSV = (kllFloatsSV == null) - ? new KllFloatsSketchSortedView(this) : kllFloatsSV; - } - abstract void setFloatItemsArray(float[] floatItems); abstract void setFloatItemsArrayAt(int index, float item); - abstract void setMaxItem(float item); + abstract void setFloatItemsArrayAt(int dstIndex, float[] srcItems, int srcOffset, int length); - abstract void setMinItem(float item); + // SORTED VIEW - void updateMinMax(final float item) { - if (isEmpty()) { - setMinItem(item); - setMaxItem(item); - } else { - setMinItem(min(getMinItem(), item)); - setMaxItem(max(getMaxItem(), item)); + @Override + @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.") + public FloatsSketchSortedView getSortedView() { + refreshSortedView(); + return floatsSV; + } + + private final FloatsSketchSortedView refreshSortedView() { + if (floatsSV == null) { + final CreateSortedView csv = new CreateSortedView(); + floatsSV = csv.getSV(); + } + return floatsSV; + } + + private final class CreateSortedView { + float[] quantiles; + long[] cumWeights; + + FloatsSketchSortedView getSV() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final float[] srcQuantiles = getFloatItemsArray(); + final int[] srcLevels = levelsArr; + final int srcNumLevels = getNumLevels(); + + if (!isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); + if (!hasMemory()) { setLevelZeroSorted(true); } + //we don't sort level0 in Memory, only our copy. + } + final int numQuantiles = getNumRetained(); + quantiles = new float[numQuantiles]; + cumWeights = new long[numQuantiles]; + populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + return new FloatsSketchSortedView( + quantiles, cumWeights, getN(), getMaxItemInternal(), getMinItemInternal()); + } + + private void populateFromSketch(final float[] srcQuantiles, final int[] srcLevels, + final int srcNumLevels, final int numItems) { + final int[] myLevels = new int[srcNumLevels + 1]; + final int offset = srcLevels[0]; + System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < srcNumLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(cumWeights, fromIndex, toIndex, weight); + myLevels[dstLevel] = fromIndex; + myLevels[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + final int numLevels = dstLevel; + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights + KllHelper.convertToCumulative(cumWeights); + } + } //End of class CreateSortedView + + private static void blockyTandemMergeSort(final float[] quantiles, final long[] weights, + final int[] levels, final int numLevels) { + if (numLevels == 1) { return; } + + // duplicate the input in preparation for the "ping-pong" copy reduction strategy. + final float[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); + final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one + + blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels); + } + + private static void blockyTandemMergeSortRecursion( + final float[] quantilesSrc, final long[] weightsSrc, + final float[] quantilesDst, final long[] weightsDst, + final int[] levels, final int startingLevel, final int numLevels) { + if (numLevels == 1) { return; } + final int numLevels1 = numLevels / 2; + final int numLevels2 = numLevels - numLevels1; + assert numLevels1 >= 1; + assert numLevels2 >= numLevels1; + final int startingLevel1 = startingLevel; + final int startingLevel2 = startingLevel + numLevels1; + // swap roles of src and dst + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel1, numLevels1); + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel2, numLevels2); + tandemMerge( + quantilesSrc, weightsSrc, + quantilesDst, weightsDst, + levels, + startingLevel1, numLevels1, + startingLevel2, numLevels2); + } + + private static void tandemMerge( + final float[] quantilesSrc, final long[] weightsSrc, + final float[] quantilesDst, final long[] weightsDst, + final int[] levelStarts, + final int startingLevel1, final int numLevels1, + final int startingLevel2, final int numLevels2) { + final int fromIndex1 = levelStarts[startingLevel1]; + final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive + final int fromIndex2 = levelStarts[startingLevel2]; + final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive + int iSrc1 = fromIndex1; + int iSrc2 = fromIndex2; + int iDst = fromIndex1; + + while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { + if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) { + quantilesDst[iDst] = quantilesSrc[iSrc1]; + weightsDst[iDst] = weightsSrc[iSrc1]; + iSrc1++; + } else { + quantilesDst[iDst] = quantilesSrc[iSrc2]; + weightsDst[iDst] = weightsSrc[iSrc2]; + iSrc2++; + } + iDst++; + } + if (iSrc1 < toIndex1) { + System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); + System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); + } else if (iSrc2 < toIndex2) { + System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); + System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); } } + // END SORTED VIEW + } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java deleted file mode 100644 index 52320dd0..00000000 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; - -import java.util.Arrays; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.quantilescommon.FloatsSortedView; -import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.apache.datasketches.quantilescommon.InequalitySearch; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesUtil; - -/** - * The SortedView of the KllFloatsSketch. - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class KllFloatsSketchSortedView implements FloatsSortedView { - private final float[] quantiles; - private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights - private final long totalN; - private final float maxItem; - private final float minItem; - - /** - * Construct from elements for testing. - * @param quantiles sorted array of quantiles - * @param cumWeights sorted, monotonically increasing cumulative weights. - * @param totalN the total number of items presented to the sketch. - */ - KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, - final float maxItem, final float minItem) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = totalN; - this.maxItem = maxItem; - this.minItem = minItem; - } - - /** - * Constructs this Sorted View given the sketch - * @param sketch the given KllFloatsSketch. - */ - public KllFloatsSketchSortedView(final KllFloatsSketch sketch) { - if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - this.totalN = sketch.getN(); - this.maxItem = sketch.getMaxItem(); - this.minItem = sketch.getMinItem(); - final float[] srcQuantiles = sketch.getFloatItemsArray(); - final int[] srcLevels = sketch.levelsArr; - final int srcNumLevels = sketch.getNumLevels(); - - if (!sketch.isLevelZeroSorted()) { - Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); - if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } - } - - final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove free space - quantiles = new float[numQuantiles]; - cumWeights = new long[numQuantiles]; - populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); - } - - @Override - public long[] getCumulativeWeights() { - return cumWeights.clone(); - } - - @Override - public float getMaxItem() { - return maxItem; - } - - @Override - public float getMinItem() { - return minItem; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public int getNumRetained() { - return quantiles.length; - } - - @Override - public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - QuantilesUtil.checkNormalizedRankBounds(rank); - final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN, searchCrit); - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); - if (index == -1) { - return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return quantiles[index]; - } - - @Override - public float[] getQuantiles() { - return quantiles.clone(); - } - - @Override - public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final int len = quantiles.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; - final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit); - if (index == -1) { - return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile - } - return (double)cumWeights[index] / totalN; - } - - @Override - public boolean isEmpty() { - return totalN == 0; - } - - @Override - public FloatsSortedViewIterator iterator() { - return new FloatsSortedViewIterator(quantiles, cumWeights); - } - - //restricted methods - - private void populateFromSketch(final float[] srcQuantiles, final int[] srcLevels, - final int srcNumLevels, final int numItems) { - final int[] myLevels = new int[srcNumLevels + 1]; - final int offset = srcLevels[0]; - System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < srcNumLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(cumWeights, fromIndex, toIndex, weight); - myLevels[dstLevel] = fromIndex; - myLevels[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - final int numLevels = dstLevel; - blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights - KllHelper.convertToCumulative(cumWeights); - } - - private static void blockyTandemMergeSort(final float[] quantiles, final long[] weights, - final int[] levels, final int numLevels) { - if (numLevels == 1) { return; } - - // duplicate the input in preparation for the "ping-pong" copy reduction strategy. - final float[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); - final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here - - blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels); - } - - private static void blockyTandemMergeSortRecursion( - final float[] quantilesSrc, final long[] weightsSrc, - final float[] quantilesDst, final long[] weightsDst, - final int[] levels, final int startingLevel, final int numLevels) { - if (numLevels == 1) { return; } - final int numLevels1 = numLevels / 2; - final int numLevels2 = numLevels - numLevels1; - assert numLevels1 >= 1; - assert numLevels2 >= numLevels1; - final int startingLevel1 = startingLevel; - final int startingLevel2 = startingLevel + numLevels1; - // swap roles of src and dst - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel1, numLevels1); - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel2, numLevels2); - tandemMerge( - quantilesSrc, weightsSrc, - quantilesDst, weightsDst, - levels, - startingLevel1, numLevels1, - startingLevel2, numLevels2); - } - - private static void tandemMerge( - final float[] quantilesSrc, final long[] weightsSrc, - final float[] quantilesDst, final long[] weightsDst, - final int[] levelStarts, - final int startingLevel1, final int numLevels1, - final int startingLevel2, final int numLevels2) { - final int fromIndex1 = levelStarts[startingLevel1]; - final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive - final int fromIndex2 = levelStarts[startingLevel2]; - final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive - int iSrc1 = fromIndex1; - int iSrc2 = fromIndex2; - int iDst = fromIndex1; - - while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { - if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) { - quantilesDst[iDst] = quantilesSrc[iSrc1]; - weightsDst[iDst] = weightsSrc[iSrc1]; - iSrc1++; - } else { - quantilesDst[iDst] = quantilesSrc[iSrc2]; - weightsDst[iDst] = weightsSrc[iSrc2]; - iSrc2++; - } - iDst++; - } - if (iSrc1 < toIndex1) { - System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); - System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); - } else if (iSrc2 < toIndex2) { - System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); - System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); - } - } - -} diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java index cafefde7..cc192b7f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java @@ -163,12 +163,17 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { @Override String getItemAsString(final int index) { if (isEmpty()) { return "NaN"; } - return Double.toString(floatItems[index]); + return Float.toString(floatItems[index]); } @Override public int getK() { return k; } + //MinMax Methods + + @Override + float getMaxItemInternal() { return maxFloatItem; } + @Override public float getMaxItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } @@ -177,26 +182,43 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { @Override String getMaxItemAsString() { - if (isEmpty()) { return "NaN"; } return Float.toString(maxFloatItem); } + @Override + float getMinItemInternal() { return minFloatItem; } + @Override public float getMinItem() { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (isEmpty() || Float.isNaN(minFloatItem)) { throw new SketchesArgumentException(EMPTY_MSG); } return minFloatItem; } @Override String getMinItemAsString() { - if (isEmpty()) { return "NaN"; } return Float.toString(minFloatItem); } + @Override + byte[] getMinMaxByteArr() { + final byte[] bytesOut = new byte[2 * Float.BYTES]; + putFloatLE(bytesOut, 0, minFloatItem); + putFloatLE(bytesOut, Float.BYTES, maxFloatItem); + return bytesOut; + } + + @Override + void setMaxItem(final float item) { this.maxFloatItem = item; } + + @Override + void setMinItem(final float item) { this.minFloatItem = item; } + + //END MinMax Methods + @Override public long getN() { return n; } - //restricted + //other restricted @Override float[] getFloatItemsArray() { return floatItems; } @@ -216,14 +238,6 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { @Override int getMinK() { return minK; } - @Override - byte[] getMinMaxByteArr() { - final byte[] bytesOut = new byte[2 * Float.BYTES]; - putFloatLE(bytesOut, 0, minFloatItem); - putFloatLE(bytesOut, Float.BYTES, maxFloatItem); - return bytesOut; - } - @Override byte[] getRetainedItemsByteArr() { if (isEmpty()) { return new byte[0]; } @@ -272,13 +286,12 @@ final class KllHeapFloatsSketch extends KllFloatsSketch { void setFloatItemsArrayAt(final int index, final float item) { this.floatItems[index] = item; } @Override - void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted = sorted; } - - @Override - void setMaxItem(final float item) { this.maxFloatItem = item; } - + void setFloatItemsArrayAt(final int dstIndex, final float[] srcItems, final int srcOffset, final int length) { //TODO + System.arraycopy(srcItems, srcOffset, floatItems, dstIndex, length); + } + @Override - void setMinItem(final float item) { this.minFloatItem = item; } + void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted = sorted; } @Override void setMinK(final int minK) { this.minK = minK; } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java index 89300851..564f8aa1 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java @@ -32,7 +32,7 @@ import org.apache.datasketches.common.SketchesArgumentException; */ public final class DoublesSketchSortedView implements DoublesSortedView { private final double[] quantiles; - private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights + private final long[] cumWeights; //cumulative natural weights private final long totalN; private final double maxItem; private final double minItem; diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java similarity index 80% copy from src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java copy to src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java index 89300851..fbcfc887 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSketchSortedView.java @@ -30,12 +30,12 @@ import org.apache.datasketches.common.SketchesArgumentException; * @author Alexander Saydakov * @author Lee Rhodes */ -public final class DoublesSketchSortedView implements DoublesSortedView { - private final double[] quantiles; - private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights +public class FloatsSketchSortedView implements FloatsSortedView { + private final float[] quantiles; + private final long[] cumWeights; //cumulative natural weights private final long totalN; - private final double maxItem; - private final double minItem; + private final float maxItem; + private final float minItem; /** * Construct from elements, also used in testing. @@ -45,8 +45,8 @@ public final class DoublesSketchSortedView implements DoublesSortedView { * @param maxItem of type double * @param minItem of type double */ - public DoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN, - final double maxItem, final double minItem) { + public FloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, + final float maxItem, final float minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; @@ -60,12 +60,12 @@ public final class DoublesSketchSortedView implements DoublesSortedView { } @Override - public double getMaxItem() { + public float getMaxItem() { return maxItem; } @Override - public double getMinItem() { + public float getMinItem() { return minItem; } @@ -80,7 +80,7 @@ public final class DoublesSketchSortedView implements DoublesSortedView { } @Override - public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; @@ -94,12 +94,12 @@ public final class DoublesSketchSortedView implements DoublesSortedView { } @Override - public double[] getQuantiles() { + public float[] getQuantiles() { return quantiles.clone(); } @Override - public double getRank(final double quantile, final QuantileSearchCriteria searchCrit) { + public double getRank(final float quantile, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } final int len = quantiles.length; final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; @@ -116,8 +116,8 @@ public final class DoublesSketchSortedView implements DoublesSortedView { } @Override - public DoublesSortedViewIterator iterator() { - return new DoublesSortedViewIterator(quantiles, cumWeights); + public FloatsSortedViewIterator iterator() { + return new FloatsSortedViewIterator(quantiles, cumWeights); } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java index db0a612b..5fbcf434 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java @@ -40,7 +40,7 @@ import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequalit public class ItemsSketchSortedView<T> implements GenericSortedView<T> { private static final double PARTITIONING_ERROR_FACTOR = 2.0; private final T[] quantiles; - private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights + private final long[] cumWeights; //cumulative natural weights private final long totalN; private final Comparator<? super T> comparator; private final T maxItem; diff --git a/src/test/java/org/apache/datasketches/common/TestUtil.java b/src/test/java/org/apache/datasketches/common/TestUtil.java index 6f837ac1..7aab8179 100644 --- a/src/test/java/org/apache/datasketches/common/TestUtil.java +++ b/src/test/java/org/apache/datasketches/common/TestUtil.java @@ -77,7 +77,7 @@ public final class TestUtil { public static File getResourceFile(final String shortFileName) { Objects.requireNonNull(shortFileName, "input parameter 'String shortFileName' cannot be null."); final String slashName = (shortFileName.charAt(0) == '/') ? shortFileName : '/' + shortFileName; - final URL url = Util.class.getResource(slashName); + final URL url = TestUtil.class.getResource(slashName); Objects.requireNonNull(url, "resource " + slashName + " returns null URL."); File file; file = createTempFile(slashName); @@ -110,7 +110,7 @@ public final class TestUtil { public static byte[] getResourceBytes(final String shortFileName) { Objects.requireNonNull(shortFileName, "input parameter 'String shortFileName' cannot be null."); final String slashName = (shortFileName.charAt(0) == '/') ? shortFileName : '/' + shortFileName; - final URL url = Util.class.getResource(slashName); + final URL url = TestUtil.class.getResource(slashName); Objects.requireNonNull(url, "resource " + slashName + " returns null URL."); final byte[] out; if (url.getProtocol().equals("jar")) { //definitely a jar diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index dcc80d66..3d5b3160 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -637,14 +637,6 @@ public class KllDirectDoublesSketchTest { sk2.merge(sk1); } - @Test - public void checkMergeExceptionsWrongType() { - KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20); - KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); - try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } - try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } - } - @Test public void checkVectorUpdate() { WritableMemory dstMem = WritableMemory.allocate(6000); @@ -673,6 +665,14 @@ public class KllDirectDoublesSketchTest { return ddsk; } + @Test + public void checkMergeExceptionsWrongType() { + KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20); + KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); + try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } + try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + } + private final static boolean enablePrinting = false; /** diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index 8ab61edd..f4f716e8 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -268,7 +268,8 @@ public class KllDirectFloatsSketchTest { public void mergeMinAndMaxFromOther() { final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0); final KllFloatsSketch sketch2 = getUpdatableDirectFloatSketch(200, 0); - for (int i = 1; i <= 1_000_000; i++) { + int n = 1_000_000; + for (int i = 1; i <= n; i++) { sketch1.update(i); } sketch2.merge(sketch1); @@ -325,7 +326,7 @@ public class KllDirectFloatsSketchTest { @Test public void serializeDeserializeEmptyViaUpdatableWritableWrap() { final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0); - final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable + final byte[] bytes = KllHelper.toByteArray(sketch1, true); final KllFloatsSketch sketch2 = KllFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); @@ -343,7 +344,7 @@ public class KllDirectFloatsSketchTest { public void serializeDeserializeOneValueViaCompactHeapify() { final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0); sketch1.update(1); - final byte[] bytes = sketch1.toByteArray(); //compact + final byte[] bytes = sketch1.toByteArray(); final KllFloatsSketch sketch2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); assertFalse(sketch2.isEmpty()); @@ -359,7 +360,7 @@ public class KllDirectFloatsSketchTest { public void serializeDeserializeOneValueViaUpdatableWritableWrap() { final KllFloatsSketch sketch1 = getUpdatableDirectFloatSketch(200, 0); sketch1.update(1); - final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable + final byte[] bytes = KllHelper.toByteArray(sketch1, true); final KllFloatsSketch sketch2 = KllFloatsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); @@ -637,14 +638,25 @@ public class KllDirectFloatsSketchTest { } @Test - public void checkMergeExceptionsWrongType() { - KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20); - KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); - try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } - try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + public void checkVectorUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllFloatsSketch sk = KllFloatsSketch.newDirectInstance(20, dstMem, memReqSvr); + float[] v = new float[21]; + for (int i = 0; i < 21; i++) { v[i] = i + 1; } + sk.update(v, 0, 21); } - private static KllFloatsSketch getUpdatableDirectFloatSketch(final int k, final int n) { + @Test + public void checkWeightedUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllFloatsSketch sk = KllFloatsSketch.newDirectInstance(8, dstMem, memReqSvr); + for (int i = 0; i < 16; i++) { + sk.update(i + 1, 16); + } + println(sk.toString(true, true)); + } + + private static KllFloatsSketch getUpdatableDirectFloatSketch(int k, int n) { KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(k); for (int i = 1; i <= n; i++) { sk.update(i); } byte[] byteArr = KllHelper.toByteArray(sk, true); @@ -653,6 +665,14 @@ public class KllDirectFloatsSketchTest { return dfsk; } + @Test + public void checkMergeExceptionsWrongType() { + KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(20); + KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); + try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } + try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + } + private final static boolean enablePrinting = false; /** diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java index 3bbb44b1..fe745bcd 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchSerDeTest.java @@ -32,8 +32,10 @@ public class KllFloatsSketchSerDeTest { @Test public void serializeDeserializeEmpty() { - final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(); - //from heap -> byte[] -> heap + final int N = 20; + + final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(N); + //Empty: from heap -> byte[] -> heap final byte[] bytes = sk1.toByteArray(); final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); assertEquals(bytes.length, sk1.getSerializedSizeBytes()); @@ -44,7 +46,8 @@ public class KllFloatsSketchSerDeTest { try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); - //from heap -> byte[] -> off heap + + //Empty: from heap -> byte[] -> off heap final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes)); assertTrue(sk3.isEmpty()); assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); @@ -62,6 +65,7 @@ public class KllFloatsSketchSerDeTest { public void serializeDeserializeOneValue() { final KllFloatsSketch sk1 = KllFloatsSketch.newHeapInstance(); sk1.update(1); + //from heap -> byte[] -> heap final byte[] bytes = sk1.toByteArray(); final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); @@ -73,6 +77,7 @@ public class KllFloatsSketchSerDeTest { assertEquals(sk2.getMinItem(), 1.0F); assertEquals(sk2.getMaxItem(), 1.0F); assertEquals(sk2.getSerializedSizeBytes(), Long.BYTES + Float.BYTES); + //from heap -> byte[] -> off heap final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes)); assertFalse(sk3.isEmpty()); @@ -96,6 +101,7 @@ public class KllFloatsSketchSerDeTest { } assertEquals(sk1.getMinItem(), 0.0f); assertEquals(sk1.getMaxItem(), 999.0f); + //from heap -> byte[] -> heap final byte[] bytes = sk1.toByteArray(); final KllFloatsSketch sk2 = KllFloatsSketch.heapify(Memory.wrap(bytes)); @@ -107,6 +113,7 @@ public class KllFloatsSketchSerDeTest { assertEquals(sk2.getMinItem(), sk1.getMinItem()); assertEquals(sk2.getMaxItem(), sk1.getMaxItem()); assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap final KllFloatsSketch sk3 = KllFloatsSketch.wrap(Memory.wrap(bytes)); assertFalse(sk3.isEmpty()); diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 243dd832..652bd51a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.kll; +import static java.lang.Math.min; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; @@ -37,6 +38,7 @@ import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.testng.annotations.Test; public class KllFloatsSketchTest { + private static final String LS = System.getProperty("line.separator"); private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 @@ -608,6 +610,101 @@ public class KllFloatsSketchTest { try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { } } + @Test + public void checkVectorUpdate() { + boolean withLevels = false; + boolean withLevelsAndItems = true; + int k = 20; + int n = 108;//108; + int maxVsz = 40; //max vector size + KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(k); + int j = 1; + int rem; + while ((rem = n - j + 1) > 0) { + int vecSz = min(rem, maxVsz); + float[] v = new float[vecSz]; + for (int i = 0; i < vecSz; i++) { v[i] = j++; } + sk.update(v, 0, vecSz); + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test + public void vectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final float[] values = new float[N]; + float vIn = 1.0F; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + sketch.update(values, 0, N); //vector input + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1.0F); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), (float)(totN / 2.0), totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d\n", N); + printf(" Num Vectors : %,12d\n", M); + printf(" Total Input : %,12d\n", totN); + printf(" Run Time mS : %,12.3f\n", runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f\n", trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f\n", updateTime); + } + + @Test + public void nonVectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final float[] values = new float[N]; + float vIn = 1.0F; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + for (int i = 0; i < N; i++) { + sketch.update(values[i]); //single item input + } + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1.0); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), (float)(totN / 2.0), totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d\n", N); + printf(" Num Vectors : %,12d\n", M); + printf(" Total Input : %,12d\n", totN); + printf(" Run Time mS : %,12.3f\n", runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f\n", trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f\n", updateTime); + } + private final static boolean enablePrinting = false; /** diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java index 6049f2f3..7e3b071a 100644 --- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java +++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java @@ -40,7 +40,6 @@ import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.kll.KllDoublesSketch; import org.apache.datasketches.kll.KllFloatsSketch; -import org.apache.datasketches.kll.KllFloatsSketchSortedView; import org.apache.datasketches.kll.KllItemsSketch; import org.apache.datasketches.kll.KllSketch; import org.apache.datasketches.quantiles.DoublesSketch; @@ -142,7 +141,7 @@ public class CrossCheckQuantilesTest { ItemsSketch<String> itemsSk = null; ReqSketchSortedView reqFloatsSV = null; - KllFloatsSketchSortedView kllFloatsSV = null; + FloatsSketchSortedView kllFloatsSV = null; DoublesSketchSortedView kllDoublesSV = null; DoublesSketchSortedView classicDoublesSV = null; ItemsSketchSortedView<String> kllItemsSV = null; @@ -356,10 +355,10 @@ public class CrossCheckQuantilesTest { return (ReqSketchSortedView) REQ_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } - private final static KllFloatsSketchSortedView getRawKllFloatsSV( + private final static FloatsSketchSortedView getRawKllFloatsSV( final float[] values, final long[] cumWeights, final long totalN, final float maxItem, final float minItem) throws Exception { - return (KllFloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); + return (FloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } private final static DoublesSketchSortedView getRawDoublesSV( diff --git a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java index 6dbcac2d..e1050985 100644 --- a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java +++ b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java @@ -44,7 +44,7 @@ public final class ReflectUtilityTest { static { REQ_SV = getClass("org.apache.datasketches.req.ReqSketchSortedView"); - KLL_FLOATS_SV = getClass("org.apache.datasketches.kll.KllFloatsSketchSortedView"); + KLL_FLOATS_SV = getClass("org.apache.datasketches.quantilescommon.FloatsSketchSortedView"); DOUBLES_SV = getClass("org.apache.datasketches.quantilescommon.DoublesSketchSortedView"); REQ_SV_CTOR = --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
