leerho commented on code in PR #546:
URL: https://github.com/apache/datasketches-java/pull/546#discussion_r1563727616
##########
src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java:
##########
@@ -118,29 +114,35 @@ public int getNumRetained() {
}
@Override
- @SuppressWarnings("unchecked")
- public GenericPartitionBoundaries<T> getPartitionBoundaries(final int
numEquallySized,
+ public GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
+ final long nominalPartitionSize,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new
SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
- final long totalN = this.totalN;
- final int maxParts = (int) (totalN / Math.ceil(normRankErr *
PARTITIONING_ERROR_FACTOR) );
- final int svLen = cumWeights.length;
-
- if (numEquallySized > maxParts) {
+ final long partSizeItems = getMinPartitionSizeItems();
+ if (nominalPartitionSize < partSizeItems) {
throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
- + "The requested number of partitions is too large for the 'k' of
this sketch "
- + "if it exceeds the maximum number of partitions allowed by the
error threshold for the 'k' of this sketch."
- + "Requested Partitions: " + numEquallySized + " > " + maxParts);
+ + " The requested nominal partition size is too small for this
sketch.");
}
- if (numEquallySized > svLen / 2.0) {
+ final long totalN = this.totalN;
+ final int numEquallySizedParts = (int) min(totalN / partSizeItems,
getMaxPartitions());
+ return getPartitionBoundariesFromNumParts(numEquallySizedParts);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
+ final int numEquallySizedParts,
+ final QuantileSearchCriteria searchCrit) {
+ if (isEmpty()) { throw new
SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
+ final int maxParts = getMaxPartitions();
+ if (numEquallySizedParts > maxParts) {
throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
- + "The requested number of partitions is too large for the number of
retained items "
- + "if it exceeds maximum number of retained items divided by 2."
- + "Requested Partitions: " + numEquallySized + " > "
- + "Retained Items / 2: " + (svLen / 2));
+ + " The requested number of partitions is too large for this
sketch.");
}
+ final long totalN = this.totalN;
Review Comment:
I'll look at it.
##########
src/test/java/org/apache/datasketches/quantilescommon/PartitionBoundariesTest.java:
##########
@@ -196,6 +196,40 @@ public void checkSimpleEndsAdjustment() {
assertEquals(minItm, "1");
}
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkSketchPartitionLimits() {
+ final long totalN = 1_000_000;
+ final Comparator<String> comparator = Comparator.naturalOrder();
+ final ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
+ final KllItemsSketch<String> sk =
KllItemsSketch.newHeapInstance(comparator, serDe);
+ final int d = digits(totalN);
+ for (int i = 1; i <= totalN; i++) {
+ sk.update(getString(i, d));
+ }
+ final int numLimit = sk.getMaxPartitions();
+ final int ret = sk.getNumRetained();
+ println("ret: " + ret + ", numLimit " + numLimit);
+ @SuppressWarnings("unused")
+ GenericPartitionBoundaries<String> gpb =
sk.getPartitionBoundariesFromNumParts(numLimit + 1);
+ }
+
+ @Test(expectedExceptions = SketchesArgumentException.class)
+ public void checkSketchPartitionLimits2() {
+ final long totalN = 1_000_000;
+ final Comparator<String> comparator = Comparator.naturalOrder();
+ final ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe();
+ final KllItemsSketch<String> sk =
KllItemsSketch.newHeapInstance(comparator, serDe);
+ final int d = digits(totalN);
+ for (int i = 1; i <= totalN; i++) {
+ sk.update(getString(i, d));
+ }
+ final long sizeLimit= sk.getMinPartitionSizeItems();
+
+ println("Min Size Limit: " + sizeLimit);
+ @SuppressWarnings("unused")
+ GenericPartitionBoundaries<String> gpb =
sk.getPartitionBoundariesFromPartSize(sizeLimit - 1);
Review Comment:
I tested it manually, and it passes at sizeLimit and fails as sizeLimit - 1.
I didn't think it was necessary, but I could add a test that passes exactly
at sizeLimit.
Same thing goes for the limit on num partitions.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]