This is an automated email from the ASF dual-hosted git repository. leerho pushed a commit to branch Fixes_for_getPartitionBoundaries in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 405cb7620d820319d12e18601997523861a64de7 Author: Lee Rhodes <[email protected]> AuthorDate: Wed Oct 25 17:07:09 2023 -0700 Initial fixes for getPartitionBoundaries. --- .../quantiles/ItemsSketchSortedView.java | 3 +- .../quantilescommon/InequalitySearch.java | 255 ++++++++++++++++++++- .../quantilescommon/QuantilesFloatsAPI.java | 2 +- 3 files changed, 245 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index d2ccf9fd..0dcd35fb 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -130,8 +130,7 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T> { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final long naturalRank = (searchCrit == INCLUSIVE) - ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN); + final long naturalRank = Math.round(rank * totalN); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java index 5a61e525..51b01357 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java @@ -73,6 +73,11 @@ public enum InequalitySearch { return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return a; @@ -88,6 +93,11 @@ public enum InequalitySearch { return a; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return a; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -109,6 +119,13 @@ public enum InequalitySearch { : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v > arr[lo] ? lo : -1) + : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -150,6 +167,20 @@ public enum InequalitySearch { + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) + "]=" + arr[idx + 1] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1"; + } + if (idx == high) { + return "LT: " + v + " > arr[" + high + "]=" + arr[high] + + "; return arr[" + high + "]=" + arr[high]; + } //idx < high + return "LT: " + v + + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) + "]=" + arr[idx + 1] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -179,6 +210,11 @@ public enum InequalitySearch { return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return a; @@ -194,6 +230,11 @@ public enum InequalitySearch { return a; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return a; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -215,6 +256,13 @@ public enum InequalitySearch { : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v >= arr[lo] ? lo : -1) + : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -256,6 +304,20 @@ public enum InequalitySearch { + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) + "]=" + arr[idx + 1] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1"; + } + if (idx == high) { + return "LE: " + v + " >= arr[" + high + "]=" + arr[high] + + "; return arr[" + high + "]=" + arr[high]; + } + return "LE: " + v + + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) + "]=" + arr[idx + 1] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -281,6 +343,11 @@ public enum InequalitySearch { return v < arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return v == arr[a] ? a : v == arr[b] ? b : -1; @@ -296,6 +363,11 @@ public enum InequalitySearch { return v == arr[a] ? a : v == arr[b] ? b : -1; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return v == arr[a] ? a : v == arr[b] ? b : -1; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -317,6 +389,13 @@ public enum InequalitySearch { : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v == arr[lo] ? lo : -1) + : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -358,6 +437,20 @@ public enum InequalitySearch { } return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + if (v > arr[high]) { + return "EQ: " + v + " > arr[" + high + "]; return -1"; + } + if (v < arr[low]) { + return "EQ: " + v + " < arr[" + low + "]; return -1"; + } + return "EQ: " + v + " Cannot be found within arr[" + low + "], arr[" + high + "]; return -1"; + } + return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -387,6 +480,11 @@ public enum InequalitySearch { return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return b; @@ -402,6 +500,11 @@ public enum InequalitySearch { return b; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return b; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -423,6 +526,13 @@ public enum InequalitySearch { : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v <= arr[lo] ? lo : -1) + : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -464,6 +574,20 @@ public enum InequalitySearch { + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + idx + "]=" + arr[idx] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return -1"; + } + if (idx == low) { + return "GE: " + v + " <= arr[" + low + "]=" + arr[low] + + "; return arr[" + low + "]=" + arr[low]; + } //idx > low + return "GE: " + v + + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + idx + "]=" + arr[idx] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -493,6 +617,11 @@ public enum InequalitySearch { return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return b; @@ -508,6 +637,11 @@ public enum InequalitySearch { return b; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return b; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -529,6 +663,13 @@ public enum InequalitySearch { : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v < arr[lo] ? lo : -1) + : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -570,14 +711,28 @@ public enum InequalitySearch { + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + idx + "]=" + arr[idx] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return -1"; + } + if (idx == low) { + return "GT: " + v + " < arr[" + low + "]=" + arr[low] + + "; return arr[" + low + "]=" + arr[low]; + } //idx > low + return "GT: " + v + + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + idx + "]=" + arr[idx] + + "; return arr[" + idx + "]=" + arr[idx]; + } }; /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the double value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ @@ -585,10 +740,10 @@ public enum InequalitySearch { /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of float values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the float value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ @@ -596,15 +751,26 @@ public enum InequalitySearch { /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of long values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the long value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ abstract int compare(long[] arr, int a, int b, long v); + /** + * The call to compare index a and index b with the value v. + * @param arr The underlying sorted array of values + * @param a the lower index of the current pair + * @param b the higher index of the current pair + * @param v the value to search for + * @return +1, which means we must search higher in the array, or -1, which means we must + * search lower in the array, or 0, which means we have found the correct bounding pair. + */ + abstract int compare(long[] arr, int a, int b, double v); + /** * If the compare operation returns 0, which means "found", this returns the index of the * found value that satisfies the selected criteria. @@ -638,6 +804,17 @@ public enum InequalitySearch { */ abstract int getIndex(long[] arr, int a, int b, long v); + /** + * If the compare operation returns 0, which means "found", this returns the index of the + * found value that satisfies the selected criteria. + * @param arr the array being searched + * @param a the lower index of the current pair + * @param b the higher index of the current pair + * @param v the value being searched for. + * @return the index of the found value that satisfies the selected criteria. + */ + abstract int getIndex(long[] arr, int a, int b, double v); + /** * Called to resolve the search when the hi and lo pointers are equal or adjacent. * @param arr the array being searched @@ -668,13 +845,23 @@ public enum InequalitySearch { */ abstract int resolve(long[] arr, int lo, int hi, long v); + /** + * Called to resolve the search when the hi and lo pointers are equal or adjacent. + * @param arr the array being searched + * @param lo the current lo value + * @param hi the current hi value + * @param v the value being searched for + * @return the index of the resolution or -1, if it cannot be resolved. + */ + abstract int resolve(long[] arr, int lo, int hi, double v); + /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ @@ -683,10 +870,10 @@ public enum InequalitySearch { /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ @@ -695,15 +882,27 @@ public enum InequalitySearch { /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ public abstract String desc(long[] arr, int low, int high, long v, int idx); + /** + * Optional call that describes the details of the results of the search. + * Used primarily for debugging. + * @param arr The underlying sorted array of values + * @param low the low index of the range + * @param high the high index of the range + * @param v the value to search for + * @param idx the resolved index from the search + * @return the descriptive string. + */ + public abstract String desc(long[] arr, int low, int high, double v, int idx); + /** * Binary Search for the index of the double value in the given search range that satisfies * the given InequalitySearch criterion. @@ -804,4 +1003,36 @@ public enum InequalitySearch { return -1; //should never return here } + /** + * Binary Search for the index of the double value in the given search range that satisfies + * the given InequalitySearch criterion. + * If -1 is returned there are no values in the search range that satisfy the criterion. + * + * @param arr the given array that must be sorted. + * @param low the lowest index of the lowest value in the search range, inclusive. + * @param high the highest index of the highest value in the search range, inclusive. + * @param v the value to search for. + * @param crit one of LT, LE, EQ, GT, GE + * @return the index of the value in the given search range that satisfies the criterion + */ + public static int find(final long[] arr, final int low, final int high, + final double v, final InequalitySearch crit) { + Objects.requireNonNull(arr, "Input arr must not be null"); + Objects.requireNonNull(crit, "Input crit must not be null"); + if (arr.length == 0) { throw new SketchesArgumentException("Input array must not be empty."); } + int lo = low; + int hi = high; + while (lo <= hi) { + if (hi - lo <= 1) { + return crit.resolve(arr, lo, hi, v); + } + final int mid = lo + (hi - lo) / 2; + final int ret = crit.compare(arr, mid, mid + 1, v); + if (ret == -1 ) { hi = mid; } + else if (ret == 1) { lo = mid + 1; } + else { return crit.getIndex(arr, mid, mid + 1, v); } + } + return -1; //should never return here + } + } //End of enum diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index ddece47e..aebdc46b 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -221,7 +221,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * Gets the lower bound of the quantile confidence interval in which the quantile of the * given rank exists. * - * <p>Although it is possible to estimate the probablity that the true quantile + * <p>Although it is possible to estimate the probability that the true quantile * exists within the quantile confidence interval specified by the upper and lower quantile bounds, * it is not possible to guarantee the width of the quantile confidence interval * as an additive or multiplicative percent of the true quantile.</p> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
