(datasketches-hive) branch java_version_update updated: Possibly improve documentation, finish moving KLL to new ds-java API

jmalkin Tue, 14 May 2024 16:46:10 -0700

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch java_version_update
in repository https://gitbox.apache.org/repos/asf/datasketches-hive.git



The following commit(s) were added to refs/heads/java_version_update by this 
push:
     new b6c4d01  Possibly improve documentation, finish moving KLL to new 
ds-java API
b6c4d01 is described below

commit b6c4d01ff9539d2aff520569277c14d479695bb0
Author: Jon <[email protected]>
AuthorDate: Tue May 14 16:45:55 2024 -0700

    Possibly improve documentation, finish moving KLL to new ds-java API
---
 .../apache/datasketches/hive/kll/GetCdfUDF.java    | 25 +++++++++++++++++++---
 .../apache/datasketches/hive/kll/GetPmfUDF.java    | 23 +++++++++++++++++---
 .../datasketches/hive/kll/GetQuantileUDF.java      | 24 ++++++++++++++++++---
 .../datasketches/hive/kll/GetQuantilesUDF.java     | 25 +++++++++++++++++++---
 .../apache/datasketches/hive/kll/GetRankUDF.java   | 20 +++++++++++++++--
 .../hive/quantiles/GetCdfFromDoublesSketchUDF.java |  7 +++---
 .../hive/quantiles/GetCdfFromStringsSketchUDF.java |  9 ++++----
 .../hive/quantiles/GetPmfFromDoublesSketchUDF.java |  7 +++---
 .../hive/quantiles/GetPmfFromStringsSketchUDF.java |  7 +++---
 .../quantiles/GetQuantileFromDoublesSketchUDF.java |  9 ++++----
 .../quantiles/GetQuantileFromStringsSketchUDF.java | 10 ++++-----
 .../GetQuantilesFromDoublesSketchUDF.java          | 11 +++++-----
 .../GetQuantilesFromStringsSketchUDF.java          | 11 +++++-----
 .../datasketches/hive/kll/GetCdfUDFTest.java       | 11 ++++++++++
 .../datasketches/hive/kll/GetPmfUDFTest.java       | 11 ++++++++++
 .../datasketches/hive/kll/GetQuantileUDFTest.java  | 10 ++++++++-
 .../datasketches/hive/kll/GetQuantilesUDFTest.java | 16 ++++++++++++--
 .../datasketches/hive/kll/GetRankUDFTest.java      | 10 ++++++++-
 18 files changed, 189 insertions(+), 57 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/hive/kll/GetCdfUDF.java 
b/src/main/java/org/apache/datasketches/hive/kll/GetCdfUDF.java
index 6eb91b9..1683a0a 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/GetCdfUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/GetCdfUDF.java
@@ -23,15 +23,19 @@ import java.util.List;
 
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 @Description(
   name = "GetCDF",
-  value = "_FUNC_(sketch, split points...)",
+  value = "_FUNC_(sketch, [inclusive,] split points...)",
   extended = "Returns an approximation to the Cumulative Distribution Function 
(CDF)"
   + " from a sketch given a set of split points (values)."
+  + " The optional boolean parameter 'inclusive' (default: true) determines if 
the result includes"
+  + " values less than or equal to each target fraction or, if false, only 
values strictly less than"
+  + " each target fraction."
   + " Split points are an array of M unique, monotonically increasing values"
   + " that divide the real number line into M+1 consecutive disjoint 
intervals."
   + " The function returns an array of M+1 double valuess, the first M of 
which are approximations"
@@ -42,16 +46,31 @@ import org.apache.hadoop.io.BytesWritable;
 public class GetCdfUDF extends UDF {
 
   /**
-   * Returns a list of ranks (CDF) from a given sketch
+   * Returns a list of ranks (CDF) from a given sketch. Equivalent to calling
+   * GetCDF(sketch, true, splitPoints...)
    * @param serializedSketch serialized sketch
    * @param splitPoints list of unique and monotonically increasing values
    * @return list of fractions from 0 to 1
    */
   public List<Double> evaluate(final BytesWritable serializedSketch, final 
Float... splitPoints) {
+    return evaluate(serializedSketch, true, splitPoints);
+  }
+
+  /**
+   * Returns a list of ranks (CDF) from a given sketch. Equivalent to calling
+   * GetCDF(sketch, true, splitPoints...)
+   * @param serializedSketch serialized sketch
+   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param splitPoints list of unique and monotonically increasing values
+   * @return list of fractions from 0 to 1
+   */
+  public List<Double> evaluate(final BytesWritable serializedSketch, final 
Boolean inclusive, final Float... splitPoints) {
     if (serializedSketch == null) { return null; }
     final KllFloatsSketch sketch =
         
KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch));
-    final double[] cdf = sketch.getCDF(Util.objectsToPrimitives(splitPoints));
+    if (sketch.isEmpty()) { return null; }
+    final double[] cdf = sketch.getCDF(Util.objectsToPrimitives(splitPoints),
+        inclusive ? QuantileSearchCriteria.INCLUSIVE : 
QuantileSearchCriteria.EXCLUSIVE);
     if (cdf == null) { return null; }
     return Util.primitivesToList(cdf);
   }
diff --git a/src/main/java/org/apache/datasketches/hive/kll/GetPmfUDF.java 
b/src/main/java/org/apache/datasketches/hive/kll/GetPmfUDF.java
index a0a7fc0..e6ec8b7 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/GetPmfUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/GetPmfUDF.java
@@ -23,15 +23,19 @@ import java.util.List;
 
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 @Description(
   name = "GetPMF",
-  value = "_FUNC_(sketch, split points...)",
+  value = "_FUNC_(sketch, [inclusive,] split points...)",
   extended = "Returns an approximation to the Probability Mass Function (PMF)"
   + " from a sketch given a set of split points (values)."
+  + " The optional boolean parameter 'inclusive' (default: true) determines if 
the result includes"
+  + " values less than or equal to each target fraction or, if false, only 
values strictly less than"
+  + " each target fraction."
   + " Split points are an array of M unique, monotonically increasing values"
   + " that divide the real number line into M+1 consecutive disjoint 
intervals."
   + " The function returns an array of M+1 doubles, each of which is an 
approximation"
@@ -42,16 +46,29 @@ import org.apache.hadoop.io.BytesWritable;
 public class GetPmfUDF extends UDF {
 
   /**
-   * Returns a list of fractions (PMF) from a given sketch
+   * Returns a list of fractions (PMF) from a given sketch. Equivalent to 
calling
+   * GetPMF(sketch, true, splitPoints...)
    * @param serializedSketch serialized sketch
    * @param splitPoints list of unique and monotonically increasing values
    * @return list of fractions from 0 to 1
    */
   public List<Double> evaluate(final BytesWritable serializedSketch, final 
Float... splitPoints) {
+    return evaluate(serializedSketch, true, splitPoints);
+  }
+  /**
+   * Returns a list of fractions (PMF) from a given sketch
+   * @param serializedSketch serialized sketch
+   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param splitPoints list of unique and monotonically increasing values
+   * @return list of fractions from 0 to 1
+   */
+  public List<Double> evaluate(final BytesWritable serializedSketch, final 
Boolean inclusive, final Float... splitPoints) {
     if (serializedSketch == null) { return null; }
     final KllFloatsSketch sketch =
         
KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch));
-    final double[] pmf = sketch.getPMF(Util.objectsToPrimitives(splitPoints));
+    if (sketch.isEmpty()) { return null; }
+    final double[] pmf = sketch.getPMF(Util.objectsToPrimitives(splitPoints),
+        inclusive ? QuantileSearchCriteria.INCLUSIVE: 
QuantileSearchCriteria.EXCLUSIVE);
     if (pmf == null) { return null; }
     return Util.primitivesToList(pmf);
   }
diff --git a/src/main/java/org/apache/datasketches/hive/kll/GetQuantileUDF.java 
b/src/main/java/org/apache/datasketches/hive/kll/GetQuantileUDF.java
index 51c4961..7429f24 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/GetQuantileUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/GetQuantileUDF.java
@@ -21,13 +21,17 @@ package org.apache.datasketches.hive.kll;
 
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
-@Description(name = "GetQuantile", value = "_FUNC_(sketch, fraction)",
+@Description(name = "GetQuantile", value = "_FUNC_(sketch, [inclusive,] 
fraction)",
 extended = " Returns a quantile value from a given KllFloatsSketch."
 + " A single value for a given fraction is returned."
++ " The optional boolean parameter 'inclusive' (default: true) determines if 
the result includes"
++ " values less than or equal to the fraction or, if false, only values 
strictly less than"
++ " the fraction."
 + " The fraction represents a normalized rank, and must be from 0 to 1 
inclusive."
 + " For example, a fraction of 0.5 corresponds to 50th percentile, which is"
 + " the median value of the distribution (the number separating the higher 
half"
@@ -36,16 +40,30 @@ extended = " Returns a quantile value from a given 
KllFloatsSketch."
 public class GetQuantileUDF extends UDF {
 
   /**
-   * Returns a quantile value from a given sketch
+   * Returns a quantile value from a given sketch. Equivalent to calling
+   * GetQuantile(sketch, true, fraction)
    * @param serializedSketch serialized sketch
    * @param fraction value from 0 to 1 inclusive
    * @return quantile value
    */
   public Float evaluate(final BytesWritable serializedSketch, final double 
fraction) {
+    return evaluate(serializedSketch, true, fraction);
+  }
+
+  /**
+   * Returns a quantile value from a given sketch
+   * @param serializedSketch serialized sketch
+   * @param inclusive if true, the given rank is considered inclusive 
(includes weight of an item)
+   * @param fraction value from 0 to 1 inclusive
+   * @return quantile value
+   */
+  public Float evaluate(final BytesWritable serializedSketch, final Boolean 
inclusive, final double fraction) {
     if (serializedSketch == null) { return null; }
     final KllFloatsSketch sketch =
         
KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch));
-    return sketch.getQuantile(fraction);
+    if (sketch.isEmpty()) { return null; }
+    return sketch.getQuantile(fraction,
+      inclusive ? QuantileSearchCriteria.INCLUSIVE : 
QuantileSearchCriteria.EXCLUSIVE);
   }
 
 }
diff --git 
a/src/main/java/org/apache/datasketches/hive/kll/GetQuantilesUDF.java 
b/src/main/java/org/apache/datasketches/hive/kll/GetQuantilesUDF.java
index d1ca522..c619faf 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/GetQuantilesUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/GetQuantilesUDF.java
@@ -23,14 +23,19 @@ import java.util.List;
 
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
 @Description(
   name = "GetQuantiles",
-  value = "_FUNC_(sketch, fractions...)",
+  value = "_FUNC_(sketch, [inclusive,] fractions...)",
   extended = "Returns quantile values from a given KllFloatsSketch based on a 
given list of fractions."
+  + " The optional boolean parameter 'inclusive' determines if the interval is 
inclusive,"
+  + " which is inclusive of the left fraction and exclusive of the right 
fraction, or"
+  + " the alternative of exclusive of the left fraction and inclusive of the 
right fraction."
+  + " Defaults to inclusive (of left fraction) when not specified."
   + " The fractions represent normalized ranks, and must be from 0 to 1 
inclusive."
   + " For example, a fraction of 0.5 corresponds to 50th percentile,"
   + " which is the median value of the distribution (the number separating the 
higher"
@@ -39,16 +44,30 @@ import org.apache.hadoop.io.BytesWritable;
 public class GetQuantilesUDF extends UDF {
 
   /**
-   * Returns a list of quantile values from a given sketch
+   * Returns a list of quantile values from a given sketch. Equivalent to 
calling
+   * GetQuantile(sketch, true, fractions...)
    * @param serializedSketch serialized sketch
    * @param fractions list of values from 0 to 1 inclusive
    * @return list of quantile values
    */
   public List<Float> evaluate(final BytesWritable serializedSketch, final 
Double... fractions) {
+    return evaluate(serializedSketch, true, fractions);
+  }
+
+  /**
+   * Returns a list of quantile values from a given sketch
+   * @param serializedSketch serialized sketch
+   * @param inclusive if true, the given ranks are considered inclusive 
(include weight of an item)
+   * @param fractions list of values from 0 to 1 inclusive
+   * @return list of quantile values
+   */
+  public List<Float> evaluate(final BytesWritable serializedSketch, final 
Boolean inclusive, final Double... fractions) {
     if (serializedSketch == null) { return null; }
     final KllFloatsSketch sketch =
         
KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch));
-    return 
Util.primitivesToList(sketch.getQuantiles(Util.objectsToPrimitives(fractions)));
+    if (sketch.isEmpty()) { return null; }
+    return 
Util.primitivesToList(sketch.getQuantiles(Util.objectsToPrimitives(fractions),
+      inclusive ? QuantileSearchCriteria.INCLUSIVE : 
QuantileSearchCriteria.EXCLUSIVE));
   }
 
 }
diff --git a/src/main/java/org/apache/datasketches/hive/kll/GetRankUDF.java 
b/src/main/java/org/apache/datasketches/hive/kll/GetRankUDF.java
index 578bcd1..f7444f0 100644
--- a/src/main/java/org/apache/datasketches/hive/kll/GetRankUDF.java
+++ b/src/main/java/org/apache/datasketches/hive/kll/GetRankUDF.java
@@ -21,12 +21,15 @@ package org.apache.datasketches.hive.kll;
 
 import org.apache.datasketches.hive.common.BytesWritableHelper;
 import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.BytesWritable;
 
-@Description(name = "GetRank", value = "_FUNC_(sketch, value)",
+@Description(name = "GetRank", value = "_FUNC_(sketch, [inclusive,] value)",
 extended = " Returns a normalized rank of a given value from a given 
KllFloatsSketch."
++ " The optional boolean parameter inclusive (default: true) determines if the 
weight of the"
++ " given value is included in the rank or not."
 + " The returned rank is an approximation to the fraction of values of the 
distribution"
 + " that are less than the given value (mass of the distribution below the 
given value).")
 @SuppressWarnings("deprecation")
@@ -39,10 +42,23 @@ public class GetRankUDF extends UDF {
    * @return rank
    */
   public Double evaluate(final BytesWritable serializedSketch, final float 
value) {
+    return evaluate(serializedSketch, true, value);
+  }
+
+  /**
+   * Returns a normalized rank of a given value from a given sketch
+   * @param serializedSketch serialized sketch
+   * @param inclusive if true the weight of the given item is included into 
the rank.
+   * Otherwise the rank equals the sum of the weights of all items that are 
less than the given item
+   * @param value the given value
+   * @return rank
+   */
+  public Double evaluate(final BytesWritable serializedSketch, final Boolean 
inclusive, final float value) {
     if (serializedSketch == null) { return null; }
     final KllFloatsSketch sketch =
         
KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(serializedSketch));
-    return sketch.getRank(value);
+    if (sketch.isEmpty()) { return null; }
+    return sketch.getRank(value, inclusive ? QuantileSearchCriteria.INCLUSIVE 
: QuantileSearchCriteria.EXCLUSIVE);
   }
 
 }
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromDoublesSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromDoublesSketchUDF.java
index 623087c..a59f59c 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromDoublesSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromDoublesSketchUDF.java
@@ -33,10 +33,9 @@ import org.apache.hadoop.io.BytesWritable;
   value = "_FUNC_(sketch, [inclusive,] split points...)",
   extended = "Returns an approximation to the Cumulative Distribution Function 
(CDF)"
   + " from a sketch given a set of split points (values)."
-  + " The optional boolean parameter 'inclusive' determines if the interval is 
inclusive,"
-  + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-  + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-  + " Defaults to inclusive (of left split point) when not specified."
+  + " The optional boolean parameter 'inclusive' (default: true) determines 
whether the rank of an"
+  + " item includes its own weight. If true, such items are included in the 
interval to the left of"
+  + " the split point; otherwise they are included in the interval to the 
right of the split point."
   + " Split points are an array of M unique, monotonically increasing values"
   + " that divide the real number line into M+1 consecutive disjoint 
intervals."
   + " The function returns an array of M+1 double valuess, the first M of 
which are approximations"
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java
index 47749c7..fdb75bc 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java
@@ -35,11 +35,10 @@ import org.apache.hadoop.io.BytesWritable;
     value = "_FUNC_(sketch, [inclusive,] split points...)",
     extended = "Returns an approximation to the Cumulative Distribution 
Function (CDF)"
     + " from a sketch given a set of split points (values)."
-    + " The optional boolean parameter 'inclusive' determines if the interval 
is inclusive,"
-    + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-    + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-    + " Defaults to inclusive (of left split point) when not specified."
-    + " Split points are an array of M unique, monotonically increasing values"
+    + " The optional boolean parameter 'inclusive' (default: true) determines 
whether the rank of an"
+    + " item includes its own weight. If true, such items are included in the 
interval to the left of"
+    + " the split point; otherwise they are included in the interval to the 
right of the split point."
+      + " Split points are an array of M unique, monotonically increasing 
values"
     + " that divide the domain into M+1 consecutive disjoint intervals."
     + " The function returns an array of M+1 double valuess, the first M of 
which are approximations"
     + " to the ranks of the corresponding split points (fraction of input 
stream values that are less"
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromDoublesSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromDoublesSketchUDF.java
index e7380a4..4e7b486 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromDoublesSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromDoublesSketchUDF.java
@@ -33,10 +33,9 @@ import org.apache.hadoop.io.BytesWritable;
   value = "_FUNC_(sketch, [inclusive,] split points...)",
   extended = "Returns an approximation to the Probability Mass Function (PMF)"
   + " from a sketch given a set of split points (values)."
-  + " The optional boolean parameter 'inclusive' determines if the interval is 
inclusive,"
-  + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-  + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-  + " Defaults to inclusive (of left split point) when not specified."
+  + " The optional boolean parameter 'inclusive' (default: true) determines 
whether the rank of an"
+  + " item includes its own weight. If true, such items are included in the 
interval to the left of"
+  + " the split point; otherwise they are included in the interval to the 
right of the split point."
   + " Split points are an array of M unique, monotonically increasing values"
   + " that divide the real number line into M+1 consecutive disjoint 
intervals."
   + " The function returns an array of M+1 doubles, each of which is an 
approximation"
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java
index 0ee1267..514809b 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java
@@ -35,10 +35,9 @@ import org.apache.hadoop.io.BytesWritable;
     value = "_FUNC_(sketch, [inclusive,] split points...)",
     extended = "Returns an approximation to the Probability Mass Function 
(PMF)"
     + " from a sketch given a set of split points (values)."
-    + " The optional boolean parameter 'inclusive' determines if the interval 
is inclusive,"
-    + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-    + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-    + " Defaults to inclusive (of left split point) when not specified."
+    + " The optional boolean parameter 'inclusive' (default: true) determines 
whether the rank of an"
+    + " item includes its own weight. If true, such items are included in the 
interval to the left of"
+    + " the split point; otherwise they are included in the interval to the 
right of the split point."
     + " Split points are an array of M unique, monotonically increasing values"
     + " that divide the domain into M+1 consecutive disjoint intervals."
     + " The function returns an array of M+1 doubles, each of which is an 
approximation"
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromDoublesSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromDoublesSketchUDF.java
index d92b6b8..7fa0cf9 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromDoublesSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromDoublesSketchUDF.java
@@ -29,10 +29,9 @@ import org.apache.hadoop.io.BytesWritable;
 @Description(name = "GetQuantile", value = "_FUNC_(sketch, [inclusive,] 
fraction)",
     extended = " Returns a quantile value from a given DoublesSketch sketch."
     + " A single value for a given fraction is returned."
-    + " The optional boolean parameter 'inclusive' determines if the interval 
is inclusive,"
-    + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-    + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-    + " Defaults to inclusive (of left split point) when not specified."
+    + " The optional boolean parameter 'inclusive' (default: true) determines 
if the result includes"
+    + " values less than or equal to the fraction or, if false, only values 
strictly less than"
+    + " the fraction."
     + " The fraction represents a normalized rank, and must be from 0 to 1 
inclusive."
     + " For example, a fraction of 0.5 corresponds to 50th percentile, which 
is"
     + " the median value of the distribution (the number separating the higher 
half"
@@ -54,7 +53,7 @@ public class GetQuantileFromDoublesSketchUDF extends UDF {
   /**
    * Returns a quantile value from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param inclusive if true, the given rank is considered inclusive 
(includes weight of an item)
    * @param fraction value from 0 to 1 inclusive
    * @return quantile value
    */
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java
index 4aca275..c961d6c 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java
@@ -32,10 +32,9 @@ import org.apache.hadoop.io.BytesWritable;
 @Description(name = "GetQuantile", value = "_FUNC_(sketch, [inclusive,] 
fraction)",
     extended = " Returns a quantile value from a given ItemsSketch<String> 
sketch."
     + " A single value for a given fraction is returned."
-    + " The optional boolean parameter 'inclusive' determines if the interval 
is inclusive,"
-    + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-    + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-    + " Defaults to inclusive (of left split point) when not specified."
+    + " The optional boolean parameter 'inclusive' (default: true) determines 
if the result includes"
+    + " values less than or equal to the fraction or, if false, only values 
strictly less than"
+    + " the fraction."
     + " The fraction represents a normalized rank, and must be from 0 to 1 
inclusive."
     + " For example, a fraction of 0.5 corresponds to 50th percentile, which 
is"
     + " the median value of the distribution (the number separating the higher 
half"
@@ -57,7 +56,8 @@ public class GetQuantileFromStringsSketchUDF extends UDF {
   /**
    * Returns a quantile value from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point   * @param fraction value from 0 
to 1 inclusive
+   * @param inclusive if true, the given rank is considered inclusive 
(includes weight of an item)
+   * @param fraction value from 0 to 1 inclusive
    * @return quantile value
    */
   public String evaluate(final BytesWritable serializedSketch, final Boolean 
inclusive, final double fraction) {
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java
index ed49e42..0ec1415 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java
@@ -33,10 +33,9 @@ import org.apache.hadoop.io.BytesWritable;
   value = "_FUNC_(sketch, [inclusive,] fractions...) or _FUNC_(sketch, 
[inclusive,] number)",
   extended = "Returns quantile values from a given DoublesSketch based on a 
given"
   + " list of fractions or a number of evenly spaced fractions."
-  + " The optional boolean parameter 'inclusive' determines if the interval is 
inclusive,"
-  + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-  + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-  + " Defaults to inclusive (of left split point) when not specified."
+  + " The optional boolean parameter 'inclusive' (default: true) determines if 
the result includes"
+  + " values less than or equal to each target fraction or, if false, only 
values strictly less than"
+  + " each target fraction."
   + " The fractions represent normalized ranks, and must be from 0 to 1 
inclusive."
   + " For example, a fraction of 0.5 corresponds to 50th percentile,"
   + " which is the median value of the distribution (the number separating the 
higher"
@@ -62,7 +61,7 @@ public class GetQuantilesFromDoublesSketchUDF extends UDF {
   /**
    * Returns a list of quantile values from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param inclusive if true, the given ranks are considered inclusive 
(include weight of an item)
    * @param fractions list of values from 0 to 1 inclusive
    * @return list of quantile values
    */
@@ -88,7 +87,7 @@ public class GetQuantilesFromDoublesSketchUDF extends UDF {
   /**
    * Returns a list of quantile values from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param inclusive if true, the given ranks are considered inclusive 
(include weight of an item)
    * @param number of evenly spaced fractions
    * @return list of quantile values
    */
diff --git 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java
 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java
index a685c15..6689f66 100644
--- 
a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java
+++ 
b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java
@@ -36,10 +36,9 @@ import org.apache.hadoop.io.BytesWritable;
     value = "_FUNC_(sketch, [inclusive,] fractions...) or _FUNC_(sketch, 
[inclusive,] number)",
     extended = "Returns quantile values from a given ItemsSketch<String> based 
on a given"
     + " list of fractions or a number of evenly spaced fractions."
-    + " The optional boolean parameter 'inclusive' determines if the interval 
is inclusive,"
-    + " which is inclusive of the left split point and exclusive of the right 
split point, or"
-    + " the alternative of exclusive of the split point and inclusive of the 
right split point."
-    + " Defaults to inclusive (of left split point) when not specified."
+    + " The optional boolean parameter 'inclusive' (default: true) determines 
if the result includes"
+    + " values less than or equal to each target fraction or, if false, only 
values strictly less than"
+    + " each target fraction."
     + " The fractions represent normalized ranks, and must be from 0 to 1 
inclusive."
     + " For example, a fraction of 0.5 corresponds to 50th percentile,"
     + " which is the median value of the distribution (the number separating 
the higher"
@@ -65,7 +64,7 @@ public class GetQuantilesFromStringsSketchUDF extends UDF {
   /**
    * Returns a list of quantile values from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param inclusive if true, the given ranks are considered inclusive 
(include weight of an item)
    * @param fractions list of values from 0 to 1 inclusive
    * @return list of quantile values
    */
@@ -95,7 +94,7 @@ public class GetQuantilesFromStringsSketchUDF extends UDF {
   /**
    * Returns a list of quantile values from a given sketch
    * @param serializedSketch serialized sketch
-   * @param inclusive if true, the interval is inclusive of the left split 
point and exclusive of the right split point
+   * @param inclusive if true, the given ranks are considered inclusive 
(include weight of an item)
    * @param number of evenly spaced fractions
    * @return list of quantile values
    */
diff --git a/src/test/java/org/apache/datasketches/hive/kll/GetCdfUDFTest.java 
b/src/test/java/org/apache/datasketches/hive/kll/GetCdfUDFTest.java
index f14c3fb..19a4bed 100644
--- a/src/test/java/org/apache/datasketches/hive/kll/GetCdfUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/kll/GetCdfUDFTest.java
@@ -61,9 +61,20 @@ public class GetCdfUDFTest {
     sketch.update(2);
     sketch.update(3);
     sketch.update(4);
+
+    // inclusive
     List<Double> result = new GetCdfUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 1f, 3f, 4f);
     Assert.assertNotNull(result);
     Assert.assertEquals(result.size(), 4);
+    Assert.assertEquals((double)result.get(0), 0.25);
+    Assert.assertEquals((double)result.get(1), 0.75);
+    Assert.assertEquals((double)result.get(2), 1.0);
+    Assert.assertEquals((double)result.get(3), 1.0);
+
+    // exclusive
+    result = new GetCdfUDF().evaluate(new BytesWritable(sketch.toByteArray()), 
false, 1f, 3f, 4f);
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result.size(), 4);
     Assert.assertEquals((double)result.get(0), 0.0);
     Assert.assertEquals((double)result.get(1), 0.5);
     Assert.assertEquals((double)result.get(2), 0.75);
diff --git a/src/test/java/org/apache/datasketches/hive/kll/GetPmfUDFTest.java 
b/src/test/java/org/apache/datasketches/hive/kll/GetPmfUDFTest.java
index 9f3d2c6..5086a53 100644
--- a/src/test/java/org/apache/datasketches/hive/kll/GetPmfUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/kll/GetPmfUDFTest.java
@@ -61,9 +61,20 @@ public class GetPmfUDFTest {
     sketch.update(2);
     sketch.update(3);
     sketch.update(4);
+
+    // inclusive
     List<Double> result = new GetPmfUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 1f, 3f, 5f);
     Assert.assertNotNull(result);
     Assert.assertEquals(result.size(), 4);
+    Assert.assertEquals((double)result.get(0), 0.25);
+    Assert.assertEquals((double)result.get(1), 0.5);
+    Assert.assertEquals((double)result.get(2), 0.25);
+    Assert.assertEquals((double)result.get(3), 0.0);
+
+    // exclusive
+    result = new GetPmfUDF().evaluate(new BytesWritable(sketch.toByteArray()), 
false, 1f, 3f, 5f);
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result.size(), 4);
     Assert.assertEquals((double)result.get(0), 0.0);
     Assert.assertEquals((double)result.get(1), 0.5);
     Assert.assertEquals((double)result.get(2), 0.5);
diff --git 
a/src/test/java/org/apache/datasketches/hive/kll/GetQuantileUDFTest.java 
b/src/test/java/org/apache/datasketches/hive/kll/GetQuantileUDFTest.java
index 7e0f269..b78a9eb 100644
--- a/src/test/java/org/apache/datasketches/hive/kll/GetQuantileUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/kll/GetQuantileUDFTest.java
@@ -39,9 +39,17 @@ public class GetQuantileUDFTest {
     sketch.update(1);
     sketch.update(2);
     sketch.update(3);
-    final Float result = new GetQuantileUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 0.5);
+    sketch.update(4);
+
+    // inclusive
+    Float result = new GetQuantileUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 0.5);
     Assert.assertNotNull(result);
     Assert.assertEquals((double)result, 2f);
+
+    // exclusive
+    result = new GetQuantileUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), false, 0.5);
+    Assert.assertNotNull(result);
+    Assert.assertEquals((double)result, 3f);
   }
 
 }
diff --git 
a/src/test/java/org/apache/datasketches/hive/kll/GetQuantilesUDFTest.java 
b/src/test/java/org/apache/datasketches/hive/kll/GetQuantilesUDFTest.java
index 6a165df..347c22c 100644
--- a/src/test/java/org/apache/datasketches/hive/kll/GetQuantilesUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/kll/GetQuantilesUDFTest.java
@@ -52,12 +52,24 @@ public class GetQuantilesUDFTest {
     sketch.update(1);
     sketch.update(2);
     sketch.update(3);
-    final List<Float> result = new GetQuantilesUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 0.0, 0.5, 1.0);
+    sketch.update(4);
+
+    // inclusive
+    List<Float> result = new GetQuantilesUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 0.0, 0.5, 1.0);
     Assert.assertNotNull(result);
     Assert.assertEquals(result.size(), 3);
     Assert.assertEquals((double)result.get(0), 1f);
     Assert.assertEquals((double)result.get(1), 2f);
-    Assert.assertEquals((double)result.get(2), 3f);
+    Assert.assertEquals((double)result.get(2), 4f);
+
+    // exclusive
+    result = new GetQuantilesUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), false, 0.0, 0.5, 1.0);
+    Assert.assertNotNull(result);
+    Assert.assertEquals(result.size(), 3);
+    Assert.assertEquals((double)result.get(0), 1f);
+    Assert.assertEquals((double)result.get(1), 3f);
+    Assert.assertEquals((double)result.get(2), 4f);
+
   }
 
 }
diff --git a/src/test/java/org/apache/datasketches/hive/kll/GetRankUDFTest.java 
b/src/test/java/org/apache/datasketches/hive/kll/GetRankUDFTest.java
index 8c87909..565c004 100644
--- a/src/test/java/org/apache/datasketches/hive/kll/GetRankUDFTest.java
+++ b/src/test/java/org/apache/datasketches/hive/kll/GetRankUDFTest.java
@@ -40,9 +40,17 @@ public class GetRankUDFTest {
     sketch.update(2);
     sketch.update(3);
     sketch.update(4);
-    final Double result = new GetRankUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 3f);
+
+    // inclusive
+    Double result = new GetRankUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), 3f);
+    Assert.assertNotNull(result);
+    Assert.assertEquals((double)result, 0.75);
+
+    // exclusive
+    result = new GetRankUDF().evaluate(new 
BytesWritable(sketch.toByteArray()), false, 3f);
     Assert.assertNotNull(result);
     Assert.assertEquals((double)result, 0.5);
+
   }
 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-hive) branch java_version_update updated: Possibly improve documentation, finish moving KLL to new ds-java API

Reply via email to