This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch java_version_update in repository https://gitbox.apache.org/repos/asf/datasketches-hive.git
commit 6255f0f35fca64cd8fd70ea3fc3858eba98e55a6 Author: Jon <[email protected]> AuthorDate: Fri May 10 22:53:04 2024 -0700 Make main code use ds-java 6.0.0 API --- .../hive/frequencies/DataToItemsSketchUDAF.java | 2 +- .../hive/frequencies/DataToStringsSketchUDAF.java | 2 +- .../GetFrequentItemsFromStringsSketchUDTF.java | 2 +- .../hive/frequencies/ItemsEvaluator.java | 2 +- .../datasketches/hive/frequencies/ItemsState.java | 2 +- .../hive/frequencies/UnionItemsSketchUDAF.java | 2 +- .../hive/frequencies/UnionStringsSketchUDAF.java | 2 +- .../apache/datasketches/hive/kll/SketchState.java | 4 ++-- .../hive/quantiles/DataToItemsSketchUDAF.java | 6 +++--- .../hive/quantiles/DataToStringsSketchUDAF.java | 4 ++-- .../hive/quantiles/DoublesUnionState.java | 2 +- .../hive/quantiles/GetCdfFromStringsSketchUDF.java | 3 ++- .../hive/quantiles/GetKFromStringsSketchUDF.java | 3 ++- .../hive/quantiles/GetNFromStringsSketchUDF.java | 3 ++- .../hive/quantiles/GetPmfFromStringsSketchUDF.java | 3 ++- .../quantiles/GetQuantileFromStringsSketchUDF.java | 3 ++- .../GetQuantilesFromDoublesSketchUDF.java | 20 +++++++++++++++++- .../GetQuantilesFromStringsSketchUDF.java | 24 ++++++++++++++++++++-- .../hive/quantiles/ItemsEvaluator.java | 8 +++++--- .../hive/quantiles/ItemsUnionState.java | 16 ++++++++------- .../hive/quantiles/StringsSketchToStringUDF.java | 3 ++- .../hive/quantiles/UnionItemsSketchUDAF.java | 6 +++--- .../hive/quantiles/UnionStringsSketchUDAF.java | 4 ++-- .../tuple/DoubleSummarySketchToEstimatesUDF.java | 4 ++-- .../tuple/DoubleSummarySketchToPercentileUDF.java | 4 ++-- .../frequencies/DataToStringsSketchUDAFTest.java | 4 ++-- .../GetFrequentItemsFromStringsSketchUDTFTest.java | 4 ++-- .../frequencies/UnionStringsSketchUDAFTest.java | 4 ++-- .../quantiles/DataToStringsSketchUDAFTest.java | 4 ++-- .../quantiles/GetCdfFromStringsSketchUDFTest.java | 4 ++-- .../quantiles/GetKFromStringsSketchUDFTest.java | 4 ++-- .../quantiles/GetNFromStringsSketchUDFTest.java | 4 ++-- .../quantiles/GetPmfFromStringsSketchUDFTest.java | 4 ++-- .../GetQuantileFromStringsSketchUDFTest.java | 6 +++--- .../GetQuantilesFromStringsSketchUDFTest.java | 6 +++--- .../quantiles/StringsSketchToStringUDFTest.java | 4 ++-- .../hive/quantiles/UnionStringsSketchUDAFTest.java | 4 ++-- 37 files changed, 117 insertions(+), 69 deletions(-) diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java index 6b17ef1..9aa411c 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/DataToItemsSketchUDAF.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java index ab0156f..6f7e204 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAF.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Description; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java b/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java index 2673677..87c3879 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.frequencies; import java.util.Arrays; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.frequencies.ErrorType; import org.apache.datasketches.frequencies.ItemsSketch; import org.apache.datasketches.hive.common.BytesWritableHelper; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java index c3154a4..321f153 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsEvaluator.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.frequencies.ItemsSketch; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.memory.Memory; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java index 08d0243..d267d2f 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/ItemsState.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.frequencies.ItemsSketch; import org.apache.datasketches.memory.Memory; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/UnionItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/UnionItemsSketchUDAF.java index b91270f..eed29bf 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/UnionItemsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/UnionItemsSketchUDAF.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; diff --git a/src/main/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAF.java index 229f38a..29ccd48 100644 --- a/src/main/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAF.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hive.frequencies; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; diff --git a/src/main/java/org/apache/datasketches/hive/kll/SketchState.java b/src/main/java/org/apache/datasketches/hive/kll/SketchState.java index 667183e..5da420b 100644 --- a/src/main/java/org/apache/datasketches/hive/kll/SketchState.java +++ b/src/main/java/org/apache/datasketches/hive/kll/SketchState.java @@ -28,11 +28,11 @@ class SketchState extends AbstractAggregationBuffer { // initialization is needed in the first phase (iterate) only void init() { - this.state_ = new KllFloatsSketch(); + this.state_ = KllFloatsSketch.newHeapInstance(); } void init(final int k) { - this.state_ = new KllFloatsSketch(k); + this.state_ = KllFloatsSketch.newHeapInstance(k); } boolean isInitialized() { diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java index d95b195..3c050be 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/DataToItemsSketchUDAF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -56,8 +56,8 @@ public abstract class DataToItemsSketchUDAF<T> extends AbstractGenericUDAFResolv public static abstract class DataToSketchEvaluator<T> extends ItemsEvaluator<T> { - DataToSketchEvaluator(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { - super(comparator, serDe); + DataToSketchEvaluator(final Class<T> clazz, final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + super(clazz, comparator, serDe); } @Override diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java index be5874e..f45091f 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Description; @@ -45,7 +45,7 @@ public class DataToStringsSketchUDAF extends DataToItemsSketchUDAF<String> { static class DataToStringsSketchEvaluator extends DataToSketchEvaluator<String> { DataToStringsSketchEvaluator() { - super(Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + super(String.class, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); } @Override diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java index cb6f03c..79aa40e 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/DoublesUnionState.java @@ -52,7 +52,7 @@ class DoublesUnionState extends AbstractAggregationBuffer { if (this.union == null) { this.union = DoublesUnion.heapify(incomingSketch); } else { - this.union.update(incomingSketch); + this.union.union(incomingSketch); } } diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java index c6944ae..37c8a19 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDF.java @@ -22,7 +22,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; import java.util.List; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -52,6 +52,7 @@ public class GetCdfFromStringsSketchUDF extends UDF { public List<Double> evaluate(final BytesWritable serializedSketch, final String... splitPoints) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDF.java index 8ad8591..90ea91b 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -41,6 +41,7 @@ public class GetKFromStringsSketchUDF extends UDF { public Integer evaluate(final BytesWritable serializedSketch) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDF.java index 533282b..236de7e 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -42,6 +42,7 @@ public class GetNFromStringsSketchUDF extends UDF { public Long evaluate(final BytesWritable serializedSketch) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java index 5b17704..5f5ab5a 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDF.java @@ -22,7 +22,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; import java.util.List; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -52,6 +52,7 @@ public class GetPmfFromStringsSketchUDF extends UDF { public List<Double> evaluate(final BytesWritable serializedSketch, final String... splitPoints) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java index 85f6f78..5acad31 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -47,6 +47,7 @@ public class GetQuantileFromStringsSketchUDF extends UDF { public String evaluate(final BytesWritable serializedSketch, final double fraction) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java index c04f768..98169b5 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromDoublesSketchUDF.java @@ -64,7 +64,25 @@ public class GetQuantilesFromDoublesSketchUDF extends UDF { public List<Double> evaluate(final BytesWritable serializedSketch, final int number) { if (serializedSketch == null) { return null; } final DoublesSketch sketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(serializedSketch)); - final double[] quantiles = sketch.getQuantiles(number); + + double[] quantiles = null; + if (number == 1) { + quantiles = new double[1]; + quantiles[0] = sketch.getMinItem(); + } else if (number == 2) { + quantiles = new double[2]; + quantiles[0] = sketch.getMinItem(); + quantiles[1] = sketch.getMaxItem(); + } else if (number > 2) { + final double[] ranks = new double[number]; + final double delta = 1.0 / (number - 1); + for (int i = 0; i < number; i++) { + ranks[i] = i * delta; + } + quantiles = sketch.getQuantiles(ranks); + quantiles[number - 1] = sketch.getMaxItem(); // to ensure the max value is exact + } + if (quantiles == null) { return null; } return Util.primitivesToList(quantiles); } diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java index dd5f07f..49318de 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDF.java @@ -23,7 +23,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -55,6 +55,7 @@ public class GetQuantilesFromStringsSketchUDF extends UDF { public List<String> evaluate(final BytesWritable serializedSketch, final Double... fractions) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() @@ -71,11 +72,30 @@ public class GetQuantilesFromStringsSketchUDF extends UDF { public List<String> evaluate(final BytesWritable serializedSketch, final int number) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() ); - final String[] quantiles = sketch.getQuantiles(number); + + String[] quantiles = null; + if (number == 1) { + quantiles = new String[1]; + quantiles[0] = sketch.getMinItem(); + } else if (number == 2) { + quantiles = new String[2]; + quantiles[0] = sketch.getMinItem(); + quantiles[1] = sketch.getMaxItem(); + } else if (number > 2) { + final double[] ranks = new double[number]; + final double delta = 1.0 / (number - 1); + for (int i = 0; i < number; i++) { + ranks[i] = i * delta; + } + quantiles = sketch.getQuantiles(ranks); + quantiles[number - 1] = sketch.getMaxItem(); // to ensure the max value is exact + } + if (quantiles == null) { return null; } return Arrays.asList(quantiles); } diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java index 4713974..a9879e0 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsEvaluator.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.ItemsSketch; @@ -35,12 +35,14 @@ import org.apache.hadoop.io.BytesWritable; abstract class ItemsEvaluator<T> extends GenericUDAFEvaluator { + private final Class<T> clazz_; private final Comparator<? super T> comparator_; private final ArrayOfItemsSerDe<T> serDe_; protected PrimitiveObjectInspector inputObjectInspector; protected PrimitiveObjectInspector kObjectInspector; - ItemsEvaluator(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + ItemsEvaluator(final Class<T> clazz, final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + this.clazz_ = clazz; this.comparator_ = comparator; this.serDe_ = serDe; } @@ -100,7 +102,7 @@ abstract class ItemsEvaluator<T> extends GenericUDAFEvaluator { @SuppressWarnings("deprecation") @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { - return new ItemsUnionState<>(this.comparator_, this.serDe_); + return new ItemsUnionState<>(this.clazz_, this.comparator_, this.serDe_); } } diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java index b7004d3..6d40ebf 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/ItemsUnionState.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.datasketches.quantiles.ItemsUnion; @@ -29,11 +29,13 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggreg class ItemsUnionState<T> extends AbstractAggregationBuffer { + private final Class<T> clazz_; private final Comparator<? super T> comparator_; private final ArrayOfItemsSerDe<T> serDe_; private ItemsUnion<T> union; - ItemsUnionState(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + ItemsUnionState(final Class<T> clazz, final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + this.clazz_ = clazz; this.comparator_ = comparator; this.serDe_ = serDe; } @@ -41,9 +43,9 @@ class ItemsUnionState<T> extends AbstractAggregationBuffer { // initializing is needed only in the first phase (iterate) void init(final int k) { if (k > 0) { - this.union = ItemsUnion.getInstance(k, this.comparator_); + this.union = ItemsUnion.getInstance(this.clazz_, k, this.comparator_); } else { - this.union = ItemsUnion.getInstance(this.comparator_); + this.union = ItemsUnion.getInstance(this.clazz_, this.comparator_); } } @@ -53,18 +55,18 @@ class ItemsUnionState<T> extends AbstractAggregationBuffer { void update(final T value) { if (this.union == null) { - this.union = ItemsUnion.getInstance(this.comparator_); + this.union = ItemsUnion.getInstance(this.clazz_, this.comparator_); } this.union.update(value); } void update(final Memory serializedSketch) { final ItemsSketch<T> incomingSketch = - ItemsSketch.getInstance(serializedSketch, this.comparator_, this.serDe_); + ItemsSketch.getInstance(this.clazz_, serializedSketch, this.comparator_, this.serDe_); if (this.union == null) { this.union = ItemsUnion.getInstance(incomingSketch); } else { - this.union.update(incomingSketch); + this.union.union(incomingSketch); } } diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDF.java b/src/main/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDF.java index 904446f..1e81579 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.Description; @@ -41,6 +41,7 @@ public class StringsSketchToStringUDF extends UDF { public String evaluate(final BytesWritable serializedSketch) { if (serializedSketch == null) { return null; } final ItemsSketch<String> sketch = ItemsSketch.getInstance( + String.class, BytesWritableHelper.wrapAsMemory(serializedSketch), Comparator.naturalOrder(), new ArrayOfStringsSerDe() diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java index cfd35fa..1ca92ae 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/UnionItemsSketchUDAF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -56,8 +56,8 @@ public abstract class UnionItemsSketchUDAF<T> extends AbstractGenericUDAFResolve public static class UnionEvaluator<T> extends ItemsEvaluator<T> { - UnionEvaluator(final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { - super(comparator, serDe); + UnionEvaluator(final Class<T> clazz, final Comparator<? super T> comparator, final ArrayOfItemsSerDe<T> serDe) { + super(clazz, comparator, serDe); } @Override diff --git a/src/main/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAF.java b/src/main/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAF.java index 641482e..2d7b0c1 100644 --- a/src/main/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAF.java +++ b/src/main/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAF.java @@ -21,7 +21,7 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -38,7 +38,7 @@ public class UnionStringsSketchUDAF extends UnionItemsSketchUDAF<String> { static class UnionStringsSketchEvaluator extends UnionEvaluator<String> { UnionStringsSketchEvaluator() { - super(Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + super(String.class, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); } } diff --git a/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToEstimatesUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToEstimatesUDF.java index f137984..e3a238d 100644 --- a/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToEstimatesUDF.java +++ b/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToEstimatesUDF.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.tuple.Sketch; -import org.apache.datasketches.tuple.SketchIterator; +import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Sketches; import org.apache.datasketches.tuple.SummaryDeserializer; import org.apache.datasketches.tuple.adouble.DoubleSummary; @@ -58,7 +58,7 @@ public class DoubleSummarySketchToEstimatesUDF extends UDF { final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(BytesWritableHelper.wrapAsMemory(serializedSketch), SUMMARY_DESERIALIZER); double sum = 0; - final SketchIterator<DoubleSummary> it = sketch.iterator(); + final TupleSketchIterator<DoubleSummary> it = sketch.iterator(); while (it.next()) { sum += it.getSummary().getValue(); } diff --git a/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToPercentileUDF.java b/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToPercentileUDF.java index 1eda623..d0dd87c 100644 --- a/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToPercentileUDF.java +++ b/src/main/java/org/apache/datasketches/hive/tuple/DoubleSummarySketchToPercentileUDF.java @@ -23,7 +23,7 @@ import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.DoublesSketch; import org.apache.datasketches.quantiles.UpdateDoublesSketch; import org.apache.datasketches.tuple.Sketch; -import org.apache.datasketches.tuple.SketchIterator; +import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Sketches; import org.apache.datasketches.tuple.SummaryDeserializer; import org.apache.datasketches.tuple.adouble.DoubleSummary; @@ -62,7 +62,7 @@ public class DoubleSummarySketchToPercentileUDF extends UDF { final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(BytesWritableHelper.wrapAsMemory(serializedSketch), SUMMARY_DESERIALIZER); final UpdateDoublesSketch qs = DoublesSketch.builder().setK(QUANTILES_SKETCH_K).build(); - final SketchIterator<DoubleSummary> it = sketch.iterator(); + final TupleSketchIterator<DoubleSummary> it = sketch.iterator(); while (it.next()) { qs.update(it.getSummary().getValue()); } diff --git a/src/test/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAFTest.java index 99fbdd2..1ebd3a2 100644 --- a/src/test/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAFTest.java +++ b/src/test/java/org/apache/datasketches/hive/frequencies/DataToStringsSketchUDAFTest.java @@ -21,8 +21,8 @@ package org.apache.datasketches.hive.frequencies; import java.util.Arrays; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.frequencies.ItemsSketch; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; diff --git a/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java b/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java index 12d4a07..fa3699c 100644 --- a/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java +++ b/src/test/java/org/apache/datasketches/hive/frequencies/GetFrequentItemsFromStringsSketchUDTFTest.java @@ -38,8 +38,8 @@ import org.apache.hadoop.io.BytesWritable; import org.testng.Assert; import org.testng.annotations.Test; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.frequencies.ItemsSketch; @SuppressWarnings("javadoc") diff --git a/src/test/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAFTest.java index 47e39d7..696eda4 100644 --- a/src/test/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAFTest.java +++ b/src/test/java/org/apache/datasketches/hive/frequencies/UnionStringsSketchUDAFTest.java @@ -21,8 +21,8 @@ package org.apache.datasketches.hive.frequencies; import java.util.Arrays; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.frequencies.ItemsSketch; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAFTest.java index 4276832..db75b23 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/DataToStringsSketchUDAFTest.java @@ -22,8 +22,8 @@ package org.apache.datasketches.hive.quantiles; import java.util.Arrays; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDFTest.java index 68d5eb5..1ae86b1 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetCdfFromStringsSketchUDFTest.java @@ -22,8 +22,8 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; import java.util.List; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.io.BytesWritable; import org.testng.Assert; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDFTest.java index 0ffa53c..86c63e2 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetKFromStringsSketchUDFTest.java @@ -23,8 +23,8 @@ import java.util.Comparator; import org.apache.hadoop.io.BytesWritable; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.quantiles.ItemsSketch; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDFTest.java index 6b44ac6..78f11c8 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetNFromStringsSketchUDFTest.java @@ -23,8 +23,8 @@ import java.util.Comparator; import org.apache.hadoop.io.BytesWritable; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.quantiles.ItemsSketch; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDFTest.java index 7d2dd7b..a428eb4 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetPmfFromStringsSketchUDFTest.java @@ -22,8 +22,8 @@ package org.apache.datasketches.hive.quantiles; import java.util.Comparator; import java.util.List; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.io.BytesWritable; import org.testng.Assert; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDFTest.java index 380ff6b..edf90b2 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantileFromStringsSketchUDFTest.java @@ -25,9 +25,9 @@ import org.apache.hadoop.io.BytesWritable; import org.testng.Assert; import org.testng.annotations.Test; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfLongsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfLongsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.quantiles.ItemsSketch; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDFTest.java index f0836b0..47c0f72 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/GetQuantilesFromStringsSketchUDFTest.java @@ -26,9 +26,9 @@ import org.apache.hadoop.io.BytesWritable; import org.testng.Assert; import org.testng.annotations.Test; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfLongsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfLongsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.SketchesArgumentException; import org.apache.datasketches.quantiles.ItemsSketch; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDFTest.java index cf4731b..4a15f30 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/StringsSketchToStringUDFTest.java @@ -23,8 +23,8 @@ import java.util.Comparator; import org.apache.hadoop.io.BytesWritable; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.quantiles.ItemsSketch; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAFTest.java b/src/test/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAFTest.java index 6df21f1..e69cdcc 100644 --- a/src/test/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAFTest.java +++ b/src/test/java/org/apache/datasketches/hive/quantiles/UnionStringsSketchUDAFTest.java @@ -22,8 +22,8 @@ package org.apache.datasketches.hive.quantiles; import java.util.Arrays; import java.util.Comparator; -import org.apache.datasketches.ArrayOfItemsSerDe; -import org.apache.datasketches.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.hive.common.BytesWritableHelper; import org.apache.datasketches.quantiles.ItemsSketch; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
