This is an automated email from the ASF dual-hosted git repository.
kishoreg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 84b8143 Add support for an aggregation function returning serialized
hyperloglogs. (#4293)
84b8143 is described below
commit 84b814371f3d2c43d729370ce83a7b051cc07a25
Author: ianvkoeppe <[email protected]>
AuthorDate: Wed Jun 19 12:41:22 2019 -0500
Add support for an aggregation function returning serialized hyperloglogs.
(#4293)
Client-side batching is a common requirement for reducing the query and
response sizes to/from Pinot. In doing so, client's may then need to perform
their own aggregation of results from the multiple responses returned. This
is easy for sum, min, max, etc., but numerical values representing uniques
cannot
simply be added.
Supporting the DISTINCTCOUNTRAWHLL and DISTINCTCOUNTRAWHLLMV will allow
clients to receive the serialized HLL and then aggregate them
client-side.
---
docs/pql_examples.rst | 2 +
.../data/aggregator/ValueAggregatorFactory.java | 2 +
.../function/AggregationFunctionFactory.java | 4 +
.../function/AggregationFunctionType.java | 2 +
.../DistinctCountRawHLLAggregationFunction.java | 115 +++++++++++++++++++++
.../DistinctCountRawHLLMVAggregationFunction.java | 60 +++++++++++
.../function/customobject/SerializedHLL.java | 52 ++++++++++
.../function/AggregationFunctionFactoryTest.java | 10 ++
.../function/AggregationFunctionTypeTest.java | 4 +
.../v2/AggregationFunctionColumnPairTest.java | 9 ++
...terSegmentAggregationMultiValueQueriesTest.java | 29 +++++-
...erSegmentAggregationSingleValueQueriesTest.java | 33 +++++-
.../org/apache/pinot/queries/QueriesTestUtils.java | 18 +++-
13 files changed, 334 insertions(+), 6 deletions(-)
diff --git a/docs/pql_examples.rst b/docs/pql_examples.rst
index 72fc243..9f11d76 100644
--- a/docs/pql_examples.rst
+++ b/docs/pql_examples.rst
@@ -157,6 +157,7 @@ Supported aggregations on single-value columns
* ``MINMAXRANGE``
* ``DISTINCTCOUNT``
* ``DISTINCTCOUNTHLL``
+* ``DISTINCTCOUNTRAWHLL``: Returns HLL response serialized as string. The
serialized HLL can be converted back into an HLL (see
`pinot-core/\*\*/HllUtil.java` as an example) and then aggregated with other
HLLs. A common use case may be to merge HLL responses from different Pinot
tables, or to allow aggregation after client-side batching.
* ``FASTHLL`` (**WARN**: will be deprecated soon. ``FASTHLL`` stores
serialized HyperLogLog in String format, which performs
worse than ``DISTINCTCOUNTHLL``, which supports serialized HyperLogLog in
BYTES (byte array) format)
* ``PERCENTILE[0-100]``: e.g. ``PERCENTILE5``, ``PERCENTILE50``,
``PERCENTILE99``, etc.
@@ -173,6 +174,7 @@ Supported aggregations on multi-value columns
* ``MINMAXRANGEMV``
* ``DISTINCTCOUNTMV``
* ``DISTINCTCOUNTHLLMV``
+* ``DISTINCTCOUNTRAWHLLMV``: Returns HLL response serialized as string. The
serialized HLL can be converted back into an HLL (see
`pinot-core/**/HllUtil.java` as an example) and then aggregated with other
HLLs. A common use case may be to merge HLL responses from different Pinot
tables, or to allow aggregation after client-side batching.
* ``FASTHLLMV`` (**WARN**: will be deprecated soon. It does not make lots of
sense to configure serialized HyperLogLog
column as a dimension)
* ``PERCENTILE[0-100]MV``: e.g. ``PERCENTILE5MV``, ``PERCENTILE50MV``,
``PERCENTILE99MV``, etc.
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/data/aggregator/ValueAggregatorFactory.java
b/pinot-core/src/main/java/org/apache/pinot/core/data/aggregator/ValueAggregatorFactory.java
index fb98b72..eb3c688 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/data/aggregator/ValueAggregatorFactory.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/data/aggregator/ValueAggregatorFactory.java
@@ -50,6 +50,7 @@ public class ValueAggregatorFactory {
case MINMAXRANGE:
return new MinMaxRangeValueAggregator();
case DISTINCTCOUNTHLL:
+ case DISTINCTCOUNTRAWHLL:
return new DistinctCountHLLValueAggregator();
case PERCENTILEEST:
return new PercentileEstValueAggregator();
@@ -81,6 +82,7 @@ public class ValueAggregatorFactory {
case MINMAXRANGE:
return MinMaxRangeValueAggregator.AGGREGATED_VALUE_TYPE;
case DISTINCTCOUNTHLL:
+ case DISTINCTCOUNTRAWHLL:
return DistinctCountHLLValueAggregator.AGGREGATED_VALUE_TYPE;
case PERCENTILEEST:
return PercentileEstValueAggregator.AGGREGATED_VALUE_TYPE;
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java
index a815b94..e1f1d69 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactory.java
@@ -81,6 +81,8 @@ public class AggregationFunctionFactory {
return new DistinctCountAggregationFunction();
case DISTINCTCOUNTHLL:
return new DistinctCountHLLAggregationFunction();
+ case DISTINCTCOUNTRAWHLL:
+ return new DistinctCountRawHLLAggregationFunction();
case FASTHLL:
return new FastHLLAggregationFunction();
case COUNTMV:
@@ -99,6 +101,8 @@ public class AggregationFunctionFactory {
return new DistinctCountMVAggregationFunction();
case DISTINCTCOUNTHLLMV:
return new DistinctCountHLLMVAggregationFunction();
+ case DISTINCTCOUNTRAWHLLMV:
+ return new DistinctCountRawHLLMVAggregationFunction();
default:
throw new IllegalArgumentException();
}
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionType.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionType.java
index 39428d7..d4b7ef8 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionType.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionType.java
@@ -32,6 +32,7 @@ public enum AggregationFunctionType {
MINMAXRANGE("minMaxRange"),
DISTINCTCOUNT("distinctCount"),
DISTINCTCOUNTHLL("distinctCountHLL"),
+ DISTINCTCOUNTRAWHLL("distinctCountRawHLL"),
FASTHLL("fastHLL"),
PERCENTILE("percentile"),
PERCENTILEEST("percentileEst"),
@@ -45,6 +46,7 @@ public enum AggregationFunctionType {
MINMAXRANGEMV("minMaxRangeMV"),
DISTINCTCOUNTMV("distinctCountMV"),
DISTINCTCOUNTHLLMV("distinctCountHLLMV"),
+ DISTINCTCOUNTRAWHLLMV("distinctCountRawHLLMV"),
PERCENTILEMV("percentileMV"),
PERCENTILEESTMV("percentileEstMV"),
PERCENTILETDIGESTMV("percentileTDigestMV");
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLAggregationFunction.java
new file mode 100644
index 0000000..ce7cae2
--- /dev/null
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLAggregationFunction.java
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+import javax.annotation.Nonnull;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import
org.apache.pinot.core.query.aggregation.function.customobject.SerializedHLL;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+
+
+public class DistinctCountRawHLLAggregationFunction implements
AggregationFunction<HyperLogLog, SerializedHLL> {
+
+ private final DistinctCountHLLAggregationFunction
_distinctCountHLLAggregationFunction = new
DistinctCountHLLAggregationFunction();
+
+ @Nonnull
+ @Override
+ public AggregationFunctionType getType() {
+ return AggregationFunctionType.DISTINCTCOUNTRAWHLL;
+ }
+
+ @Nonnull
+ @Override
+ public String getColumnName(@Nonnull String column) {
+ return AggregationFunctionType.DISTINCTCOUNTRAWHLL.getName() + "_" +
column;
+ }
+
+ @Override
+ public void accept(@Nonnull AggregationFunctionVisitorBase visitor) {
+ _distinctCountHLLAggregationFunction.accept(visitor);
+ }
+
+ @Nonnull
+ @Override
+ public AggregationResultHolder createAggregationResultHolder() {
+ return
_distinctCountHLLAggregationFunction.createAggregationResultHolder();
+ }
+
+ @Nonnull
+ @Override
+ public GroupByResultHolder createGroupByResultHolder(int initialCapacity,
int maxCapacity) {
+ return
_distinctCountHLLAggregationFunction.createGroupByResultHolder(initialCapacity,
maxCapacity);
+ }
+
+ @Override
+ public void aggregate(int length, @Nonnull AggregationResultHolder
aggregationResultHolder,
+ @Nonnull BlockValSet... blockValSets) {
+ _distinctCountHLLAggregationFunction.aggregate(length,
aggregationResultHolder, blockValSets);
+ }
+
+ @Override
+ public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray,
+ @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull
BlockValSet... blockValSets) {
+ _distinctCountHLLAggregationFunction.aggregateGroupBySV(length,
groupKeyArray, groupByResultHolder, blockValSets);
+ }
+
+ @Override
+ public void aggregateGroupByMV(int length, @Nonnull int[][] groupKeysArray,
+ @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull
BlockValSet... blockValSets) {
+ _distinctCountHLLAggregationFunction.aggregateGroupByMV(length,
groupKeysArray, groupByResultHolder, blockValSets);
+ }
+
+ @Nonnull
+ @Override
+ public HyperLogLog extractAggregationResult(@Nonnull AggregationResultHolder
aggregationResultHolder) {
+ return
_distinctCountHLLAggregationFunction.extractAggregationResult(aggregationResultHolder);
+ }
+
+ @Nonnull
+ @Override
+ public HyperLogLog extractGroupByResult(@Nonnull GroupByResultHolder
groupByResultHolder, int groupKey) {
+ return
_distinctCountHLLAggregationFunction.extractGroupByResult(groupByResultHolder,
groupKey);
+ }
+
+ @Nonnull
+ @Override
+ public HyperLogLog merge(@Nonnull HyperLogLog intermediateResult1, @Nonnull
HyperLogLog intermediateResult2) {
+ return _distinctCountHLLAggregationFunction.merge(intermediateResult1,
intermediateResult2);
+ }
+
+ @Override
+ public boolean isIntermediateResultComparable() {
+ return
_distinctCountHLLAggregationFunction.isIntermediateResultComparable();
+ }
+
+ @Nonnull
+ @Override
+ public DataSchema.ColumnDataType getIntermediateResultColumnType() {
+ return
_distinctCountHLLAggregationFunction.getIntermediateResultColumnType();
+ }
+
+ @Nonnull
+ @Override
+ public SerializedHLL extractFinalResult(@Nonnull HyperLogLog
intermediateResult) {
+ return SerializedHLL.of(intermediateResult);
+ }
+}
\ No newline at end of file
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLMVAggregationFunction.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLMVAggregationFunction.java
new file mode 100644
index 0000000..1da478d
--- /dev/null
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountRawHLLMVAggregationFunction.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import javax.annotation.Nonnull;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+
+
+public class DistinctCountRawHLLMVAggregationFunction extends
DistinctCountRawHLLAggregationFunction {
+
+ private final DistinctCountHLLMVAggregationFunction
_distinctCountHLLMVAggregationFunction = new
DistinctCountHLLMVAggregationFunction();
+
+ @Nonnull
+ @Override
+ public AggregationFunctionType getType() {
+ return AggregationFunctionType.DISTINCTCOUNTRAWHLLMV;
+ }
+
+ @Nonnull
+ @Override
+ public String getColumnName(@Nonnull String column) {
+ return AggregationFunctionType.DISTINCTCOUNTRAWHLLMV.getName() + "_" +
column;
+ }
+
+ @Override
+ public void aggregate(int length, @Nonnull AggregationResultHolder
aggregationResultHolder,
+ @Nonnull BlockValSet... blockValSets) {
+ _distinctCountHLLMVAggregationFunction.aggregate(length,
aggregationResultHolder, blockValSets);
+ }
+
+ @Override
+ public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray,
+ @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull
BlockValSet... blockValSets) {
+ _distinctCountHLLMVAggregationFunction.aggregateGroupBySV(length,
groupKeyArray, groupByResultHolder, blockValSets);
+ }
+
+ @Override
+ public void aggregateGroupByMV(int length, @Nonnull int[][] groupKeysArray,
+ @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull
BlockValSet... blockValSets) {
+ _distinctCountHLLMVAggregationFunction.aggregateGroupByMV(length,
groupKeysArray, groupByResultHolder, blockValSets);
+ }
+}
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/customobject/SerializedHLL.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/customobject/SerializedHLL.java
new file mode 100644
index 0000000..41ce7e6
--- /dev/null
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/customobject/SerializedHLL.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function.customobject;
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog;
+import javax.annotation.Nonnull;
+import org.apache.pinot.common.utils.BytesUtils;
+import org.apache.pinot.core.startree.hll.HllUtil;
+
+
+public class SerializedHLL implements Comparable<SerializedHLL> {
+
+ private final HyperLogLog hll;
+
+ public static SerializedHLL of(@Nonnull HyperLogLog serializedHLL) {
+ return new SerializedHLL(serializedHLL);
+ }
+
+ @Override
+ public int compareTo(@Nonnull SerializedHLL other) {
+ return this.getCardinality().compareTo(other.getCardinality());
+ }
+
+ @Override
+ public String toString() {
+ return BytesUtils.toHexString(HllUtil.toBytes(hll));
+ }
+
+ private Long getCardinality() {
+ return hll.cardinality();
+ }
+
+ private SerializedHLL(HyperLogLog hll) {
+ this.hll = hll;
+ }
+}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactoryTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactoryTest.java
index 4ece476..7310601 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactoryTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionFactoryTest.java
@@ -69,6 +69,11 @@ public class AggregationFunctionFactoryTest {
Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.DISTINCTCOUNTHLL);
Assert.assertEquals(aggregationFunction.getColumnName(COLUMN),
"distinctCountHLL_column");
+ aggregationFunction =
AggregationFunctionFactory.getAggregationFunction("DiStInCtCoUnTrAwHlL");
+ Assert.assertTrue(aggregationFunction instanceof
DistinctCountRawHLLAggregationFunction);
+ Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.DISTINCTCOUNTRAWHLL);
+ Assert.assertEquals(aggregationFunction.getColumnName(COLUMN),
"distinctCountRawHLL_column");;
+
aggregationFunction =
AggregationFunctionFactory.getAggregationFunction("FaStHlL");
Assert.assertTrue(aggregationFunction instanceof
FastHLLAggregationFunction);
Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.FASTHLL);
@@ -129,6 +134,11 @@ public class AggregationFunctionFactoryTest {
Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.DISTINCTCOUNTHLLMV);
Assert.assertEquals(aggregationFunction.getColumnName(COLUMN),
"distinctCountHLLMV_column");
+ aggregationFunction =
AggregationFunctionFactory.getAggregationFunction("DiStInCtCoUnTrAwHlLmV");
+ Assert.assertTrue(aggregationFunction instanceof
DistinctCountRawHLLMVAggregationFunction);
+ Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.DISTINCTCOUNTRAWHLLMV);
+ Assert.assertEquals(aggregationFunction.getColumnName(COLUMN),
"distinctCountRawHLLMV_column");
+
aggregationFunction =
AggregationFunctionFactory.getAggregationFunction("PeRcEnTiLe10Mv");
Assert.assertTrue(aggregationFunction instanceof
PercentileMVAggregationFunction);
Assert.assertEquals(aggregationFunction.getType(),
AggregationFunctionType.PERCENTILEMV);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionTypeTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionTypeTest.java
index b35247f..0f34566 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionTypeTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AggregationFunctionTypeTest.java
@@ -37,6 +37,8 @@ public class AggregationFunctionTypeTest {
AggregationFunctionType.DISTINCTCOUNT);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("DiStInCtCoUnThLl"),
AggregationFunctionType.DISTINCTCOUNTHLL);
+
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("DiStInCtCoUnTrAwHlL"),
+ AggregationFunctionType.DISTINCTCOUNTRAWHLL);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("FaStHlL"),
AggregationFunctionType.FASTHLL);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("PeRcEnTiLe5"),
AggregationFunctionType.PERCENTILE);
@@ -55,6 +57,8 @@ public class AggregationFunctionTypeTest {
AggregationFunctionType.DISTINCTCOUNTMV);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("DiStInCtCoUnThLlMv"),
AggregationFunctionType.DISTINCTCOUNTHLLMV);
+
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("DiStInCtCoUnTrAwHlLmV"),
+ AggregationFunctionType.DISTINCTCOUNTRAWHLLMV);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("PeRcEnTiLe10Mv"),
AggregationFunctionType.PERCENTILEMV);
Assert.assertEquals(AggregationFunctionType.getAggregationFunctionType("PeRcEnTiLeEsT90mV"),
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/AggregationFunctionColumnPairTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/AggregationFunctionColumnPairTest.java
index 6de08f3..0264b3b 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/AggregationFunctionColumnPairTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/AggregationFunctionColumnPairTest.java
@@ -73,6 +73,15 @@ public class AggregationFunctionColumnPairTest {
Assert.assertEquals(fromColumnName, pair);
Assert.assertEquals(fromColumnName.hashCode(), pair.hashCode());
+ pair = new
AggregationFunctionColumnPair(AggregationFunctionType.DISTINCTCOUNTRAWHLL,
COLUMN);
+ Assert.assertEquals(pair.getFunctionType(),
AggregationFunctionType.DISTINCTCOUNTRAWHLL);
+ Assert.assertEquals(pair.getColumn(), COLUMN);
+ columnName = pair.toColumnName();
+ Assert.assertEquals(columnName, "distinctCountRawHLL__column");
+ fromColumnName = AggregationFunctionColumnPair.fromColumnName(columnName);
+ Assert.assertEquals(fromColumnName, pair);
+ Assert.assertEquals(fromColumnName.hashCode(), pair.hashCode());
+
pair = new
AggregationFunctionColumnPair(AggregationFunctionType.PERCENTILEEST, COLUMN);
Assert.assertEquals(pair.getFunctionType(),
AggregationFunctionType.PERCENTILEEST);
Assert.assertEquals(pair.getColumn(), COLUMN);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationMultiValueQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationMultiValueQueriesTest.java
index 44169c5..df77bbe 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationMultiValueQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationMultiValueQueriesTest.java
@@ -18,12 +18,15 @@
*/
package org.apache.pinot.queries;
+import java.io.Serializable;
+import java.util.function.Function;
import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.utils.BytesUtils;
import org.apache.pinot.core.plan.maker.InstancePlanMakerImplV2;
+import org.apache.pinot.core.startree.hll.HllUtil;
import org.testng.annotations.Test;
-import static org.testng.Assert.assertFalse;
-import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.*;
public class InterSegmentAggregationMultiValueQueriesTest extends
BaseMultiValueQueriesTest {
@@ -199,6 +202,28 @@ public class InterSegmentAggregationMultiValueQueriesTest
extends BaseMultiValue
}
@Test
+ public void testDistinctCountRawHLLMV() {
+ String query = "SELECT DISTINCTCOUNTRAWHLLMV(column6) FROM testTable";
+ Function<Serializable, String> cardinalityExtractor = value ->
String.valueOf(HllUtil.buildHllFromBytes(BytesUtils.toBytes(value)).cardinality());
+
+ BrokerResponseNative brokerResponse = getBrokerResponseForQuery(query);
+ QueriesTestUtils
+ .testInterSegmentAggregationResult(brokerResponse, 400000L, 0L,
400000L, 400000L, cardinalityExtractor, new String[]{"20039"});
+
+ brokerResponse = getBrokerResponseForQueryWithFilter(query);
+ QueriesTestUtils
+ .testInterSegmentAggregationResult(brokerResponse, 62480L, 1053656L,
62480L, 400000L, cardinalityExtractor, new String[]{"1296"});
+
+ brokerResponse = getBrokerResponseForQuery(query + SV_GROUP_BY);
+ QueriesTestUtils
+ .testInterSegmentAggregationResult(brokerResponse, 400000L, 0L,
800000L, 400000L, cardinalityExtractor, new String[]{"4715"});
+
+ brokerResponse = getBrokerResponseForQuery(query + MV_GROUP_BY);
+ QueriesTestUtils
+ .testInterSegmentAggregationResult(brokerResponse, 400000L, 0L,
800000L, 400000L, cardinalityExtractor, new String[]{"3490"});
+ }
+
+ @Test
public void testPercentile50MV() {
String query = "SELECT PERCENTILE50MV(column6) FROM testTable";
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationSingleValueQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationSingleValueQueriesTest.java
index 04adf61..f253182 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationSingleValueQueriesTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/InterSegmentAggregationSingleValueQueriesTest.java
@@ -18,12 +18,15 @@
*/
package org.apache.pinot.queries;
+import java.io.Serializable;
+import java.util.function.Function;
import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.utils.BytesUtils;
import org.apache.pinot.core.plan.maker.InstancePlanMakerImplV2;
+import org.apache.pinot.core.startree.hll.HllUtil;
import org.testng.annotations.Test;
-import static org.testng.Assert.assertFalse;
-import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.*;
public class InterSegmentAggregationSingleValueQueriesTest extends
BaseSingleValueQueriesTest {
@@ -204,6 +207,32 @@ public class InterSegmentAggregationSingleValueQueriesTest
extends BaseSingleVal
}
@Test
+ public void testDistinctCountRawHLL() {
+ String query = "SELECT DISTINCTCOUNTRAWHLL(column1),
DISTINCTCOUNTRAWHLL(column3) FROM testTable";
+ Function<Serializable, String> cardinalityExtractor = value ->
String.valueOf(HllUtil.buildHllFromBytes(BytesUtils.toBytes(value)).cardinality());
+
+ BrokerResponseNative brokerResponse = getBrokerResponseForQuery(query);
+ QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse,
120000L, 0L, 240000L, 120000L,
+ cardinalityExtractor,
+ new String[]{"5977", "23825"});
+
+ brokerResponse = getBrokerResponseForQueryWithFilter(query);
+ QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse, 24516L,
336536L, 49032L, 120000L,
+ cardinalityExtractor,
+ new String[]{"1886", "4492"});
+
+ brokerResponse = getBrokerResponseForQuery(query + GROUP_BY);
+ QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse,
120000L, 0L, 360000L, 120000L,
+ cardinalityExtractor,
+ new String[]{"3592", "11889"});
+
+ brokerResponse = getBrokerResponseForQueryWithFilter(query + GROUP_BY);
+ QueriesTestUtils.testInterSegmentAggregationResult(brokerResponse, 24516L,
336536L, 73548L, 120000L,
+ cardinalityExtractor,
+ new String[]{"1324", "3197"});
+ }
+
+ @Test
public void testPercentile50() {
String query = "SELECT PERCENTILE50(column1), PERCENTILE50(column3) FROM
testTable";
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java
b/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java
index 4b83cc5..24cb0db 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/QueriesTestUtils.java
@@ -21,6 +21,7 @@ package org.apache.pinot.queries;
import java.io.Serializable;
import java.util.Iterator;
import java.util.List;
+import java.util.function.Function;
import org.apache.pinot.common.response.broker.AggregationResult;
import org.apache.pinot.common.response.broker.BrokerResponseNative;
import org.apache.pinot.core.operator.ExecutionStatistics;
@@ -82,6 +83,19 @@ public class QueriesTestUtils {
public static void testInterSegmentAggregationResult(BrokerResponseNative
brokerResponse, long expectedNumDocsScanned,
long expectedNumEntriesScannedInFilter, long
expectedNumEntriesScannedPostFilter, long expectedNumTotalDocs,
String[] expectedAggregationResults) {
+ testInterSegmentAggregationResult(
+ brokerResponse,
+ expectedNumDocsScanned,
+ expectedNumEntriesScannedInFilter,
+ expectedNumEntriesScannedPostFilter,
+ expectedNumTotalDocs,
+ Serializable::toString,
+ expectedAggregationResults);
+ }
+
+ public static void testInterSegmentAggregationResult(BrokerResponseNative
brokerResponse, long expectedNumDocsScanned,
+ long expectedNumEntriesScannedInFilter, long
expectedNumEntriesScannedPostFilter, long expectedNumTotalDocs,
+ Function<Serializable, String> responseMapper, String[]
expectedAggregationResults) {
Assert.assertEquals(brokerResponse.getNumDocsScanned(),
expectedNumDocsScanned);
Assert.assertEquals(brokerResponse.getNumEntriesScannedInFilter(),
expectedNumEntriesScannedInFilter);
Assert.assertEquals(brokerResponse.getNumEntriesScannedPostFilter(),
expectedNumEntriesScannedPostFilter);
@@ -95,10 +109,10 @@ public class QueriesTestUtils {
Serializable value = aggregationResult.getValue();
if (value != null) {
// Aggregation.
- Assert.assertEquals(value, expectedAggregationResult);
+ Assert.assertEquals(responseMapper.apply(value),
expectedAggregationResult);
} else {
// Group-by.
-
Assert.assertEquals(aggregationResult.getGroupByResult().get(0).getValue(),
expectedAggregationResult);
+
Assert.assertEquals(responseMapper.apply(aggregationResult.getGroupByResult().get(0).getValue()),
expectedAggregationResult);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]