This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 5687f27214 Simplify the parameters for 
DistinctCountSmartHLLAggregationFunction (#8566)
5687f27214 is described below

commit 5687f272146135215e62179b968abff77ad23164
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Wed Apr 20 09:28:32 2022 -0700

    Simplify the parameters for DistinctCountSmartHLLAggregationFunction (#8566)
---
 .../query/NonScanBasedAggregationOperator.java     |  4 +-
 .../DistinctCountSmartHLLAggregationFunction.java  | 79 ++++++++++++----------
 .../pinot/queries/DistinctCountQueriesTest.java    | 25 +++++--
 3 files changed, 63 insertions(+), 45 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java
index c9bdf4dacf..5a0c7630ae 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/NonScanBasedAggregationOperator.java
@@ -231,9 +231,9 @@ public class NonScanBasedAggregationOperator extends 
BaseOperator<IntermediateRe
 
   private static Object getDistinctCountSmartHLLResult(Dictionary dictionary,
       DistinctCountSmartHLLAggregationFunction function) {
-    if (dictionary.length() > function.getHllConversionThreshold()) {
+    if (dictionary.length() > function.getThreshold()) {
       // Store values into a HLL when the dictionary size exceeds the 
conversion threshold
-      return getDistinctValueHLL(dictionary, function.getHllLog2m());
+      return getDistinctValueHLL(dictionary, function.getLog2m());
     } else {
       return getDistinctValueSet(dictionary);
     }
diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountSmartHLLAggregationFunction.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountSmartHLLAggregationFunction.java
index ba1774adf9..2ee2e85545 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountSmartHLLAggregationFunction.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/DistinctCountSmartHLLAggregationFunction.java
@@ -54,35 +54,35 @@ import org.roaringbitmap.RoaringBitmap;
  * exceeds a threshold, the Set will be converted into a HyperLogLog, and 
approximate result will be returned.
  *
  * The function takes an optional second argument for parameters:
- * - hllLog2m: Log2m for the converted HyperLogLog, 12 by default.
- * - hllConversionThreshold: Threshold of the number of distinct values to 
trigger the conversion, 100_000 by default.
- *                           Non-positive value means never convert.
- * Example of second argument: 'hllLog2m=8;hllConversionThreshold=10'
+ * - threshold: Threshold of the number of distinct values to trigger the 
conversion, 100_000 by default. Non-positive
+ *              value means never convert.
+ * - log2m: Log2m for the converted HyperLogLog, 12 by default.
+ * Example of second argument: 'threshold=10;log2m=8'
  */
 @SuppressWarnings({"rawtypes", "unchecked"})
 public class DistinctCountSmartHLLAggregationFunction extends 
BaseSingleInputAggregationFunction<Object, Integer> {
-  private final int _hllLog2m;
-  private final int _hllConversionThreshold;
+  private final int _threshold;
+  private final int _log2m;
 
   public DistinctCountSmartHLLAggregationFunction(List<ExpressionContext> 
arguments) {
     super(arguments.get(0));
 
     if (arguments.size() > 1) {
       Parameters parameters = new Parameters(arguments.get(1).getLiteral());
-      _hllLog2m = parameters._hllLog2m;
-      _hllConversionThreshold = parameters._hllConversionThreshold;
+      _threshold = parameters._threshold;
+      _log2m = parameters._log2m;
     } else {
-      _hllLog2m = Parameters.DEFAULT_HLL_LOG2M;
-      _hllConversionThreshold = Parameters.DEFAULT_HLL_CONVERSION_THRESHOLD;
+      _threshold = Parameters.DEFAULT_THRESHOLD;
+      _log2m = Parameters.DEFAULT_LOG2M;
     }
   }
 
-  public int getHllLog2m() {
-    return _hllLog2m;
+  public int getThreshold() {
+    return _threshold;
   }
 
-  public int getHllConversionThreshold() {
-    return _hllConversionThreshold;
+  public int getLog2m() {
+    return _log2m;
   }
 
   @Override
@@ -329,7 +329,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
     }
 
     // Convert to HLL if the set size exceeds the threshold
-    if (valueSet.size() > _hllConversionThreshold) {
+    if (valueSet.size() > _threshold) {
       aggregationResultHolder.setValue(convertSetToHLL(valueSet, storedType));
     }
   }
@@ -343,7 +343,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
   }
 
   protected HyperLogLog convertByteArraySetToHLL(ObjectSet<ByteArray> 
valueSet) {
-    HyperLogLog hll = new HyperLogLog(_hllLog2m);
+    HyperLogLog hll = new HyperLogLog(_log2m);
     for (ByteArray value : valueSet) {
       hll.offer(value.getBytes());
     }
@@ -351,7 +351,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
   }
 
   protected HyperLogLog convertNonByteArraySetToHLL(Set valueSet) {
-    HyperLogLog hll = new HyperLogLog(_hllLog2m);
+    HyperLogLog hll = new HyperLogLog(_log2m);
     for (Object value : valueSet) {
       hll.offer(value);
     }
@@ -632,7 +632,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
     if (result instanceof DictIdsWrapper) {
       // For dictionary-encoded expression, convert dictionary ids to values
       DictIdsWrapper dictIdsWrapper = (DictIdsWrapper) result;
-      if 
(dictIdsWrapper._dictIdBitmap.cardinalityExceeds(_hllConversionThreshold)) {
+      if (dictIdsWrapper._dictIdBitmap.cardinalityExceeds(_threshold)) {
         return convertToHLL(dictIdsWrapper);
       } else {
         return convertToValueSet(dictIdsWrapper);
@@ -680,7 +680,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
     valueSet1.addAll(valueSet2);
 
     // Convert to HLL if the set size exceeds the threshold
-    if (valueSet1.size() > _hllConversionThreshold) {
+    if (valueSet1.size() > _threshold) {
       if (valueSet1 instanceof ObjectSet && valueSet1.iterator().next() 
instanceof ByteArray) {
         return convertByteArraySetToHLL((ObjectSet<ByteArray>) valueSet1);
       } else {
@@ -691,7 +691,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
     }
   }
 
-  private HyperLogLog mergeIntoHLL(HyperLogLog hll, Object intermediateResult) 
{
+  private static HyperLogLog mergeIntoHLL(HyperLogLog hll, Object 
intermediateResult) {
     if (intermediateResult instanceof HyperLogLog) {
       try {
         hll.addAll((HyperLogLog) intermediateResult);
@@ -924,7 +924,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
    * Helper method to read dictionary and convert dictionary ids to a 
HyperLogLog for dictionary-encoded expression.
    */
   private HyperLogLog convertToHLL(DictIdsWrapper dictIdsWrapper) {
-    HyperLogLog hyperLogLog = new HyperLogLog(_hllLog2m);
+    HyperLogLog hyperLogLog = new HyperLogLog(_log2m);
     Dictionary dictionary = dictIdsWrapper._dictionary;
     RoaringBitmap dictIdBitmap = dictIdsWrapper._dictIdBitmap;
     PeekableIntIterator iterator = dictIdBitmap.getIntIterator();
@@ -934,7 +934,7 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
     return hyperLogLog;
   }
 
-  private IllegalStateException getIllegalDataTypeException(DataType dataType, 
boolean singleValue) {
+  private static IllegalStateException getIllegalDataTypeException(DataType 
dataType, boolean singleValue) {
     return new IllegalStateException(
         "Illegal data type for DISTINCT_COUNT_SMART_HLL aggregation function: 
" + dataType + (singleValue ? ""
             : "_MV"));
@@ -956,16 +956,21 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
   private static class Parameters {
     static final char PARAMETER_DELIMITER = ';';
     static final char PARAMETER_KEY_VALUE_SEPARATOR = '=';
-    static final String HLL_LOG2M_KEY = "HLLLOG2M";
-    static final String HLL_CONVERSION_THRESHOLD_KEY = 
"HLLCONVERSIONTHRESHOLD";
 
-    // Use 12 by default to get good accuracy for DistinctCount
-    static final int DEFAULT_HLL_LOG2M = 12;
+    static final String THRESHOLD_KEY = "THRESHOLD";
     // 100K values to trigger HLL conversion by default
-    static final int DEFAULT_HLL_CONVERSION_THRESHOLD = 100_000;
+    static final int DEFAULT_THRESHOLD = 100_000;
+    @Deprecated
+    static final String DEPRECATED_THRESHOLD_KEY = "HLLCONVERSIONTHRESHOLD";
+
+    static final String LOG2M_KEY = "LOG2M";
+    // Use 12 by default to get good accuracy for DistinctCount
+    static final int DEFAULT_LOG2M = 12;
+    @Deprecated
+    static final String DEPRECATED_LOG2M_KEY = "HLLLOG2M";
 
-    int _hllLog2m = DEFAULT_HLL_LOG2M;
-    int _hllConversionThreshold = DEFAULT_HLL_CONVERSION_THRESHOLD;
+    int _threshold = DEFAULT_THRESHOLD;
+    int _log2m = DEFAULT_LOG2M;
 
     Parameters(String parametersString) {
       StringUtils.deleteWhitespace(parametersString);
@@ -976,16 +981,18 @@ public class DistinctCountSmartHLLAggregationFunction 
extends BaseSingleInputAgg
         String key = keyAndValue[0];
         String value = keyAndValue[1];
         switch (key.toUpperCase()) {
-          case HLL_LOG2M_KEY:
-            _hllLog2m = Integer.parseInt(value);
-            break;
-          case HLL_CONVERSION_THRESHOLD_KEY:
-            _hllConversionThreshold = Integer.parseInt(value);
+          case THRESHOLD_KEY:
+          case DEPRECATED_THRESHOLD_KEY:
+            _threshold = Integer.parseInt(value);
             // Treat non-positive threshold as unlimited
-            if (_hllConversionThreshold <= 0) {
-              _hllConversionThreshold = Integer.MAX_VALUE;
+            if (_threshold <= 0) {
+              _threshold = Integer.MAX_VALUE;
             }
             break;
+          case LOG2M_KEY:
+          case DEPRECATED_LOG2M_KEY:
+            _log2m = Integer.parseInt(value);
+            break;
           default:
             throw new IllegalArgumentException("Invalid parameter key: " + 
key);
         }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/DistinctCountQueriesTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/queries/DistinctCountQueriesTest.java
index f86c6a690d..2c6bed8e16 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/DistinctCountQueriesTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/DistinctCountQueriesTest.java
@@ -39,8 +39,11 @@ import 
org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
 import org.apache.pinot.core.operator.query.AggregationGroupByOperator;
 import org.apache.pinot.core.operator.query.AggregationOperator;
 import org.apache.pinot.core.operator.query.NonScanBasedAggregationOperator;
+import 
org.apache.pinot.core.query.aggregation.function.DistinctCountSmartHLLAggregationFunction;
 import 
org.apache.pinot.core.query.aggregation.groupby.AggregationGroupByResult;
 import org.apache.pinot.core.query.aggregation.groupby.GroupKeyGenerator;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import 
org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
 import 
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
 import 
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
 import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
@@ -391,12 +394,11 @@ public class DistinctCountQueriesTest extends 
BaseQueriesTest {
   @Test
   public void testSmartHLL() {
     // Dictionary based
-    String query = "SELECT DISTINCTCOUNTSMARTHLL(intColumn, 
'hllConversionThreshold=10'), "
-        + "DISTINCTCOUNTSMARTHLL(longColumn, 'hllConversionThreshold=10'), "
-        + "DISTINCTCOUNTSMARTHLL(floatColumn, 'hllConversionThreshold=10'), "
-        + "DISTINCTCOUNTSMARTHLL(doubleColumn, 'hllConversionThreshold=10'), "
-        + "DISTINCTCOUNTSMARTHLL(stringColumn, 'hllConversionThreshold=10'), "
-        + "DISTINCTCOUNTSMARTHLL(bytesColumn, 'hllConversionThreshold=10') 
FROM testTable";
+    String query = "SELECT DISTINCTCOUNTSMARTHLL(intColumn, 'threshold=10'), "
+        + "DISTINCTCOUNTSMARTHLL(longColumn, 'threshold=10'), " + 
"DISTINCTCOUNTSMARTHLL(floatColumn, 'threshold=10'), "
+        + "DISTINCTCOUNTSMARTHLL(doubleColumn, 'threshold=10'), "
+        + "DISTINCTCOUNTSMARTHLL(stringColumn, 'threshold=10'), "
+        + "DISTINCTCOUNTSMARTHLL(bytesColumn, 'threshold=10') FROM testTable";
 
     // Inner segment
     String[] interSegmentsExpectedResults = new String[6];
@@ -469,7 +471,7 @@ public class DistinctCountQueriesTest extends 
BaseQueriesTest {
     }
 
     // Change log2m
-    query = "SELECT DISTINCTCOUNTSMARTHLL(intColumn, 
'hllLog2m=8;hllConversionThreshold=10') FROM testTable";
+    query = "SELECT DISTINCTCOUNTSMARTHLL(intColumn, 'threshold=10;log2m=8') 
FROM testTable";
     operator = getOperatorForSqlQuery(query);
     assertTrue(operator instanceof NonScanBasedAggregationOperator);
     aggregationResult = ((NonScanBasedAggregationOperator) 
operator).nextBlock().getAggregationResult();
@@ -479,6 +481,15 @@ public class DistinctCountQueriesTest extends 
BaseQueriesTest {
     HyperLogLog hll = (HyperLogLog) aggregationResult.get(0);
     // Check log2m is 8
     assertEquals(hll.sizeof(), 172);
+
+    // Test legacy parameters
+    query = "SELECT DISTINCTCOUNTSMARTHLL(intColumn, 
'hllConversionThreshold=10;hllLog2m=8') FROM testTable";
+    QueryContext queryContext = 
QueryContextConverterUtils.getQueryContextFromSQL(query);
+    assertNotNull(queryContext.getAggregationFunctions());
+    DistinctCountSmartHLLAggregationFunction function =
+        (DistinctCountSmartHLLAggregationFunction) 
queryContext.getAggregationFunctions()[0];
+    assertEquals(function.getThreshold(), 10);
+    assertEquals(function.getLog2m(), 8);
   }
 
   @AfterClass


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to