This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9d7676ed8c [multistage] Support multi-value aggregation functions
(#11216)
9d7676ed8c is described below
commit 9d7676ed8cc21914bb64126281bf9e626236e82f
Author: Xiang Fu <[email protected]>
AuthorDate: Tue Aug 1 02:52:05 2023 -0700
[multistage] Support multi-value aggregation functions (#11216)
* temp
* Support multi-value aggregation functions
---
.../tests/MultiStageEngineIntegrationTest.java | 51 +++++++++++++++-
.../pinot/segment/spi/AggregationFunctionType.java | 69 ++++++++++++++++------
2 files changed, 99 insertions(+), 21 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
index f74e1bd24f..bd12f0901b 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MultiStageEngineIntegrationTest.java
@@ -128,12 +128,60 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
testQueryWithMatchingRowCount(pinotQuery, h2Query);
}
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testMultiValueColumnAggregationQuery(boolean
useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+ String[] multiValueFunctions = new String[]{
+ "sumMV", "countMV", "minMV", "maxMV", "avgMV", "minMaxRangeMV",
"distinctCountMV", "distinctCountBitmapMV",
+ "distinctCountHLLMV", "distinctSumMV", "distinctAvgMV"
+ };
+ double[] expectedResults = new double[]{
+ -5.421344202E9, 577725, -9999.0, 16271.0, -9383.95292223809, 26270.0,
312, 312, 328, 3954484.0,
+ 12674.628205128205
+ };
+
+ Assert.assertEquals(multiValueFunctions.length, expectedResults.length);
+
+ for (int i = 0; i < multiValueFunctions.length; i++) {
+ String pinotQuery = String.format("SELECT %s(DivAirportIDs) FROM
mytable", multiValueFunctions[i]);
+ JsonNode jsonNode = postQuery(pinotQuery);
+
Assert.assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble(),
expectedResults[i]);
+ }
+
+ String pinotQuery = "SELECT percentileMV(DivAirportIDs, 99) FROM mytable";
+ JsonNode jsonNode = postQuery(pinotQuery);
+
Assert.assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble(),
13433.0);
+
+ pinotQuery = "SELECT percentileEstMV(DivAirportIDs, 99) FROM mytable";
+ jsonNode = postQuery(pinotQuery);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
> 13000);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
< 14000);
+
+ pinotQuery = "SELECT percentileTDigestMV(DivAirportIDs, 99) FROM mytable";
+ jsonNode = postQuery(pinotQuery);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
> 13000);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
< 14000);
+
+ pinotQuery = "SELECT percentileKLLMV(DivAirportIDs, 99) FROM mytable";
+ jsonNode = postQuery(pinotQuery);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
> 12000);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
< 15000);
+
+ pinotQuery = "SELECT percentileKLLMV(DivAirportIDs, 99, 100) FROM mytable";
+ jsonNode = postQuery(pinotQuery);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
> 12000);
+
Assert.assertTrue(jsonNode.get("resultTable").get("rows").get(0).get(0).asDouble()
< 15000);
+
+ setUseMultiStageQueryEngine(true);
+ }
+
@Test
public void testTimeFunc()
throws Exception {
String sqlQuery = "SELECT toDateTime(now(), 'yyyy-MM-dd z'),
toDateTime(ago('PT1H'), 'yyyy-MM-dd z') FROM mytable";
JsonNode response = postQuery(sqlQuery);
- System.out.println("response = " + response);
String todayStr =
response.get("resultTable").get("rows").get(0).get(0).asText();
String expectedTodayStr =
Instant.now().atZone(ZoneId.of("UTC")).format(DateTimeFormatter.ofPattern("yyyy-MM-dd
z"));
@@ -441,7 +489,6 @@ public class MultiStageEngineIntegrationTest extends
BaseClusterIntegrationTestS
+
"decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') as
decodedUrl, toBase64"
+ "(toUtf8('hello!')) as toBase64,
fromUtf8(fromBase64('aGVsbG8h')) as fromBase64";
JsonNode response = postQuery(sqlQuery);
- System.out.println("response = " + response.toPrettyString());
long queryEndTimeMs = System.currentTimeMillis();
JsonNode resultTable = response.get("resultTable");
diff --git
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
index b636ed4f24..460bf61faf 100644
---
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
+++
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
@@ -150,25 +150,56 @@ public enum AggregationFunctionType {
STUNION("STUnion"),
// Aggregation functions for multi-valued columns
- COUNTMV("countMV"),
- MINMV("minMV"),
- MAXMV("maxMV"),
- SUMMV("sumMV"),
- AVGMV("avgMV"),
- MINMAXRANGEMV("minMaxRangeMV"),
- DISTINCTCOUNTMV("distinctCountMV"),
- DISTINCTCOUNTBITMAPMV("distinctCountBitmapMV"),
- DISTINCTCOUNTHLLMV("distinctCountHLLMV"),
- DISTINCTCOUNTRAWHLLMV("distinctCountRawHLLMV"),
- DISTINCTSUMMV("distinctSumMV"),
- DISTINCTAVGMV("distinctAvgMV"),
- PERCENTILEMV("percentileMV"),
- PERCENTILEESTMV("percentileEstMV"),
- PERCENTILERAWESTMV("percentileRawEstMV"),
- PERCENTILETDIGESTMV("percentileTDigestMV"),
- PERCENTILERAWTDIGESTMV("percentileRawTDigestMV"),
- PERCENTILEKLLMV("percentileKLLMV"),
- PERCENTILERAWKLLMV("percentileRawKLLMV"),
+ COUNTMV("countMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY),
ReturnTypes.explicit(SqlTypeName.BIGINT),
+ ReturnTypes.explicit(SqlTypeName.BIGINT)),
+ MINMV("minMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.DOUBLE)),
+ MAXMV("maxMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.DOUBLE)),
+ SUMMV("sumMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.DOUBLE)),
+ AVGMV("avgMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ MINMAXRANGEMV("minMaxRangeMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY),
ReturnTypes.explicit(SqlTypeName.DOUBLE),
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTCOUNTMV("distinctCountMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.BIGINT,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTCOUNTBITMAPMV("distinctCountBitmapMV", null, SqlKind.OTHER_FUNCTION,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION,
OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.BIGINT,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTCOUNTHLLMV("distinctCountHLLMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.BIGINT,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTCOUNTRAWHLLMV("distinctCountRawHLLMV", null, SqlKind.OTHER_FUNCTION,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION,
OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.VARCHAR_2000,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTSUMMV("distinctSumMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ DISTINCTAVGMV("distinctAvgMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(SqlTypeFamily.ARRAY), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILEMV("percentileMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC)), ReturnTypes.DOUBLE,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILEESTMV("percentileEstMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC)), ReturnTypes.DOUBLE,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILERAWESTMV("percentileRawEstMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC)), ReturnTypes.VARCHAR_2000,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILETDIGESTMV("percentileTDigestMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC)), ReturnTypes.DOUBLE,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILERAWTDIGESTMV("percentileRawTDigestMV", null,
SqlKind.OTHER_FUNCTION,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC)), ReturnTypes.VARCHAR_2000,
+ ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILEKLLMV("percentileKLLMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC),
+ ordinal -> ordinal > 1 && ordinal < 4), ReturnTypes.DOUBLE,
ReturnTypes.explicit(SqlTypeName.OTHER)),
+ PERCENTILERAWKLLMV("percentileRawKLLMV", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.family(ImmutableList.of(SqlTypeFamily.ARRAY,
SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC),
+ ordinal -> ordinal > 1 && ordinal < 4), ReturnTypes.VARCHAR_2000,
ReturnTypes.explicit(SqlTypeName.OTHER)),
// boolean aggregate functions
BOOLAND("boolAnd", null, SqlKind.OTHER_FUNCTION,
SqlFunctionCategory.USER_DEFINED_FUNCTION,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]