This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new d359b72ff4 Fix IndexOutOfBoundException in filtered aggregation
group-by (#11231)
d359b72ff4 is described below
commit d359b72ff4aec3447db664715edf22dac95c2cca
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Jul 31 20:28:58 2023 -0700
Fix IndexOutOfBoundException in filtered aggregation group-by (#11231)
---
.../operator/query/FilteredGroupByOperator.java | 3 +-
.../pinot/queries/FilteredAggregationsTest.java | 55 +++++++++++++---------
2 files changed, 35 insertions(+), 23 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
index 9be251ae23..0b1440e071 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
@@ -82,11 +82,12 @@ public class FilteredGroupByOperator extends
BaseOperator<GroupByResultsBlock> {
DataSchema.ColumnDataType[] columnDataTypes = new
DataSchema.ColumnDataType[numColumns];
// Extract column names and data types for group-by columns
+ BaseProjectOperator<?> projectOperator =
projectOperators.get(0).getRight();
for (int i = 0; i < numGroupByExpressions; i++) {
ExpressionContext groupByExpression = _groupByExpressions[i];
columnNames[i] = groupByExpression.toString();
columnDataTypes[i] = DataSchema.ColumnDataType.fromDataTypeSV(
-
projectOperators.get(i).getRight().getResultColumnContext(groupByExpression).getDataType());
+
projectOperator.getResultColumnContext(groupByExpression).getDataType());
}
// Extract column names and data types for aggregation functions
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
index d3636e4fc9..a22943e415 100644
---
a/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
@@ -391,49 +391,63 @@ public class FilteredAggregationsTest extends
BaseQueriesTest {
@Test
public void testGroupBy() {
- String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000)
testSum FROM MyTable GROUP BY BOOLEAN_COL";
- String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE
INT_COL > 25000 GROUP BY BOOLEAN_COL";
+ String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000)
testSum FROM MyTable GROUP BY BOOLEAN_COL "
+ + "ORDER BY BOOLEAN_COL";
+ String nonFilterQuery =
+ "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 GROUP
BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
testQuery(filterQuery, nonFilterQuery);
}
@Test
- public void testGroupByCaseAlternative() {
+ public void testGroupByMultipleColumns() {
String filterQuery =
- "SELECT SUM(INT_COL), SUM(INT_COL) FILTER(WHERE INT_COL > 25000) AS
total_sum FROM MyTable GROUP BY "
- + "BOOLEAN_COL";
+ "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM
MyTable GROUP BY BOOLEAN_COL, STRING_COL "
+ + "ORDER BY BOOLEAN_COL, STRING_COL";
String nonFilterQuery =
- "SELECT SUM(INT_COL), SUM(CASE WHEN INT_COL > 25000 THEN INT_COL ELSE
0 END) AS total_sum FROM MyTable GROUP "
- + "BY BOOLEAN_COL";
+ "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 GROUP
BY BOOLEAN_COL, STRING_COL "
+ + "ORDER BY BOOLEAN_COL, STRING_COL";
+ testQuery(filterQuery, nonFilterQuery);
+ }
+
+ @Test
+ public void testGroupByCaseAlternative() {
+ String filterQuery = "SELECT SUM(INT_COL), SUM(INT_COL) FILTER(WHERE
INT_COL > 25000) AS total_sum FROM MyTable "
+ + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
+ String nonFilterQuery =
+ "SELECT SUM(INT_COL), SUM(CASE WHEN INT_COL > 25000 THEN INT_COL ELSE
0 END) AS total_sum FROM MyTable "
+ + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
testQuery(filterQuery, nonFilterQuery);
}
@Test
public void testGroupBySameFilter() {
String filterQuery =
- "SELECT AVG(INT_COL) FILTER(WHERE INT_COL > 25000) testAvg,
SUM(INT_COL) FILTER(WHERE INT_COL > 25000) "
- + "testSum FROM MyTable GROUP BY BOOLEAN_COL";
- String nonFilterQuery =
- "SELECT AVG(INT_COL) testAvg, SUM(INT_COL) testSum FROM MyTable WHERE
INT_COL > 25000 GROUP BY BOOLEAN_COL";
+ "SELECT AVG(INT_COL) FILTER(WHERE INT_COL > 25000) testAvg,
SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum "
+ + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
+ String nonFilterQuery = "SELECT AVG(INT_COL) testAvg, SUM(INT_COL) testSum
FROM MyTable WHERE INT_COL > 25000 "
+ + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
testQuery(filterQuery, nonFilterQuery);
}
@Test
public void testMultipleAggregationsOnSameFilterGroupBy() {
String filterQuery = "SELECT MIN(INT_COL) FILTER(WHERE NO_INDEX_COL >
29990) testMin, "
- + "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) testMax FROM MyTable
GROUP BY BOOLEAN_COL";
+ + "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) testMax FROM MyTable
GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
String nonFilterQuery =
- "SELECT MIN(INT_COL) testMin, MAX(INT_COL) testMax FROM MyTable WHERE
INT_COL > 29990 GROUP BY BOOLEAN_COL";
+ "SELECT MIN(INT_COL) testMin, MAX(INT_COL) testMax FROM MyTable WHERE
INT_COL > 29990 GROUP BY BOOLEAN_COL "
+ + "ORDER BY BOOLEAN_COL";
testQuery(filterQuery, nonFilterQuery);
filterQuery = "SELECT MIN(INT_COL) FILTER(WHERE NO_INDEX_COL > 29990) AS
total_min, "
+ "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) AS total_max, "
+ "SUM(INT_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_sum, "
- + "MAX(NO_INDEX_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_max2
FROM MyTable GROUP BY BOOLEAN_COL";
+ + "MAX(NO_INDEX_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_max2 "
+ + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
nonFilterQuery = "SELECT MIN(CASE WHEN (NO_INDEX_COL > 29990) THEN INT_COL
ELSE 99999 END) AS total_min, "
+ "MAX(CASE WHEN (INT_COL > 29990) THEN INT_COL ELSE 0 END) AS
total_max, "
+ "SUM(CASE WHEN (NO_INDEX_COL < 5000) THEN INT_COL ELSE 0 END) AS
total_sum, "
- + "MAX(CASE WHEN (NO_INDEX_COL < 5000) THEN NO_INDEX_COL ELSE 0 END)
AS total_max2 FROM MyTable GROUP BY "
- + "BOOLEAN_COL";
+ + "MAX(CASE WHEN (NO_INDEX_COL < 5000) THEN NO_INDEX_COL ELSE 0 END)
AS total_max2 "
+ + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
testQuery(filterQuery, nonFilterQuery);
}
@@ -452,10 +466,8 @@ public class FilteredAggregationsTest extends
BaseQueriesTest {
public void testSameNumScannedFilteredAggMatchAll() {
// For a single filtered aggregation, the same number of docs should be
scanned regardless of which portions of
// the filter are in the filter expression Vs. the main predicate i.e. the
applied filters are commutative.
- String filterQuery =
- "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM
MyTable";
- String nonFilterQuery =
- "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000";
+ String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000)
testSum FROM MyTable";
+ String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE
INT_COL > 25000";
long filterQueryDocsScanned =
getBrokerResponse(filterQuery).getNumDocsScanned();
long nonFilterQueryDocsScanned =
getBrokerResponse(nonFilterQuery).getNumDocsScanned();
assertEquals(filterQueryDocsScanned, nonFilterQueryDocsScanned);
@@ -467,8 +479,7 @@ public class FilteredAggregationsTest extends
BaseQueriesTest {
// the filter are in the filter expression Vs. the main predicate i.e. the
applied filters are commutative.
String filterQuery =
"SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM
MyTable WHERE INT_COL < 1000000";
- String nonFilterQuery =
- "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 AND
INT_COL < 1000000";
+ String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE
INT_COL > 25000 AND INT_COL < 1000000";
long filterQueryDocsScanned =
getBrokerResponse(filterQuery).getNumDocsScanned();
long nonFilterQueryDocsScanned =
getBrokerResponse(nonFilterQuery).getNumDocsScanned();
assertEquals(filterQueryDocsScanned, nonFilterQueryDocsScanned);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]