This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new d359b72ff4 Fix IndexOutOfBoundException in filtered aggregation 
group-by (#11231)
d359b72ff4 is described below

commit d359b72ff4aec3447db664715edf22dac95c2cca
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Jul 31 20:28:58 2023 -0700

    Fix IndexOutOfBoundException in filtered aggregation group-by (#11231)
---
 .../operator/query/FilteredGroupByOperator.java    |  3 +-
 .../pinot/queries/FilteredAggregationsTest.java    | 55 +++++++++++++---------
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
index 9be251ae23..0b1440e071 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/query/FilteredGroupByOperator.java
@@ -82,11 +82,12 @@ public class FilteredGroupByOperator extends 
BaseOperator<GroupByResultsBlock> {
     DataSchema.ColumnDataType[] columnDataTypes = new 
DataSchema.ColumnDataType[numColumns];
 
     // Extract column names and data types for group-by columns
+    BaseProjectOperator<?> projectOperator = 
projectOperators.get(0).getRight();
     for (int i = 0; i < numGroupByExpressions; i++) {
       ExpressionContext groupByExpression = _groupByExpressions[i];
       columnNames[i] = groupByExpression.toString();
       columnDataTypes[i] = DataSchema.ColumnDataType.fromDataTypeSV(
-          
projectOperators.get(i).getRight().getResultColumnContext(groupByExpression).getDataType());
+          
projectOperator.getResultColumnContext(groupByExpression).getDataType());
     }
 
     // Extract column names and data types for aggregation functions
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
index d3636e4fc9..a22943e415 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
@@ -391,49 +391,63 @@ public class FilteredAggregationsTest extends 
BaseQueriesTest {
 
   @Test
   public void testGroupBy() {
-    String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) 
testSum FROM MyTable GROUP BY BOOLEAN_COL";
-    String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE 
INT_COL > 25000 GROUP BY BOOLEAN_COL";
+    String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) 
testSum FROM MyTable GROUP BY BOOLEAN_COL "
+        + "ORDER BY BOOLEAN_COL";
+    String nonFilterQuery =
+        "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 GROUP 
BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     testQuery(filterQuery, nonFilterQuery);
   }
 
   @Test
-  public void testGroupByCaseAlternative() {
+  public void testGroupByMultipleColumns() {
     String filterQuery =
-        "SELECT SUM(INT_COL), SUM(INT_COL) FILTER(WHERE INT_COL > 25000) AS 
total_sum FROM MyTable GROUP BY "
-            + "BOOLEAN_COL";
+        "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM 
MyTable GROUP BY BOOLEAN_COL, STRING_COL "
+            + "ORDER BY BOOLEAN_COL, STRING_COL";
     String nonFilterQuery =
-        "SELECT SUM(INT_COL), SUM(CASE WHEN INT_COL > 25000 THEN INT_COL ELSE 
0 END) AS total_sum FROM MyTable GROUP "
-            + "BY BOOLEAN_COL";
+        "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 GROUP 
BY BOOLEAN_COL, STRING_COL "
+            + "ORDER BY BOOLEAN_COL, STRING_COL";
+    testQuery(filterQuery, nonFilterQuery);
+  }
+
+  @Test
+  public void testGroupByCaseAlternative() {
+    String filterQuery = "SELECT SUM(INT_COL), SUM(INT_COL) FILTER(WHERE 
INT_COL > 25000) AS total_sum FROM MyTable "
+        + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
+    String nonFilterQuery =
+        "SELECT SUM(INT_COL), SUM(CASE WHEN INT_COL > 25000 THEN INT_COL ELSE 
0 END) AS total_sum FROM MyTable "
+            + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     testQuery(filterQuery, nonFilterQuery);
   }
 
   @Test
   public void testGroupBySameFilter() {
     String filterQuery =
-        "SELECT AVG(INT_COL) FILTER(WHERE INT_COL > 25000) testAvg, 
SUM(INT_COL) FILTER(WHERE INT_COL > 25000) "
-            + "testSum FROM MyTable GROUP BY BOOLEAN_COL";
-    String nonFilterQuery =
-        "SELECT AVG(INT_COL) testAvg, SUM(INT_COL) testSum FROM MyTable WHERE 
INT_COL > 25000 GROUP BY BOOLEAN_COL";
+        "SELECT AVG(INT_COL) FILTER(WHERE INT_COL > 25000) testAvg, 
SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum "
+            + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
+    String nonFilterQuery = "SELECT AVG(INT_COL) testAvg, SUM(INT_COL) testSum 
FROM MyTable WHERE INT_COL > 25000 "
+        + "GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     testQuery(filterQuery, nonFilterQuery);
   }
 
   @Test
   public void testMultipleAggregationsOnSameFilterGroupBy() {
     String filterQuery = "SELECT MIN(INT_COL) FILTER(WHERE NO_INDEX_COL > 
29990) testMin, "
-        + "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) testMax FROM MyTable 
GROUP BY BOOLEAN_COL";
+        + "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) testMax FROM MyTable 
GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     String nonFilterQuery =
-        "SELECT MIN(INT_COL) testMin, MAX(INT_COL) testMax FROM MyTable WHERE 
INT_COL > 29990 GROUP BY BOOLEAN_COL";
+        "SELECT MIN(INT_COL) testMin, MAX(INT_COL) testMax FROM MyTable WHERE 
INT_COL > 29990 GROUP BY BOOLEAN_COL "
+            + "ORDER BY BOOLEAN_COL";
     testQuery(filterQuery, nonFilterQuery);
 
     filterQuery = "SELECT MIN(INT_COL) FILTER(WHERE NO_INDEX_COL > 29990) AS 
total_min, "
         + "MAX(INT_COL) FILTER(WHERE INT_COL > 29990) AS total_max, "
         + "SUM(INT_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_sum, "
-        + "MAX(NO_INDEX_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_max2 
FROM MyTable GROUP BY BOOLEAN_COL";
+        + "MAX(NO_INDEX_COL) FILTER(WHERE NO_INDEX_COL < 5000) AS total_max2 "
+        + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     nonFilterQuery = "SELECT MIN(CASE WHEN (NO_INDEX_COL > 29990) THEN INT_COL 
ELSE 99999 END) AS total_min, "
         + "MAX(CASE WHEN (INT_COL > 29990) THEN INT_COL ELSE 0 END) AS 
total_max, "
         + "SUM(CASE WHEN (NO_INDEX_COL < 5000) THEN INT_COL ELSE 0 END) AS 
total_sum, "
-        + "MAX(CASE WHEN (NO_INDEX_COL < 5000) THEN NO_INDEX_COL ELSE 0 END) 
AS total_max2 FROM MyTable GROUP BY "
-        + "BOOLEAN_COL";
+        + "MAX(CASE WHEN (NO_INDEX_COL < 5000) THEN NO_INDEX_COL ELSE 0 END) 
AS total_max2 "
+        + "FROM MyTable GROUP BY BOOLEAN_COL ORDER BY BOOLEAN_COL";
     testQuery(filterQuery, nonFilterQuery);
   }
 
@@ -452,10 +466,8 @@ public class FilteredAggregationsTest extends 
BaseQueriesTest {
   public void testSameNumScannedFilteredAggMatchAll() {
     // For a single filtered aggregation, the same number of docs should be 
scanned regardless of which portions of
     // the filter are in the filter expression Vs. the main predicate i.e. the 
applied filters are commutative.
-    String filterQuery =
-        "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM 
MyTable";
-    String nonFilterQuery =
-        "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000";
+    String filterQuery = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) 
testSum FROM MyTable";
+    String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE 
INT_COL > 25000";
     long filterQueryDocsScanned = 
getBrokerResponse(filterQuery).getNumDocsScanned();
     long nonFilterQueryDocsScanned = 
getBrokerResponse(nonFilterQuery).getNumDocsScanned();
     assertEquals(filterQueryDocsScanned, nonFilterQueryDocsScanned);
@@ -467,8 +479,7 @@ public class FilteredAggregationsTest extends 
BaseQueriesTest {
     // the filter are in the filter expression Vs. the main predicate i.e. the 
applied filters are commutative.
     String filterQuery =
         "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 25000) testSum FROM 
MyTable WHERE INT_COL < 1000000";
-    String nonFilterQuery =
-        "SELECT SUM(INT_COL) testSum FROM MyTable WHERE INT_COL > 25000 AND 
INT_COL < 1000000";
+    String nonFilterQuery = "SELECT SUM(INT_COL) testSum FROM MyTable WHERE 
INT_COL > 25000 AND INT_COL < 1000000";
     long filterQueryDocsScanned = 
getBrokerResponse(filterQuery).getNumDocsScanned();
     long nonFilterQueryDocsScanned = 
getBrokerResponse(nonFilterQuery).getNumDocsScanned();
     assertEquals(filterQueryDocsScanned, nonFilterQueryDocsScanned);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to