maropu commented on a change in pull request #26656: [SPARK-27986][SQL] Support ANSI SQL filter clause for aggregate expression. URL: https://github.com/apache/spark/pull/26656#discussion_r350160173
########## File path: sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql ########## @@ -0,0 +1,29 @@ +-- Test filter clause for aggregate expression. + +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b); + +-- Aggregate with filter and empty GroupBy expressions. +SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData; +SELECT COUNT(a) FILTER (WHERE a = 1), COUNT(b) FILTER (WHERE a > 1) FROM testData; + +-- Aggregate with filter and non-empty GroupBy expressions. +SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a; +SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b; +SELECT COUNT(a) FILTER (WHERE a >= 0), COUNT(b) FILTER (WHERE a >= 3) FROM testData GROUP BY a; + +-- Aggregate with filter and grouped by literals. +SELECT 'foo', COUNT(a) FILTER (WHERE b <= 2) FROM testData GROUP BY 1; + +-- Aggregate with filter and grouped by literals (hash aggregate). +SELECT 'foo', APPROX_COUNT_DISTINCT(a) FILTER (WHERE b >= 0) FROM testData WHERE a = 0 GROUP BY 1; + +-- Aggregate with filter and grouped by literals (sort aggregate). +SELECT 'foo', MAX(STRUCT(a)) FILTER (WHERE b >= 1) FROM testData WHERE a = 0 GROUP BY 1; + +-- Aggregate with filter and complex GroupBy expressions. +SELECT a + b, COUNT(b) FILTER (WHERE b >= 2) FROM testData GROUP BY a + b; +SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1; +SELECT a + 1 + 1, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY a + 1; Review comment: Can you add some tests for regular subqueries and correlated ones? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org