alamb commented on a change in pull request #9364:
URL: https://github.com/apache/arrow/pull/9364#discussion_r568781006



##########
File path: rust/datafusion/src/sql/planner.rs
##########
@@ -1233,6 +1321,257 @@ mod tests {
         quick_test(sql, expected);
     }
 
+    #[test]
+    fn select_with_having() {
+        let sql = "SELECT id, age
+                   FROM person
+                   HAVING age > 100 AND age < 200";
+        let expected = "Projection: #id, #age\
+                        \n  Filter: #age Gt Int64(100) And #age Lt Int64(200)\
+                        \n    TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_with_having_referencing_column_not_in_select() {
+        let sql = "SELECT id, age
+                   FROM person
+                   HAVING first_name = 'M'";
+        let err = logical_plan(sql).expect_err("query should have failed");
+        assert_eq!(
+            "Plan(\"Having references column(s) not provided by the select\")",
+            format!("{:?}", err)
+        );
+    }
+
+    #[test]
+    fn select_with_having_referencing_column_nested_in_select_expression() {
+        let sql = "SELECT id, age + 1
+                   FROM person
+                   HAVING age > 100";
+        let err = logical_plan(sql).expect_err("query should have failed");
+        assert_eq!(
+            "Plan(\"Having references column(s) not provided by the select\")",
+            format!("{:?}", err)
+        );
+    }
+
+    #[test]
+    fn select_with_having_with_aggregate_not_in_select() {
+        let sql = "SELECT first_name
+                   FROM person
+                   HAVING MAX(age) > 100";
+        let err = logical_plan(sql).expect_err("query should have failed");
+        assert_eq!(
+            "Plan(\"Projection references non-aggregate values\")",
+            format!("{:?}", err)
+        );
+    }
+
+    #[test]
+    fn select_aggregate_with_having_that_reuses_aggregate() {
+        let sql = "SELECT MAX(age)
+                   FROM person
+                   HAVING MAX(age) < 30";
+        let expected = "Filter: #MAX(age) Lt Int64(30)\
+                        \n  Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
+                        \n    TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_aggregate_with_having_with_aggregate_not_in_select() {
+        let sql = "SELECT MAX(age)
+                   FROM person
+                   HAVING MAX(first_name) > 'M'";
+        let expected = "Projection: #MAX(age)\
+                        \n  Filter: #MAX(first_name) Gt Utf8(\"M\")\
+                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age), 
MAX(#first_name)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_aggregate_with_having_referencing_column_not_in_select() {
+        let sql = "SELECT COUNT(*)
+                   FROM person
+                   HAVING first_name = 'M'";
+        let err = logical_plan(sql).expect_err("query should have failed");
+        assert_eq!(
+            "Plan(\"Having references non-aggregate values\")",
+            format!("{:?}", err)
+        );
+    }
+
+    #[test]
+    fn 
select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
+        let sql = "SELECT MAX(age) as max_age
+                   FROM person
+                   HAVING max_age < 30";
+        let expected = "Projection: #MAX(age) AS max_age\
+                        \n  Filter: #MAX(age) Lt Int64(30)\
+                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_alias()
 {
+        let sql = "SELECT MAX(age) as max_age
+                   FROM person
+                   HAVING MAX(age) < 30";
+        let expected = "Projection: #MAX(age) AS max_age\
+                        \n  Filter: #MAX(age) Lt Int64(30)\
+                        \n    Aggregate: groupBy=[[]], aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_aggregate_with_group_by_with_having() {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING first_name = 'M'";
+        let expected = "Filter: #first_name Eq Utf8(\"M\")\
+                        \n  Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n    TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
+        let sql = "SELECT first_name AS fn, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 2 AND fn = 'M'";
+        let expected = "Projection: #first_name AS fn, #MAX(age)\
+                        \n  Filter: #MAX(age) Gt Int64(2) And #first_name Eq 
Utf8(\"M\")\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_using_columns_with_and_without_their_aliases(
+    ) {
+        let sql = "SELECT first_name AS fn, MAX(age) AS max_age
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 2 AND max_age < 5 AND first_name = 'M' 
AND fn = 'N'";
+        let expected = "Projection: #first_name AS fn, #MAX(age) AS max_age\
+                        \n  Filter: #MAX(age) Gt Int64(2) And #MAX(age) Lt 
Int64(5) And #first_name Eq Utf8(\"M\") And #first_name Eq Utf8(\"N\")\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 100";
+        let expected = "Filter: #MAX(age) Gt Int64(100)\
+                        \n  Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n    TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by() 
{
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 10 AND last_name = 'M'";
+        let err = logical_plan(sql).expect_err("query should have failed");
+        assert_eq!(
+            "Plan(\"Having references non-aggregate values\")",
+            format!("{:?}", err)
+        );
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_times()
 {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 100 AND MAX(age) < 200";
+        let expected = "Filter: #MAX(age) Gt Int64(100) And #MAX(age) Lt 
Int64(200)\
+                        \n  Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n    TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_using_aggreagate_not_in_select() {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 100 AND MIN(id) < 50";
+        let expected = "Projection: #first_name, #MAX(age)\
+                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id) Lt 
Int64(50)\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age), MIN(#id)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_its_alias(
+    ) {
+        let sql = "SELECT first_name, MAX(age) AS max_age
+                   FROM person
+                   GROUP BY first_name
+                   HAVING max_age > 100";
+        let expected = "Projection: #first_name, #MAX(age) AS max_age\
+                        \n  Filter: #MAX(age) Gt Int64(100)\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_compound_aliased_with_group_by_with_having_referencing_compound_aggregate_by_its_alias(
+    ) {
+        let sql = "SELECT first_name, MAX(age) + 1 AS max_age_plus_one
+                   FROM person
+                   GROUP BY first_name
+                   HAVING max_age_plus_one > 100";
+        let expected =
+            "Projection: #first_name, #MAX(age) Plus Int64(1) AS 
max_age_plus_one\
+                        \n  Filter: #MAX(age) Plus Int64(1) Gt Int64(100)\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age)]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_using_derived_column_aggreagate_not_in_select(
+    ) {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 100 AND MIN(id - 2) < 50";
+        let expected = "Projection: #first_name, #MAX(age)\
+                        \n  Filter: #MAX(age) Gt Int64(100) And #MIN(id Minus 
Int64(2)) Lt Int64(50)\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age), MIN(#id Minus Int64(2))]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn 
select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
+        let sql = "SELECT first_name, MAX(age)
+                   FROM person
+                   GROUP BY first_name
+                   HAVING MAX(age) > 100 AND COUNT(*) < 50";
+        let expected = "Projection: #first_name, #MAX(age)\
+                        \n  Filter: #MAX(age) Gt Int64(100) And 
#COUNT(UInt8(1)) Lt Int64(50)\
+                        \n    Aggregate: groupBy=[[#first_name]], 
aggr=[[MAX(#age), COUNT(UInt8(1))]]\
+                        \n      TableScan: person projection=None";
+        quick_test(sql, expected);
+    }
+

Review comment:
       These test cases are pretty amazing @drusso  -- thank you. The only one 
I think we should also add is a query that has both a `HAVING` and `WHERE` 
clause
   
   Something like 
   
   ```
    "SELECT first_name, MAX(age)
                      FROM person
                      WHERE id > 5
                      GROUP BY first_name
                      HAVING MAX(age) <100";
   ```
   
   And 
   ```
    "SELECT first_name, MAX(age)
                      FROM person
                      WHERE id > 5 AND age > 18
                      GROUP BY first_name
                      HAVING MAX(age) <100";
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to