alamb commented on code in PR #4579:
URL: https://github.com/apache/arrow-datafusion/pull/4579#discussion_r1045200524


##########
datafusion/core/tests/sql/group_by.rs:
##########
@@ -194,7 +194,7 @@ async fn csv_query_group_by_and_having_and_where() -> 
Result<()> {
 async fn csv_query_having_without_group_by() -> Result<()> {
     let ctx = SessionContext::new();
     register_aggregate_csv(&ctx).await?;
-    let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 HAVING c2 >= 4 AND c3 
> 90";
+    let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 where c2 >= 4 AND c3 
> 90";

Review Comment:
   I think we can remove the whole test as we definitely have coverage for 
selecting with predicates in the where clause and the name of the test doesn't 
make sense `csv_query_having_without_group_by` 



##########
datafusion/sql/src/planner.rs:
##########
@@ -3641,10 +3632,11 @@ mod tests {
         let sql = "SELECT id, age
                    FROM person
                    HAVING age > 100 AND age < 200";
-        let expected = "Projection: person.id, person.age\
-                        \n  Filter: person.age > Int64(100) AND person.age < 
Int64(200)\
-                        \n    TableScan: person";
-        quick_test(sql, expected);
+        let err = logical_plan(sql).expect_err("query should have failed");

Review Comment:
   I recommend one test (that should also fail) where there is a GROUP clause 
but the having clause refers to an invalid column, such as
   
   ```sql
   SELECT id, MAX(age)
   FROM PERSON
   GROUP BY id
   -- first_name is a valid column but does not appear in the grouping output 
(group column nor aggregate)
   HAVING first_name = 'M'
   ```



##########
datafusion/sql/src/planner.rs:
##########
@@ -1193,33 +1193,24 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             .collect::<Result<Vec<Expr>>>()?;
 
         // process group by, aggregation or having
-        let (plan, mut select_exprs_post_aggr, having_expr_post_aggr) =
-            if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
-                self.aggregate(
-                    plan,
-                    &select_exprs,
-                    having_expr_opt.as_ref(),
-                    group_by_exprs,
-                    aggr_exprs,
-                )?
-            } else {
-                if let Some(having_expr) = &having_expr_opt {
-                    let available_columns = select_exprs
-                        .iter()
-                        .map(|expr| expr_as_column_expr(expr, &plan))
-                        .collect::<Result<Vec<Expr>>>()?;
-
-                    // Ensure the HAVING expression is using only columns
-                    // provided by the SELECT.
-                    check_columns_satisfy_exprs(
-                        &available_columns,
-                        &[having_expr.clone()],
-                        "HAVING clause references column(s) not provided by 
the select",
-                    )?;
+        let (plan, mut select_exprs_post_aggr, having_expr_post_aggr) = if 
!group_by_exprs
+            .is_empty()
+            || !aggr_exprs.is_empty()
+        {
+            self.aggregate(

Review Comment:
   I think we have to check here that the having expr only uses columns in the 
logical aggregation phase (group by columns or agg exprs)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to