alamb commented on code in PR #4579:
URL: https://github.com/apache/arrow-datafusion/pull/4579#discussion_r1045200524
##########
datafusion/core/tests/sql/group_by.rs:
##########
@@ -194,7 +194,7 @@ async fn csv_query_group_by_and_having_and_where() ->
Result<()> {
async fn csv_query_having_without_group_by() -> Result<()> {
let ctx = SessionContext::new();
register_aggregate_csv(&ctx).await?;
- let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 HAVING c2 >= 4 AND c3
> 90";
+ let sql = "SELECT c1, c2, c3 FROM aggregate_test_100 where c2 >= 4 AND c3
> 90";
Review Comment:
I think we can remove the whole test as we definitely have coverage for
selecting with predicates in the where clause and the name of the test doesn't
make sense `csv_query_having_without_group_by`
##########
datafusion/sql/src/planner.rs:
##########
@@ -3641,10 +3632,11 @@ mod tests {
let sql = "SELECT id, age
FROM person
HAVING age > 100 AND age < 200";
- let expected = "Projection: person.id, person.age\
- \n Filter: person.age > Int64(100) AND person.age <
Int64(200)\
- \n TableScan: person";
- quick_test(sql, expected);
+ let err = logical_plan(sql).expect_err("query should have failed");
Review Comment:
I recommend one test (that should also fail) where there is a GROUP clause
but the having clause refers to an invalid column, such as
```sql
SELECT id, MAX(age)
FROM PERSON
GROUP BY id
-- first_name is a valid column but does not appear in the grouping output
(group column nor aggregate)
HAVING first_name = 'M'
```
##########
datafusion/sql/src/planner.rs:
##########
@@ -1193,33 +1193,24 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.collect::<Result<Vec<Expr>>>()?;
// process group by, aggregation or having
- let (plan, mut select_exprs_post_aggr, having_expr_post_aggr) =
- if !group_by_exprs.is_empty() || !aggr_exprs.is_empty() {
- self.aggregate(
- plan,
- &select_exprs,
- having_expr_opt.as_ref(),
- group_by_exprs,
- aggr_exprs,
- )?
- } else {
- if let Some(having_expr) = &having_expr_opt {
- let available_columns = select_exprs
- .iter()
- .map(|expr| expr_as_column_expr(expr, &plan))
- .collect::<Result<Vec<Expr>>>()?;
-
- // Ensure the HAVING expression is using only columns
- // provided by the SELECT.
- check_columns_satisfy_exprs(
- &available_columns,
- &[having_expr.clone()],
- "HAVING clause references column(s) not provided by
the select",
- )?;
+ let (plan, mut select_exprs_post_aggr, having_expr_post_aggr) = if
!group_by_exprs
+ .is_empty()
+ || !aggr_exprs.is_empty()
+ {
+ self.aggregate(
Review Comment:
I think we have to check here that the having expr only uses columns in the
logical aggregation phase (group by columns or agg exprs)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]