alamb commented on code in PR #21021:
URL: https://github.com/apache/datafusion/pull/21021#discussion_r3306270765
##########
datafusion/core/src/dataframe/mod.rs:
##########
@@ -410,21 +426,128 @@ impl DataFrame {
expr_list: impl IntoIterator<Item = impl Into<SelectExpr>>,
) -> Result<DataFrame> {
let expr_list: Vec<SelectExpr> =
- expr_list.into_iter().map(|e| e.into()).collect::<Vec<_>>();
+ expr_list.into_iter().map(|e| e.into()).collect();
+ // Extract expressions
let expressions = expr_list.iter().filter_map(|e| match e {
SelectExpr::Expression(expr) => Some(expr),
_ => None,
});
- let window_func_exprs = find_window_exprs(expressions);
- let plan = if window_func_exprs.is_empty() {
+ // Apply window functions first
+ let window_func_exprs = find_window_exprs(expressions.clone());
+
+ let mut plan = if window_func_exprs.is_empty() {
self.plan
} else {
LogicalPlanBuilder::window_plan(self.plan, window_func_exprs)?
};
- let project_plan =
LogicalPlanBuilder::from(plan).project(expr_list)?.build()?;
+ // Collect aggregate expressions
+ let aggr_exprs = find_aggregate_exprs(expressions.clone());
+
+ // Check for non-aggregate expressions
+ let has_non_aggregate_expr = expr_list.iter().any(|e| match e {
+ SelectExpr::Expression(expr) => {
+ find_aggregate_exprs(std::iter::once(expr)).is_empty()
+ }
+ SelectExpr::Wildcard(_) | SelectExpr::QualifiedWildcard(_, _) =>
true,
+ });
+
+ if has_non_aggregate_expr && !aggr_exprs.is_empty() {
+ return plan_err!(
+ "Column in SELECT must be in GROUP BY or an aggregate function"
+ );
+ }
+
+ // Fallback to projection
+ if matches!(plan, LogicalPlan::Aggregate(_))
+ || has_non_aggregate_expr
+ || aggr_exprs.is_empty()
+ {
+ let project_plan =
+ LogicalPlanBuilder::from(plan).project(expr_list)?.build()?;
+
+ return Ok(DataFrame {
+ session_state: self.session_state,
+ plan: project_plan,
+ projection_requires_validation: false,
+ });
+ }
+
+ // Unique name generator
Review Comment:
I agree this seems like much more logic is added here than justified by the
feature being added. Something doesn't add up 🤔
##########
datafusion-examples/examples/dataframe/dataframe.rs:
##########
@@ -53,8 +55,10 @@ use tokio::fs::create_dir_all;
///
/// * [write_out]: write out a DataFrame to a table, parquet file, csv file,
or json file
///
-/// # Executing subqueries
+/// # Querying data
///
+/// * [aggregate_global_and_grouped]: global vs grouped aggregation (`select`
vs `aggregate`)
Review Comment:
maybe we can add them as a separate PR -- this PR is already pretty big
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]