Jefffrey commented on code in PR #21021:
URL: https://github.com/apache/datafusion/pull/21021#discussion_r3130104363


##########
datafusion/core/src/dataframe/mod.rs:
##########
@@ -410,21 +426,128 @@ impl DataFrame {
         expr_list: impl IntoIterator<Item = impl Into<SelectExpr>>,
     ) -> Result<DataFrame> {
         let expr_list: Vec<SelectExpr> =
-            expr_list.into_iter().map(|e| e.into()).collect::<Vec<_>>();
+            expr_list.into_iter().map(|e| e.into()).collect();
 
+        // Extract expressions
         let expressions = expr_list.iter().filter_map(|e| match e {
             SelectExpr::Expression(expr) => Some(expr),
             _ => None,
         });
 
-        let window_func_exprs = find_window_exprs(expressions);
-        let plan = if window_func_exprs.is_empty() {
+        // Apply window functions first
+        let window_func_exprs = find_window_exprs(expressions.clone());
+
+        let mut plan = if window_func_exprs.is_empty() {
             self.plan
         } else {
             LogicalPlanBuilder::window_plan(self.plan, window_func_exprs)?
         };
 
-        let project_plan = 
LogicalPlanBuilder::from(plan).project(expr_list)?.build()?;
+        // Collect aggregate expressions
+        let aggr_exprs = find_aggregate_exprs(expressions.clone());
+
+        // Check for non-aggregate expressions
+        let has_non_aggregate_expr = expr_list.iter().any(|e| match e {
+            SelectExpr::Expression(expr) => {
+                find_aggregate_exprs(std::iter::once(expr)).is_empty()
+            }
+            SelectExpr::Wildcard(_) | SelectExpr::QualifiedWildcard(_, _) => 
true,
+        });
+
+        if has_non_aggregate_expr && !aggr_exprs.is_empty() {
+            return plan_err!(
+                "Column in SELECT must be in GROUP BY or an aggregate function"
+            );
+        }
+
+        // Fallback to projection
+        if matches!(plan, LogicalPlan::Aggregate(_))
+            || has_non_aggregate_expr
+            || aggr_exprs.is_empty()
+        {
+            let project_plan =
+                LogicalPlanBuilder::from(plan).project(expr_list)?.build()?;
+
+            return Ok(DataFrame {
+                session_state: self.session_state,
+                plan: project_plan,
+                projection_requires_validation: false,
+            });
+        }
+
+        // Unique name generator

Review Comment:
   Hm that makes sense, though with the amount of logic introduced here it 
makes me wonder if this feature is worth it. I'll try find some time to 
carefully look over this.



##########
datafusion-examples/examples/dataframe/dataframe.rs:
##########
@@ -53,8 +55,10 @@ use tokio::fs::create_dir_all;
 ///
 /// * [write_out]: write out a DataFrame to a table, parquet file, csv file, 
or json file
 ///
-/// # Executing subqueries
+/// # Querying data
 ///
+/// * [aggregate_global_and_grouped]: global vs grouped aggregation (`select` 
vs `aggregate`)

Review Comment:
   Whats the reasoning behind adding these new examples? Did we not have 
examples of aggregation before with DataFrame API?



##########
datafusion/core/tests/dataframe/mod.rs:
##########
@@ -1046,26 +1049,28 @@ async fn test_aggregate_with_union() -> Result<()> {
 
     let df1 = df
         .clone()
-        // GROUP BY `c1`
-        .aggregate(vec![col("c1")], vec![min(col("c2"))])?
-        // SELECT `c1` , min(c2) as `result`
-        .select(vec![col("c1"), min(col("c2")).alias("result")])?;
+        // GROUP BY c1, compute min(c2) as result
+        .aggregate(vec![col("c1")], vec![min(col("c2")).alias("result")])?

Review Comment:
   I don't quite understand how these aren't considered regressions if we 
needed to fix existing test cases because of this new functionality 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to