UBarney commented on code in PR #15876: URL: https://github.com/apache/datafusion/pull/15876#discussion_r2077729484
########## datafusion/expr/src/logical_plan/builder.rs: ########## @@ -797,26 +807,146 @@ impl LogicalPlanBuilder { } // remove pushed down sort columns - let new_expr = schema.columns().into_iter().map(Expr::Column).collect(); + let sort_output = schema.columns().into_iter().map(Expr::Column).collect(); + + let plan = Arc::unwrap_or_clone(self.plan); + + let (plan, agg_func_to_col, sorts) = if missing_agg_funcs.is_empty() { + (plan, HashMap::new(), sorts) + } else { + { + let (plan, agg_func_to_col) = + Self::add_missing_agg_funcs_to_logical_agg(plan, &missing_agg_funcs)?; + + let sorts = sorts + .iter() + .map(|x| { + Self::replace_subexpr_to_col(&x.expr, &agg_func_to_col) + .map(|expr| x.with_expr(expr)) + }) + .collect::<Result<Vec<_>, _>>()?; + + (plan, agg_func_to_col, sorts) + } + }; + + // we need downstream filter/project return missing col(agg_funcs) + missing_cols.extend(agg_func_to_col.into_values()); let is_distinct = false; - let plan = Self::add_missing_columns( - Arc::unwrap_or_clone(self.plan), - &missing_cols, - is_distinct, - )?; + let plan = Self::add_missing_columns(plan, &missing_cols, is_distinct)?; let sort_plan = LogicalPlan::Sort(Sort { expr: normalize_sorts(sorts, &plan)?, input: Arc::new(plan), fetch, }); - Projection::try_new(new_expr, Arc::new(sort_plan)) + Projection::try_new(sort_output, Arc::new(sort_plan)) .map(LogicalPlan::Projection) .map(Self::new) } + fn replace_subexpr_to_col( + expr: &Expr, + func_to_col: &HashMap<Expr, Column>, + ) -> Result<Expr> { + Ok(expr + .clone() + .transform_down(|nested_expr| { + if let Some(col) = func_to_col.get(&nested_expr) { + Ok(Transformed::yes(Expr::Column(col.clone()))) + } else { + Ok(Transformed::no(nested_expr)) + } + })? + .data) + } + + fn add_missing_agg_funcs_to_logical_agg( + plan: LogicalPlan, + missing_agg_funcs: &IndexSet<Expr>, + ) -> Result<(LogicalPlan, HashMap<Expr, Column>)> { + let mut agg_func_to_output_col: HashMap<Expr, Column> = + HashMap::with_capacity(missing_agg_funcs.len()); + + if missing_agg_funcs.is_empty() { + return Ok((plan, agg_func_to_output_col)); + } + + let plan = plan + .transform_down(|plan| { Review Comment: This query would pass in branches that don't contain the code changes introduced in this PR. ``` √ devhomeinsp ~/c/datafusion > ./target/release/datafusion-cli DataFusion CLI v46.0.1 > create table t(a int); 0 row(s) fetched. Elapsed 0.010 seconds. > select * from t, (select max(a) from t) order by max(a); +---+----------+ | a | max(t.a) | +---+----------+ +---+----------+ 0 row(s) fetched. Elapsed 0.013 seconds. > CREATE TABLE order_by_agg_func_not_selected ( g INT, o INT ) AS VALUES (1, -1), (2, -2), (3, -3); 0 row(s) fetched. Elapsed 0.001 seconds. > SELECT g, count(o) FROM order_by_agg_func_not_selected group by g order by min(o); type_coercion caused by Schema error: No field named order_by_agg_func_not_selected.o. Valid fields are order_by_agg_func_not_selected.g, "count(order_by_agg_func_not_selected.o)". ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org