kumarUjjawal commented on code in PR #22169:
URL: https://github.com/apache/datafusion/pull/22169#discussion_r3289256385
##########
datafusion/sql/src/select.rs:
##########
@@ -377,39 +438,94 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
plan
};
- // Try processing unnest expression or do the final projection
- let plan = self.try_process_unnest(plan, select_exprs_post_aggr)?;
-
- // Process distinct clause
+ // Process distinct clause. For `DISTINCT ON` combined with
+ // aggregation, GROUP BY, or window functions we apply DistinctOn
+ // *before* the final projection so grouping columns and ORDER BY
+ // tie-breakers that aren't in the user SELECT stay in scope.
+ // DistinctOn provides the projection in that case (its select_expr
+ // list is wrapped in FIRST_VALUE during lowering).
let plan = match select.distinct {
- None => Ok(plan),
- Some(Distinct::All) => Ok(plan),
+ None | Some(Distinct::All) => {
+ self.try_process_unnest(plan, select_exprs_post_aggr)?
+ }
Some(Distinct::Distinct) => {
- LogicalPlanBuilder::from(plan).distinct()?.build()
+ let plan = self.try_process_unnest(plan,
select_exprs_post_aggr)?;
+ LogicalPlanBuilder::from(plan).distinct()?.build()?
}
- Some(Distinct::On(on_expr)) => {
- if !aggr_exprs.is_empty()
- || !group_by_exprs.is_empty()
- || !window_func_exprs.is_empty()
+ Some(Distinct::On(_)) => {
+ if aggr_exprs.is_empty()
+ && group_by_exprs.is_empty()
+ && window_func_exprs.is_empty()
{
- return not_impl_err!(
- "DISTINCT ON expressions with GROUP BY, aggregation or
window functions are not supported "
- );
- }
-
- let on_expr = on_expr
- .into_iter()
- .map(|e| {
- self.sql_expr_to_logical_expr(e, plan.schema(),
planner_context)
- })
- .collect::<Result<Vec<_>>>()?;
+ // Fast path: no aggregation context. Fuse projection
+ // and deduplication into a single DistinctOn over
+ // `base_plan`. The sort attached to DistinctOn via
+ // `with_sort_expr` later normalizes against base_plan,
+ // so a bare ORDER BY alias (e.g. `ORDER BY x` where
+ // SELECT has `a AS x`) must be swapped back to the
+ // underlying input expression first.
+ if !alias_map.is_empty() {
+ order_by_rex = order_by_rex
+ .into_iter()
+ .map(|s| {
+ let expr = substitute_top_level_alias(
+ s.expr.clone(),
+ &alias_map,
+ );
+ s.with_expr(expr)
+ })
+ .collect();
+ }
+ LogicalPlanBuilder::from(base_plan)
+ .distinct_on(on_exprs_post_aggr, select_exprs, None)?
+ .build()?
+ } else {
+ // General path: DistinctOn layered over the post-
+ // aggregate / post-window plan (no extra Projection
+ // node — DistinctOn's lowering wraps each select_expr
+ // in FIRST_VALUE, which acts as the projection).
+ //
+ // The DistinctOn input has the post-aggregate raw
+ // column names (e.g. `max(t.c4)`), not the user-facing
+ // SELECT aliases (`agg2`). ORDER BY may reference
+ // those aliases — substitute them back to the
+ // underlying post-aggregate expression so they
+ // resolve against the DistinctOn input.
+ let select_alias_map: datafusion_common::HashMap<String,
Expr> =
+ select_exprs_post_aggr
+ .iter()
+ .filter_map(|e| match e {
+ Expr::Alias(a) => {
+ Some((a.name.clone(), (*a.expr).clone()))
+ }
+ _ => None,
+ })
+ .collect();
+
+ if !select_alias_map.is_empty() {
+ // Only swap an alias for its underlying expression
+ // when the ORDER BY item is a bare alias name —
+ // matches PostgreSQL's behavior. `b` resolves to
+ // the alias; `b + 0` keeps `b` as the input
+ // column.
+ order_by_rex = order_by_rex
+ .into_iter()
+ .map(|s| {
+ let expr = substitute_top_level_alias(
+ s.expr.clone(),
+ &select_alias_map,
+ );
+ s.with_expr(expr)
+ })
+ .collect();
+ }
- // Build the final plan
- LogicalPlanBuilder::from(base_plan)
- .distinct_on(on_expr, select_exprs, None)?
- .build()
+ LogicalPlanBuilder::from(plan)
Review Comment:
I went with supporting it rather than rejecting `untest()`. We now do the
equivalent unnest rewrite before buildingDistinctOn in the post-aggregate path,
instead of rejecting the combination.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]