alamb commented on code in PR #11564: URL: https://github.com/apache/datafusion/pull/11564#discussion_r1687016111
########## datafusion/expr/src/function.rs: ########## @@ -57,6 +57,9 @@ pub struct AccumulatorArgs<'a> { /// The schema of the input arguments pub schema: &'a Schema, + /// The schema of the input arguments Review Comment: I view this addition as basically making the UDAF framework as full featured as the built in one is ########## datafusion/physical-expr-common/src/aggregate/mod.rs: ########## @@ -51,6 +51,10 @@ use datafusion_expr::utils::AggregateOrderSensitivity; /// /// `input_exprs` and `sort_exprs` are used for customizing Accumulator as the arguments in `AccumulatorArgs`, /// if you don't need them it is fine to pass empty slice `&[]`. +/// +/// `is_reversed` is used to indicate whether the aggregation is running in reverse order, +/// it could be used to hint Accumulator to accumulate in the reversed order, +/// you can just set to false if you are not reversing expression #[allow(clippy::too_many_arguments)] pub fn create_aggregate_expr( Review Comment: Given the number of arguments it is getting, maybe it would be good to define a builder like the following: ```rust let builder = AggregateExprBuilder::new(fun), .with_input_phyiscal_exprs(input_phy_exprs) ... .with_name(name) .build()? ``` 🤔 ########## datafusion/physical-expr-common/src/utils.rs: ########## @@ -134,6 +135,33 @@ pub fn limited_convert_logical_expr_to_physical_expr( } } +pub fn limited_convert_logical_expr_to_physical_expr_with_dfschema( + expr: &Expr, + dfschema: &DFSchema, +) -> Result<Arc<dyn PhysicalExpr>> { + match expr { + Expr::Alias(Alias { expr, .. }) => Ok( + limited_convert_logical_expr_to_physical_expr_with_dfschema(expr, dfschema)?, + ), + Expr::Column(col) => { + let idx = dfschema.index_of_column(col)?; Review Comment: So does that mean `limited_convert_logical_expr_to_physical_expr_with_dfschema` will be temporary? ########## datafusion/physical-expr-common/src/aggregate/mod.rs: ########## @@ -495,18 +573,23 @@ impl AggregateExpr for AggregateFunctionExpr { }) .collect::<Vec<_>>(); let mut name = self.name().to_string(); - replace_order_by_clause(&mut name); + // TODO: Generalize order-by clause rewrite Review Comment: Should we track this in a follow on ticket? ########## datafusion/physical-expr-common/src/aggregate/mod.rs: ########## @@ -81,6 +86,61 @@ pub fn create_aggregate_expr( .map(|e| e.expr.data_type(schema)) .collect::<Result<Vec<_>>>()?; + let ordering_fields = ordering_fields(ordering_req, &ordering_types); + let name = name.into(); + + Ok(Arc::new(AggregateFunctionExpr { + fun: fun.clone(), + args: input_phy_exprs.to_vec(), + logical_args: input_exprs.to_vec(), + data_type: fun.return_type(&input_exprs_types)?, + name, + schema: schema.clone(), + dfschema: DFSchema::empty(), + sort_exprs: sort_exprs.to_vec(), + ordering_req: ordering_req.to_vec(), + ignore_nulls, + ordering_fields, + is_distinct, + input_type: input_exprs_types[0].clone(), + is_reversed, + })) +} + +#[allow(clippy::too_many_arguments)] +// This is not for external usage, consider creating with `create_aggregate_expr` instead. +pub fn create_aggregate_expr_with_dfschema( Review Comment: Make we can also make a builder to make using this function easier (e.g. it might be easy to swap one of the last three bools by accident) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org