peter-toth commented on code in PR #10396: URL: https://github.com/apache/datafusion/pull/10396#discussion_r1592914848
########## datafusion/optimizer/src/common_subexpr_eliminate.rs: ########## @@ -34,90 +33,63 @@ use datafusion_common::{ use datafusion_expr::expr::Alias; use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, Window}; use datafusion_expr::{col, Expr, ExprSchemable}; +use indexmap::IndexMap; -/// Set of expressions generated by the [`ExprIdentifierVisitor`] -/// and consumed by the [`CommonSubexprRewriter`]. -#[derive(Default)] -struct ExprSet { - /// A map from expression's identifier (stringified expr) to tuple including: - /// - the expression itself (cloned) - /// - counter - /// - DataType of this expression. - /// - symbol used as the identifier in the alias. - map: HashMap<Identifier, (Expr, usize, DataType, Identifier)>, -} - -impl ExprSet { - fn expr_identifier(expr: &Expr) -> Identifier { - format!("{expr}") - } - - fn get(&self, key: &Identifier) -> Option<&(Expr, usize, DataType, Identifier)> { - self.map.get(key) - } - - fn entry( - &mut self, - key: Identifier, - ) -> Entry<'_, Identifier, (Expr, usize, DataType, Identifier)> { - self.map.entry(key) - } - - fn populate_expr_set( - &mut self, - expr: &[Expr], - input_schema: DFSchemaRef, - expr_mask: ExprMask, - ) -> Result<()> { - expr.iter().try_for_each(|e| { - self.expr_to_identifier(e, Arc::clone(&input_schema), expr_mask)?; - - Ok(()) - }) - } - - /// Go through an expression tree and generate identifier for every node in this tree. - fn expr_to_identifier( - &mut self, - expr: &Expr, - input_schema: DFSchemaRef, - expr_mask: ExprMask, - ) -> Result<()> { - expr.visit(&mut ExprIdentifierVisitor { - expr_set: self, - input_schema, - visit_stack: vec![], - node_count: 0, - expr_mask, - })?; - - Ok(()) - } -} - -impl From<Vec<(Identifier, (Expr, usize, DataType, Identifier))>> for ExprSet { - fn from(entries: Vec<(Identifier, (Expr, usize, DataType, Identifier))>) -> Self { - let mut expr_set = Self::default(); - entries.into_iter().for_each(|(k, v)| { - expr_set.map.insert(k, v); - }); - expr_set - } -} - -/// Identifier for each subexpression. +/// Identifier that represents a subexpression tree. /// -/// Note that the current implementation uses the `Display` of an expression -/// (a `String`) as `Identifier`. +/// Note that the current implementation contains: +/// - the `Display` of an expression (a `String`) and +/// - the identifiers of the childrens of the expression Review Comment: I added a comment here: https://github.com/apache/datafusion/pull/10396#issuecomment-2099072200 that identifiers as keys of the `ExprStats` map is a must have but identifiers as aliases of exracted common expressions is not. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org