peter-toth commented on code in PR #10396:
URL: https://github.com/apache/datafusion/pull/10396#discussion_r1592914848


##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -34,90 +33,63 @@ use datafusion_common::{
 use datafusion_expr::expr::Alias;
 use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, 
Window};
 use datafusion_expr::{col, Expr, ExprSchemable};
+use indexmap::IndexMap;
 
-/// Set of expressions generated by the [`ExprIdentifierVisitor`]
-/// and consumed by the [`CommonSubexprRewriter`].
-#[derive(Default)]
-struct ExprSet {
-    /// A map from expression's identifier (stringified expr) to tuple 
including:
-    /// - the expression itself (cloned)
-    /// - counter
-    /// - DataType of this expression.
-    /// - symbol used as the identifier in the alias.
-    map: HashMap<Identifier, (Expr, usize, DataType, Identifier)>,
-}
-
-impl ExprSet {
-    fn expr_identifier(expr: &Expr) -> Identifier {
-        format!("{expr}")
-    }
-
-    fn get(&self, key: &Identifier) -> Option<&(Expr, usize, DataType, 
Identifier)> {
-        self.map.get(key)
-    }
-
-    fn entry(
-        &mut self,
-        key: Identifier,
-    ) -> Entry<'_, Identifier, (Expr, usize, DataType, Identifier)> {
-        self.map.entry(key)
-    }
-
-    fn populate_expr_set(
-        &mut self,
-        expr: &[Expr],
-        input_schema: DFSchemaRef,
-        expr_mask: ExprMask,
-    ) -> Result<()> {
-        expr.iter().try_for_each(|e| {
-            self.expr_to_identifier(e, Arc::clone(&input_schema), expr_mask)?;
-
-            Ok(())
-        })
-    }
-
-    /// Go through an expression tree and generate identifier for every node 
in this tree.
-    fn expr_to_identifier(
-        &mut self,
-        expr: &Expr,
-        input_schema: DFSchemaRef,
-        expr_mask: ExprMask,
-    ) -> Result<()> {
-        expr.visit(&mut ExprIdentifierVisitor {
-            expr_set: self,
-            input_schema,
-            visit_stack: vec![],
-            node_count: 0,
-            expr_mask,
-        })?;
-
-        Ok(())
-    }
-}
-
-impl From<Vec<(Identifier, (Expr, usize, DataType, Identifier))>> for ExprSet {
-    fn from(entries: Vec<(Identifier, (Expr, usize, DataType, Identifier))>) 
-> Self {
-        let mut expr_set = Self::default();
-        entries.into_iter().for_each(|(k, v)| {
-            expr_set.map.insert(k, v);
-        });
-        expr_set
-    }
-}
-
-/// Identifier for each subexpression.
+/// Identifier that represents a subexpression tree.
 ///
-/// Note that the current implementation uses the `Display` of an expression
-/// (a `String`) as `Identifier`.
+/// Note that the current implementation contains:
+/// - the `Display` of an expression (a `String`) and
+/// - the identifiers of the childrens of the expression

Review Comment:
   I added a comment here: 
https://github.com/apache/datafusion/pull/10396#issuecomment-2099072200 that 
identifiers as keys of the `ExprStats` map is a must have but identifiers as 
aliases of exracted common expressions is not.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to