peter-toth commented on code in PR #10396:
URL: https://github.com/apache/datafusion/pull/10396#discussion_r1591490350


##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -658,55 +682,89 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
     type Node = Expr;
 
     fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
-        // related to https://github.com/apache/datafusion/issues/8814
+        // related to https://github.com/apache/arrow-datafusion/issues/8814
         // If the expr contain volatile expression or is a short-circuit 
expression, skip it.
+        // TODO: propagate is_volatile state bottom-up + consider non-volatile 
sub-expressions for CSE
+        // TODO: consider surely executed children of "short circuited"s for 
CSE
         if expr.short_circuits() || expr.is_volatile()? {
-            self.visit_stack
-                .push(VisitRecord::JumpMark(self.node_count));
-            return Ok(TreeNodeRecursion::Jump); // go to f_up
+            self.visit_stack.push(VisitRecord::JumpMark);
+
+            return Ok(TreeNodeRecursion::Jump);
         }
 
+        self.id_array.push((0, "".to_string()));
         self.visit_stack
-            .push(VisitRecord::EnterMark(self.node_count));
-        self.node_count += 1;
+            .push(VisitRecord::EnterMark(self.down_index));
+        self.down_index += 1;
 
         Ok(TreeNodeRecursion::Continue)
     }
 
     fn f_up(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
-        let (_idx, sub_expr_identifier) = self.pop_enter_mark();
-
-        // skip exprs should not be recognize.
-        if self.expr_mask.ignores(expr) {
-            let curr_expr_identifier = ExprSet::expr_identifier(expr);
-            self.visit_stack
-                .push(VisitRecord::ExprItem(curr_expr_identifier));
+        let Some((down_index, sub_expr_id)) = self.pop_enter_mark() else {
             return Ok(TreeNodeRecursion::Continue);
-        }
-        let curr_expr_identifier = ExprSet::expr_identifier(expr);
-        let alias_symbol = 
format!("{curr_expr_identifier}{sub_expr_identifier}");
+        };
 
-        self.visit_stack
-            .push(VisitRecord::ExprItem(alias_symbol.clone()));
+        let expr_id = expr_identifier(expr, sub_expr_id);
+
+        self.id_array[down_index].0 = self.up_index;
+        if !self.expr_mask.ignores(expr) {
+            self.id_array[down_index].1.clone_from(&expr_id);
 
-        let data_type = expr.get_type(&self.input_schema)?;
+            // TODO: can we capture the data type in the second traversal only 
for
+            //  replaced expressions?
+            let data_type = expr.get_type(&self.input_schema)?;
+            let (count, _) = self
+                .expr_stats
+                .entry(expr_id.clone())
+                .or_insert((0, data_type));
+            *count += 1;
+        }
+        self.visit_stack.push(VisitRecord::ExprItem(expr_id));
+        self.up_index += 1;
 
-        self.expr_set
-            .entry(curr_expr_identifier)
-            .or_insert_with(|| (expr.clone(), 0, data_type, alias_symbol))

Review Comment:
   this clone is also eliminated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to