alamb commented on code in PR #10333:
URL: https://github.com/apache/datafusion/pull/10333#discussion_r1591315724


##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -592,109 +591,51 @@ impl ExprMask {
 
 /// Go through an expression tree and generate identifiers for each 
subexpression.
 ///
-/// An identifier contains information of the expression itself and its 
sub-expression.
-/// This visitor implementation use a stack `visit_stack` to track traversal, 
which
-/// lets us know when a sub-tree's visiting is finished. When `pre_visit` is 
called
-/// (traversing to a new node), an `EnterMark` and an `ExprItem` will be 
pushed into stack.
-/// And try to pop out a `EnterMark` on leaving a node (`f_up()`). All 
`ExprItem`
-/// before the first `EnterMark` is considered to be sub-tree of the leaving 
node.
-///
-/// This visitor also records identifier in `id_array`. Makes the following 
traverse
-/// pass can get the identifier of a node without recalculate it. We assign 
each node
-/// in the expr tree a series number, start from 1, maintained by 
`series_number`.
-/// Series number represents the order we left (`f_up()`) a node. Has the 
property
-/// that child node's series number always smaller than parent's. While 
`id_array` is
-/// organized in the order we enter (`f_down()`) a node. `node_count` helps us 
to
-/// get the index of `id_array` for each node.
+/// This visitor also assigns an alias symbol to each expression, to be used 
if the
+/// expression is getting eliminated, and tracks the frequency and datatype of 
each expression.
 ///
 /// `Expr` without sub-expr (column, literal etc.) will not have identifier
 /// because they should not be recognized as common sub-expr.
 struct ExprIdentifierVisitor<'a> {
-    // param
+    // expr set to be populated by the visitor
     expr_set: &'a mut ExprSet,
     /// input schema for the node that we're optimizing, so we can determine 
the correct datatype
     /// for each subexpression
     input_schema: DFSchemaRef,
-    // inner states
-    visit_stack: Vec<VisitRecord>,
-    /// increased in fn_down, start from 0.
-    node_count: usize,
     /// which expression should be skipped?
     expr_mask: ExprMask,
 }
 
-/// Record item that used when traversing a expression tree.
-enum VisitRecord {
-    /// `usize` is the monotone increasing series number assigned in 
pre_visit().
-    /// Starts from 0. Is used to index the identifier array `id_array` in 
post_visit().
-    EnterMark(usize),
-    /// the node's children were skipped => jump to f_up on same node
-    JumpMark(usize),
-    /// Accumulated identifier of sub expression.
-    ExprItem(Identifier),
-}
-
-impl ExprIdentifierVisitor<'_> {
-    /// Find the first `EnterMark` in the stack, and accumulates every 
`ExprItem`
-    /// before it.
-    fn pop_enter_mark(&mut self) -> (usize, Identifier) {
-        let mut desc = String::new();
-
-        while let Some(item) = self.visit_stack.pop() {
-            match item {
-                VisitRecord::EnterMark(idx) | VisitRecord::JumpMark(idx) => {
-                    return (idx, desc);
-                }
-                VisitRecord::ExprItem(id) => {
-                    desc.push_str(&id);
-                }
-            }
-        }
-        unreachable!("Enter mark should paired with node number");
-    }
-}
-
 impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
     type Node = Expr;
 
     fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
         // related to https://github.com/apache/datafusion/issues/8814
         // If the expr contain volatile expression or is a short-circuit 
expression, skip it.
         if expr.short_circuits() || expr.is_volatile()? {
-            self.visit_stack
-                .push(VisitRecord::JumpMark(self.node_count));
             return Ok(TreeNodeRecursion::Jump); // go to f_up
         }
 
-        self.visit_stack
-            .push(VisitRecord::EnterMark(self.node_count));
-        self.node_count += 1;
-
         Ok(TreeNodeRecursion::Continue)
     }
 
     fn f_up(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
-        let (_idx, sub_expr_identifier) = self.pop_enter_mark();
-
         // skip exprs should not be recognize.
         if self.expr_mask.ignores(expr) {
-            let curr_expr_identifier = ExprSet::expr_identifier(expr);
-            self.visit_stack
-                .push(VisitRecord::ExprItem(curr_expr_identifier));
             return Ok(TreeNodeRecursion::Continue);
         }
-        let curr_expr_identifier = ExprSet::expr_identifier(expr);
-        let alias_symbol = 
format!("{curr_expr_identifier}{sub_expr_identifier}");
 
-        self.visit_stack
-            .push(VisitRecord::ExprItem(alias_symbol.clone()));
+        let curr_expr_identifier = ExprSet::expr_identifier(expr);
 
         let data_type = expr.get_type(&self.input_schema)?;
 
+        let alias_symbol = format!("#{{{curr_expr_identifier}}}");

Review Comment:
   A small nit might be to pull this into a function that can be documented (so 
the code that creates identifiers is easier to find)
   
   Perhaps something like 
   ```rust
   let alias_symbol = ExprSet::alias_symbol(expr)
   ```
   
   🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to