peter-toth commented on code in PR #10396:
URL: https://github.com/apache/datafusion/pull/10396#discussion_r1591490350
##########
datafusion/optimizer/src/common_subexpr_eliminate.rs:
##########
@@ -658,55 +682,89 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
type Node = Expr;
fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
- // related to https://github.com/apache/datafusion/issues/8814
+ // related to https://github.com/apache/arrow-datafusion/issues/8814
// If the expr contain volatile expression or is a short-circuit
expression, skip it.
+ // TODO: propagate is_volatile state bottom-up + consider non-volatile
sub-expressions for CSE
+ // TODO: consider surely executed children of "short circuited"s for
CSE
if expr.short_circuits() || expr.is_volatile()? {
- self.visit_stack
- .push(VisitRecord::JumpMark(self.node_count));
- return Ok(TreeNodeRecursion::Jump); // go to f_up
+ self.visit_stack.push(VisitRecord::JumpMark);
+
+ return Ok(TreeNodeRecursion::Jump);
}
+ self.id_array.push((0, "".to_string()));
self.visit_stack
- .push(VisitRecord::EnterMark(self.node_count));
- self.node_count += 1;
+ .push(VisitRecord::EnterMark(self.down_index));
+ self.down_index += 1;
Ok(TreeNodeRecursion::Continue)
}
fn f_up(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
- let (_idx, sub_expr_identifier) = self.pop_enter_mark();
-
- // skip exprs should not be recognize.
- if self.expr_mask.ignores(expr) {
- let curr_expr_identifier = ExprSet::expr_identifier(expr);
- self.visit_stack
- .push(VisitRecord::ExprItem(curr_expr_identifier));
+ let Some((down_index, sub_expr_id)) = self.pop_enter_mark() else {
return Ok(TreeNodeRecursion::Continue);
- }
- let curr_expr_identifier = ExprSet::expr_identifier(expr);
- let alias_symbol =
format!("{curr_expr_identifier}{sub_expr_identifier}");
+ };
- self.visit_stack
- .push(VisitRecord::ExprItem(alias_symbol.clone()));
+ let expr_id = expr_identifier(expr, sub_expr_id);
+
+ self.id_array[down_index].0 = self.up_index;
+ if !self.expr_mask.ignores(expr) {
+ self.id_array[down_index].1.clone_from(&expr_id);
- let data_type = expr.get_type(&self.input_schema)?;
+ // TODO: can we capture the data type in the second traversal only
for
+ // replaced expressions?
+ let data_type = expr.get_type(&self.input_schema)?;
+ let (count, _) = self
+ .expr_stats
+ .entry(expr_id.clone())
+ .or_insert((0, data_type));
+ *count += 1;
+ }
+ self.visit_stack.push(VisitRecord::ExprItem(expr_id));
+ self.up_index += 1;
- self.expr_set
- .entry(curr_expr_identifier)
- .or_insert_with(|| (expr.clone(), 0, data_type, alias_symbol))
Review Comment:
this clone is also eliminated
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]