mingmwang commented on code in PR #3969:
URL: https://github.com/apache/arrow-datafusion/pull/3969#discussion_r1006993438
##########
datafusion/physical-expr/src/physical_expr.rs:
##########
@@ -128,6 +137,232 @@ impl PhysicalExprStats for BasicExpressionStats {
}
}
+/// a Trait for marking tree node types that are rewritable
+pub trait TreeNodeRewritable: Clone {
+ /// Transform the tree node using the given [TreeNodeRewriter]
+ /// It performs a depth first walk of an node and its children.
+ ///
+ /// For an node tree such as
+ /// ```text
+ /// ParentNode
+ /// left: ChildNode1
+ /// right: ChildNode2
+ /// ```
+ ///
+ /// The nodes are visited using the following order
+ /// ```text
+ /// pre_visit(ParentNode)
+ /// pre_visit(ChildNode1)
+ /// mutatate(ChildNode1)
+ /// pre_visit(ChildNode2)
+ /// mutate(ChildNode2)
+ /// mutate(ParentNode)
+ /// ```
+ ///
+ /// If an Err result is returned, recursion is stopped immediately
+ ///
+ /// If [`false`] is returned on a call to pre_visit, no
+ /// children of that node are visited, nor is mutate
+ /// called on that node
+ ///
+ fn transform_using<R: TreeNodeRewriter<Self>>(
+ self,
+ rewriter: &mut R,
+ ) -> Result<Self> {
+ let need_mutate = match rewriter.pre_visit(&self)? {
+ RewriteRecursion::Mutate => return rewriter.mutate(self),
+ RewriteRecursion::Stop => return Ok(self),
+ RewriteRecursion::Continue => true,
+ RewriteRecursion::Skip => false,
+ };
+
+ let after_op_children =
+ self.map_children(|node| node.transform_using(rewriter))?;
+
+ // now rewrite this node itself
+ if need_mutate {
+ rewriter.mutate(after_op_children)
+ } else {
+ Ok(after_op_children)
+ }
+ }
+
+ /// Convenience utils for writing optimizers rule: recursively apply the
given `op` to the node tree.
+ /// When `op` does not apply to a given node, it is left uncshanged.
+ /// The default tree traversal direction is transform_up(Postorder
Traversal).
+ fn transform<F>(self, op: &F) -> Result<Self>
+ where
+ F: Fn(Self) -> Option<Self>,
+ {
+ self.transform_up(op)
+ }
+
+ /// Convenience utils for writing optimizers rule: recursively apply the
given 'op' to the node and all of its
+ /// children(Preorder Traversal).
+ /// When the `op` does not apply to a given node, it is left unchanged.
+ fn transform_down<F>(self, op: &F) -> Result<Self>
+ where
+ F: Fn(Self) -> Option<Self>,
+ {
+ let node_cloned = self.clone();
+ let after_op = match op(node_cloned) {
+ Some(value) => value,
+ None => self,
+ };
+ after_op.map_children(|node| node.transform_down(op))
+ }
+
+ /// Convenience utils for writing optimizers rule: recursively apply the
given 'op' first to all of its
+ /// children and then itself(Postorder Traversal).
+ /// When the `op` does not apply to a given node, it is left unchanged.
+ fn transform_up<F>(self, op: &F) -> Result<Self>
+ where
+ F: Fn(Self) -> Option<Self>,
+ {
+ let after_op_children = self.map_children(|node|
node.transform_up(op))?;
+
+ let after_op_children_clone = after_op_children.clone();
+ let new_node = match op(after_op_children) {
+ Some(value) => value,
+ None => after_op_children_clone,
+ };
+ Ok(new_node)
+ }
+
+ /// Apply transform `F` to the node's children, the transform `F` might
have a direction(Preorder or Postorder)
+ fn map_children<F>(self, transform: F) -> Result<Self>
+ where
+ F: FnMut(Self) -> Result<Self>;
+}
+
+/// Trait for potentially recursively transform an [`TreeNodeRewritable`] node
+/// tree. When passed to `TreeNodeRewritable::transform_using`,
`TreeNodeRewriter::mutate` is
+/// invoked recursively on all nodes of a tree.
+pub trait TreeNodeRewriter<N: TreeNodeRewritable>: Sized {
+ /// Invoked before (Preorder) any children of `node` are rewritten /
+ /// visited. Default implementation returns
`Ok(RewriteRecursion::Continue)`
+ fn pre_visit(&mut self, _node: &N) -> Result<RewriteRecursion> {
+ Ok(RewriteRecursion::Continue)
+ }
+
+ /// Invoked after (Postorder) all children of `node` have been mutated and
+ /// returns a potentially modified node.
+ fn mutate(&mut self, node: N) -> Result<N>;
+}
+
+/// Controls how the [TreeNodeRewriter] recursion should proceed.
+#[allow(dead_code)]
+pub enum RewriteRecursion {
+ /// Continue rewrite / visit this node tree.
+ Continue,
+ /// Call 'op' immediately and return.
+ Mutate,
+ /// Do not rewrite / visit the children of this node.
+ Stop,
+ /// Keep recursive but skip apply op on this node
+ Skip,
+}
+
+impl TreeNodeRewritable for Arc<dyn PhysicalExpr> {
+ fn map_children<F>(self, transform: F) -> Result<Self>
+ where
+ F: FnMut(Self) -> Result<Self>,
+ {
+ if !self.children().is_empty() {
+ let new_children: Result<Vec<_>> =
+ self.children().into_iter().map(transform).collect();
+ with_new_children_if_necessary(self, new_children?)
+ } else {
+ Ok(self)
+ }
+ }
+}
+
+/// Returns a copy of this expr if we change any child according to the
pointer comparison.
+/// The size of `children` must be equal to the size of
`PhysicalExpr::children()`.
+/// Allow the vtable address comparisons for PhysicalExpr Trait Objects,it is
harmless even
+/// in the case of 'false-native'.
+#[allow(clippy::vtable_address_comparisons)]
+pub fn with_new_children_if_necessary(
+ expr: Arc<dyn PhysicalExpr>,
+ children: Vec<Arc<dyn PhysicalExpr>>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+ if children.len() != expr.children().len() {
+ Err(DataFusionError::Internal(
+ "PhysicalExpr: Wrong number of children".to_string(),
+ ))
+ } else if children.is_empty()
+ || children
+ .iter()
+ .zip(expr.children().iter())
+ .any(|(c1, c2)| !Arc::ptr_eq(c1, c2))
+ {
+ expr.with_new_children(children)
+ } else {
+ Ok(expr)
+ }
+}
+
+/// Compare the two expr lists are equal no matter the order.
+/// For example two InListExpr can be considered to be equals no matter the
order:
+///
+/// In('a','b','c') == In('c','b','a')
+pub fn expr_list_eq_any_order(
Review Comment:
Sure, currently there is no utils, I will add one. In the following PRs more
util methods will be added.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]