This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 16369d8612 Improve documentation on `TreeNode` (#10035)
16369d8612 is described below

commit 16369d861270e2093567bb8818e7f636529f9947
Author: Andrew Lamb <and...@nerdnetworks.org>
AuthorDate: Mon Apr 22 16:30:33 2024 -0400

    Improve documentation on `TreeNode` (#10035)
    
    * Improve documentation on `TreeNode`
    
    * Document inspecting, rewriting  APIs. Add chart
    
    * tweak
    
    * Add references to PlanContext and ExprContext
    
    * refine TreeNodeRewriter docs
    
    * Add note about exists
    
    * Update datafusion/common/src/tree_node.rs
    
    Co-authored-by: Jeffrey Vo <jeffrey.vo.austra...@gmail.com>
    
    ---------
    
    Co-authored-by: Jeffrey Vo <jeffrey.vo.austra...@gmail.com>
---
 datafusion/common/src/tree_node.rs       | 282 ++++++++++++++++++++++---------
 datafusion/expr/src/logical_plan/plan.rs |   2 +-
 2 files changed, 203 insertions(+), 81 deletions(-)

diff --git a/datafusion/common/src/tree_node.rs 
b/datafusion/common/src/tree_node.rs
index f41d264d35..7003f5ac7f 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! This module provides common traits for visiting or rewriting tree
-//! data structures easily.
+//! [`TreeNode`] for visiting and rewriting expression and plan trees
 
 use std::sync::Arc;
 
@@ -31,22 +30,83 @@ macro_rules! handle_transform_recursion {
     }};
 }
 
-/// Defines a visitable and rewriteable tree node. This trait is implemented
-/// for plans ([`ExecutionPlan`] and [`LogicalPlan`]) as well as expression
-/// trees ([`PhysicalExpr`], [`Expr`]) in DataFusion.
+/// API for inspecting and rewriting tree data structures.
+///
+/// The `TreeNode` API is used to express algorithms separately from traversing
+/// the structure of `TreeNode`s, avoiding substantial code duplication.
+///
+/// This trait is implemented for plans ([`ExecutionPlan`], [`LogicalPlan`]) 
and
+/// expression trees ([`PhysicalExpr`], [`Expr`]) as well as Plan+Payload
+/// combinations [`PlanContext`] and [`ExprContext`].
+///
+/// # Overview
+/// There are three categories of TreeNode APIs:
+///
+/// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`:
+/// [`apply`], [`visit`], [`exists`].
+///
+/// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s
+/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
+/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
+///
+/// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`],
+/// and [`map_children`].
+///
+/// | Traversal Order | Inspecting | Transforming |
+/// | --- | --- | --- |
+/// | top-down | [`apply`], [`exists`] | [`transform_down`]|
+/// | bottom-up | | [`transform`] , [`transform_up`]|
+/// | combined with separate `f_down` and `f_up` closures | | 
[`transform_down_up`] |
+/// | combined with `f_down()` and `f_up()` in an object | [`visit`]  | 
[`rewrite`] |
+///
+/// **Note**:while there is currently no in-place mutation API that uses `&mut
+/// TreeNode`, the transforming APIs are efficient and optimized to avoid
+/// cloning.
+///
+/// [`apply`]: Self::apply
+/// [`visit`]: Self::visit
+/// [`exists`]: Self::exists
+/// [`transform`]: Self::transform
+/// [`transform_up`]: Self::transform_up
+/// [`transform_down`]: Self::transform_down
+/// [`transform_down_up`]: Self::transform_down_up
+/// [`rewrite`]: Self::rewrite
+/// [`apply_children`]: Self::apply_children
+/// [`map_children`]: Self::map_children
+///
+/// # Terminology
+/// The following terms are used in this trait
+///
+/// * `f_down`: Invoked before any children of the current node are visited.
+/// * `f_up`: Invoked after all children of the current node are visited.
+/// * `f`: closure that is applied to the current node.
+/// * `map_*`: applies a transformation to rewrite owned nodes
+/// * `apply_*`:  invokes a function on borrowed nodes
+/// * `transform_`: applies a transformation to rewrite owned nodes
 ///
 /// <!-- Since these are in the datafusion-common crate, can't use intra doc 
links) -->
 /// [`ExecutionPlan`]: 
https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
 /// [`PhysicalExpr`]: 
https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.PhysicalExpr.html
 /// [`LogicalPlan`]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html
 /// [`Expr`]: 
https://docs.rs/datafusion-expr/latest/datafusion_expr/expr/enum.Expr.html
+/// [`PlanContext`]: 
https://docs.rs/datafusion/latest/datafusion/physical_plan/tree_node/struct.PlanContext.html
+/// [`ExprContext`]: 
https://docs.rs/datafusion/latest/datafusion/physical_expr/tree_node/struct.ExprContext.html
 pub trait TreeNode: Sized {
-    /// Visit the tree node using the given [`TreeNodeVisitor`], performing a
+    /// Visit the tree node with a [`TreeNodeVisitor`], performing a
     /// depth-first walk of the node and its children.
     ///
-    /// See also:
-    /// *  [`Self::rewrite`] to rewrite owned `TreeNode`s
+    /// [`TreeNodeVisitor::f_down()`] is called in top-down order (before
+    /// children are visited), [`TreeNodeVisitor::f_up()`] is called in
+    /// bottom-up order (after children are visited).
     ///
+    /// # Return Value
+    /// Specifies how the tree walk ended. See [`TreeNodeRecursion`] for 
details.
+    ///
+    /// # See Also:
+    /// * [`Self::apply`] for inspecting nodes with a closure
+    /// * [`Self::rewrite`] to rewrite owned `TreeNode`s
+    ///
+    /// # Example
     /// Consider the following tree structure:
     /// ```text
     /// ParentNode
@@ -63,14 +123,6 @@ pub trait TreeNode: Sized {
     /// TreeNodeVisitor::f_up(ChildNode2)
     /// TreeNodeVisitor::f_up(ParentNode)
     /// ```
-    ///
-    /// See [`TreeNodeRecursion`] for more details on controlling the 
traversal.
-    ///
-    /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`] 
returns [`Err`],
-    /// the recursion stops immediately.
-    ///
-    /// If using the default [`TreeNodeVisitor::f_up`] that does nothing, 
consider using
-    /// [`Self::apply`].
     fn visit<V: TreeNodeVisitor<Node = Self>>(
         &self,
         visitor: &mut V,
@@ -81,12 +133,29 @@ pub trait TreeNode: Sized {
             .visit_parent(|| visitor.f_up(self))
     }
 
-    /// Implements the [visitor 
pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for
-    /// recursively transforming [`TreeNode`]s.
+    /// Rewrite the tree node with a [`TreeNodeRewriter`], performing a
+    /// depth-first walk of the node and its children.
+    ///
+    /// [`TreeNodeRewriter::f_down()`] is called in top-down order (before
+    /// children are visited), [`TreeNodeRewriter::f_up()`] is called in
+    /// bottom-up order (after children are visited).
+    ///
+    /// Note: If using the default [`TreeNodeRewriter::f_up`] or
+    /// [`TreeNodeRewriter::f_down`] that do nothing, consider using
+    /// [`Self::transform_down`] instead.
+    ///
+    /// # Return Value
+    /// The returns value specifies how the tree walk should proceed. See
+    /// [`TreeNodeRecursion`] for details. If an [`Err`] is returned, the
+    /// recursion stops immediately.
     ///
-    /// See also:
-    /// *  [`Self::visit`] for inspecting (without modification) `TreeNode`s
+    /// # See Also
+    /// * [`Self::visit`] for inspecting (without modification) `TreeNode`s
+    /// * [Self::transform_down_up] for a top-down (pre-order) traversal.
+    /// * [Self::transform_down] for a top-down (pre-order) traversal.
+    /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
     ///
+    /// # Example
     /// Consider the following tree structure:
     /// ```text
     /// ParentNode
@@ -103,11 +172,6 @@ pub trait TreeNode: Sized {
     /// TreeNodeRewriter::f_up(ChildNode2)
     /// TreeNodeRewriter::f_up(ParentNode)
     /// ```
-    ///
-    /// See [`TreeNodeRecursion`] for more details on controlling the 
traversal.
-    ///
-    /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`] 
returns [`Err`],
-    /// the recursion stops immediately.
     fn rewrite<R: TreeNodeRewriter<Node = Self>>(
         self,
         rewriter: &mut R,
@@ -117,12 +181,15 @@ pub trait TreeNode: Sized {
         })
     }
 
-    /// Applies `f` to the node and its children. `f` is applied in a pre-order
-    /// way, and it is controlled by [`TreeNodeRecursion`], which means result
-    /// of the `f` on a node can cause an early return.
+    /// Applies `f` to the node then each of its children, recursively (a
+    /// top-down, pre-order traversal).
+    ///
+    /// The return [`TreeNodeRecursion`] controls the recursion and can cause
+    /// an early return.
     ///
-    /// The `f` closure can be used to collect some information from tree nodes
-    /// or run a check on the tree.
+    /// # See Also
+    /// * [`Self::transform_down`] for the equivalent transformation API.
+    /// * [`Self::visit`] for both top-down and bottom up traversal.
     fn apply<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
         &self,
         mut f: F,
@@ -137,9 +204,10 @@ pub trait TreeNode: Sized {
         apply_impl(self, &mut f)
     }
 
-    /// Convenience utility for writing optimizer rules: Recursively apply the
-    /// given function `f` to the tree in a bottom-up (post-order) fashion. 
When
-    /// `f` does not apply to a given node, it is left unchanged.
+    /// Recursively rewrite the node's children and then the node using `f`
+    /// (a bottom-up post-order traversal).
+    ///
+    /// A synonym of [`Self::transform_up`].
     fn transform<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
         f: F,
@@ -147,9 +215,15 @@ pub trait TreeNode: Sized {
         self.transform_up(f)
     }
 
-    /// Convenience utility for writing optimizer rules: Recursively apply the
-    /// given function `f` to a node and then to its children (pre-order 
traversal).
-    /// When `f` does not apply to a given node, it is left unchanged.
+    /// Recursively rewrite the tree using `f` in a top-down (pre-order)
+    /// fashion.
+    ///
+    /// `f` is applied to the node first, and then its children.
+    ///
+    /// # See Also
+    /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
+    /// * [Self::transform_down_up] for a combined traversal with closures
+    /// * [`Self::rewrite`] for a combined traversal with a visitor
     fn transform_down<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
         mut f: F,
@@ -164,9 +238,7 @@ pub trait TreeNode: Sized {
         transform_down_impl(self, &mut f)
     }
 
-    /// Convenience utility for writing optimizer rules: Recursively apply the
-    /// given mutable function `f` to a node and then to its children 
(pre-order
-    /// traversal). When `f` does not apply to a given node, it is left 
unchanged.
+    /// Same as [`Self::transform_down`] but with a mutable closure.
     #[deprecated(since = "38.0.0", note = "Use `transform_down` instead")]
     fn transform_down_mut<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
@@ -175,10 +247,15 @@ pub trait TreeNode: Sized {
         self.transform_down(f)
     }
 
-    /// Convenience utility for writing optimizer rules: Recursively apply the
-    /// given function `f` to all children of a node, and then to the node 
itself
-    /// (post-order traversal). When `f` does not apply to a given node, it is
-    /// left unchanged.
+    /// Recursively rewrite the node using `f` in a bottom-up (post-order)
+    /// fashion.
+    ///
+    /// `f` is applied to the node's  children first, and then to the node 
itself.
+    ///
+    /// # See Also
+    /// * [`Self::transform_down`] top-down (pre-order) traversal.
+    /// * [Self::transform_down_up] for a combined traversal with closures
+    /// * [`Self::rewrite`] for a combined traversal with a visitor
     fn transform_up<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
         mut f: F,
@@ -194,10 +271,7 @@ pub trait TreeNode: Sized {
         transform_up_impl(self, &mut f)
     }
 
-    /// Convenience utility for writing optimizer rules: Recursively apply the
-    /// given mutable function `f` to all children of a node, and then to the
-    /// node itself (post-order traversal). When `f` does not apply to a given
-    /// node, it is left unchanged.
+    /// Same as [`Self::transform_up`] but with a mutable closure.
     #[deprecated(since = "38.0.0", note = "Use `transform_up` instead")]
     fn transform_up_mut<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
@@ -206,15 +280,21 @@ pub trait TreeNode: Sized {
         self.transform_up(f)
     }
 
-    /// Transforms the tree using `f_down` while traversing the tree top-down
+    /// Transforms the node using `f_down` while traversing the tree top-down
     /// (pre-order), and using `f_up` while traversing the tree bottom-up
     /// (post-order).
     ///
-    /// Use this method if you want to start the `f_up` process right where 
`f_down` jumps.
-    /// This can make the whole process faster by reducing the number of 
`f_up` steps.
-    /// If you don't need this, it's just like using `transform_down` followed 
by
-    /// `transform_up` on the same tree.
+    /// The method behaves the same as calling [`Self::transform_down`] 
followed
+    /// by [`Self::transform_up`] on the same node. Use this method if you want
+    /// to start the `f_up` process right where `f_down` jumps. This can make
+    /// the whole process faster by reducing the number of `f_up` steps.
     ///
+    /// # See Also
+    /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
+    /// * [Self::transform_down] for a top-down (pre-order) traversal.
+    /// * [`Self::rewrite`] for a combined traversal with a visitor
+    ///
+    /// # Example
     /// Consider the following tree structure:
     /// ```text
     /// ParentNode
@@ -322,7 +402,7 @@ pub trait TreeNode: Sized {
         transform_down_up_impl(self, &mut f_down, &mut f_up)
     }
 
-    /// Returns true if `f` returns true for node in the tree.
+    /// Returns true if `f` returns true for any node in the tree.
     ///
     /// Stops recursion as soon as a matching node is found
     fn exists<F: FnMut(&Self) -> bool>(&self, mut f: F) -> bool {
@@ -339,50 +419,91 @@ pub trait TreeNode: Sized {
         found
     }
 
-    /// Apply the closure `F` to the node's children.
+    /// Low-level API used to implement other APIs.
+    ///
+    /// If you want to implement the [`TreeNode`] trait for your own type, you
+    /// should implement this method and [`Self::map_children`].
     ///
-    /// See `mutate_children` for rewriting in place
+    /// Users should use one of the higher level APIs described on [`Self`].
+    ///
+    /// Description: Apply `f` to inspect node's children (but not the node
+    /// itself).
     fn apply_children<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
         &self,
         f: F,
     ) -> Result<TreeNodeRecursion>;
 
-    /// Apply transform `F` to potentially rewrite the node's children. Note
-    /// that the transform `F` might have a direction (pre-order or 
post-order).
+    /// Low-level API used to implement other APIs.
+    ///
+    /// If you want to implement the [`TreeNode`] trait for your own type, you
+    /// should implement this method and [`Self::apply_children`].
+    ///
+    /// Users should use one of the higher level APIs described on [`Self`].
+    ///
+    /// Description: Apply `f` to rewrite the node's children (but not the 
node itself).
     fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
         f: F,
     ) -> Result<Transformed<Self>>;
 }
 
-/// Implements the [visitor 
pattern](https://en.wikipedia.org/wiki/Visitor_pattern)
-/// for recursively walking [`TreeNode`]s.
+/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively
+/// inspecting [`TreeNode`]s via [`TreeNode::visit`].
 ///
-/// A [`TreeNodeVisitor`] allows one to express algorithms separately from the
-/// code traversing the structure of the `TreeNode` tree, making it easier to
-/// add new types of tree nodes and algorithms.
+/// See [`TreeNode`] for more details on available APIs
 ///
 /// When passed to [`TreeNode::visit`], [`TreeNodeVisitor::f_down`] and
 /// [`TreeNodeVisitor::f_up`] are invoked recursively on the tree.
 /// See [`TreeNodeRecursion`] for more details on controlling the traversal.
+///
+/// # Return Value
+/// The returns value of `f_up` and `f_down` specifies how the tree walk should
+/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned,
+/// the recursion stops immediately.
+///
+/// Note: If using the default implementations of [`TreeNodeVisitor::f_up`] or
+/// [`TreeNodeVisitor::f_down`] that do nothing, consider using
+/// [`TreeNode::apply`] instead.
+///
+/// # See Also:
+/// * [`TreeNode::rewrite`] to rewrite owned `TreeNode`s
 pub trait TreeNodeVisitor: Sized {
     /// The node type which is visitable.
     type Node: TreeNode;
 
-    /// Invoked before any children of `node` are visited.
-    /// Default implementation simply continues the recursion.
+    /// Invoked while traversing down the tree, before any children are 
visited.
+    /// Default implementation continues the recursion.
     fn f_down(&mut self, _node: &Self::Node) -> Result<TreeNodeRecursion> {
         Ok(TreeNodeRecursion::Continue)
     }
 
-    /// Invoked after all children of `node` are visited.
-    /// Default implementation simply continues the recursion.
+    /// Invoked while traversing up the tree after children are visited. 
Default
+    /// implementation continues the recursion.
     fn f_up(&mut self, _node: &Self::Node) -> Result<TreeNodeRecursion> {
         Ok(TreeNodeRecursion::Continue)
     }
 }
 
-/// Trait for potentially recursively transforming a tree of [`TreeNode`]s.
+/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively
+/// rewriting [`TreeNode`]s via [`TreeNode::rewrite`].
+///
+/// See [`TreeNode`] for more details on available APIs
+///
+/// When passed to [`TreeNode::rewrite`], [`TreeNodeRewriter::f_down`] and
+/// [`TreeNodeRewriter::f_up`] are invoked recursively on the tree.
+/// See [`TreeNodeRecursion`] for more details on controlling the traversal.
+///
+/// # Return Value
+/// The returns value of `f_up` and `f_down` specifies how the tree walk should
+/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned,
+/// the recursion stops immediately.
+///
+/// Note: If using the default implementations of [`TreeNodeRewriter::f_up`] or
+/// [`TreeNodeRewriter::f_down`] that do nothing, consider using
+/// [`TreeNode::transform_up`] or [`TreeNode::transform_down`] instead.
+///
+/// # See Also:
+/// * [`TreeNode::visit`] to inspect borrowed `TreeNode`s
 pub trait TreeNodeRewriter: Sized {
     /// The node type which is rewritable.
     type Node: TreeNode;
@@ -460,24 +581,24 @@ impl TreeNodeRecursion {
     }
 }
 
-/// This struct is used by tree transformation APIs such as
-/// - [`TreeNode::rewrite`],
-/// - [`TreeNode::transform_down`],
-/// - [`TreeNode::transform_up`],
-/// - [`TreeNode::transform_down_up`]
+/// Result of tree walk / transformation APIs
 ///
-/// to control the transformation and return the transformed result.
-///
-/// Specifically, API users can provide transformation closures or 
[`TreeNodeRewriter`]
-/// implementations to control the transformation by returning:
+/// API users control the transformation by returning:
 /// - The resulting (possibly transformed) node,
-/// - A flag indicating whether any change was made to the node, and
-/// - A flag specifying how to proceed with the recursion.
+/// - `transformed`: flag indicating whether any change was made to the node
+/// - `tnr`: [`TreeNodeRecursion`] specifying how to proceed with the 
recursion.
 ///
 /// At the end of the transformation, the return value will contain:
 /// - The final (possibly transformed) tree,
-/// - A flag indicating whether any change was made to the tree, and
-/// - A flag specifying how the recursion ended.
+/// - `transformed`: flag indicating whether any change was made to the node
+/// - `tnr`: [`TreeNodeRecursion`] specifying how the recursion ended.
+///
+/// Example APIs:
+/// - [`TreeNode`],
+/// - [`TreeNode::rewrite`],
+/// - [`TreeNode::transform_down`],
+/// - [`TreeNode::transform_up`],
+/// - [`TreeNode::transform_down_up`]
 #[derive(PartialEq, Debug)]
 pub struct Transformed<T> {
     pub data: T,
@@ -659,6 +780,7 @@ impl<I: Iterator> TreeNodeIterator for I {
 
 /// Transformation helper to process a heterogeneous sequence of tree node 
containing
 /// expressions.
+///
 /// This macro is very similar to 
[TreeNodeIterator::map_until_stop_and_collect] to
 /// process nodes that are siblings, but it accepts an initial transformation 
(`F0`) and
 /// a sequence of pairs. Each pair is made of an expression (`EXPR`) and its
diff --git a/datafusion/expr/src/logical_plan/plan.rs 
b/datafusion/expr/src/logical_plan/plan.rs
index 97f5e22287..6df5516b1b 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -65,7 +65,7 @@ pub use datafusion_common::{JoinConstraint, JoinType};
 /// from leaves up to the root to produce the query result.
 ///
 /// # See also:
-/// * [`tree_node`]: visiting and rewriting API
+/// * [`tree_node`]: To inspect and rewrite `LogicalPlan` trees
 ///
 /// [`tree_node`]: crate::logical_plan::tree_node
 #[derive(Clone, PartialEq, Eq, Hash)]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to