This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 16369d8612 Improve documentation on `TreeNode` (#10035)
16369d8612 is described below
commit 16369d861270e2093567bb8818e7f636529f9947
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Apr 22 16:30:33 2024 -0400
Improve documentation on `TreeNode` (#10035)
* Improve documentation on `TreeNode`
* Document inspecting, rewriting APIs. Add chart
* tweak
* Add references to PlanContext and ExprContext
* refine TreeNodeRewriter docs
* Add note about exists
* Update datafusion/common/src/tree_node.rs
Co-authored-by: Jeffrey Vo <[email protected]>
---------
Co-authored-by: Jeffrey Vo <[email protected]>
---
datafusion/common/src/tree_node.rs | 282 ++++++++++++++++++++++---------
datafusion/expr/src/logical_plan/plan.rs | 2 +-
2 files changed, 203 insertions(+), 81 deletions(-)
diff --git a/datafusion/common/src/tree_node.rs
b/datafusion/common/src/tree_node.rs
index f41d264d35..7003f5ac7f 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -15,8 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! This module provides common traits for visiting or rewriting tree
-//! data structures easily.
+//! [`TreeNode`] for visiting and rewriting expression and plan trees
use std::sync::Arc;
@@ -31,22 +30,83 @@ macro_rules! handle_transform_recursion {
}};
}
-/// Defines a visitable and rewriteable tree node. This trait is implemented
-/// for plans ([`ExecutionPlan`] and [`LogicalPlan`]) as well as expression
-/// trees ([`PhysicalExpr`], [`Expr`]) in DataFusion.
+/// API for inspecting and rewriting tree data structures.
+///
+/// The `TreeNode` API is used to express algorithms separately from traversing
+/// the structure of `TreeNode`s, avoiding substantial code duplication.
+///
+/// This trait is implemented for plans ([`ExecutionPlan`], [`LogicalPlan`])
and
+/// expression trees ([`PhysicalExpr`], [`Expr`]) as well as Plan+Payload
+/// combinations [`PlanContext`] and [`ExprContext`].
+///
+/// # Overview
+/// There are three categories of TreeNode APIs:
+///
+/// 1. "Inspecting" APIs to traverse a tree of `&TreeNodes`:
+/// [`apply`], [`visit`], [`exists`].
+///
+/// 2. "Transforming" APIs that traverse and consume a tree of `TreeNode`s
+/// producing possibly changed `TreeNode`s: [`transform`], [`transform_up`],
+/// [`transform_down`], [`transform_down_up`], and [`rewrite`].
+///
+/// 3. Internal APIs used to implement the `TreeNode` API: [`apply_children`],
+/// and [`map_children`].
+///
+/// | Traversal Order | Inspecting | Transforming |
+/// | --- | --- | --- |
+/// | top-down | [`apply`], [`exists`] | [`transform_down`]|
+/// | bottom-up | | [`transform`] , [`transform_up`]|
+/// | combined with separate `f_down` and `f_up` closures | |
[`transform_down_up`] |
+/// | combined with `f_down()` and `f_up()` in an object | [`visit`] |
[`rewrite`] |
+///
+/// **Note**:while there is currently no in-place mutation API that uses `&mut
+/// TreeNode`, the transforming APIs are efficient and optimized to avoid
+/// cloning.
+///
+/// [`apply`]: Self::apply
+/// [`visit`]: Self::visit
+/// [`exists`]: Self::exists
+/// [`transform`]: Self::transform
+/// [`transform_up`]: Self::transform_up
+/// [`transform_down`]: Self::transform_down
+/// [`transform_down_up`]: Self::transform_down_up
+/// [`rewrite`]: Self::rewrite
+/// [`apply_children`]: Self::apply_children
+/// [`map_children`]: Self::map_children
+///
+/// # Terminology
+/// The following terms are used in this trait
+///
+/// * `f_down`: Invoked before any children of the current node are visited.
+/// * `f_up`: Invoked after all children of the current node are visited.
+/// * `f`: closure that is applied to the current node.
+/// * `map_*`: applies a transformation to rewrite owned nodes
+/// * `apply_*`: invokes a function on borrowed nodes
+/// * `transform_`: applies a transformation to rewrite owned nodes
///
/// <!-- Since these are in the datafusion-common crate, can't use intra doc
links) -->
/// [`ExecutionPlan`]:
https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
/// [`PhysicalExpr`]:
https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.PhysicalExpr.html
/// [`LogicalPlan`]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html
/// [`Expr`]:
https://docs.rs/datafusion-expr/latest/datafusion_expr/expr/enum.Expr.html
+/// [`PlanContext`]:
https://docs.rs/datafusion/latest/datafusion/physical_plan/tree_node/struct.PlanContext.html
+/// [`ExprContext`]:
https://docs.rs/datafusion/latest/datafusion/physical_expr/tree_node/struct.ExprContext.html
pub trait TreeNode: Sized {
- /// Visit the tree node using the given [`TreeNodeVisitor`], performing a
+ /// Visit the tree node with a [`TreeNodeVisitor`], performing a
/// depth-first walk of the node and its children.
///
- /// See also:
- /// * [`Self::rewrite`] to rewrite owned `TreeNode`s
+ /// [`TreeNodeVisitor::f_down()`] is called in top-down order (before
+ /// children are visited), [`TreeNodeVisitor::f_up()`] is called in
+ /// bottom-up order (after children are visited).
///
+ /// # Return Value
+ /// Specifies how the tree walk ended. See [`TreeNodeRecursion`] for
details.
+ ///
+ /// # See Also:
+ /// * [`Self::apply`] for inspecting nodes with a closure
+ /// * [`Self::rewrite`] to rewrite owned `TreeNode`s
+ ///
+ /// # Example
/// Consider the following tree structure:
/// ```text
/// ParentNode
@@ -63,14 +123,6 @@ pub trait TreeNode: Sized {
/// TreeNodeVisitor::f_up(ChildNode2)
/// TreeNodeVisitor::f_up(ParentNode)
/// ```
- ///
- /// See [`TreeNodeRecursion`] for more details on controlling the
traversal.
- ///
- /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`]
returns [`Err`],
- /// the recursion stops immediately.
- ///
- /// If using the default [`TreeNodeVisitor::f_up`] that does nothing,
consider using
- /// [`Self::apply`].
fn visit<V: TreeNodeVisitor<Node = Self>>(
&self,
visitor: &mut V,
@@ -81,12 +133,29 @@ pub trait TreeNode: Sized {
.visit_parent(|| visitor.f_up(self))
}
- /// Implements the [visitor
pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for
- /// recursively transforming [`TreeNode`]s.
+ /// Rewrite the tree node with a [`TreeNodeRewriter`], performing a
+ /// depth-first walk of the node and its children.
+ ///
+ /// [`TreeNodeRewriter::f_down()`] is called in top-down order (before
+ /// children are visited), [`TreeNodeRewriter::f_up()`] is called in
+ /// bottom-up order (after children are visited).
+ ///
+ /// Note: If using the default [`TreeNodeRewriter::f_up`] or
+ /// [`TreeNodeRewriter::f_down`] that do nothing, consider using
+ /// [`Self::transform_down`] instead.
+ ///
+ /// # Return Value
+ /// The returns value specifies how the tree walk should proceed. See
+ /// [`TreeNodeRecursion`] for details. If an [`Err`] is returned, the
+ /// recursion stops immediately.
///
- /// See also:
- /// * [`Self::visit`] for inspecting (without modification) `TreeNode`s
+ /// # See Also
+ /// * [`Self::visit`] for inspecting (without modification) `TreeNode`s
+ /// * [Self::transform_down_up] for a top-down (pre-order) traversal.
+ /// * [Self::transform_down] for a top-down (pre-order) traversal.
+ /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
///
+ /// # Example
/// Consider the following tree structure:
/// ```text
/// ParentNode
@@ -103,11 +172,6 @@ pub trait TreeNode: Sized {
/// TreeNodeRewriter::f_up(ChildNode2)
/// TreeNodeRewriter::f_up(ParentNode)
/// ```
- ///
- /// See [`TreeNodeRecursion`] for more details on controlling the
traversal.
- ///
- /// If [`TreeNodeVisitor::f_down()`] or [`TreeNodeVisitor::f_up()`]
returns [`Err`],
- /// the recursion stops immediately.
fn rewrite<R: TreeNodeRewriter<Node = Self>>(
self,
rewriter: &mut R,
@@ -117,12 +181,15 @@ pub trait TreeNode: Sized {
})
}
- /// Applies `f` to the node and its children. `f` is applied in a pre-order
- /// way, and it is controlled by [`TreeNodeRecursion`], which means result
- /// of the `f` on a node can cause an early return.
+ /// Applies `f` to the node then each of its children, recursively (a
+ /// top-down, pre-order traversal).
+ ///
+ /// The return [`TreeNodeRecursion`] controls the recursion and can cause
+ /// an early return.
///
- /// The `f` closure can be used to collect some information from tree nodes
- /// or run a check on the tree.
+ /// # See Also
+ /// * [`Self::transform_down`] for the equivalent transformation API.
+ /// * [`Self::visit`] for both top-down and bottom up traversal.
fn apply<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
&self,
mut f: F,
@@ -137,9 +204,10 @@ pub trait TreeNode: Sized {
apply_impl(self, &mut f)
}
- /// Convenience utility for writing optimizer rules: Recursively apply the
- /// given function `f` to the tree in a bottom-up (post-order) fashion.
When
- /// `f` does not apply to a given node, it is left unchanged.
+ /// Recursively rewrite the node's children and then the node using `f`
+ /// (a bottom-up post-order traversal).
+ ///
+ /// A synonym of [`Self::transform_up`].
fn transform<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
f: F,
@@ -147,9 +215,15 @@ pub trait TreeNode: Sized {
self.transform_up(f)
}
- /// Convenience utility for writing optimizer rules: Recursively apply the
- /// given function `f` to a node and then to its children (pre-order
traversal).
- /// When `f` does not apply to a given node, it is left unchanged.
+ /// Recursively rewrite the tree using `f` in a top-down (pre-order)
+ /// fashion.
+ ///
+ /// `f` is applied to the node first, and then its children.
+ ///
+ /// # See Also
+ /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
+ /// * [Self::transform_down_up] for a combined traversal with closures
+ /// * [`Self::rewrite`] for a combined traversal with a visitor
fn transform_down<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
mut f: F,
@@ -164,9 +238,7 @@ pub trait TreeNode: Sized {
transform_down_impl(self, &mut f)
}
- /// Convenience utility for writing optimizer rules: Recursively apply the
- /// given mutable function `f` to a node and then to its children
(pre-order
- /// traversal). When `f` does not apply to a given node, it is left
unchanged.
+ /// Same as [`Self::transform_down`] but with a mutable closure.
#[deprecated(since = "38.0.0", note = "Use `transform_down` instead")]
fn transform_down_mut<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
@@ -175,10 +247,15 @@ pub trait TreeNode: Sized {
self.transform_down(f)
}
- /// Convenience utility for writing optimizer rules: Recursively apply the
- /// given function `f` to all children of a node, and then to the node
itself
- /// (post-order traversal). When `f` does not apply to a given node, it is
- /// left unchanged.
+ /// Recursively rewrite the node using `f` in a bottom-up (post-order)
+ /// fashion.
+ ///
+ /// `f` is applied to the node's children first, and then to the node
itself.
+ ///
+ /// # See Also
+ /// * [`Self::transform_down`] top-down (pre-order) traversal.
+ /// * [Self::transform_down_up] for a combined traversal with closures
+ /// * [`Self::rewrite`] for a combined traversal with a visitor
fn transform_up<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
mut f: F,
@@ -194,10 +271,7 @@ pub trait TreeNode: Sized {
transform_up_impl(self, &mut f)
}
- /// Convenience utility for writing optimizer rules: Recursively apply the
- /// given mutable function `f` to all children of a node, and then to the
- /// node itself (post-order traversal). When `f` does not apply to a given
- /// node, it is left unchanged.
+ /// Same as [`Self::transform_up`] but with a mutable closure.
#[deprecated(since = "38.0.0", note = "Use `transform_up` instead")]
fn transform_up_mut<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
@@ -206,15 +280,21 @@ pub trait TreeNode: Sized {
self.transform_up(f)
}
- /// Transforms the tree using `f_down` while traversing the tree top-down
+ /// Transforms the node using `f_down` while traversing the tree top-down
/// (pre-order), and using `f_up` while traversing the tree bottom-up
/// (post-order).
///
- /// Use this method if you want to start the `f_up` process right where
`f_down` jumps.
- /// This can make the whole process faster by reducing the number of
`f_up` steps.
- /// If you don't need this, it's just like using `transform_down` followed
by
- /// `transform_up` on the same tree.
+ /// The method behaves the same as calling [`Self::transform_down`]
followed
+ /// by [`Self::transform_up`] on the same node. Use this method if you want
+ /// to start the `f_up` process right where `f_down` jumps. This can make
+ /// the whole process faster by reducing the number of `f_up` steps.
///
+ /// # See Also
+ /// * [`Self::transform_up`] for a bottom-up (post-order) traversal.
+ /// * [Self::transform_down] for a top-down (pre-order) traversal.
+ /// * [`Self::rewrite`] for a combined traversal with a visitor
+ ///
+ /// # Example
/// Consider the following tree structure:
/// ```text
/// ParentNode
@@ -322,7 +402,7 @@ pub trait TreeNode: Sized {
transform_down_up_impl(self, &mut f_down, &mut f_up)
}
- /// Returns true if `f` returns true for node in the tree.
+ /// Returns true if `f` returns true for any node in the tree.
///
/// Stops recursion as soon as a matching node is found
fn exists<F: FnMut(&Self) -> bool>(&self, mut f: F) -> bool {
@@ -339,50 +419,91 @@ pub trait TreeNode: Sized {
found
}
- /// Apply the closure `F` to the node's children.
+ /// Low-level API used to implement other APIs.
+ ///
+ /// If you want to implement the [`TreeNode`] trait for your own type, you
+ /// should implement this method and [`Self::map_children`].
///
- /// See `mutate_children` for rewriting in place
+ /// Users should use one of the higher level APIs described on [`Self`].
+ ///
+ /// Description: Apply `f` to inspect node's children (but not the node
+ /// itself).
fn apply_children<F: FnMut(&Self) -> Result<TreeNodeRecursion>>(
&self,
f: F,
) -> Result<TreeNodeRecursion>;
- /// Apply transform `F` to potentially rewrite the node's children. Note
- /// that the transform `F` might have a direction (pre-order or
post-order).
+ /// Low-level API used to implement other APIs.
+ ///
+ /// If you want to implement the [`TreeNode`] trait for your own type, you
+ /// should implement this method and [`Self::apply_children`].
+ ///
+ /// Users should use one of the higher level APIs described on [`Self`].
+ ///
+ /// Description: Apply `f` to rewrite the node's children (but not the
node itself).
fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>(
self,
f: F,
) -> Result<Transformed<Self>>;
}
-/// Implements the [visitor
pattern](https://en.wikipedia.org/wiki/Visitor_pattern)
-/// for recursively walking [`TreeNode`]s.
+/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively
+/// inspecting [`TreeNode`]s via [`TreeNode::visit`].
///
-/// A [`TreeNodeVisitor`] allows one to express algorithms separately from the
-/// code traversing the structure of the `TreeNode` tree, making it easier to
-/// add new types of tree nodes and algorithms.
+/// See [`TreeNode`] for more details on available APIs
///
/// When passed to [`TreeNode::visit`], [`TreeNodeVisitor::f_down`] and
/// [`TreeNodeVisitor::f_up`] are invoked recursively on the tree.
/// See [`TreeNodeRecursion`] for more details on controlling the traversal.
+///
+/// # Return Value
+/// The returns value of `f_up` and `f_down` specifies how the tree walk should
+/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned,
+/// the recursion stops immediately.
+///
+/// Note: If using the default implementations of [`TreeNodeVisitor::f_up`] or
+/// [`TreeNodeVisitor::f_down`] that do nothing, consider using
+/// [`TreeNode::apply`] instead.
+///
+/// # See Also:
+/// * [`TreeNode::rewrite`] to rewrite owned `TreeNode`s
pub trait TreeNodeVisitor: Sized {
/// The node type which is visitable.
type Node: TreeNode;
- /// Invoked before any children of `node` are visited.
- /// Default implementation simply continues the recursion.
+ /// Invoked while traversing down the tree, before any children are
visited.
+ /// Default implementation continues the recursion.
fn f_down(&mut self, _node: &Self::Node) -> Result<TreeNodeRecursion> {
Ok(TreeNodeRecursion::Continue)
}
- /// Invoked after all children of `node` are visited.
- /// Default implementation simply continues the recursion.
+ /// Invoked while traversing up the tree after children are visited.
Default
+ /// implementation continues the recursion.
fn f_up(&mut self, _node: &Self::Node) -> Result<TreeNodeRecursion> {
Ok(TreeNodeRecursion::Continue)
}
}
-/// Trait for potentially recursively transforming a tree of [`TreeNode`]s.
+/// A [Visitor](https://en.wikipedia.org/wiki/Visitor_pattern) for recursively
+/// rewriting [`TreeNode`]s via [`TreeNode::rewrite`].
+///
+/// See [`TreeNode`] for more details on available APIs
+///
+/// When passed to [`TreeNode::rewrite`], [`TreeNodeRewriter::f_down`] and
+/// [`TreeNodeRewriter::f_up`] are invoked recursively on the tree.
+/// See [`TreeNodeRecursion`] for more details on controlling the traversal.
+///
+/// # Return Value
+/// The returns value of `f_up` and `f_down` specifies how the tree walk should
+/// proceed. See [`TreeNodeRecursion`] for details. If an [`Err`] is returned,
+/// the recursion stops immediately.
+///
+/// Note: If using the default implementations of [`TreeNodeRewriter::f_up`] or
+/// [`TreeNodeRewriter::f_down`] that do nothing, consider using
+/// [`TreeNode::transform_up`] or [`TreeNode::transform_down`] instead.
+///
+/// # See Also:
+/// * [`TreeNode::visit`] to inspect borrowed `TreeNode`s
pub trait TreeNodeRewriter: Sized {
/// The node type which is rewritable.
type Node: TreeNode;
@@ -460,24 +581,24 @@ impl TreeNodeRecursion {
}
}
-/// This struct is used by tree transformation APIs such as
-/// - [`TreeNode::rewrite`],
-/// - [`TreeNode::transform_down`],
-/// - [`TreeNode::transform_up`],
-/// - [`TreeNode::transform_down_up`]
+/// Result of tree walk / transformation APIs
///
-/// to control the transformation and return the transformed result.
-///
-/// Specifically, API users can provide transformation closures or
[`TreeNodeRewriter`]
-/// implementations to control the transformation by returning:
+/// API users control the transformation by returning:
/// - The resulting (possibly transformed) node,
-/// - A flag indicating whether any change was made to the node, and
-/// - A flag specifying how to proceed with the recursion.
+/// - `transformed`: flag indicating whether any change was made to the node
+/// - `tnr`: [`TreeNodeRecursion`] specifying how to proceed with the
recursion.
///
/// At the end of the transformation, the return value will contain:
/// - The final (possibly transformed) tree,
-/// - A flag indicating whether any change was made to the tree, and
-/// - A flag specifying how the recursion ended.
+/// - `transformed`: flag indicating whether any change was made to the node
+/// - `tnr`: [`TreeNodeRecursion`] specifying how the recursion ended.
+///
+/// Example APIs:
+/// - [`TreeNode`],
+/// - [`TreeNode::rewrite`],
+/// - [`TreeNode::transform_down`],
+/// - [`TreeNode::transform_up`],
+/// - [`TreeNode::transform_down_up`]
#[derive(PartialEq, Debug)]
pub struct Transformed<T> {
pub data: T,
@@ -659,6 +780,7 @@ impl<I: Iterator> TreeNodeIterator for I {
/// Transformation helper to process a heterogeneous sequence of tree node
containing
/// expressions.
+///
/// This macro is very similar to
[TreeNodeIterator::map_until_stop_and_collect] to
/// process nodes that are siblings, but it accepts an initial transformation
(`F0`) and
/// a sequence of pairs. Each pair is made of an expression (`EXPR`) and its
diff --git a/datafusion/expr/src/logical_plan/plan.rs
b/datafusion/expr/src/logical_plan/plan.rs
index 97f5e22287..6df5516b1b 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -65,7 +65,7 @@ pub use datafusion_common::{JoinConstraint, JoinType};
/// from leaves up to the root to produce the query result.
///
/// # See also:
-/// * [`tree_node`]: visiting and rewriting API
+/// * [`tree_node`]: To inspect and rewrite `LogicalPlan` trees
///
/// [`tree_node`]: crate::logical_plan::tree_node
#[derive(Clone, PartialEq, Eq, Hash)]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]