jonahgao commented on code in PR #10687: URL: https://github.com/apache/datafusion/pull/10687#discussion_r1618480297
########## datafusion/expr/src/logical_plan/plan.rs: ########## @@ -56,19 +56,139 @@ use crate::logical_plan::tree_node::unwrap_arc; pub use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan}; pub use datafusion_common::{JoinConstraint, JoinType}; -/// A LogicalPlan represents the different types of relational -/// operators (such as Projection, Filter, etc) and can be created by -/// the SQL query planner and the DataFrame API. +/// A `LogicalPlan` is a node in a tree of relational operators (such as +/// Projection or Filter). /// -/// A LogicalPlan represents transforming an input relation (table) to -/// an output relation (table) with a (potentially) different -/// schema. A plan represents a dataflow tree where data flows -/// from leaves up to the root to produce the query result. +/// Represents transforming an input relation (table) to an output relation +/// (table) with a potentially different schema. Plans form a dataflow tree +/// where data flows from leaves up to the root to produce the query result. +/// +/// `LogicalPlan`s can be created by the SQL query planner, the DataFrame API, +/// or programmatically (for example custom query languages). /// /// # See also: -/// * [`tree_node`]: To inspect and rewrite `LogicalPlan` trees +/// * [`Expr`]: For the expressions that are evaluated by the plan +/// * [`LogicalPlanBuilder`]: For building `LogicalPlan`s +/// * [`tree_node`]: To inspect and rewrite `LogicalPlan`s /// /// [`tree_node`]: crate::logical_plan::tree_node +/// +/// # Examples +/// +/// ## Creating a LogicalPlan from SQL: +/// +/// See [`SessionContext::sql`](https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql) +/// +/// ## Creating a LogicalPlan from the DataFrame API: +/// +/// See [`DataFrame::logical_plan`](https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.logical_plan) +/// +/// ## Creating a LogicalPlan programmatically: +/// +/// See [`LogicalPlanBuilder`] +/// +/// # Visiting and Rewriting `LogicalPlan`s +/// +/// Using the [`tree_node`] API, you can recursively walk all nodes in a +/// `LogicalPlan`. For example, to find all column references in a plan: +/// +/// ``` +/// # use std::collections::HashSet; +/// # use arrow::datatypes::{DataType, Field, Schema}; +/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan}; +/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode}; +/// # use datafusion_common::{Column, Result}; +/// # fn employee_schema() -> Schema { +/// # Schema::new(vec![ +/// # Field::new("name", DataType::Utf8, false), +/// # Field::new("salary", DataType::Int32, false), +/// # ]) +/// # } +/// // Projection(name, salary) +/// // Filter(salary > 1000) +/// // TableScan(employee) +/// # fn main() -> Result<()> { +/// let plan = table_scan(Some("employee"), &employee_schema(), None)? +/// .filter(col("salary").gt(lit(1000)))? +/// .project(vec![col("name")])? +/// .build()?; +/// +/// // use apply to walk the plan and collect all column references +/// let mut expressions = HashSet::new(); +/// plan.apply(|node| { +/// // collect all expressions in the plan +/// node.apply_expressions(|expr| { +/// expressions.insert(expr.clone()); +/// Ok(TreeNodeRecursion::Continue) // control walk of expressions +/// })?; +/// Ok(TreeNodeRecursion::Continue) // control walk of plan nodes +/// }).unwrap(); +/// +/// // we found the expression in projection and filter +/// assert_eq!(expressions.len(), 2); +/// println!("Found expressions: {:?}", expressions); +/// // found predicate in the Filter: employee.salary > 1000 +/// let salary = Expr::Column(Column::new(Some("employee"), "salary")); +/// assert!(expressions.contains(&salary.gt(lit(1000)))); +/// // found projection in the Projection: employee.name +/// let name = Expr::Column(Column::new(Some("employee"), "name")); +/// assert!(expressions.contains(&name)); +/// # Ok(()) +/// # } +/// ``` +/// +/// You can also rewrite plans using the [`tree_node`] API. For example, to +/// replace the filter predicate in a plan: +/// +/// ``` +/// # use std::collections::HashSet; +/// # use arrow::datatypes::{DataType, Field, Schema}; +/// # use datafusion_expr::{Expr, col, lit, LogicalPlan, LogicalPlanBuilder, table_scan}; +/// # use datafusion_common::tree_node::{TreeNodeRecursion, TreeNode}; +/// # use datafusion_common::{Column, Result}; +/// # fn employee_schema() -> Schema { +/// # Schema::new(vec![ +/// # Field::new("name", DataType::Utf8, false), +/// # Field::new("salary", DataType::Int32, false), +/// # ]) +/// # } +/// // Projection(name, salary) +/// // Filter(salary > 1000) +/// // TableScan(employee) +/// # fn main() -> Result<()> { +/// use datafusion_common::tree_node::Transformed; +/// let plan = table_scan(Some("employee"), &employee_schema(), None)? +/// .filter(col("salary").gt(lit(1000)))? +/// .project(vec![col("name")])? +/// .build()?; +/// +/// // use transform to rewrite the plan +/// let transformed_result = plan.transform(|node| { +/// // when we see the filter node +/// if let LogicalPlan::Filter(mut filter) = node { +/// // replace predicate with salary > 2000 Review Comment: ```suggestion /// // replace predicate with salary < 2000 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org