alamb commented on a change in pull request #8619:
URL: https://github.com/apache/arrow/pull/8619#discussion_r520176174



##########
File path: rust/datafusion/src/logical_plan/mod.rs
##########
@@ -956,117 +956,570 @@ impl LogicalPlan {
     }
 }
 
+/// Trait that implements the [Visitor
+/// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for a
+/// depth first walk of `LogicalPlan` nodes. `pre_visit` is called
+/// before any children are visited, and then `post_visit` is called
+/// after all children have been visited.
+////
+/// To use, define a struct that implements this trait and then invoke
+/// "LogicalPlan::accept".
+///
+/// For example, for a logical plan like:
+///
+/// Projection: #id
+///    Filter: #state Eq Utf8(\"CO\")\
+///       CsvScan: employee.csv projection=Some([0, 3])";
+///
+/// The sequence of visit operations would be:
+/// ```text
+/// visitor.pre_visit(Projection)
+/// visitor.pre_visit(Filter)
+/// visitor.pre_visit(CsvScan)
+/// visitor.post_visit(CsvScan)
+/// visitor.post_visit(Filter)
+/// visitor.post_visit(Projection)
+/// ```
+///
+/// Example use: TODO
+pub trait PlanVisitor {

Review comment:
       I actually think this introduction is the most important thing about 
this PR -- I would like to move most of the recursive walks of `LogicalPlans` 
to be in terms of such a structure. 

##########
File path: rust/datafusion/src/logical_plan/mod.rs
##########
@@ -956,117 +956,570 @@ impl LogicalPlan {
     }
 }
 
+/// Trait that implements the [Visitor
+/// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for a
+/// depth first walk of `LogicalPlan` nodes. `pre_visit` is called
+/// before any children are visited, and then `post_visit` is called
+/// after all children have been visited.
+////
+/// To use, define a struct that implements this trait and then invoke
+/// "LogicalPlan::accept".
+///
+/// For example, for a logical plan like:
+///
+/// Projection: #id
+///    Filter: #state Eq Utf8(\"CO\")\
+///       CsvScan: employee.csv projection=Some([0, 3])";
+///
+/// The sequence of visit operations would be:
+/// ```text
+/// visitor.pre_visit(Projection)
+/// visitor.pre_visit(Filter)
+/// visitor.pre_visit(CsvScan)
+/// visitor.post_visit(CsvScan)
+/// visitor.post_visit(Filter)
+/// visitor.post_visit(Projection)
+/// ```
+///
+/// Example use: TODO
+pub trait PlanVisitor {
+    /// The type of error returned by this visitor
+    type Error;
+
+    /// Invoked on a logical plan before any of its child inputs have been
+    /// visited. If Ok(true) is returned, the recursion continues. If
+    /// Err(..) or Ok(false) are returned, the recursion stops
+    /// immedately and the error, if any, is returned to `accept`
+    fn pre_visit(&mut self, plan: &LogicalPlan)
+        -> std::result::Result<bool, Self::Error>;
+
+    /// Invoked on a logical plan after all of its child inputs have
+    /// been visited. The return value is handled the same as the
+    /// return value of `pre_visit`. The provided default implementation
+    /// returns `Ok(true)`.
+    fn post_visit(
+        &mut self,
+        _plan: &LogicalPlan,
+    ) -> std::result::Result<bool, Self::Error> {
+        Ok(true)
+    }
+}
+
 impl LogicalPlan {
-    fn fmt_with_indent(&self, f: &mut fmt::Formatter, indent: usize) -> 
fmt::Result {
-        if indent > 0 {
-            writeln!(f)?;
-            for _ in 0..indent {
-                write!(f, "  ")?;
-            }
+    /// returns all inputs in the logical plan. Returns Ok(true) if
+    /// all nodes were visited, and Ok(false) if any call to
+    /// `pre_visit` or `post_visit` returned Ok(false) and may have
+    /// cut short the recursion
+    pub fn accept<V>(&self, visitor: &mut V) -> std::result::Result<bool, 
V::Error>
+    where
+        V: PlanVisitor,
+    {
+        if !visitor.pre_visit(self)? {
+            return Ok(false);
         }
-        match *self {
-            LogicalPlan::EmptyRelation { .. } => write!(f, "EmptyRelation"),
-            LogicalPlan::TableScan {
-                ref source,
-                ref projection,
-                ..
-            } => match source {
-                TableSource::FromContext(table_name) => {
-                    write!(f, "TableScan: {} projection={:?}", table_name, 
projection)
-                }
-                TableSource::FromProvider(_) => {
-                    write!(f, "TableScan: projection={:?}", projection)
-                }
-            },
-            LogicalPlan::InMemoryScan { ref projection, .. } => {
-                write!(f, "InMemoryScan: projection={:?}", projection)
-            }
-            LogicalPlan::CsvScan {
-                ref path,
-                ref projection,
-                ..
-            } => write!(f, "CsvScan: {} projection={:?}", path, projection),
-            LogicalPlan::ParquetScan {
-                ref path,
-                ref projection,
-                ..
-            } => write!(f, "ParquetScan: {} projection={:?}", path, 
projection),
-            LogicalPlan::Projection {
-                ref expr,
-                ref input,
-                ..
-            } => {
-                write!(f, "Projection: ")?;
-                for i in 0..expr.len() {
-                    if i > 0 {
-                        write!(f, ", ")?;
+
+        // recurse
+        let recurse = match self {
+            LogicalPlan::Projection { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Filter { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Aggregate { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Sort { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Limit { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Extension { node } => {
+                for input in node.inputs() {
+                    if !input.accept(visitor)? {
+                        return Ok(false);
                     }
-                    write!(f, "{:?}", expr[i])?;
                 }
-                input.fmt_with_indent(f, indent + 1)
+                true
             }
-            LogicalPlan::Filter {
-                predicate: ref expr,
-                ref input,
-                ..
-            } => {
-                write!(f, "Filter: {:?}", expr)?;
-                input.fmt_with_indent(f, indent + 1)
-            }
-            LogicalPlan::Aggregate {
-                ref input,
-                ref group_expr,
-                ref aggr_expr,
-                ..
-            } => {
+            // plans without inputs
+            LogicalPlan::TableScan { .. }
+            | LogicalPlan::InMemoryScan { .. }
+            | LogicalPlan::ParquetScan { .. }
+            | LogicalPlan::CsvScan { .. }
+            | LogicalPlan::EmptyRelation { .. }
+            | LogicalPlan::CreateExternalTable { .. }
+            | LogicalPlan::Explain { .. } => true,
+        };
+        if !recurse {
+            return Ok(false);
+        }
+
+        if !visitor.post_visit(self)? {
+            return Ok(false);
+        }
+
+        Ok(true)
+    }
+}
+
+/// Formats plans with a single line per node. For example:
+///
+/// Projection: #id
+///    Filter: #state Eq Utf8(\"CO\")\
+///       CsvScan: employee.csv projection=Some([0, 3])";
+struct IndentVisitor<'a, 'b> {
+    f: &'a mut fmt::Formatter<'b>,
+    /// If true, includes summarized schema information
+    with_schema: bool,
+    indent: u32,
+}
+
+impl<'a, 'b> IndentVisitor<'a, 'b> {
+    fn write_indent(&mut self) -> fmt::Result {
+        for _ in 0..self.indent {
+            write!(self.f, "  ")?;
+        }
+        Ok(())
+    }
+}
+
+impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
+    type Error = fmt::Error;
+
+    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, 
fmt::Error> {
+        if self.indent > 0 {
+            writeln!(self.f)?;
+        }
+        self.write_indent()?;
+
+        write!(self.f, "{}", plan.display())?;
+        if self.with_schema {
+            write!(self.f, " {}", display_schema(plan.schema()))?;
+        }
+
+        self.indent += 1;
+        Ok(true)
+    }
+
+    fn post_visit(
+        &mut self,
+        _plan: &LogicalPlan,
+    ) -> std::result::Result<bool, fmt::Error> {
+        self.indent -= 1;
+        Ok(true)
+    }
+}
+
+/// Print the schema in a compact representation to `buf`
+///
+/// For example: `foo:Utf8` if `foo` can not be null, and
+/// `foo:Utf8;N` if `foo` is nullable.
+///
+/// ```
+/// use arrow::datatypes::{Field, Schema, DataType};
+/// # use datafusion::logical_plan::display_schema;
+/// let schema = Schema::new(vec![
+///     Field::new("id", DataType::Int32, false),
+///     Field::new("first_name", DataType::Utf8, true),
+///  ]);
+///
+///  assert_eq!(
+///      "[id:Int32, first_name:Utf8;N]",
+///      format!("{}", display_schema(&schema))
+///  );
+/// ```
+pub fn display_schema<'a>(schema: &'a Schema) -> impl fmt::Display + 'a {

Review comment:
       I wonder if this belongs in arrow rather than DataFusion? I could go 
either way




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to