andygrove commented on a change in pull request #8619:
URL: https://github.com/apache/arrow/pull/8619#discussion_r520655856



##########
File path: rust/datafusion/src/logical_plan/mod.rs
##########
@@ -956,117 +956,567 @@ impl LogicalPlan {
     }
 }
 
+/// Trait that implements the [Visitor
+/// pattern](https://en.wikipedia.org/wiki/Visitor_pattern) for a
+/// depth first walk of `LogicalPlan` nodes. `pre_visit` is called
+/// before any children are visited, and then `post_visit` is called
+/// after all children have been visited.
+////
+/// To use, define a struct that implements this trait and then invoke
+/// "LogicalPlan::accept".
+///
+/// For example, for a logical plan like:
+///
+/// Projection: #id
+///    Filter: #state Eq Utf8(\"CO\")\
+///       CsvScan: employee.csv projection=Some([0, 3])";
+///
+/// The sequence of visit operations would be:
+/// ```text
+/// visitor.pre_visit(Projection)
+/// visitor.pre_visit(Filter)
+/// visitor.pre_visit(CsvScan)
+/// visitor.post_visit(CsvScan)
+/// visitor.post_visit(Filter)
+/// visitor.post_visit(Projection)
+/// ```
+pub trait PlanVisitor {
+    /// The type of error returned by this visitor
+    type Error;
+
+    /// Invoked on a logical plan before any of its child inputs have been
+    /// visited. If Ok(true) is returned, the recursion continues. If
+    /// Err(..) or Ok(false) are returned, the recursion stops
+    /// immedately and the error, if any, is returned to `accept`
+    fn pre_visit(&mut self, plan: &LogicalPlan)
+        -> std::result::Result<bool, Self::Error>;
+
+    /// Invoked on a logical plan after all of its child inputs have
+    /// been visited. The return value is handled the same as the
+    /// return value of `pre_visit`. The provided default implementation
+    /// returns `Ok(true)`.
+    fn post_visit(
+        &mut self,
+        _plan: &LogicalPlan,
+    ) -> std::result::Result<bool, Self::Error> {
+        Ok(true)
+    }
+}
+
 impl LogicalPlan {
-    fn fmt_with_indent(&self, f: &mut fmt::Formatter, indent: usize) -> 
fmt::Result {
-        if indent > 0 {
-            writeln!(f)?;
-            for _ in 0..indent {
-                write!(f, "  ")?;
-            }
+    /// returns all inputs in the logical plan. Returns Ok(true) if
+    /// all nodes were visited, and Ok(false) if any call to
+    /// `pre_visit` or `post_visit` returned Ok(false) and may have
+    /// cut short the recursion
+    pub fn accept<V>(&self, visitor: &mut V) -> std::result::Result<bool, 
V::Error>
+    where
+        V: PlanVisitor,
+    {
+        if !visitor.pre_visit(self)? {
+            return Ok(false);
         }
-        match *self {
-            LogicalPlan::EmptyRelation { .. } => write!(f, "EmptyRelation"),
-            LogicalPlan::TableScan {
-                ref source,
-                ref projection,
-                ..
-            } => match source {
-                TableSource::FromContext(table_name) => {
-                    write!(f, "TableScan: {} projection={:?}", table_name, 
projection)
-                }
-                TableSource::FromProvider(_) => {
-                    write!(f, "TableScan: projection={:?}", projection)
-                }
-            },
-            LogicalPlan::InMemoryScan { ref projection, .. } => {
-                write!(f, "InMemoryScan: projection={:?}", projection)
-            }
-            LogicalPlan::CsvScan {
-                ref path,
-                ref projection,
-                ..
-            } => write!(f, "CsvScan: {} projection={:?}", path, projection),
-            LogicalPlan::ParquetScan {
-                ref path,
-                ref projection,
-                ..
-            } => write!(f, "ParquetScan: {} projection={:?}", path, 
projection),
-            LogicalPlan::Projection {
-                ref expr,
-                ref input,
-                ..
-            } => {
-                write!(f, "Projection: ")?;
-                for i in 0..expr.len() {
-                    if i > 0 {
-                        write!(f, ", ")?;
+
+        let recurse = match self {
+            LogicalPlan::Projection { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Filter { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Aggregate { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Sort { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Limit { input, .. } => input.accept(visitor)?,
+            LogicalPlan::Extension { node } => {
+                for input in node.inputs() {
+                    if !input.accept(visitor)? {
+                        return Ok(false);
                     }
-                    write!(f, "{:?}", expr[i])?;
                 }
-                input.fmt_with_indent(f, indent + 1)
+                true
             }
-            LogicalPlan::Filter {
-                predicate: ref expr,
-                ref input,
-                ..
-            } => {
-                write!(f, "Filter: {:?}", expr)?;
-                input.fmt_with_indent(f, indent + 1)
-            }
-            LogicalPlan::Aggregate {
-                ref input,
-                ref group_expr,
-                ref aggr_expr,
-                ..
-            } => {
+            // plans without inputs
+            LogicalPlan::TableScan { .. }
+            | LogicalPlan::InMemoryScan { .. }
+            | LogicalPlan::ParquetScan { .. }
+            | LogicalPlan::CsvScan { .. }
+            | LogicalPlan::EmptyRelation { .. }
+            | LogicalPlan::CreateExternalTable { .. }
+            | LogicalPlan::Explain { .. } => true,
+        };
+        if !recurse {
+            return Ok(false);
+        }
+
+        if !visitor.post_visit(self)? {
+            return Ok(false);
+        }
+
+        Ok(true)
+    }
+}
+
+/// Formats plans with a single line per node. For example:
+///
+/// Projection: #id
+///    Filter: #state Eq Utf8(\"CO\")\
+///       CsvScan: employee.csv projection=Some([0, 3])";
+struct IndentVisitor<'a, 'b> {
+    f: &'a mut fmt::Formatter<'b>,
+    /// If true, includes summarized schema information
+    with_schema: bool,
+    indent: u32,
+}
+
+impl<'a, 'b> IndentVisitor<'a, 'b> {
+    fn write_indent(&mut self) -> fmt::Result {
+        for _ in 0..self.indent {
+            write!(self.f, "  ")?;
+        }
+        Ok(())
+    }
+}
+
+impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
+    type Error = fmt::Error;
+
+    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, 
fmt::Error> {
+        if self.indent > 0 {
+            writeln!(self.f)?;
+        }
+        self.write_indent()?;
+
+        write!(self.f, "{}", plan.display())?;
+        if self.with_schema {
+            write!(self.f, " {}", display_schema(plan.schema()))?;
+        }
+
+        self.indent += 1;
+        Ok(true)
+    }
+
+    fn post_visit(
+        &mut self,
+        _plan: &LogicalPlan,
+    ) -> std::result::Result<bool, fmt::Error> {
+        self.indent -= 1;
+        Ok(true)
+    }
+}
+
+/// Print the schema in a compact representation to `buf`
+///
+/// For example: `foo:Utf8` if `foo` can not be null, and
+/// `foo:Utf8;N` if `foo` is nullable.
+///
+/// ```
+/// use arrow::datatypes::{Field, Schema, DataType};
+/// # use datafusion::logical_plan::display_schema;
+/// let schema = Schema::new(vec![
+///     Field::new("id", DataType::Int32, false),
+///     Field::new("first_name", DataType::Utf8, true),
+///  ]);
+///
+///  assert_eq!(
+///      "[id:Int32, first_name:Utf8;N]",
+///      format!("{}", display_schema(&schema))
+///  );
+/// ```
+pub fn display_schema<'a>(schema: &'a Schema) -> impl fmt::Display + 'a {
+    struct Wrapper<'a>(&'a Schema);
+
+    impl<'a> fmt::Display for Wrapper<'a> {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            write!(f, "[")?;
+            for (idx, field) in self.0.fields().iter().enumerate() {
+                if idx > 0 {
+                    write!(f, ", ")?;
+                }
+                let nullable_str = if field.is_nullable() { ";N" } else { "" };
                 write!(
                     f,
-                    "Aggregate: groupBy=[{:?}], aggr=[{:?}]",
-                    group_expr, aggr_expr
+                    "{}:{:?}{}",
+                    field.name(),
+                    field.data_type(),
+                    nullable_str
                 )?;
-                input.fmt_with_indent(f, indent + 1)
-            }
-            LogicalPlan::Sort {
-                ref input,
-                ref expr,
-                ..
-            } => {
-                write!(f, "Sort: ")?;
-                for i in 0..expr.len() {
-                    if i > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{:?}", expr[i])?;
-                }
-                input.fmt_with_indent(f, indent + 1)
             }
-            LogicalPlan::Limit {
-                ref input, ref n, ..
-            } => {
-                write!(f, "Limit: {}", n)?;
-                input.fmt_with_indent(f, indent + 1)
+            write!(f, "]")
+        }
+    }
+    Wrapper(schema)
+}
+
+/// Logic related to creating DOT language graphs.
+#[derive(Default)]
+struct GraphvizBuilder {
+    id_gen: usize,
+}
+
+impl GraphvizBuilder {
+    fn next_id(&mut self) -> usize {
+        self.id_gen += 1;
+        self.id_gen
+    }
+
+    // write out the start of the subgraph cluster
+    fn start_cluster(&mut self, f: &mut fmt::Formatter, title: &str) -> 
fmt::Result {
+        writeln!(f, "  subgraph cluster_{}", self.next_id())?;
+        writeln!(f, "  {{")?;
+        writeln!(f, "    graph[label={}]", Self::quoted(title))
+    }
+
+    // write out the end of the subgraph cluster
+    fn end_cluster(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(f, "  }}")
+    }
+
+    /// makes a quoted string suitable for inclusion in a graphviz chart
+    fn quoted(label: &str) -> String {
+        let label = label.replace('"', "_");
+        format!("\"{}\"", label)
+    }
+}
+
+/// Formats plans for graphical display using the `DOT` language. This
+/// format can be visualized using software from
+/// [`graphviz`](https://graphviz.org/)
+struct GraphvizVisitor<'a, 'b> {
+    f: &'a mut fmt::Formatter<'b>,
+    graphviz_builder: GraphvizBuilder,
+    /// If true, includes summarized schema information
+    with_schema: bool,
+
+    /// Holds the ids (as generated from `graphviz_builder` of all
+    /// parent nodes
+    parent_ids: Vec<usize>,
+}
+
+impl<'a, 'b> GraphvizVisitor<'a, 'b> {
+    fn new(f: &'a mut fmt::Formatter<'b>) -> Self {
+        Self {
+            f,
+            graphviz_builder: GraphvizBuilder::default(),
+            with_schema: false,
+            parent_ids: Vec::new(),
+        }
+    }
+
+    /// Sets a flag which controls if the output schema is displayed
+    fn set_with_schema(&mut self, with_schema: bool) {
+        self.with_schema = with_schema;
+    }
+
+    fn pre_visit_plan(&mut self, label: &str) -> fmt::Result {
+        self.graphviz_builder.start_cluster(self.f, label)
+    }
+
+    fn post_visit_plan(&mut self) -> fmt::Result {
+        self.graphviz_builder.end_cluster(self.f)
+    }
+}
+
+impl<'a, 'b> PlanVisitor for GraphvizVisitor<'a, 'b> {
+    type Error = fmt::Error;
+
+    fn pre_visit(&mut self, plan: &LogicalPlan) -> std::result::Result<bool, 
fmt::Error> {
+        let id = self.graphviz_builder.next_id();
+
+        // Create a new graph node for `plan` such as
+        // id [label="foo"]
+        let label = if self.with_schema {
+            format!(
+                "{}\\nSchema: {}",
+                plan.display(),
+                display_schema(plan.schema())
+            )
+        } else {
+            format!("{}", plan.display())
+        };
+
+        writeln!(
+            self.f,
+            "    {}[label={}]",

Review comment:
       I think the plans would look better and use less space if we added 
`shape=box` here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to