This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 41e7aed3a9 Support `EXPLAIN ... FORMAT <indent | tree | json |
graphviz > ...` (#15166)
41e7aed3a9 is described below
commit 41e7aed3a943134c40d1b18cb9d424b358b5e5b1
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Mar 15 08:19:16 2025 -0400
Support `EXPLAIN ... FORMAT <indent | tree | json | graphviz > ...` (#15166)
* Support EXPLAIN FORMAT <format>
* Update datafusion/sql/src/parser.rs
Co-authored-by: Ruihang Xia <[email protected]>
* Improve documentation
* Remove to_stringified and simplify code
* mark deprecated rather than remove
* Add a note about explain format configuration
---------
Co-authored-by: Ruihang Xia <[email protected]>
---
datafusion-examples/examples/planner_api.rs | 28 +--
datafusion/core/src/execution/session_state.rs | 2 +
datafusion/core/src/physical_planner.rs | 138 +++++++++------
datafusion/expr/src/logical_plan/builder.rs | 3 +-
datafusion/expr/src/logical_plan/mod.rs | 6 +-
datafusion/expr/src/logical_plan/plan.rs | 157 +++++++++++++++++
datafusion/expr/src/logical_plan/tree_node.rs | 2 +
datafusion/physical-plan/src/display.rs | 25 +--
datafusion/sql/src/parser.rs | 44 ++++-
datafusion/sql/src/statement.rs | 32 +++-
datafusion/sqllogictest/test_files/explain.slt | 132 ++++++++++++++
docs/source/user-guide/sql/explain.md | 232 ++++++++++++++++++++++---
12 files changed, 670 insertions(+), 131 deletions(-)
diff --git a/datafusion-examples/examples/planner_api.rs
b/datafusion-examples/examples/planner_api.rs
index 41110a3e0a..4943e593bd 100644
--- a/datafusion-examples/examples/planner_api.rs
+++ b/datafusion-examples/examples/planner_api.rs
@@ -16,8 +16,8 @@
// under the License.
use datafusion::error::Result;
-use datafusion::logical_expr::{LogicalPlan, PlanType};
-use datafusion::physical_plan::{displayable, DisplayFormatType};
+use datafusion::logical_expr::LogicalPlan;
+use datafusion::physical_plan::displayable;
use datafusion::physical_planner::DefaultPhysicalPlanner;
use datafusion::prelude::*;
@@ -77,13 +77,7 @@ async fn to_physical_plan_in_one_api_demo(
println!(
"Physical plan direct from logical plan:\n\n{}\n\n",
- displayable(physical_plan.as_ref())
- .to_stringified(
- false,
- PlanType::InitialPhysicalPlan,
- DisplayFormatType::Default
- )
- .plan
+ displayable(physical_plan.as_ref()).indent(false)
);
Ok(())
@@ -123,13 +117,7 @@ async fn to_physical_plan_step_by_step_demo(
.await?;
println!(
"Final physical plan:\n\n{}\n\n",
- displayable(physical_plan.as_ref())
- .to_stringified(
- false,
- PlanType::InitialPhysicalPlan,
- DisplayFormatType::Default
- )
- .plan
+ displayable(physical_plan.as_ref()).indent(false)
);
// Call the physical optimizer with an existing physical plan (in this
@@ -142,13 +130,7 @@ async fn to_physical_plan_step_by_step_demo(
planner.optimize_physical_plan(physical_plan, &ctx.state(), |_, _|
{})?;
println!(
"Optimized physical plan:\n\n{}\n\n",
- displayable(physical_plan.as_ref())
- .to_stringified(
- false,
- PlanType::InitialPhysicalPlan,
- DisplayFormatType::Default
- )
- .plan
+ displayable(physical_plan.as_ref()).indent(false)
);
Ok(())
diff --git a/datafusion/core/src/execution/session_state.rs
b/datafusion/core/src/execution/session_state.rs
index 002220f93e..32c04409ad 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -577,6 +577,7 @@ impl SessionState {
return Ok(LogicalPlan::Explain(Explain {
verbose: e.verbose,
plan: Arc::clone(&e.plan),
+ explain_format: e.explain_format.clone(),
stringified_plans,
schema: Arc::clone(&e.schema),
logical_optimization_succeeded: false,
@@ -612,6 +613,7 @@ impl SessionState {
Ok(LogicalPlan::Explain(Explain {
verbose: e.verbose,
+ explain_format: e.explain_format.clone(),
plan,
stringified_plans,
schema: Arc::clone(&e.schema),
diff --git a/datafusion/core/src/physical_planner.rs
b/datafusion/core/src/physical_planner.rs
index 170f85af7a..135b32a0a8 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -19,7 +19,6 @@
use std::borrow::Cow;
use std::collections::HashMap;
-use std::str::FromStr;
use std::sync::Arc;
use crate::datasource::file_format::file_type_to_format;
@@ -78,8 +77,8 @@ use datafusion_expr::expr::{
use datafusion_expr::expr_rewriter::unnormalize_cols;
use
datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
use datafusion_expr::{
- Analyze, DescribeTable, DmlStatement, Explain, Extension, FetchType,
Filter,
- JoinType, RecursiveQuery, SkipType, SortExpr, StringifiedPlan, WindowFrame,
+ Analyze, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension,
FetchType,
+ Filter, JoinType, RecursiveQuery, SkipType, SortExpr, StringifiedPlan,
WindowFrame,
WindowFrameBound, WriteOp,
};
use datafusion_physical_expr::aggregate::{AggregateExprBuilder,
AggregateFunctionExpr};
@@ -89,7 +88,6 @@ use datafusion_physical_optimizer::PhysicalOptimizerRule;
use datafusion_physical_plan::execution_plan::InvariantLevel;
use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
use datafusion_physical_plan::unnest::ListUnnest;
-use datafusion_physical_plan::DisplayFormatType;
use crate::schema_equivalence::schema_satisfied_by;
use async_trait::async_trait;
@@ -1742,12 +1740,54 @@ impl DefaultPhysicalPlanner {
let mut stringified_plans = vec![];
let config = &session_state.config_options().explain;
- let explain_format = DisplayFormatType::from_str(&config.format)?;
+ let explain_format = &e.explain_format;
+
+ match explain_format {
+ ExplainFormat::Indent => { /* fall through */ }
+ ExplainFormat::Tree => {
+ // Tree render does not try to explain errors,
+ let physical_plan = self
+ .create_initial_plan(e.plan.as_ref(), session_state)
+ .await?;
+
+ let optimized_plan = self.optimize_physical_plan(
+ physical_plan,
+ session_state,
+ |_plan, _optimizer| {},
+ )?;
+
+ stringified_plans.push(StringifiedPlan::new(
+ FinalPhysicalPlan,
+ displayable(optimized_plan.as_ref())
+ .tree_render()
+ .to_string(),
+ ));
+ }
+ ExplainFormat::PostgresJSON => {
+ stringified_plans.push(StringifiedPlan::new(
+ FinalLogicalPlan,
+ e.plan.display_pg_json().to_string(),
+ ));
+ }
+ ExplainFormat::Graphviz => {
+ stringified_plans.push(StringifiedPlan::new(
+ FinalLogicalPlan,
+ e.plan.display_graphviz().to_string(),
+ ));
+ }
+ };
- let skip_logical_plan =
- config.physical_plan_only || explain_format ==
DisplayFormatType::TreeRender;
+ if !stringified_plans.is_empty() {
+ return Ok(Arc::new(ExplainExec::new(
+ Arc::clone(e.schema.inner()),
+ stringified_plans,
+ e.verbose,
+ )));
+ }
- if !skip_logical_plan {
+ // The indent mode is quite sophisticated, and handles quite a few
+ // different cases / options for displaying the plan.
+ if !config.physical_plan_only {
stringified_plans.clone_from(&e.stringified_plans);
if e.logical_optimization_succeeded {
stringified_plans.push(e.plan.to_stringified(FinalLogicalPlan));
@@ -1761,41 +1801,35 @@ impl DefaultPhysicalPlanner {
{
Ok(input) => {
// Include statistics / schema if enabled
- stringified_plans.push(
+ stringified_plans.push(StringifiedPlan::new(
+ InitialPhysicalPlan,
displayable(input.as_ref())
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
- .to_stringified(
- e.verbose,
- InitialPhysicalPlan,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
// Show statistics + schema in verbose output even if not
// explicitly requested
if e.verbose {
if !config.show_statistics {
- stringified_plans.push(
+ stringified_plans.push(StringifiedPlan::new(
+ InitialPhysicalPlanWithStats,
displayable(input.as_ref())
.set_show_statistics(true)
- .to_stringified(
- e.verbose,
- InitialPhysicalPlanWithStats,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
}
if !config.show_schema {
- stringified_plans.push(
+ stringified_plans.push(StringifiedPlan::new(
+ InitialPhysicalPlanWithSchema,
displayable(input.as_ref())
.set_show_schema(true)
- .to_stringified(
- e.verbose,
- InitialPhysicalPlanWithSchema,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
}
}
@@ -1805,52 +1839,50 @@ impl DefaultPhysicalPlanner {
|plan, optimizer| {
let optimizer_name = optimizer.name().to_string();
let plan_type = OptimizedPhysicalPlan {
optimizer_name };
- stringified_plans.push(
+ stringified_plans.push(StringifiedPlan::new(
+ plan_type,
displayable(plan)
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
- .to_stringified(e.verbose, plan_type,
explain_format),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
},
);
match optimized_plan {
Ok(input) => {
// This plan will includes statistics if
show_statistics is on
- stringified_plans.push(
+ stringified_plans.push(StringifiedPlan::new(
+ FinalPhysicalPlan,
displayable(input.as_ref())
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
- .to_stringified(
- e.verbose,
- FinalPhysicalPlan,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
// Show statistics + schema in verbose output even
if not
// explicitly requested
if e.verbose {
if !config.show_statistics {
- stringified_plans.push(
+
stringified_plans.push(StringifiedPlan::new(
+ FinalPhysicalPlanWithStats,
displayable(input.as_ref())
.set_show_statistics(true)
- .to_stringified(
- e.verbose,
- FinalPhysicalPlanWithStats,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
}
if !config.show_schema {
- stringified_plans.push(
+
stringified_plans.push(StringifiedPlan::new(
+ FinalPhysicalPlanWithSchema,
+ // This will include schema if
show_schema is on
+ // and will be set to true if verbose
is on
displayable(input.as_ref())
.set_show_schema(true)
- .to_stringified(
- e.verbose,
- FinalPhysicalPlanWithSchema,
- explain_format,
- ),
- );
+ .indent(e.verbose)
+ .to_string(),
+ ));
}
}
}
diff --git a/datafusion/expr/src/logical_plan/builder.rs
b/datafusion/expr/src/logical_plan/builder.rs
index f60bb2f007..f506c0671b 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -46,7 +46,7 @@ use crate::{
};
use super::dml::InsertOp;
-use super::plan::ColumnUnnestList;
+use super::plan::{ColumnUnnestList, ExplainFormat};
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
use datafusion_common::display::ToStringifiedPlan;
@@ -1211,6 +1211,7 @@ impl LogicalPlanBuilder {
Ok(Self::new(LogicalPlan::Explain(Explain {
verbose,
plan: self.plan,
+ explain_format: ExplainFormat::Indent,
stringified_plans,
schema,
logical_optimization_succeeded: false,
diff --git a/datafusion/expr/src/logical_plan/mod.rs
b/datafusion/expr/src/logical_plan/mod.rs
index 916b2131be..a55f4d97b2 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -38,9 +38,9 @@ pub use ddl::{
pub use dml::{DmlStatement, WriteOp};
pub use plan::{
projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable,
Distinct,
- DistinctOn, EmptyRelation, Explain, Extension, FetchType, Filter, Join,
- JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType,
Projection,
- RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery,
+ DistinctOn, EmptyRelation, Explain, ExplainFormat, Extension, FetchType,
Filter,
+ Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType,
+ Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan,
Subquery,
SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
};
pub use statement::{
diff --git a/datafusion/expr/src/logical_plan/plan.rs
b/datafusion/expr/src/logical_plan/plan.rs
index 0dbce941a8..4196ebf28f 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -21,6 +21,7 @@ use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::{self, Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
+use std::str::FromStr;
use std::sync::{Arc, LazyLock};
use super::dml::CopyTo;
@@ -1086,6 +1087,7 @@ impl LogicalPlan {
Ok(LogicalPlan::Explain(Explain {
verbose: e.verbose,
plan: Arc::new(input),
+ explain_format: e.explain_format.clone(),
stringified_plans: e.stringified_plans.clone(),
schema: Arc::clone(&e.schema),
logical_optimization_succeeded:
e.logical_optimization_succeeded,
@@ -2926,12 +2928,167 @@ impl PartialOrd for DescribeTable {
}
}
+/// Output formats for controlling for Explain plans
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum ExplainFormat {
+ /// Indent mode
+ ///
+ /// Example:
+ /// ```text
+ /// > explain format indent select x from values (1) t(x);
+ /// +---------------+-----------------------------------------------------+
+ /// | plan_type | plan |
+ /// +---------------+-----------------------------------------------------+
+ /// | logical_plan | SubqueryAlias: t |
+ /// | | Projection: column1 AS x |
+ /// | | Values: (Int64(1)) |
+ /// | physical_plan | ProjectionExec: expr=[column1@0 as x] |
+ /// | | DataSourceExec: partitions=1, partition_sizes=[1] |
+ /// | | |
+ /// +---------------+-----------------------------------------------------+
+ /// ```
+ Indent,
+ /// Tree mode
+ ///
+ /// Example:
+ /// ```text
+ /// > explain format tree select x from values (1) t(x);
+ /// +---------------+-------------------------------+
+ /// | plan_type | plan |
+ /// +---------------+-------------------------------+
+ /// | physical_plan | ┌───────────────────────────┐ |
+ /// | | │ ProjectionExec │ |
+ /// | | │ -------------------- │ |
+ /// | | │ x: column1@0 │ |
+ /// | | └─────────────┬─────────────┘ |
+ /// | | ┌─────────────┴─────────────┐ |
+ /// | | │ DataSourceExec │ |
+ /// | | │ -------------------- │ |
+ /// | | │ bytes: 128 │ |
+ /// | | │ format: memory │ |
+ /// | | │ rows: 1 │ |
+ /// | | └───────────────────────────┘ |
+ /// | | |
+ /// +---------------+-------------------------------+
+ /// ```
+ Tree,
+ /// Postgres Json mode
+ ///
+ /// A displayable structure that produces plan in postgresql JSON format.
+ ///
+ /// Users can use this format to visualize the plan in existing plan
+ /// visualization tools, for example [dalibo](https://explain.dalibo.com/)
+ ///
+ /// Example:
+ /// ```text
+ /// > explain format pgjson select x from values (1) t(x);
+ /// +--------------+--------------------------------------+
+ /// | plan_type | plan |
+ /// +--------------+--------------------------------------+
+ /// | logical_plan | [ |
+ /// | | { |
+ /// | | "Plan": { |
+ /// | | "Alias": "t", |
+ /// | | "Node Type": "Subquery", |
+ /// | | "Output": [ |
+ /// | | "x" |
+ /// | | ], |
+ /// | | "Plans": [ |
+ /// | | { |
+ /// | | "Expressions": [ |
+ /// | | "column1 AS x" |
+ /// | | ], |
+ /// | | "Node Type": "Projection", |
+ /// | | "Output": [ |
+ /// | | "x" |
+ /// | | ], |
+ /// | | "Plans": [ |
+ /// | | { |
+ /// | | "Node Type": "Values", |
+ /// | | "Output": [ |
+ /// | | "column1" |
+ /// | | ], |
+ /// | | "Plans": [], |
+ /// | | "Values": "(Int64(1))" |
+ /// | | } |
+ /// | | ] |
+ /// | | } |
+ /// | | ] |
+ /// | | } |
+ /// | | } |
+ /// | | ] |
+ /// +--------------+--------------------------------------+
+ /// ```
+ PostgresJSON,
+ /// Graphviz mode
+ ///
+ /// Example:
+ /// ```text
+ /// > explain format graphviz select x from values (1) t(x);
+ ///
+--------------+------------------------------------------------------------------------+
+ /// | plan_type | plan
|
+ ///
+--------------+------------------------------------------------------------------------+
+ /// | logical_plan |
|
+ /// | | // Begin DataFusion GraphViz Plan,
|
+ /// | | // display it online here:
https://dreampuf.github.io/GraphvizOnline |
+ /// | |
|
+ /// | | digraph {
|
+ /// | | subgraph cluster_1
|
+ /// | | {
|
+ /// | | graph[label="LogicalPlan"]
|
+ /// | | 2[shape=box label="SubqueryAlias: t"]
|
+ /// | | 3[shape=box label="Projection: column1 AS x"]
|
+ /// | | 2 -> 3 [arrowhead=none, arrowtail=normal,
dir=back] |
+ /// | | 4[shape=box label="Values: (Int64(1))"]
|
+ /// | | 3 -> 4 [arrowhead=none, arrowtail=normal,
dir=back] |
+ /// | | }
|
+ /// | | subgraph cluster_5
|
+ /// | | {
|
+ /// | | graph[label="Detailed LogicalPlan"]
|
+ /// | | 6[shape=box label="SubqueryAlias: t\nSchema:
[x:Int64;N]"] |
+ /// | | 7[shape=box label="Projection: column1 AS
x\nSchema: [x:Int64;N]"] |
+ /// | | 6 -> 7 [arrowhead=none, arrowtail=normal,
dir=back] |
+ /// | | 8[shape=box label="Values: (Int64(1))\nSchema:
[column1:Int64;N]"] |
+ /// | | 7 -> 8 [arrowhead=none, arrowtail=normal,
dir=back] |
+ /// | | }
|
+ /// | | }
|
+ /// | | // End DataFusion GraphViz Plan
|
+ /// | |
|
+ ///
+--------------+------------------------------------------------------------------------+
+ /// ```
+ Graphviz,
+}
+
+/// Implement parsing strings to `ExplainFormat`
+impl FromStr for ExplainFormat {
+ type Err = DataFusionError;
+
+ fn from_str(format: &str) -> std::result::Result<Self, Self::Err> {
+ match format.to_lowercase().as_str() {
+ "indent" => Ok(ExplainFormat::Indent),
+ "tree" => Ok(ExplainFormat::Tree),
+ "pgjson" => Ok(ExplainFormat::PostgresJSON),
+ "graphviz" => Ok(ExplainFormat::Graphviz),
+ _ => {
+ plan_err!("Invalid explain format. Expected 'indent', 'tree',
'pgjson' or 'graphviz'. Got '{format}'")
+ }
+ }
+ }
+}
+
/// Produces a relation with string representations of
/// various parts of the plan
+///
+/// See [the documentation] for more information
+///
+/// [the documentation]:
https://datafusion.apache.org/user-guide/sql/explain.html
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Explain {
/// Should extra (detailed, intermediate plans) be included?
pub verbose: bool,
+ /// Output format for explain, if specified.
+ /// If none, defaults to `text`
+ pub explain_format: ExplainFormat,
/// The logical plan that is being EXPLAIN'd
pub plan: Arc<LogicalPlan>,
/// Represent the various stages plans have gone through
diff --git a/datafusion/expr/src/logical_plan/tree_node.rs
b/datafusion/expr/src/logical_plan/tree_node.rs
index 87b49a3a2e..7f6e1e0253 100644
--- a/datafusion/expr/src/logical_plan/tree_node.rs
+++ b/datafusion/expr/src/logical_plan/tree_node.rs
@@ -204,6 +204,7 @@ impl TreeNode for LogicalPlan {
.update_data(LogicalPlan::Distinct),
LogicalPlan::Explain(Explain {
verbose,
+ explain_format: format,
plan,
stringified_plans,
schema,
@@ -211,6 +212,7 @@ impl TreeNode for LogicalPlan {
}) => plan.map_elements(f)?.update_data(|plan| {
LogicalPlan::Explain(Explain {
verbose,
+ explain_format: format,
plan,
stringified_plans,
schema,
diff --git a/datafusion/physical-plan/src/display.rs
b/datafusion/physical-plan/src/display.rs
index 564f7ac459..f437295a35 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -19,13 +19,12 @@
//! [`crate::displayable`] for examples of how to format
use std::collections::{BTreeMap, HashMap};
+use std::fmt;
use std::fmt::Formatter;
-use std::{fmt, str::FromStr};
use arrow::datatypes::SchemaRef;
use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
-use datafusion_common::DataFusionError;
use datafusion_expr::display_schema;
use datafusion_physical_expr::LexOrdering;
@@ -39,7 +38,7 @@ pub enum DisplayFormatType {
/// Default, compact format. Example: `FilterExec: c12 < 10.0`
///
/// This format is designed to provide a detailed textual description
- /// of all rele
+ /// of all parts of the plan.
Default,
/// Verbose, showing all available details.
///
@@ -79,21 +78,6 @@ pub enum DisplayFormatType {
TreeRender,
}
-impl FromStr for DisplayFormatType {
- type Err = DataFusionError;
-
- fn from_str(s: &str) -> Result<Self, Self::Err> {
- match s.to_lowercase().as_str() {
- "indent" => Ok(Self::Default),
- "tree" => Ok(Self::TreeRender),
- _ => Err(DataFusionError::Configuration(format!(
- "Invalid explain format: {}",
- s
- ))),
- }
- }
-}
-
/// Wraps an `ExecutionPlan` with various methods for formatting
///
///
@@ -280,6 +264,9 @@ impl<'a> DisplayableExecutionPlan<'a> {
}
}
+ /// Formats the plan using a ASCII art like tree
+ ///
+ /// See [`DisplayFormatType::TreeRender`] for more details.
pub fn tree_render(&self) -> impl fmt::Display + 'a {
struct Wrapper<'a> {
plan: &'a dyn ExecutionPlan,
@@ -326,7 +313,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
}
}
- /// format as a `StringifiedPlan`
+ #[deprecated(since = "47.0.0", note = "indent() or tree_render() instead")]
pub fn to_stringified(
&self,
verbose: bool,
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 92bfd8f9f0..d320b4acd0 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -16,6 +16,9 @@
// under the License.
//! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`]
+//!
+//! This parser implements DataFusion specific statements such as
+//! `CREATE EXTERNAL TABLE`
use std::collections::VecDeque;
use std::fmt;
@@ -43,12 +46,23 @@ fn parse_file_type(s: &str) -> Result<String, ParserError> {
Ok(s.to_uppercase())
}
-/// DataFusion specific EXPLAIN (needed so we can EXPLAIN datafusion
-/// specific COPY and other statements)
+/// DataFusion specific `EXPLAIN`
+///
+/// Syntax:
+/// ```sql
+/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
+///```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ExplainStatement {
+ /// `EXPLAIN ANALYZE ..`
pub analyze: bool,
+ /// `EXPLAIN .. VERBOSE ..`
pub verbose: bool,
+ /// `EXPLAIN .. FORMAT `
+ pub format: Option<String>,
+ /// The statement to analyze. Note this is a DataFusion [`Statement`] (not
a
+ /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`,
and other
+ /// DataFusion specific statements
pub statement: Box<Statement>,
}
@@ -57,6 +71,7 @@ impl fmt::Display for ExplainStatement {
let Self {
analyze,
verbose,
+ format,
statement,
} = self;
@@ -67,6 +82,9 @@ impl fmt::Display for ExplainStatement {
if *verbose {
write!(f, "VERBOSE ")?;
}
+ if let Some(format) = format.as_ref() {
+ write!(f, "FORMAT {format} ")?;
+ }
write!(f, "{statement}")
}
@@ -446,7 +464,6 @@ impl<'a> DFParser<'a> {
self.parse_copy()
}
Keyword::EXPLAIN => {
- // (TODO parse all supported statements)
self.parser.next_token(); // EXPLAIN
self.parse_explain()
}
@@ -620,15 +637,35 @@ impl<'a> DFParser<'a> {
pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
+ let format = self.parse_explain_format()?;
+
let statement = self.parse_statement()?;
Ok(Statement::Explain(ExplainStatement {
statement: Box::new(statement),
analyze,
verbose,
+ format,
}))
}
+ pub fn parse_explain_format(&mut self) -> Result<Option<String>,
ParserError> {
+ if !self.parser.parse_keyword(Keyword::FORMAT) {
+ return Ok(None);
+ }
+
+ let next_token = self.parser.next_token();
+ let format = match next_token.token {
+ Token::Word(w) => Ok(w.value),
+ Token::SingleQuotedString(w) => Ok(w),
+ Token::DoubleQuotedString(w) => Ok(w),
+ _ => self
+ .parser
+ .expected("an explain format such as TREE", next_token),
+ }?;
+ Ok(Some(format))
+ }
+
/// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
if self.parser.parse_keyword(Keyword::EXTERNAL) {
@@ -1543,6 +1580,7 @@ mod tests {
let expected = Statement::Explain(ExplainStatement {
analyze,
verbose,
+ format: None,
statement: Box::new(expected_copy),
});
assert_eq!(verified_stmt(sql), expected);
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index fbe6d6501c..b4e378a724 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -48,9 +48,9 @@ use datafusion_expr::{
CreateExternalTable as PlanCreateExternalTable, CreateFunction,
CreateFunctionBody,
CreateIndex as PlanCreateIndex, CreateMemoryTable, CreateView, Deallocate,
DescribeTable, DmlStatement, DropCatalogSchema, DropFunction, DropTable,
DropView,
- EmptyRelation, Execute, Explain, Expr, ExprSchemable, Filter, LogicalPlan,
- LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare, SetVariable,
SortExpr,
- Statement as PlanStatement, ToStringifiedPlan, TransactionAccessMode,
+ EmptyRelation, Execute, Explain, ExplainFormat, Expr, ExprSchemable,
Filter,
+ LogicalPlan, LogicalPlanBuilder, OperateFunctionArg, PlanType, Prepare,
SetVariable,
+ SortExpr, Statement as PlanStatement, ToStringifiedPlan,
TransactionAccessMode,
TransactionConclusion, TransactionEnd, TransactionIsolationLevel,
TransactionStart,
Volatility, WriteOp,
};
@@ -177,8 +177,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
DFStatement::Explain(ExplainStatement {
verbose,
analyze,
+ format,
statement,
- }) => self.explain_to_plan(verbose, analyze, *statement),
+ }) => self.explain_to_plan(verbose, analyze, format, *statement),
}
}
@@ -214,11 +215,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
verbose,
statement,
analyze,
- format: _,
+ format,
describe_alias: _,
..
} => {
- self.explain_to_plan(verbose, analyze,
DFStatement::Statement(statement))
+ let format = format.map(|format| format.to_string());
+ let statement = DFStatement::Statement(statement);
+ self.explain_to_plan(verbose, analyze, format, statement)
}
Statement::Query(query) => self.query_to_plan(*query,
planner_context),
Statement::ShowVariable { variable } =>
self.show_variable_to_plan(&variable),
@@ -1564,17 +1567,26 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
&self,
verbose: bool,
analyze: bool,
+ format: Option<String>,
statement: DFStatement,
) -> Result<LogicalPlan> {
let plan = self.statement_to_plan(statement)?;
if matches!(plan, LogicalPlan::Explain(_)) {
return plan_err!("Nested EXPLAINs are not supported");
}
+
let plan = Arc::new(plan);
let schema = LogicalPlan::explain_schema();
let schema = schema.to_dfschema_ref()?;
+ if verbose && format.is_some() {
+ return plan_err!("EXPLAIN VERBOSE with FORMAT is not supported");
+ }
+
if analyze {
+ if format.is_some() {
+ return plan_err!("EXPLAIN ANALYZE with FORMAT is not
supported");
+ }
Ok(LogicalPlan::Analyze(Analyze {
verbose,
input: plan,
@@ -1583,8 +1595,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
} else {
let stringified_plans =
vec![plan.to_stringified(PlanType::InitialLogicalPlan)];
+
+ // default to configuration value
+ let options = self.context_provider.options();
+ let format = format.as_ref().unwrap_or(&options.explain.format);
+
+ let format: ExplainFormat = format.parse()?;
+
Ok(LogicalPlan::Explain(Explain {
verbose,
+ explain_format: format,
plan,
stringified_plans,
schema,
diff --git a/datafusion/sqllogictest/test_files/explain.slt
b/datafusion/sqllogictest/test_files/explain.slt
index 1d63d02bb9..2d4f504e2d 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -434,3 +434,135 @@ drop table t1;
statement ok
drop table t2;
+
+## Tests for explain format
+
+statement ok
+create table foo (x int, y int) as values (1,2), (1,3), (2,4);
+
+# defaults to indent mode
+query TT
+explain select * from values (1);
+----
+logical_plan Values: (Int64(1))
+physical_plan DataSourceExec: partitions=1, partition_sizes=[1]
+
+# can explicitly request indent mode
+query TT
+explain format indent select * from values (1);
+----
+logical_plan Values: (Int64(1))
+physical_plan DataSourceExec: partitions=1, partition_sizes=[1]
+
+# tree mode
+query TT
+explain format tree select * from values (1);
+----
+physical_plan
+01)┌───────────────────────────┐
+02)│ DataSourceExec │
+03)│ -------------------- │
+04)│ bytes: 128 │
+05)│ format: memory │
+06)│ rows: 1 │
+07)└───────────────────────────┘
+
+# is not case sensitive
+query TT
+explain format TrEE select * from values (1);
+----
+physical_plan
+01)┌───────────────────────────┐
+02)│ DataSourceExec │
+03)│ -------------------- │
+04)│ bytes: 128 │
+05)│ format: memory │
+06)│ rows: 1 │
+07)└───────────────────────────┘
+
+# wrapped in single quotes
+query TT
+explain format 'tree' select * from values (1);
+----
+physical_plan
+01)┌───────────────────────────┐
+02)│ DataSourceExec │
+03)│ -------------------- │
+04)│ bytes: 128 │
+05)│ format: memory │
+06)│ rows: 1 │
+07)└───────────────────────────┘
+
+# wrapped in double quotes
+query TT
+explain format "tree" select * from values (1);
+----
+physical_plan
+01)┌───────────────────────────┐
+02)│ DataSourceExec │
+03)│ -------------------- │
+04)│ bytes: 128 │
+05)│ format: memory │
+06)│ rows: 1 │
+07)└───────────────────────────┘
+
+# number is not a valid format
+query error DataFusion error: SQL error: ParserError\("Expected: an explain
format such as TREE, found: 123 at Line: 1, Column: 16"\)
+explain format 123 select * from values (1);
+
+# verbose tree mode not supported
+query error DataFusion error: Error during planning: EXPLAIN VERBOSE with
FORMAT is not supported
+explain verbose format tree select * from values (1);
+
+# no such thing as json mode
+query error DataFusion error: Error during planning: Invalid explain format\.
Expected 'indent', 'tree', 'pgjson' or 'graphviz'\. Got 'json'
+explain format json select * from values (1);
+
+query error DataFusion error: Error during planning: Invalid explain format\.
Expected 'indent', 'tree', 'pgjson' or 'graphviz'\. Got 'foo'
+explain format foo select * from values (1);
+
+# pgjson mode
+query TT
+explain format pgjson select * from values (1);
+----
+logical_plan
+01)[
+02)--{
+03)----"Plan": {
+04)------"Node Type": "Values",
+05)------"Output": [
+06)--------"column1"
+07)------],
+08)------"Plans": [],
+09)------"Values": "(Int64(1))"
+10)----}
+11)--}
+12)]
+
+# graphviz mode
+query TT
+explain format graphviz select * from values (1);
+----
+logical_plan
+01)
+02)// Begin DataFusion GraphViz Plan,
+03)// display it online here: https://dreampuf.github.io/GraphvizOnline
+04)
+05)digraph {
+06)--subgraph cluster_1
+07)--{
+08)----graph[label="LogicalPlan"]
+09)----2[shape=box label="Values: (Int64(1))"]
+10)--}
+11)--subgraph cluster_3
+12)--{
+13)----graph[label="Detailed LogicalPlan"]
+14)----4[shape=box label="Values: (Int64(1))\nSchema: [column1:Int64;N]"]
+15)--}
+16)}
+17)// End DataFusion GraphViz Plan
+
+# unknown mode
+
+statement ok
+drop table foo;
diff --git a/docs/source/user-guide/sql/explain.md
b/docs/source/user-guide/sql/explain.md
index 3f2c7de43e..f89e854ebf 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -21,41 +21,227 @@
The `EXPLAIN` command shows the logical and physical execution plan for the
specified SQL statement.
-See the [Reading Explain Plans](../explain-usage.md) page for more information
on how to interpret these plans.
+## Syntax
<pre>
-EXPLAIN [ANALYZE] [VERBOSE] statement
+EXPLAIN [ANALYZE] [VERBOSE] [FORMAT format] statement
</pre>
-## EXPLAIN
+## `EXPLAIN`
Shows the execution plan of a statement.
If you need more detailed output, use `EXPLAIN VERBOSE`.
+Note that `EXPLAIN VERBOSE` only supports the `indent` format.
+
+The optional `[FORMAT format]` clause controls how the plan is displayed as
+explained below. If this clause is not specified, the plan is displayed using
+the format from the [configuration value] `datafusion.explain.format`.
+
+[configuration value]: ../configs.md
+
+### `indent` format (default)
+
+The `indent` format shows both the logical and physical plan, with one line for
+each operator in the plan. Child plans are indented to show the hierarchy.
+
+See [Reading Explain Plans](../explain-usage.md) for more information on how
to interpret these plans.
+
+```sql
+> CREATE TABLE t(x int, b int) AS VALUES (1, 2), (2, 3);
+0 row(s) fetched.
+Elapsed 0.004 seconds.
+
+> EXPLAIN SELECT SUM(x) FROM t GROUP BY b;
++---------------+-------------------------------------------------------------------------------+
+| plan_type | plan
|
++---------------+-------------------------------------------------------------------------------+
+| logical_plan | Projection: sum(t.x)
|
+| | Aggregate: groupBy=[[t.b]], aggr=[[sum(CAST(t.x AS
Int64))]] |
+| | TableScan: t projection=[x, b]
|
+| physical_plan | ProjectionExec: expr=[sum(t.x)@1 as sum(t.x)]
|
+| | AggregateExec: mode=FinalPartitioned, gby=[b@0 as b],
aggr=[sum(t.x)] |
+| | CoalesceBatchesExec: target_batch_size=8192
|
+| | RepartitionExec: partitioning=Hash([b@0], 16),
input_partitions=16 |
+| | RepartitionExec: partitioning=RoundRobinBatch(16),
input_partitions=1 |
+| | AggregateExec: mode=Partial, gby=[b@1 as b],
aggr=[sum(t.x)] |
+| | DataSourceExec: partitions=1,
partition_sizes=[1] |
+| |
|
++---------------+-------------------------------------------------------------------------------+
+2 row(s) fetched.
+Elapsed 0.004 seconds.
+```
+
+### `tree` format
+
+The `tree` format is modeled after [DuckDB plans] and is designed to be easier
+to see the high level structure of the plan
+
+[duckdb plans]: https://duckdb.org/docs/stable/guides/meta/explain.html
+
+```sql
+> EXPLAIN FORMAT TREE SELECT SUM(x) FROM t GROUP BY b;
++---------------+-------------------------------+
+| plan_type | plan |
++---------------+-------------------------------+
+| physical_plan | ┌───────────────────────────┐ |
+| | │ ProjectionExec │ |
+| | │ -------------------- │ |
+| | │ sum(t.x): sum(t.x)@1 │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ AggregateExec │ |
+| | │ -------------------- │ |
+| | │ aggr: sum(t.x) │ |
+| | │ group_by: b@0 as b │ |
+| | │ │ |
+| | │ mode: │ |
+| | │ FinalPartitioned │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ CoalesceBatchesExec │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ RepartitionExec │ |
+| | │ -------------------- │ |
+| | │ output_partition_count: │ |
+| | │ 16 │ |
+| | │ │ |
+| | │ partitioning_scheme: │ |
+| | │ Hash([b@0], 16) │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ RepartitionExec │ |
+| | │ -------------------- │ |
+| | │ output_partition_count: │ |
+| | │ 1 │ |
+| | │ │ |
+| | │ partitioning_scheme: │ |
+| | │ RoundRobinBatch(16) │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ AggregateExec │ |
+| | │ -------------------- │ |
+| | │ aggr: sum(t.x) │ |
+| | │ group_by: b@1 as b │ |
+| | │ mode: Partial │ |
+| | └─────────────┬─────────────┘ |
+| | ┌─────────────┴─────────────┐ |
+| | │ DataSourceExec │ |
+| | │ -------------------- │ |
+| | │ bytes: 224 │ |
+| | │ format: memory │ |
+| | │ rows: 1 │ |
+| | └───────────────────────────┘ |
+| | |
++---------------+-------------------------------+
+1 row(s) fetched.
+Elapsed 0.016 seconds.
+```
+
+### `pgjson` format
+
+The `pgjson` format is modeled after [Postgres JSON] format.
+
+You can use this format to visualize the plan in existing plan visualization
+tools, such as [dalibo]
+
+[postgres json]: https://www.postgresql.org/docs/current/sql-explain.html
+[dalibo]: https://explain.dalibo.com/
+
+```sql
+> EXPLAIN FORMAT PGJSON SELECT SUM(x) FROM t GROUP BY b;
++--------------+----------------------------------------------------+
+| plan_type | plan |
++--------------+----------------------------------------------------+
+| logical_plan | [ |
+| | { |
+| | "Plan": { |
+| | "Expressions": [ |
+| | "sum(t.x)" |
+| | ], |
+| | "Node Type": "Projection", |
+| | "Output": [ |
+| | "sum(t.x)" |
+| | ], |
+| | "Plans": [ |
+| | { |
+| | "Aggregates": "sum(CAST(t.x AS Int64))", |
+| | "Group By": "t.b", |
+| | "Node Type": "Aggregate", |
+| | "Output": [ |
+| | "b", |
+| | "sum(t.x)" |
+| | ], |
+| | "Plans": [ |
+| | { |
+| | "Node Type": "TableScan", |
+| | "Output": [ |
+| | "x", |
+| | "b" |
+| | ], |
+| | "Plans": [], |
+| | "Relation Name": "t" |
+| | } |
+| | ] |
+| | } |
+| | ] |
+| | } |
+| | } |
+| | ] |
++--------------+----------------------------------------------------+
+1 row(s) fetched.
+Elapsed 0.008 seconds.
+```
+
+### `graphviz` format
+
+The `graphviz` format uses the [DOT language] that can be used with [Graphviz]
to
+generate a visual representation of the plan.
+
+[dot language]: https://graphviz.org/doc/info/lang.html
+[graphviz]: https://graphviz.org/
```sql
-EXPLAIN SELECT SUM(x) FROM table GROUP BY b;
-
-+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type | plan
|
-+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan | Projection: #SUM(table.x)
|
-| | Aggregate: groupBy=[[#table.b]], aggr=[[SUM(#table.x)]]
|
-| | TableScan: table projection=[x, b]
|
-| physical_plan | ProjectionExec: expr=[SUM(table.x)@1 as SUM(table.x)]
|
-| | AggregateExec: mode=FinalPartitioned, gby=[b@0 as b],
aggr=[SUM(table.x)]
|
-| | CoalesceBatchesExec: target_batch_size=4096
|
-| | RepartitionExec: partitioning=Hash([Column { name:
"b", index: 0 }], 16)
|
-| | AggregateExec: mode=Partial, gby=[b@1 as b],
aggr=[SUM(table.x)]
|
-| | RepartitionExec: partitioning=RoundRobinBatch(16)
|
-| | DataSourceExec: file_groups={1 group:
[[/tmp/table.csv]]}, projection=[x, b], has_header=false
|
-| |
|
-+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
+> EXPLAIN FORMAT GRAPHVIZ SELECT SUM(x) FROM t GROUP BY b;
++--------------+------------------------------------------------------------------------------------------------------------------------------+
+| plan_type | plan
|
++--------------+------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan |
|
+| | // Begin DataFusion GraphViz Plan,
|
+| | // display it online here:
https://dreampuf.github.io/GraphvizOnline
|
+| |
|
+| | digraph {
|
+| | subgraph cluster_1
|
+| | {
|
+| | graph[label="LogicalPlan"]
|
+| | 2[shape=box label="Projection: sum(t.x)"]
|
+| | 3[shape=box label="Aggregate: groupBy=[[t.b]],
aggr=[[sum(CAST(t.x AS Int64))]]"] |
+| | 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]
|
+| | 4[shape=box label="TableScan: t projection=[x, b]"]
|
+| | 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]
|
+| | }
|
+| | subgraph cluster_5
|
+| | {
|
+| | graph[label="Detailed LogicalPlan"]
|
+| | 6[shape=box label="Projection: sum(t.x)\nSchema:
[sum(t.x):Int64;N]"] |
+| | 7[shape=box label="Aggregate: groupBy=[[t.b]],
aggr=[[sum(CAST(t.x AS Int64))]]\nSchema: [b:Int32;N, sum(t.x):Int64;N]"] |
+| | 6 -> 7 [arrowhead=none, arrowtail=normal, dir=back]
|
+| | 8[shape=box label="TableScan: t projection=[x,
b]\nSchema: [x:Int32;N, b:Int32;N]"] |
+| | 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]
|
+| | }
|
+| | }
|
+| | // End DataFusion GraphViz Plan
|
+| |
|
++--------------+------------------------------------------------------------------------------------------------------------------------------+
+1 row(s) fetched.
+Elapsed 0.010 seconds.
```
-## EXPLAIN ANALYZE
+## `EXPLAIN ANALYZE`
-Shows the execution plan and metrics of a statement.
-If you need more information output, use `EXPLAIN ANALYZE VERBOSE`.
+Shows the execution plan and metrics of a statement. If you need more
+information output, use `EXPLAIN ANALYZE VERBOSE`. Note that `EXPLAIN ANALYZE`
+only supports the `indent` format.
```sql
EXPLAIN ANALYZE SELECT SUM(x) FROM table GROUP BY b;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]