This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new de79843fc8 Support explain tree format debug for benchmark debug (#16604) de79843fc8 is described below commit de79843fc86be81b67f2b51e2e0cd85191f46fb1 Author: Qi Zhu <821684...@qq.com> AuthorDate: Wed Jul 2 05:28:10 2025 +0800 Support explain tree format debug for benchmark debug (#16604) * Support explain tree format debug for benchmark debug * fmt * format * Address comments * doc fix --- benchmarks/src/clickbench.rs | 9 ++++++- benchmarks/src/h2o.rs | 9 ++++++- datafusion/core/src/dataframe/mod.rs | 33 +++++++++++++++++++++-- datafusion/expr/src/logical_plan/builder.rs | 26 +++++++++++++----- datafusion/expr/src/logical_plan/mod.rs | 8 +++--- datafusion/expr/src/logical_plan/plan.rs | 41 +++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 15 deletions(-) diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs index 8d1847b1b8..1fa2304f2c 100644 --- a/benchmarks/src/clickbench.rs +++ b/benchmarks/src/clickbench.rs @@ -20,6 +20,7 @@ use std::io::ErrorKind; use std::path::{Path, PathBuf}; use crate::util::{BenchmarkRun, CommonOpt, QueryResult}; +use datafusion::logical_expr::{ExplainFormat, ExplainOption}; use datafusion::{ error::{DataFusionError, Result}, prelude::SessionContext, @@ -181,7 +182,13 @@ impl RunOpt { query_results.push(QueryResult { elapsed, row_count }) } if self.common.debug { - ctx.sql(sql).await?.explain(false, false)?.show().await?; + ctx.sql(sql) + .await? + .explain_with_options( + ExplainOption::default().with_format(ExplainFormat::Tree), + )? + .show() + .await?; } let avg = millis.iter().sum::<f64>() / millis.len() as f64; println!("Query {query_id} avg time: {avg:.2} ms"); diff --git a/benchmarks/src/h2o.rs b/benchmarks/src/h2o.rs index 23dba07f42..009f1708ef 100644 --- a/benchmarks/src/h2o.rs +++ b/benchmarks/src/h2o.rs @@ -21,6 +21,7 @@ //! - [Extended window function benchmark](https://duckdb.org/2024/06/26/benchmarks-over-time.html#window-functions-benchmark) use crate::util::{BenchmarkRun, CommonOpt}; +use datafusion::logical_expr::{ExplainFormat, ExplainOption}; use datafusion::{error::Result, prelude::SessionContext}; use datafusion_common::{ exec_datafusion_err, instant::Instant, internal_err, DataFusionError, @@ -132,7 +133,13 @@ impl RunOpt { println!("Query {query_id} avg time: {avg:.2} ms"); if self.common.debug { - ctx.sql(sql).await?.explain(false, false)?.show().await?; + ctx.sql(sql) + .await? + .explain_with_options( + ExplainOption::default().with_format(ExplainFormat::Tree), + )? + .show() + .await?; } benchmark_run.maybe_write_json(self.output_path.as_ref())?; } diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 7101a30c5d..c9a05fd650 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -61,7 +61,7 @@ use datafusion_expr::{ expr::{Alias, ScalarFunction}, is_null, lit, utils::COUNT_STAR_EXPANSION, - SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE, + ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE, }; use datafusion_functions::core::coalesce; use datafusion_functions_aggregate::expr_fn::{ @@ -1602,6 +1602,8 @@ impl DataFrame { /// Return a DataFrame with the explanation of its plan so far. /// /// if `analyze` is specified, runs the plan and reports metrics + /// if `verbose` is true, prints out additional details. + /// The default format is Indent format. /// /// ``` /// # use datafusion::prelude::*; @@ -1615,11 +1617,38 @@ impl DataFrame { /// # } /// ``` pub fn explain(self, verbose: bool, analyze: bool) -> Result<DataFrame> { + // Set the default format to Indent to keep the previous behavior + let opts = ExplainOption::default() + .with_verbose(verbose) + .with_analyze(analyze); + self.explain_with_options(opts) + } + + /// Return a DataFrame with the explanation of its plan so far. + /// + /// `opt` is used to specify the options for the explain operation. + /// Details of the options can be found in [`ExplainOption`]. + /// ``` + /// # use datafusion::prelude::*; + /// # use datafusion::error::Result; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// use datafusion_expr::{Explain, ExplainOption}; + /// let ctx = SessionContext::new(); + /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let batches = df.limit(0, Some(100))?.explain_with_options(ExplainOption::default().with_verbose(false).with_analyze(false))?.collect().await?; + /// # Ok(()) + /// # } + /// ``` + pub fn explain_with_options( + self, + explain_option: ExplainOption, + ) -> Result<DataFrame> { if matches!(self.plan, LogicalPlan::Explain(_)) { return plan_err!("Nested EXPLAINs are not supported"); } let plan = LogicalPlanBuilder::from(self.plan) - .explain(verbose, analyze)? + .explain_option_format(explain_option)? .build()?; Ok(DataFrame { session_state: self.session_state, diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 836911bd9f..abede09794 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -43,12 +43,12 @@ use crate::utils::{ group_window_expr_by_sort_keys, }; use crate::{ - and, binary_expr, lit, DmlStatement, Expr, ExprSchemable, Operator, RecursiveQuery, - Statement, TableProviderFilterPushDown, TableSource, WriteOp, + and, binary_expr, lit, DmlStatement, ExplainOption, Expr, ExprSchemable, Operator, + RecursiveQuery, Statement, TableProviderFilterPushDown, TableSource, WriteOp, }; use super::dml::InsertOp; -use super::plan::{ColumnUnnestList, ExplainFormat}; +use super::plan::ColumnUnnestList; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef}; use datafusion_common::display::ToStringifiedPlan; @@ -1259,12 +1259,24 @@ impl LogicalPlanBuilder { /// /// if `verbose` is true, prints out additional details. pub fn explain(self, verbose: bool, analyze: bool) -> Result<Self> { + // Keep the format default to Indent + self.explain_option_format( + ExplainOption::default() + .with_verbose(verbose) + .with_analyze(analyze), + ) + } + + /// Create an expression to represent the explanation of the plan + /// The`explain_option` is used to specify the format and verbosity of the explanation. + /// Details see [`ExplainOption`]. + pub fn explain_option_format(self, explain_option: ExplainOption) -> Result<Self> { let schema = LogicalPlan::explain_schema(); let schema = schema.to_dfschema_ref()?; - if analyze { + if explain_option.analyze { Ok(Self::new(LogicalPlan::Analyze(Analyze { - verbose, + verbose: explain_option.verbose, input: self.plan, schema, }))) @@ -1273,9 +1285,9 @@ impl LogicalPlanBuilder { vec![self.plan.to_stringified(PlanType::InitialLogicalPlan)]; Ok(Self::new(LogicalPlan::Explain(Explain { - verbose, + verbose: explain_option.verbose, plan: self.plan, - explain_format: ExplainFormat::Indent, + explain_format: explain_option.format, stringified_plans, schema, logical_optimization_succeeded: false, diff --git a/datafusion/expr/src/logical_plan/mod.rs b/datafusion/expr/src/logical_plan/mod.rs index a55f4d97b2..be5b44098a 100644 --- a/datafusion/expr/src/logical_plan/mod.rs +++ b/datafusion/expr/src/logical_plan/mod.rs @@ -38,10 +38,10 @@ pub use ddl::{ pub use dml::{DmlStatement, WriteOp}; pub use plan::{ projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, Distinct, - DistinctOn, EmptyRelation, Explain, ExplainFormat, Extension, FetchType, Filter, - Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType, - Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, Subquery, - SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window, + DistinctOn, EmptyRelation, Explain, ExplainFormat, ExplainOption, Extension, + FetchType, Filter, Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, + PlanType, Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, + Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window, }; pub use statement::{ Deallocate, Execute, Prepare, SetVariable, Statement, TransactionAccessMode, diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 876c14f100..ca431200ef 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -3095,6 +3095,47 @@ impl FromStr for ExplainFormat { } } +/// Options for EXPLAIN +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExplainOption { + /// Include detailed debug info + pub verbose: bool, + /// Actually execute the plan and report metrics + pub analyze: bool, + /// Output syntax/format + pub format: ExplainFormat, +} + +impl Default for ExplainOption { + fn default() -> Self { + ExplainOption { + verbose: false, + analyze: false, + format: ExplainFormat::Indent, + } + } +} + +impl ExplainOption { + /// Builder‐style setter for `verbose` + pub fn with_verbose(mut self, verbose: bool) -> Self { + self.verbose = verbose; + self + } + + /// Builder‐style setter for `analyze` + pub fn with_analyze(mut self, analyze: bool) -> Self { + self.analyze = analyze; + self + } + + /// Builder‐style setter for `format` + pub fn with_format(mut self, format: ExplainFormat) -> Self { + self.format = format; + self + } +} + /// Produces a relation with string representations of /// various parts of the plan /// --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org