This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new de79843fc8 Support explain tree format debug for benchmark debug 
(#16604)
de79843fc8 is described below

commit de79843fc86be81b67f2b51e2e0cd85191f46fb1
Author: Qi Zhu <821684...@qq.com>
AuthorDate: Wed Jul 2 05:28:10 2025 +0800

    Support explain tree format debug for benchmark debug (#16604)
    
    * Support explain tree format debug for benchmark debug
    
    * fmt
    
    * format
    
    * Address comments
    
    * doc fix
---
 benchmarks/src/clickbench.rs                |  9 ++++++-
 benchmarks/src/h2o.rs                       |  9 ++++++-
 datafusion/core/src/dataframe/mod.rs        | 33 +++++++++++++++++++++--
 datafusion/expr/src/logical_plan/builder.rs | 26 +++++++++++++-----
 datafusion/expr/src/logical_plan/mod.rs     |  8 +++---
 datafusion/expr/src/logical_plan/plan.rs    | 41 +++++++++++++++++++++++++++++
 6 files changed, 111 insertions(+), 15 deletions(-)

diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
index 8d1847b1b8..1fa2304f2c 100644
--- a/benchmarks/src/clickbench.rs
+++ b/benchmarks/src/clickbench.rs
@@ -20,6 +20,7 @@ use std::io::ErrorKind;
 use std::path::{Path, PathBuf};
 
 use crate::util::{BenchmarkRun, CommonOpt, QueryResult};
+use datafusion::logical_expr::{ExplainFormat, ExplainOption};
 use datafusion::{
     error::{DataFusionError, Result},
     prelude::SessionContext,
@@ -181,7 +182,13 @@ impl RunOpt {
             query_results.push(QueryResult { elapsed, row_count })
         }
         if self.common.debug {
-            ctx.sql(sql).await?.explain(false, false)?.show().await?;
+            ctx.sql(sql)
+                .await?
+                .explain_with_options(
+                    ExplainOption::default().with_format(ExplainFormat::Tree),
+                )?
+                .show()
+                .await?;
         }
         let avg = millis.iter().sum::<f64>() / millis.len() as f64;
         println!("Query {query_id} avg time: {avg:.2} ms");
diff --git a/benchmarks/src/h2o.rs b/benchmarks/src/h2o.rs
index 23dba07f42..009f1708ef 100644
--- a/benchmarks/src/h2o.rs
+++ b/benchmarks/src/h2o.rs
@@ -21,6 +21,7 @@
 //! - [Extended window function 
benchmark](https://duckdb.org/2024/06/26/benchmarks-over-time.html#window-functions-benchmark)
 
 use crate::util::{BenchmarkRun, CommonOpt};
+use datafusion::logical_expr::{ExplainFormat, ExplainOption};
 use datafusion::{error::Result, prelude::SessionContext};
 use datafusion_common::{
     exec_datafusion_err, instant::Instant, internal_err, DataFusionError,
@@ -132,7 +133,13 @@ impl RunOpt {
             println!("Query {query_id} avg time: {avg:.2} ms");
 
             if self.common.debug {
-                ctx.sql(sql).await?.explain(false, false)?.show().await?;
+                ctx.sql(sql)
+                    .await?
+                    .explain_with_options(
+                        
ExplainOption::default().with_format(ExplainFormat::Tree),
+                    )?
+                    .show()
+                    .await?;
             }
             benchmark_run.maybe_write_json(self.output_path.as_ref())?;
         }
diff --git a/datafusion/core/src/dataframe/mod.rs 
b/datafusion/core/src/dataframe/mod.rs
index 7101a30c5d..c9a05fd650 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -61,7 +61,7 @@ use datafusion_expr::{
     expr::{Alias, ScalarFunction},
     is_null, lit,
     utils::COUNT_STAR_EXPANSION,
-    SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE,
+    ExplainOption, SortExpr, TableProviderFilterPushDown, UNNAMED_TABLE,
 };
 use datafusion_functions::core::coalesce;
 use datafusion_functions_aggregate::expr_fn::{
@@ -1602,6 +1602,8 @@ impl DataFrame {
     /// Return a DataFrame with the explanation of its plan so far.
     ///
     /// if `analyze` is specified, runs the plan and reports metrics
+    /// if `verbose` is true, prints out additional details.
+    /// The default format is Indent format.
     ///
     /// ```
     /// # use datafusion::prelude::*;
@@ -1615,11 +1617,38 @@ impl DataFrame {
     /// # }
     /// ```
     pub fn explain(self, verbose: bool, analyze: bool) -> Result<DataFrame> {
+        // Set the default format to Indent to keep the previous behavior
+        let opts = ExplainOption::default()
+            .with_verbose(verbose)
+            .with_analyze(analyze);
+        self.explain_with_options(opts)
+    }
+
+    /// Return a DataFrame with the explanation of its plan so far.
+    ///
+    /// `opt` is used to specify the options for the explain operation.
+    /// Details of the options can be found in [`ExplainOption`].
+    /// ```
+    /// # use datafusion::prelude::*;
+    /// # use datafusion::error::Result;
+    /// # #[tokio::main]
+    /// # async fn main() -> Result<()> {
+    /// use datafusion_expr::{Explain, ExplainOption};
+    /// let ctx = SessionContext::new();
+    /// let df = ctx.read_csv("tests/data/example.csv", 
CsvReadOptions::new()).await?;
+    /// let batches = df.limit(0, 
Some(100))?.explain_with_options(ExplainOption::default().with_verbose(false).with_analyze(false))?.collect().await?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn explain_with_options(
+        self,
+        explain_option: ExplainOption,
+    ) -> Result<DataFrame> {
         if matches!(self.plan, LogicalPlan::Explain(_)) {
             return plan_err!("Nested EXPLAINs are not supported");
         }
         let plan = LogicalPlanBuilder::from(self.plan)
-            .explain(verbose, analyze)?
+            .explain_option_format(explain_option)?
             .build()?;
         Ok(DataFrame {
             session_state: self.session_state,
diff --git a/datafusion/expr/src/logical_plan/builder.rs 
b/datafusion/expr/src/logical_plan/builder.rs
index 836911bd9f..abede09794 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -43,12 +43,12 @@ use crate::utils::{
     group_window_expr_by_sort_keys,
 };
 use crate::{
-    and, binary_expr, lit, DmlStatement, Expr, ExprSchemable, Operator, 
RecursiveQuery,
-    Statement, TableProviderFilterPushDown, TableSource, WriteOp,
+    and, binary_expr, lit, DmlStatement, ExplainOption, Expr, ExprSchemable, 
Operator,
+    RecursiveQuery, Statement, TableProviderFilterPushDown, TableSource, 
WriteOp,
 };
 
 use super::dml::InsertOp;
-use super::plan::{ColumnUnnestList, ExplainFormat};
+use super::plan::ColumnUnnestList;
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use datafusion_common::display::ToStringifiedPlan;
@@ -1259,12 +1259,24 @@ impl LogicalPlanBuilder {
     ///
     /// if `verbose` is true, prints out additional details.
     pub fn explain(self, verbose: bool, analyze: bool) -> Result<Self> {
+        // Keep the format default to Indent
+        self.explain_option_format(
+            ExplainOption::default()
+                .with_verbose(verbose)
+                .with_analyze(analyze),
+        )
+    }
+
+    /// Create an expression to represent the explanation of the plan
+    /// The`explain_option` is used to specify the format and verbosity of the 
explanation.
+    /// Details see [`ExplainOption`].
+    pub fn explain_option_format(self, explain_option: ExplainOption) -> 
Result<Self> {
         let schema = LogicalPlan::explain_schema();
         let schema = schema.to_dfschema_ref()?;
 
-        if analyze {
+        if explain_option.analyze {
             Ok(Self::new(LogicalPlan::Analyze(Analyze {
-                verbose,
+                verbose: explain_option.verbose,
                 input: self.plan,
                 schema,
             })))
@@ -1273,9 +1285,9 @@ impl LogicalPlanBuilder {
                 vec![self.plan.to_stringified(PlanType::InitialLogicalPlan)];
 
             Ok(Self::new(LogicalPlan::Explain(Explain {
-                verbose,
+                verbose: explain_option.verbose,
                 plan: self.plan,
-                explain_format: ExplainFormat::Indent,
+                explain_format: explain_option.format,
                 stringified_plans,
                 schema,
                 logical_optimization_succeeded: false,
diff --git a/datafusion/expr/src/logical_plan/mod.rs 
b/datafusion/expr/src/logical_plan/mod.rs
index a55f4d97b2..be5b44098a 100644
--- a/datafusion/expr/src/logical_plan/mod.rs
+++ b/datafusion/expr/src/logical_plan/mod.rs
@@ -38,10 +38,10 @@ pub use ddl::{
 pub use dml::{DmlStatement, WriteOp};
 pub use plan::{
     projection_schema, Aggregate, Analyze, ColumnUnnestList, DescribeTable, 
Distinct,
-    DistinctOn, EmptyRelation, Explain, ExplainFormat, Extension, FetchType, 
Filter,
-    Join, JoinConstraint, JoinType, Limit, LogicalPlan, Partitioning, PlanType,
-    Projection, RecursiveQuery, Repartition, SkipType, Sort, StringifiedPlan, 
Subquery,
-    SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, Values, Window,
+    DistinctOn, EmptyRelation, Explain, ExplainFormat, ExplainOption, 
Extension,
+    FetchType, Filter, Join, JoinConstraint, JoinType, Limit, LogicalPlan, 
Partitioning,
+    PlanType, Projection, RecursiveQuery, Repartition, SkipType, Sort, 
StringifiedPlan,
+    Subquery, SubqueryAlias, TableScan, ToStringifiedPlan, Union, Unnest, 
Values, Window,
 };
 pub use statement::{
     Deallocate, Execute, Prepare, SetVariable, Statement, 
TransactionAccessMode,
diff --git a/datafusion/expr/src/logical_plan/plan.rs 
b/datafusion/expr/src/logical_plan/plan.rs
index 876c14f100..ca431200ef 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -3095,6 +3095,47 @@ impl FromStr for ExplainFormat {
     }
 }
 
+/// Options for EXPLAIN
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ExplainOption {
+    /// Include detailed debug info
+    pub verbose: bool,
+    /// Actually execute the plan and report metrics
+    pub analyze: bool,
+    /// Output syntax/format
+    pub format: ExplainFormat,
+}
+
+impl Default for ExplainOption {
+    fn default() -> Self {
+        ExplainOption {
+            verbose: false,
+            analyze: false,
+            format: ExplainFormat::Indent,
+        }
+    }
+}
+
+impl ExplainOption {
+    /// Builder‐style setter for `verbose`
+    pub fn with_verbose(mut self, verbose: bool) -> Self {
+        self.verbose = verbose;
+        self
+    }
+
+    /// Builder‐style setter for `analyze`
+    pub fn with_analyze(mut self, analyze: bool) -> Self {
+        self.analyze = analyze;
+        self
+    }
+
+    /// Builder‐style setter for `format`
+    pub fn with_format(mut self, format: ExplainFormat) -> Self {
+        self.format = format;
+        self
+    }
+}
+
 /// Produces a relation with string representations of
 /// various parts of the plan
 ///


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to