This is an automated email from the ASF dual-hosted git repository.
kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new bced6b3 feat: Support explaining DataFrame when using Python API (#92)
bced6b3 is described below
commit bced6b306d10acdef35066481238bfca0337b8cc
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Wed Sep 17 13:37:42 2025 +0800
feat: Support explaining DataFrame when using Python API (#92)
This patch fixes the following error when `show`ing a DataFrame backed by
an `EXPLAIN ...` query:
```python
>>> import sedonadb
>>> con = sedonadb.connect()
>>> con.sql("EXPLAIN SELECT 1 as one").show()
Traceback (most recent call last):
File "<python-input-4>", line 1, in <module>
con.sql("EXPLAIN SELECT 1 as one").show()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File
"/Users/bopeng/workspace/wherobots/sedona-db/python/sedonadb/python/sedonadb/dataframe.py",
line 362, in show
print(self._impl.show(self._ctx, limit, width, ascii), end="")
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
sedonadb._lib.SedonaError: Unsupported logical plan: Explain must be root
of the plan.
This issue was likely caused by a bug in DataFusion's code. Please help us
to resolve this by filing a bug report in our issue tracker:
https://github.com/apache/datafusion/issues
```
The above code will run successfully after applying this patch:
```
>>> import sedonadb
>>> con = sedonadb.connect()
>>> con.sql("EXPLAIN SELECT 1 as one").show()
┌───────────────┬─────────────────────────────────┐
│ plan_type ┆ plan │
│ utf8 ┆ utf8 │
╞═══════════════╪═════════════════════════════════╡
│ logical_plan ┆ Projection: Int64(1) AS one │
│ ┆ EmptyRelation │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ physical_plan ┆ ProjectionExec: expr=[1 as one] │
│ ┆ PlaceholderRowExec │
│ ┆ │
└───────────────┴─────────────────────────────────┘
```
This patch also added an `explain` method to `DataFrame`.
---
python/sedonadb/python/sedonadb/dataframe.py | 43 +++++++++++++++++++
python/sedonadb/src/dataframe.rs | 23 ++++++++++-
python/sedonadb/tests/test_dataframe.py | 62 ++++++++++++++++++++++++++++
rust/sedona/src/context.rs | 50 ++++++++++++++++++++--
4 files changed, 174 insertions(+), 4 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index b24fed5..78bf47b 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -353,6 +353,49 @@ class DataFrame:
width = _out_width(width)
print(self._impl.show(self._ctx, limit, width, ascii), end="")
+ def explain(
+ self,
+ type: str = "standard",
+ format: str = "indent",
+ ) -> "DataFrame":
+ """Return the execution plan for this DataFrame as a DataFrame
+
+ Retrieves the logical and physical execution plans that will be used to
+ compute this DataFrame. This is useful for understanding query
+ performance and optimization.
+
+ Args:
+ type: The type of explain plan to generate. Supported values are:
+ "standard" (default) - shows logical and physical plans,
+ "extended" - includes additional query optimization details,
+ "analyze" - executes the plan and reports actual metrics.
+ format: The format to use for displaying the plan. Supported
formats are
+ "indent" (default), "tree", "pgjson" and "graphviz".
+
+ Returns:
+ A DataFrame containing the execution plan information with columns
+ 'plan_type' and 'plan'.
+
+ Examples:
+
+ >>> import sedonadb
+ >>> con = sedonadb.connect()
+ >>> df = con.sql("SELECT 1 as one")
+ >>> df.explain().show()
+ ┌───────────────┬─────────────────────────────────┐
+ │ plan_type ┆ plan │
+ │ utf8 ┆ utf8 │
+ ╞═══════════════╪═════════════════════════════════╡
+ │ logical_plan ┆ Projection: Int64(1) AS one │
+ │ ┆ EmptyRelation │
+ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ │ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+ │ ┆ PlaceholderRowExec │
+ │ ┆ │
+ └───────────────┴─────────────────────────────────┘
+ """
+ return DataFrame(self._ctx, self._impl.explain(type, format))
+
def __repr__(self) -> str:
if global_options().interactive:
width = _out_width()
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index f6ddf06..6d92205 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use std::ffi::CString;
+use std::str::FromStr;
use std::sync::Arc;
use arrow_array::ffi::FFI_ArrowSchema;
@@ -25,7 +26,7 @@ use datafusion::catalog::MemTable;
use datafusion::logical_expr::SortExpr;
use datafusion::prelude::DataFrame;
use datafusion_common::Column;
-use datafusion_expr::Expr;
+use datafusion_expr::{ExplainFormat, ExplainOption, Expr};
use datafusion_ffi::table_provider::FFI_TableProvider;
use pyo3::prelude::*;
use pyo3::types::PyCapsule;
@@ -186,6 +187,26 @@ impl InternalDataFrame {
Ok(content)
}
+ fn explain(&self, explain_type: &str, format: &str) -> Result<Self,
PySedonaError> {
+ let format = ExplainFormat::from_str(format)?;
+ let (analyze, verbose) = match explain_type {
+ "standard" => (false, false),
+ "extended" => (false, true),
+ "analyze" => (true, false),
+ _ => {
+ return Err(PySedonaError::SedonaPython(
+ "explain type must be one of 'standard', 'extended', or
'analyze'".to_string(),
+ ))
+ }
+ };
+ let explain_option = ExplainOption::default()
+ .with_analyze(analyze)
+ .with_verbose(verbose)
+ .with_format(format);
+ let explain_df =
self.inner.clone().explain_with_options(explain_option)?;
+ Ok(Self::new(explain_df, self.runtime.clone()))
+ }
+
fn __datafusion_table_provider__<'py>(
&self,
py: Python<'py>,
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index 289e9c4..b74bfc6 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -368,6 +368,68 @@ def test_show(con, capsys):
assert capsys.readouterr().out.strip() == expected
+def test_show_explained(con, capsys):
+ con.sql("EXPLAIN SELECT 1 as one").show()
+ expected = """
+┌───────────────┬─────────────────────────────────┐
+│ plan_type ┆ plan │
+│ utf8 ┆ utf8 │
+╞═══════════════╪═════════════════════════════════╡
+│ logical_plan ┆ Projection: Int64(1) AS one │
+│ ┆ EmptyRelation │
+├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+│ ┆ PlaceholderRowExec │
+│ ┆ │
+└───────────────┴─────────────────────────────────┘
+ """.strip()
+ assert capsys.readouterr().out.strip() == expected
+
+
+def test_explain(con, capsys):
+ con.sql("SELECT 1 as one").explain().show()
+ expected = """
+┌───────────────┬─────────────────────────────────┐
+│ plan_type ┆ plan │
+│ utf8 ┆ utf8 │
+╞═══════════════╪═════════════════════════════════╡
+│ logical_plan ┆ Projection: Int64(1) AS one │
+│ ┆ EmptyRelation │
+├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+│ ┆ PlaceholderRowExec │
+│ ┆ │
+└───────────────┴─────────────────────────────────┘
+ """.strip()
+ assert capsys.readouterr().out.strip() == expected
+
+ con.sql("SELECT 1 as one").explain(format="tree").show()
+ expected = """
+┌───────────────┬───────────────────────────────┐
+│ plan_type ┆ plan │
+│ utf8 ┆ utf8 │
+╞═══════════════╪═══════════════════════════════╡
+│ physical_plan ┆ ┌───────────────────────────┐ │
+│ ┆ │ ProjectionExec │ │
+│ ┆ │ -------------------- │ │
+│ ┆ │ one: 1 │ │
+│ ┆ └─────────────┬─────────────┘ │
+│ ┆ ┌─────────────┴─────────────┐ │
+│ ┆ │ PlaceholderRowExec │ │
+│ ┆ └───────────────────────────┘ │
+│ ┆ │
+└───────────────┴───────────────────────────────┘
+ """.strip()
+ assert capsys.readouterr().out.strip() == expected
+
+ query_plan = con.sql("SELECT 1 as one").explain(type="analyze").to_pandas()
+ assert query_plan.iloc[0, 0] == "Plan with Metrics"
+
+ query_plan = con.sql("SELECT 1 as
one").explain(type="extended").to_pandas()
+ assert query_plan.iloc[0, 0] == "initial_logical_plan"
+ assert len(query_plan) > 10
+
+
def test_repr(con):
assert repr(con.sql("SELECT 1 as one")).startswith(
"<sedonadb.dataframe.DataFrame object"
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 07fabf8..e75d5d8 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -36,7 +36,7 @@ use datafusion::{
use datafusion_common::not_impl_err;
use datafusion_expr::dml::InsertOp;
use datafusion_expr::sqlparser::dialect::{dialect_from_str, Dialect};
-use datafusion_expr::{LogicalPlanBuilder, SortExpr};
+use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, SortExpr};
use parking_lot::Mutex;
use sedona_common::option::add_sedona_option_extension;
use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
@@ -292,9 +292,23 @@ impl SedonaDataFrame for DataFrame {
self,
ctx: &SedonaContext,
limit: Option<usize>,
- options: DisplayTableOptions<'a>,
+ mut options: DisplayTableOptions<'a>,
) -> Result<String> {
- let df = self.limit(0, limit)?;
+ let df = if matches!(
+ self.logical_plan(),
+ LogicalPlan::Explain(_) | LogicalPlan::DescribeTable(_) |
LogicalPlan::Analyze(_)
+ ) {
+ // Show multi-line output without truncation for plans like
`EXPLAIN`
+ options.max_row_height = usize::MAX;
+
+ // We don't want to apply an additional .limit() to plans like
`Explain`
+ // as that will trigger an internal error: Unsupported logical
plan: Explain must be root of the plan
+ self
+ } else {
+ // Apply limit if specified
+ self.limit(0, limit)?
+ };
+
let schema_without_qualifiers = df.schema().clone().strip_qualifiers();
let schema = schema_without_qualifiers.as_arrow();
let batches = df.collect().await?;
@@ -505,6 +519,36 @@ mod tests {
);
}
+ #[tokio::test]
+ async fn show_explain() {
+ let ctx = SedonaContext::new();
+ for limit in [None, Some(10)] {
+ let tbl = ctx
+ .sql("EXPLAIN SELECT 1 as one")
+ .await
+ .unwrap()
+ .show_sedona(&ctx, limit, DisplayTableOptions::default())
+ .await
+ .unwrap();
+
+ #[rustfmt::skip]
+ assert_eq!(
+ tbl.lines().collect::<Vec<_>>(),
+ vec![
+ "+---------------+---------------------------------+",
+ "| plan_type | plan |",
+ "+---------------+---------------------------------+",
+ "| logical_plan | Projection: Int64(1) AS one |",
+ "| | EmptyRelation |",
+ "| physical_plan | ProjectionExec: expr=[1 as one] |",
+ "| | PlaceholderRowExec |",
+ "| | |",
+ "+---------------+---------------------------------+",
+ ]
+ );
+ }
+ }
+
#[tokio::test]
async fn write_geoparquet() {
let tmpdir = tempdir().unwrap();