This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 2fbdb977a7 feat: implement tree explain for `ProjectionExec` (#15082)
2fbdb977a7 is described below
commit 2fbdb977a79abfcda6ed949b5e6a3c4d45b90104
Author: Alan Tang <[email protected]>
AuthorDate: Sun Mar 9 20:52:14 2025 +0800
feat: implement tree explain for `ProjectionExec` (#15082)
* feat: implement tree explain for ProjectionExec
Signed-off-by: Alan Tang <[email protected]>
* feat(test): support more tests
Signed-off-by: Alan Tang <[email protected]>
* chore(explain): Reduce redundant output
Signed-off-by: Alan Tang <[email protected]>
* Propose a different projection formatting
* feat: add project exec tree rendering for hash join
---------
Signed-off-by: Alan Tang <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/physical-plan/src/projection.rs | 11 +-
.../sqllogictest/test_files/explain_tree.slt | 232 ++++++++++++++++++---
2 files changed, 215 insertions(+), 28 deletions(-)
diff --git a/datafusion/physical-plan/src/projection.rs
b/datafusion/physical-plan/src/projection.rs
index 8ff3824eff..3f901311a0 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -168,8 +168,15 @@ impl DisplayAs for ProjectionExec {
write!(f, "ProjectionExec: expr=[{}]", expr.join(", "))
}
DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")
+ for (i, (e, alias)) in self.expr().iter().enumerate() {
+ let e = e.to_string();
+ if &e == alias {
+ writeln!(f, "expr{i}={e}")?;
+ } else {
+ writeln!(f, "{alias}={e}")?;
+ }
+ }
+ Ok(())
}
}
}
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 4031af9d60..3d62b965a7 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -539,6 +539,158 @@ physical_plan
17)│ format: arrow │
18)└───────────────────────────┘
+# Query with projection on csv
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table1;
+----
+logical_plan
+01)Projection: table1.int_col, table1.bigint_col, CAST(table1.int_col AS
Int64) + table1.bigint_col AS sum_col
+02)--TableScan: table1 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ ProjectionExec │
+03)│ -------------------- │
+04)│ bigint_col: │
+05)│ bigint_col@1 │
+06)│ │
+07)│ int_col: int_col@0 │
+08)│ │
+09)│ sum_col: │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│ bigint_col@1 │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│ RepartitionExec │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│ DataSourceExec │
+18)│ -------------------- │
+19)│ files: 1 │
+20)│ format: csv │
+21)└───────────────────────────┘
+
+
+# Query with projection on parquet
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table2;
+----
+logical_plan
+01)Projection: table2.int_col, table2.bigint_col, CAST(table2.int_col AS
Int64) + table2.bigint_col AS sum_col
+02)--TableScan: table2 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ ProjectionExec │
+03)│ -------------------- │
+04)│ bigint_col: │
+05)│ bigint_col@1 │
+06)│ │
+07)│ int_col: int_col@0 │
+08)│ │
+09)│ sum_col: │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│ bigint_col@1 │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│ RepartitionExec │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│ DataSourceExec │
+18)│ -------------------- │
+19)│ files: 1 │
+20)│ format: parquet │
+21)└───────────────────────────┘
+
+
+# Query with projection on memory
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table3;
+----
+logical_plan
+01)Projection: table3.int_col, table3.bigint_col, CAST(table3.int_col AS
Int64) + table3.bigint_col AS sum_col
+02)--TableScan: table3 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ ProjectionExec │
+03)│ -------------------- │
+04)│ bigint_col: │
+05)│ bigint_col@1 │
+06)│ │
+07)│ int_col: int_col@0 │
+08)│ │
+09)│ sum_col: │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│ bigint_col@1 │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│ DataSourceExec │
+15)│ -------------------- │
+16)│ bytes: 1560 │
+17)│ format: memory │
+18)│ rows: 1 │
+19)└───────────────────────────┘
+
+
+
+# Query with projection on json
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table4;
+----
+logical_plan
+01)Projection: table4.int_col, table4.bigint_col, table4.int_col +
table4.bigint_col AS sum_col
+02)--TableScan: table4 projection=[bigint_col, int_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ ProjectionExec │
+03)│ -------------------- │
+04)│ bigint_col: │
+05)│ bigint_col@0 │
+06)│ │
+07)│ int_col: int_col@1 │
+08)│ │
+09)│ sum_col: │
+10)│ int_col@1 + bigint_col@0 │
+11)└─────────────┬─────────────┘
+12)┌─────────────┴─────────────┐
+13)│ RepartitionExec │
+14)└─────────────┬─────────────┘
+15)┌─────────────┴─────────────┐
+16)│ DataSourceExec │
+17)│ -------------------- │
+18)│ files: 1 │
+19)│ format: json │
+20)└───────────────────────────┘
+
+
+# Query with projection on arrow
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table5;
+----
+logical_plan
+01)Projection: table5.int_col, table5.bigint_col, CAST(table5.int_col AS
Int64) + table5.bigint_col AS sum_col
+02)--TableScan: table5 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ ProjectionExec │
+03)│ -------------------- │
+04)│ bigint_col: │
+05)│ bigint_col@1 │
+06)│ │
+07)│ int_col: int_col@0 │
+08)│ │
+09)│ sum_col: │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│ bigint_col@1 │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│ RepartitionExec │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│ DataSourceExec │
+18)│ -------------------- │
+19)│ files: 1 │
+20)│ format: arrow │
+21)└───────────────────────────┘
+
# Query with PartialSortExec.
query TT
EXPLAIN SELECT *
@@ -614,19 +766,33 @@ physical_plan
18)└─────────────┬─────────────┘└─────────────┬─────────────┘
19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
20)│ ProjectionExec ││ RepartitionExec │
-21)└─────────────┬─────────────┘└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-23)│ RepartitionExec ││ DataSourceExec │
-24)│ ││ -------------------- │
-25)│ ││ files: 1 │
-26)│ ││ format: parquet │
-27)└─────────────┬─────────────┘└───────────────────────────┘
-28)┌─────────────┴─────────────┐
-29)│ DataSourceExec │
-30)│ -------------------- │
-31)│ files: 1 │
-32)│ format: csv │
-33)└───────────────────────────┘
+21)│ -------------------- ││ │
+22)│ CAST(table1.string_col AS ││ │
+23)│ Utf8View): ││ │
+24)│ CAST(string_col@1 AS ││ │
+25)│ Utf8View) ││ │
+26)│ ││ │
+27)│ bigint_col: ││ │
+28)│ bigint_col@2 ││ │
+29)│ ││ │
+30)│ date_col: date_col@3 ││ │
+31)│ int_col: int_col@0 ││ │
+32)│ ││ │
+33)│ string_col: ││ │
+34)│ string_col@1 ││ │
+35)└─────────────┬─────────────┘└─────────────┬─────────────┘
+36)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+37)│ RepartitionExec ││ DataSourceExec │
+38)│ ││ -------------------- │
+39)│ ││ files: 1 │
+40)│ ││ format: parquet │
+41)└─────────────┬─────────────┘└───────────────────────────┘
+42)┌─────────────┴─────────────┐
+43)│ DataSourceExec │
+44)│ -------------------- │
+45)│ files: 1 │
+46)│ format: csv │
+47)└───────────────────────────┘
# Query with outer hash join.
query TT
@@ -659,19 +825,33 @@ physical_plan
20)└─────────────┬─────────────┘└─────────────┬─────────────┘
21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
22)│ ProjectionExec ││ RepartitionExec │
-23)└─────────────┬─────────────┘└─────────────┬─────────────┘
-24)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-25)│ RepartitionExec ││ DataSourceExec │
-26)│ ││ -------------------- │
-27)│ ││ files: 1 │
-28)│ ││ format: parquet │
-29)└─────────────┬─────────────┘└───────────────────────────┘
-30)┌─────────────┴─────────────┐
-31)│ DataSourceExec │
-32)│ -------------------- │
-33)│ files: 1 │
-34)│ format: csv │
-35)└───────────────────────────┘
+23)│ -------------------- ││ │
+24)│ CAST(table1.string_col AS ││ │
+25)│ Utf8View): ││ │
+26)│ CAST(string_col@1 AS ││ │
+27)│ Utf8View) ││ │
+28)│ ││ │
+29)│ bigint_col: ││ │
+30)│ bigint_col@2 ││ │
+31)│ ││ │
+32)│ date_col: date_col@3 ││ │
+33)│ int_col: int_col@0 ││ │
+34)│ ││ │
+35)│ string_col: ││ │
+36)│ string_col@1 ││ │
+37)└─────────────┬─────────────┘└─────────────┬─────────────┘
+38)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+39)│ RepartitionExec ││ DataSourceExec │
+40)│ ││ -------------------- │
+41)│ ││ files: 1 │
+42)│ ││ format: parquet │
+43)└─────────────┬─────────────┘└───────────────────────────┘
+44)┌─────────────┴─────────────┐
+45)│ DataSourceExec │
+46)│ -------------------- │
+47)│ files: 1 │
+48)│ format: csv │
+49)└───────────────────────────┘
# cleanup
statement ok
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]