This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 2fbdb977a7 feat: implement tree explain for `ProjectionExec` (#15082)
2fbdb977a7 is described below

commit 2fbdb977a79abfcda6ed949b5e6a3c4d45b90104
Author: Alan Tang <[email protected]>
AuthorDate: Sun Mar 9 20:52:14 2025 +0800

    feat: implement tree explain for `ProjectionExec` (#15082)
    
    * feat: implement tree explain for ProjectionExec
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * feat(test): support more tests
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * chore(explain): Reduce redundant output
    
    Signed-off-by: Alan Tang <[email protected]>
    
    * Propose a different projection formatting
    
    * feat: add project exec tree rendering for hash join
    
    ---------
    
    Signed-off-by: Alan Tang <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/physical-plan/src/projection.rs         |  11 +-
 .../sqllogictest/test_files/explain_tree.slt       | 232 ++++++++++++++++++---
 2 files changed, 215 insertions(+), 28 deletions(-)

diff --git a/datafusion/physical-plan/src/projection.rs 
b/datafusion/physical-plan/src/projection.rs
index 8ff3824eff..3f901311a0 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -168,8 +168,15 @@ impl DisplayAs for ProjectionExec {
                 write!(f, "ProjectionExec: expr=[{}]", expr.join(", "))
             }
             DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
+                for (i, (e, alias)) in self.expr().iter().enumerate() {
+                    let e = e.to_string();
+                    if &e == alias {
+                        writeln!(f, "expr{i}={e}")?;
+                    } else {
+                        writeln!(f, "{alias}={e}")?;
+                    }
+                }
+                Ok(())
             }
         }
     }
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt 
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 4031af9d60..3d62b965a7 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -539,6 +539,158 @@ physical_plan
 17)│       format: arrow       │
 18)└───────────────────────────┘
 
+# Query with projection on csv
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table1;
+----
+logical_plan
+01)Projection: table1.int_col, table1.bigint_col, CAST(table1.int_col AS 
Int64) + table1.bigint_col AS sum_col
+02)--TableScan: table1 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       ProjectionExec      │
+03)│    --------------------   │
+04)│        bigint_col:        │
+05)│        bigint_col@1       │
+06)│                           │
+07)│     int_col: int_col@0    │
+08)│                           │
+09)│          sum_col:         │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│        bigint_col@1       │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│      RepartitionExec      │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│        format: csv        │
+21)└───────────────────────────┘
+
+
+# Query with projection on parquet
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table2;
+----
+logical_plan
+01)Projection: table2.int_col, table2.bigint_col, CAST(table2.int_col AS 
Int64) + table2.bigint_col AS sum_col
+02)--TableScan: table2 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       ProjectionExec      │
+03)│    --------------------   │
+04)│        bigint_col:        │
+05)│        bigint_col@1       │
+06)│                           │
+07)│     int_col: int_col@0    │
+08)│                           │
+09)│          sum_col:         │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│        bigint_col@1       │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│      RepartitionExec      │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│      format: parquet      │
+21)└───────────────────────────┘
+
+
+# Query with projection on memory
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table3;
+----
+logical_plan
+01)Projection: table3.int_col, table3.bigint_col, CAST(table3.int_col AS 
Int64) + table3.bigint_col AS sum_col
+02)--TableScan: table3 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       ProjectionExec      │
+03)│    --------------------   │
+04)│        bigint_col:        │
+05)│        bigint_col@1       │
+06)│                           │
+07)│     int_col: int_col@0    │
+08)│                           │
+09)│          sum_col:         │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│        bigint_col@1       │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│       DataSourceExec      │
+15)│    --------------------   │
+16)│        bytes: 1560        │
+17)│       format: memory      │
+18)│          rows: 1          │
+19)└───────────────────────────┘
+
+
+
+# Query with projection on json
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table4;
+----
+logical_plan
+01)Projection: table4.int_col, table4.bigint_col, table4.int_col + 
table4.bigint_col AS sum_col
+02)--TableScan: table4 projection=[bigint_col, int_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       ProjectionExec      │
+03)│    --------------------   │
+04)│        bigint_col:        │
+05)│        bigint_col@0       │
+06)│                           │
+07)│     int_col: int_col@1    │
+08)│                           │
+09)│          sum_col:         │
+10)│  int_col@1 + bigint_col@0 │
+11)└─────────────┬─────────────┘
+12)┌─────────────┴─────────────┐
+13)│      RepartitionExec      │
+14)└─────────────┬─────────────┘
+15)┌─────────────┴─────────────┐
+16)│       DataSourceExec      │
+17)│    --------------------   │
+18)│          files: 1         │
+19)│        format: json       │
+20)└───────────────────────────┘
+
+
+# Query with projection on arrow
+query TT
+explain SELECT int_col, bigint_col, int_col+bigint_col AS sum_col FROM table5;
+----
+logical_plan
+01)Projection: table5.int_col, table5.bigint_col, CAST(table5.int_col AS 
Int64) + table5.bigint_col AS sum_col
+02)--TableScan: table5 projection=[int_col, bigint_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       ProjectionExec      │
+03)│    --------------------   │
+04)│        bigint_col:        │
+05)│        bigint_col@1       │
+06)│                           │
+07)│     int_col: int_col@0    │
+08)│                           │
+09)│          sum_col:         │
+10)│ CAST(int_col@0 AS Int64) +│
+11)│        bigint_col@1       │
+12)└─────────────┬─────────────┘
+13)┌─────────────┴─────────────┐
+14)│      RepartitionExec      │
+15)└─────────────┬─────────────┘
+16)┌─────────────┴─────────────┐
+17)│       DataSourceExec      │
+18)│    --------------------   │
+19)│          files: 1         │
+20)│       format: arrow       │
+21)└───────────────────────────┘
+
 # Query with PartialSortExec.
 query TT
 EXPLAIN SELECT *
@@ -614,19 +766,33 @@ physical_plan
 18)└─────────────┬─────────────┘└─────────────┬─────────────┘
 19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 20)│       ProjectionExec      ││      RepartitionExec      │
-21)└─────────────┬─────────────┘└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-23)│      RepartitionExec      ││       DataSourceExec      │
-24)│                           ││    --------------------   │
-25)│                           ││          files: 1         │
-26)│                           ││      format: parquet      │
-27)└─────────────┬─────────────┘└───────────────────────────┘
-28)┌─────────────┴─────────────┐
-29)│       DataSourceExec      │
-30)│    --------------------   │
-31)│          files: 1         │
-32)│        format: csv        │
-33)└───────────────────────────┘
+21)│    --------------------   ││                           │
+22)│ CAST(table1.string_col AS ││                           │
+23)│         Utf8View):        ││                           │
+24)│    CAST(string_col@1 AS   ││                           │
+25)│          Utf8View)        ││                           │
+26)│                           ││                           │
+27)│        bigint_col:        ││                           │
+28)│        bigint_col@2       ││                           │
+29)│                           ││                           │
+30)│    date_col: date_col@3   ││                           │
+31)│     int_col: int_col@0    ││                           │
+32)│                           ││                           │
+33)│        string_col:        ││                           │
+34)│        string_col@1       ││                           │
+35)└─────────────┬─────────────┘└─────────────┬─────────────┘
+36)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+37)│      RepartitionExec      ││       DataSourceExec      │
+38)│                           ││    --------------------   │
+39)│                           ││          files: 1         │
+40)│                           ││      format: parquet      │
+41)└─────────────┬─────────────┘└───────────────────────────┘
+42)┌─────────────┴─────────────┐
+43)│       DataSourceExec      │
+44)│    --------------------   │
+45)│          files: 1         │
+46)│        format: csv        │
+47)└───────────────────────────┘
 
 # Query with outer hash join.
 query TT
@@ -659,19 +825,33 @@ physical_plan
 20)└─────────────┬─────────────┘└─────────────┬─────────────┘
 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 22)│       ProjectionExec      ││      RepartitionExec      │
-23)└─────────────┬─────────────┘└─────────────┬─────────────┘
-24)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-25)│      RepartitionExec      ││       DataSourceExec      │
-26)│                           ││    --------------------   │
-27)│                           ││          files: 1         │
-28)│                           ││      format: parquet      │
-29)└─────────────┬─────────────┘└───────────────────────────┘
-30)┌─────────────┴─────────────┐
-31)│       DataSourceExec      │
-32)│    --------------------   │
-33)│          files: 1         │
-34)│        format: csv        │
-35)└───────────────────────────┘
+23)│    --------------------   ││                           │
+24)│ CAST(table1.string_col AS ││                           │
+25)│         Utf8View):        ││                           │
+26)│    CAST(string_col@1 AS   ││                           │
+27)│          Utf8View)        ││                           │
+28)│                           ││                           │
+29)│        bigint_col:        ││                           │
+30)│        bigint_col@2       ││                           │
+31)│                           ││                           │
+32)│    date_col: date_col@3   ││                           │
+33)│     int_col: int_col@0    ││                           │
+34)│                           ││                           │
+35)│        string_col:        ││                           │
+36)│        string_col@1       ││                           │
+37)└─────────────┬─────────────┘└─────────────┬─────────────┘
+38)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+39)│      RepartitionExec      ││       DataSourceExec      │
+40)│                           ││    --------------------   │
+41)│                           ││          files: 1         │
+42)│                           ││      format: parquet      │
+43)└─────────────┬─────────────┘└───────────────────────────┘
+44)┌─────────────┴─────────────┐
+45)│       DataSourceExec      │
+46)│    --------------------   │
+47)│          files: 1         │
+48)│        format: csv        │
+49)└───────────────────────────┘
 
 # cleanup
 statement ok


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to