This is an automated email from the ASF dual-hosted git repository.

ytyou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 55ec83b6dd Implement tree explain for `NestedLoopJoinExec`, 
`CrossJoinExec`, `So… (#15081)
55ec83b6dd is described below

commit 55ec83b6dd301ac7cc22330e7fa75102e542cc42
Author: irenjj <[email protected]>
AuthorDate: Mon Mar 10 18:26:01 2025 +0800

    Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `So… 
(#15081)
    
    * Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, 
`SortMergeJoinExec` and `SymmetricHashJoinExec`
    
    * fix issues
    
    * fix issues
    
    * fix
    
    * fix proj
---
 datafusion/physical-plan/src/joins/cross_join.rs   |   8 +-
 .../physical-plan/src/joins/nested_loop_join.rs    |   7 +-
 .../physical-plan/src/joins/sort_merge_join.rs     |  13 +-
 .../physical-plan/src/joins/symmetric_hash_join.rs |  14 +-
 .../sqllogictest/test_files/explain_tree.slt       | 141 +++++++++++++++++++++
 5 files changed, 172 insertions(+), 11 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/cross_join.rs 
b/datafusion/physical-plan/src/joins/cross_join.rs
index 48dc7c9df3..e0998862bd 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -237,13 +237,11 @@ impl DisplayAs for CrossJoinExec {
         f: &mut std::fmt::Formatter,
     ) -> std::fmt::Result {
         match t {
-            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+            DisplayFormatType::Default
+            | DisplayFormatType::Verbose
+            | DisplayFormatType::TreeRender => {
                 write!(f, "CrossJoinExec")
             }
-            DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
-            }
         }
     }
 }
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs 
b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index f680de6738..f6fa8878e0 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -425,8 +425,11 @@ impl DisplayAs for NestedLoopJoinExec {
                 )
             }
             DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
+                if *self.join_type() != JoinType::Inner {
+                    writeln!(f, "join_type={:?}", self.join_type)
+                } else {
+                    Ok(())
+                }
             }
         }
     }
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs 
b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 20c0d76a9c..d8446fb332 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -370,8 +370,17 @@ impl DisplayAs for SortMergeJoinExec {
                 )
             }
             DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
+                let on = self
+                    .on
+                    .iter()
+                    .map(|(c1, c2)| format!("({} = {})", c1, c2))
+                    .collect::<Vec<String>>()
+                    .join(", ");
+
+                if self.join_type() != JoinType::Inner {
+                    writeln!(f, "join_type={:?}", self.join_type)?;
+                }
+                writeln!(f, "on={}", on)
             }
         }
     }
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs 
b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 03a6883143..63e95c7a30 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -381,8 +381,18 @@ impl DisplayAs for SymmetricHashJoinExec {
                 )
             }
             DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
+                let on = self
+                    .on
+                    .iter()
+                    .map(|(c1, c2)| format!("({} = {})", c1, c2))
+                    .collect::<Vec<String>>()
+                    .join(", ");
+
+                writeln!(f, "mode={:?}", self.mode)?;
+                if *self.join_type() != JoinType::Inner {
+                    writeln!(f, "join_type={:?}", self.join_type)?;
+                }
+                writeln!(f, "on={}", on)
             }
         }
     }
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt 
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 465b09e653..e0459b6da2 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -95,6 +95,44 @@ WITH ORDER (a ASC, b ASC, c ASC)
 LOCATION '../core/tests/data/window_2.csv'
 OPTIONS ('format.has_header' 'true');
 
+statement ok
+CREATE TABLE hashjoin_datatype_table_t1_source(c1 INT, c2 BIGINT, c3 
DECIMAL(5,2), c4 VARCHAR)
+AS VALUES
+(1,    86400000,  1.23,    'abc'),
+(2,    172800000, 456.00,  'def'),
+(null, 259200000, 789.000, 'ghi'),
+(3,    null,      -123.12, 'jkl')
+;
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t1
+AS SELECT
+  arrow_cast(c1, 'Date32') as c1,
+  arrow_cast(c2, 'Date64') as c2,
+  c3,
+  arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
+FROM
+  hashjoin_datatype_table_t1_source
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t2_source(c1 INT, c2 BIGINT, c3 
DECIMAL(10,2), c4 VARCHAR)
+AS VALUES
+(1,    86400000,  -123.12,   'abc'),
+(null, null,      100000.00, 'abcdefg'),
+(null, 259200000, 0.00,      'qwerty'),
+(3,   null,       789.000,   'qwe')
+;
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t2
+AS SELECT
+  arrow_cast(c1, 'Date32') as c1,
+  arrow_cast(c2, 'Date64') as c2,
+  c3,
+  arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
+FROM
+  hashjoin_datatype_table_t2_source
+
 ######## Begin Queries ########
 
 # Filter
@@ -897,6 +935,109 @@ physical_plan
 48)│        format: csv        │
 49)└───────────────────────────┘
 
+# Query with nested loop join.
+query TT
+explain select int_col from table1 where exists (select count(*) from table2);
+----
+logical_plan
+01)LeftSemi Join: 
+02)--TableScan: table1 projection=[int_col], partial_filters=[Boolean(true)]
+03)--SubqueryAlias: __correlated_sq_1
+04)----Projection: 
+05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+06)--------TableScan: table2 projection=[]
+physical_plan
+01)┌───────────────────────────┐
+02)│     NestedLoopJoinExec    │
+03)│    --------------------   ├──────────────┐
+04)│    join_type: LeftSemi    │              │
+05)└─────────────┬─────────────┘              │
+06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+07)│       DataSourceExec      ││       ProjectionExec      │
+08)│    --------------------   ││                           │
+09)│          files: 1         ││                           │
+10)│        format: csv        ││                           │
+11)└───────────────────────────┘└─────────────┬─────────────┘
+12)-----------------------------┌─────────────┴─────────────┐
+13)-----------------------------│       AggregateExec       │
+14)-----------------------------└─────────────┬─────────────┘
+15)-----------------------------┌─────────────┴─────────────┐
+16)-----------------------------│   CoalescePartitionsExec  │
+17)-----------------------------└─────────────┬─────────────┘
+18)-----------------------------┌─────────────┴─────────────┐
+19)-----------------------------│       AggregateExec       │
+20)-----------------------------└─────────────┬─────────────┘
+21)-----------------------------┌─────────────┴─────────────┐
+22)-----------------------------│      RepartitionExec      │
+23)-----------------------------└─────────────┬─────────────┘
+24)-----------------------------┌─────────────┴─────────────┐
+25)-----------------------------│       DataSourceExec      │
+26)-----------------------------│    --------------------   │
+27)-----------------------------│          files: 1         │
+28)-----------------------------│      format: parquet      │
+29)-----------------------------└───────────────────────────┘
+
+# Query with cross join.
+query TT
+explain select * from table1 cross join table2 ;
+----
+logical_plan
+01)Cross Join: 
+02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col]
+03)--TableScan: table2 projection=[int_col, string_col, bigint_col, date_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│       CrossJoinExec       ├──────────────┐
+03)└─────────────┬─────────────┘              │
+04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+05)│       DataSourceExec      ││      RepartitionExec      │
+06)│    --------------------   ││                           │
+07)│          files: 1         ││                           │
+08)│        format: csv        ││                           │
+09)└───────────────────────────┘└─────────────┬─────────────┘
+10)-----------------------------┌─────────────┴─────────────┐
+11)-----------------------------│       DataSourceExec      │
+12)-----------------------------│    --------------------   │
+13)-----------------------------│          files: 1         │
+14)-----------------------------│      format: parquet      │
+15)-----------------------------└───────────────────────────┘
+
+
+# Query with sort merge join.
+statement ok
+set datafusion.optimizer.prefer_hash_join = false;
+
+query TT
+explain select * from hashjoin_datatype_table_t1 t1 join 
hashjoin_datatype_table_t2 t2 on t1.c1 = t2.c1
+----
+logical_plan
+01)Inner Join: t1.c1 = t2.c1
+02)--SubqueryAlias: t1
+03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+04)--SubqueryAlias: t2
+05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+physical_plan
+01)┌───────────────────────────┐
+02)│     SortMergeJoinExec     │
+03)│    --------------------   ├──────────────┐
+04)│     on: (c1@0 = c1@0)     │              │
+05)└─────────────┬─────────────┘              │
+06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+07)│          SortExec         ││          SortExec         │
+08)│    --------------------   ││    --------------------   │
+09)│   sort keys: [c1@0 ASC]   ││   sort keys: [c1@0 ASC]   │
+10)└─────────────┬─────────────┘└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+12)│       DataSourceExec      ││       DataSourceExec      │
+13)│    --------------------   ││    --------------------   │
+14)│        bytes: 6040        ││        bytes: 6040        │
+15)│       format: memory      ││       format: memory      │
+16)│          rows: 1          ││          rows: 1          │
+17)└───────────────────────────┘└───────────────────────────┘
+
+statement ok
+set datafusion.optimizer.prefer_hash_join = true;
+
 # cleanup
 statement ok
 drop table table1;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to