This is an automated email from the ASF dual-hosted git repository.
ytyou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 55ec83b6dd Implement tree explain for `NestedLoopJoinExec`,
`CrossJoinExec`, `So… (#15081)
55ec83b6dd is described below
commit 55ec83b6dd301ac7cc22330e7fa75102e542cc42
Author: irenjj <[email protected]>
AuthorDate: Mon Mar 10 18:26:01 2025 +0800
Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `So…
(#15081)
* Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`,
`SortMergeJoinExec` and `SymmetricHashJoinExec`
* fix issues
* fix issues
* fix
* fix proj
---
datafusion/physical-plan/src/joins/cross_join.rs | 8 +-
.../physical-plan/src/joins/nested_loop_join.rs | 7 +-
.../physical-plan/src/joins/sort_merge_join.rs | 13 +-
.../physical-plan/src/joins/symmetric_hash_join.rs | 14 +-
.../sqllogictest/test_files/explain_tree.slt | 141 +++++++++++++++++++++
5 files changed, 172 insertions(+), 11 deletions(-)
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs
b/datafusion/physical-plan/src/joins/cross_join.rs
index 48dc7c9df3..e0998862bd 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -237,13 +237,11 @@ impl DisplayAs for CrossJoinExec {
f: &mut std::fmt::Formatter,
) -> std::fmt::Result {
match t {
- DisplayFormatType::Default | DisplayFormatType::Verbose => {
+ DisplayFormatType::Default
+ | DisplayFormatType::Verbose
+ | DisplayFormatType::TreeRender => {
write!(f, "CrossJoinExec")
}
- DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")
- }
}
}
}
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs
b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index f680de6738..f6fa8878e0 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -425,8 +425,11 @@ impl DisplayAs for NestedLoopJoinExec {
)
}
DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")
+ if *self.join_type() != JoinType::Inner {
+ writeln!(f, "join_type={:?}", self.join_type)
+ } else {
+ Ok(())
+ }
}
}
}
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs
b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 20c0d76a9c..d8446fb332 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -370,8 +370,17 @@ impl DisplayAs for SortMergeJoinExec {
)
}
DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")
+ let on = self
+ .on
+ .iter()
+ .map(|(c1, c2)| format!("({} = {})", c1, c2))
+ .collect::<Vec<String>>()
+ .join(", ");
+
+ if self.join_type() != JoinType::Inner {
+ writeln!(f, "join_type={:?}", self.join_type)?;
+ }
+ writeln!(f, "on={}", on)
}
}
}
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 03a6883143..63e95c7a30 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -381,8 +381,18 @@ impl DisplayAs for SymmetricHashJoinExec {
)
}
DisplayFormatType::TreeRender => {
- // TODO: collect info
- write!(f, "")
+ let on = self
+ .on
+ .iter()
+ .map(|(c1, c2)| format!("({} = {})", c1, c2))
+ .collect::<Vec<String>>()
+ .join(", ");
+
+ writeln!(f, "mode={:?}", self.mode)?;
+ if *self.join_type() != JoinType::Inner {
+ writeln!(f, "join_type={:?}", self.join_type)?;
+ }
+ writeln!(f, "on={}", on)
}
}
}
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 465b09e653..e0459b6da2 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -95,6 +95,44 @@ WITH ORDER (a ASC, b ASC, c ASC)
LOCATION '../core/tests/data/window_2.csv'
OPTIONS ('format.has_header' 'true');
+statement ok
+CREATE TABLE hashjoin_datatype_table_t1_source(c1 INT, c2 BIGINT, c3
DECIMAL(5,2), c4 VARCHAR)
+AS VALUES
+(1, 86400000, 1.23, 'abc'),
+(2, 172800000, 456.00, 'def'),
+(null, 259200000, 789.000, 'ghi'),
+(3, null, -123.12, 'jkl')
+;
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t1
+AS SELECT
+ arrow_cast(c1, 'Date32') as c1,
+ arrow_cast(c2, 'Date64') as c2,
+ c3,
+ arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
+FROM
+ hashjoin_datatype_table_t1_source
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t2_source(c1 INT, c2 BIGINT, c3
DECIMAL(10,2), c4 VARCHAR)
+AS VALUES
+(1, 86400000, -123.12, 'abc'),
+(null, null, 100000.00, 'abcdefg'),
+(null, 259200000, 0.00, 'qwerty'),
+(3, null, 789.000, 'qwe')
+;
+
+statement ok
+CREATE TABLE hashjoin_datatype_table_t2
+AS SELECT
+ arrow_cast(c1, 'Date32') as c1,
+ arrow_cast(c2, 'Date64') as c2,
+ c3,
+ arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
+FROM
+ hashjoin_datatype_table_t2_source
+
######## Begin Queries ########
# Filter
@@ -897,6 +935,109 @@ physical_plan
48)│ format: csv │
49)└───────────────────────────┘
+# Query with nested loop join.
+query TT
+explain select int_col from table1 where exists (select count(*) from table2);
+----
+logical_plan
+01)LeftSemi Join:
+02)--TableScan: table1 projection=[int_col], partial_filters=[Boolean(true)]
+03)--SubqueryAlias: __correlated_sq_1
+04)----Projection:
+05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+06)--------TableScan: table2 projection=[]
+physical_plan
+01)┌───────────────────────────┐
+02)│ NestedLoopJoinExec │
+03)│ -------------------- ├──────────────┐
+04)│ join_type: LeftSemi │ │
+05)└─────────────┬─────────────┘ │
+06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+07)│ DataSourceExec ││ ProjectionExec │
+08)│ -------------------- ││ │
+09)│ files: 1 ││ │
+10)│ format: csv ││ │
+11)└───────────────────────────┘└─────────────┬─────────────┘
+12)-----------------------------┌─────────────┴─────────────┐
+13)-----------------------------│ AggregateExec │
+14)-----------------------------└─────────────┬─────────────┘
+15)-----------------------------┌─────────────┴─────────────┐
+16)-----------------------------│ CoalescePartitionsExec │
+17)-----------------------------└─────────────┬─────────────┘
+18)-----------------------------┌─────────────┴─────────────┐
+19)-----------------------------│ AggregateExec │
+20)-----------------------------└─────────────┬─────────────┘
+21)-----------------------------┌─────────────┴─────────────┐
+22)-----------------------------│ RepartitionExec │
+23)-----------------------------└─────────────┬─────────────┘
+24)-----------------------------┌─────────────┴─────────────┐
+25)-----------------------------│ DataSourceExec │
+26)-----------------------------│ -------------------- │
+27)-----------------------------│ files: 1 │
+28)-----------------------------│ format: parquet │
+29)-----------------------------└───────────────────────────┘
+
+# Query with cross join.
+query TT
+explain select * from table1 cross join table2 ;
+----
+logical_plan
+01)Cross Join:
+02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col]
+03)--TableScan: table2 projection=[int_col, string_col, bigint_col, date_col]
+physical_plan
+01)┌───────────────────────────┐
+02)│ CrossJoinExec ├──────────────┐
+03)└─────────────┬─────────────┘ │
+04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+05)│ DataSourceExec ││ RepartitionExec │
+06)│ -------------------- ││ │
+07)│ files: 1 ││ │
+08)│ format: csv ││ │
+09)└───────────────────────────┘└─────────────┬─────────────┘
+10)-----------------------------┌─────────────┴─────────────┐
+11)-----------------------------│ DataSourceExec │
+12)-----------------------------│ -------------------- │
+13)-----------------------------│ files: 1 │
+14)-----------------------------│ format: parquet │
+15)-----------------------------└───────────────────────────┘
+
+
+# Query with sort merge join.
+statement ok
+set datafusion.optimizer.prefer_hash_join = false;
+
+query TT
+explain select * from hashjoin_datatype_table_t1 t1 join
hashjoin_datatype_table_t2 t2 on t1.c1 = t2.c1
+----
+logical_plan
+01)Inner Join: t1.c1 = t2.c1
+02)--SubqueryAlias: t1
+03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+04)--SubqueryAlias: t2
+05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+physical_plan
+01)┌───────────────────────────┐
+02)│ SortMergeJoinExec │
+03)│ -------------------- ├──────────────┐
+04)│ on: (c1@0 = c1@0) │ │
+05)└─────────────┬─────────────┘ │
+06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+07)│ SortExec ││ SortExec │
+08)│ -------------------- ││ -------------------- │
+09)│ sort keys: [c1@0 ASC] ││ sort keys: [c1@0 ASC] │
+10)└─────────────┬─────────────┘└─────────────┬─────────────┘
+11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+12)│ DataSourceExec ││ DataSourceExec │
+13)│ -------------------- ││ -------------------- │
+14)│ bytes: 6040 ││ bytes: 6040 │
+15)│ format: memory ││ format: memory │
+16)│ rows: 1 ││ rows: 1 │
+17)└───────────────────────────┘└───────────────────────────┘
+
+statement ok
+set datafusion.optimizer.prefer_hash_join = true;
+
# cleanup
statement ok
drop table table1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]