This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 965133c8d Skip filter push down on semi/anti joins (#3723)
965133c8d is described below
commit 965133c8d81c7b9efc49233fd7b9ec507dd64255
Author: Andy Grove <[email protected]>
AuthorDate: Wed Oct 5 09:47:11 2022 -0600
Skip filter push down on semi/anti joins (#3723)
---
datafusion/optimizer/src/filter_push_down.rs | 9 ++++----
datafusion/optimizer/tests/integration-test.rs | 32 ++++++++++++++++++++++++++
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/datafusion/optimizer/src/filter_push_down.rs
b/datafusion/optimizer/src/filter_push_down.rs
index 6383cdcf8..129766012 100644
--- a/datafusion/optimizer/src/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -193,11 +193,10 @@ fn on_lr_is_preserved(plan: &LogicalPlan) ->
Result<(bool, bool)> {
JoinType::Left => Ok((false, true)),
JoinType::Right => Ok((true, false)),
JoinType::Full => Ok((false, false)),
- // Semi/Anti joins can not have join filter.
- JoinType::Semi | JoinType::Anti => Err(DataFusionError::Internal(
- "on_lr_is_preserved cannot be appplied to SEMI/ANTI-JOIN nodes"
- .to_string(),
- )),
+ JoinType::Semi | JoinType::Anti => {
+ // filter_push_down does not yet support SEMI/ANTI joins with
join conditions
+ Ok((false, false))
+ }
},
LogicalPlan::CrossJoin(_) => Err(DataFusionError::Internal(
"on_lr_is_preserved cannot be applied to CROSSJOIN
nodes".to_string(),
diff --git a/datafusion/optimizer/tests/integration-test.rs
b/datafusion/optimizer/tests/integration-test.rs
index 6dea1a243..2d9546f13 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -69,6 +69,38 @@ fn distribute_by() -> Result<()> {
Ok(())
}
+#[test]
+fn semi_join_with_join_filter() -> Result<()> {
+ // regression test for
https://github.com/apache/arrow-datafusion/issues/2888
+ let sql = "SELECT * FROM test WHERE EXISTS (\
+ SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
+ AND test.col_uint32 != t2.col_uint32)";
+ let plan = test_sql(sql)?;
+ let expected = r#"Projection: test.col_int32, test.col_uint32,
test.col_utf8, test.col_date32, test.col_date64
+ Semi Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 !=
t2.col_uint32
+ TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32,
col_date64]
+ SubqueryAlias: t2
+ TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32,
col_date64]"#;
+ assert_eq!(expected, format!("{:?}", plan));
+ Ok(())
+}
+
+#[test]
+fn anti_join_with_join_filter() -> Result<()> {
+ // regression test for
https://github.com/apache/arrow-datafusion/issues/2888
+ let sql = "SELECT * FROM test WHERE NOT EXISTS (\
+ SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
+ AND test.col_uint32 != t2.col_uint32)";
+ let plan = test_sql(sql)?;
+ let expected = r#"Projection: test.col_int32, test.col_uint32,
test.col_utf8, test.col_date32, test.col_date64
+ Anti Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 !=
t2.col_uint32
+ TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32,
col_date64]
+ SubqueryAlias: t2
+ TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32,
col_date64]"#;
+ assert_eq!(expected, format!("{:?}", plan));
+ Ok(())
+}
+
#[test]
fn intersect() -> Result<()> {
let sql = "SELECT col_int32, col_utf8 FROM test \