This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 965133c8d Skip filter push down on semi/anti joins (#3723)
965133c8d is described below

commit 965133c8d81c7b9efc49233fd7b9ec507dd64255
Author: Andy Grove <[email protected]>
AuthorDate: Wed Oct 5 09:47:11 2022 -0600

    Skip filter push down on semi/anti joins (#3723)
---
 datafusion/optimizer/src/filter_push_down.rs   |  9 ++++----
 datafusion/optimizer/tests/integration-test.rs | 32 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/datafusion/optimizer/src/filter_push_down.rs 
b/datafusion/optimizer/src/filter_push_down.rs
index 6383cdcf8..129766012 100644
--- a/datafusion/optimizer/src/filter_push_down.rs
+++ b/datafusion/optimizer/src/filter_push_down.rs
@@ -193,11 +193,10 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> 
Result<(bool, bool)> {
             JoinType::Left => Ok((false, true)),
             JoinType::Right => Ok((true, false)),
             JoinType::Full => Ok((false, false)),
-            // Semi/Anti joins can not have join filter.
-            JoinType::Semi | JoinType::Anti => Err(DataFusionError::Internal(
-                "on_lr_is_preserved cannot be appplied to SEMI/ANTI-JOIN nodes"
-                    .to_string(),
-            )),
+            JoinType::Semi | JoinType::Anti => {
+                // filter_push_down does not yet support SEMI/ANTI joins with 
join conditions
+                Ok((false, false))
+            }
         },
         LogicalPlan::CrossJoin(_) => Err(DataFusionError::Internal(
             "on_lr_is_preserved cannot be applied to CROSSJOIN 
nodes".to_string(),
diff --git a/datafusion/optimizer/tests/integration-test.rs 
b/datafusion/optimizer/tests/integration-test.rs
index 6dea1a243..2d9546f13 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -69,6 +69,38 @@ fn distribute_by() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn semi_join_with_join_filter() -> Result<()> {
+    // regression test for 
https://github.com/apache/arrow-datafusion/issues/2888
+    let sql = "SELECT * FROM test WHERE EXISTS (\
+    SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
+    AND test.col_uint32 != t2.col_uint32)";
+    let plan = test_sql(sql)?;
+    let expected = r#"Projection: test.col_int32, test.col_uint32, 
test.col_utf8, test.col_date32, test.col_date64
+  Semi Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != 
t2.col_uint32
+    TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, 
col_date64]
+    SubqueryAlias: t2
+      TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, 
col_date64]"#;
+    assert_eq!(expected, format!("{:?}", plan));
+    Ok(())
+}
+
+#[test]
+fn anti_join_with_join_filter() -> Result<()> {
+    // regression test for 
https://github.com/apache/arrow-datafusion/issues/2888
+    let sql = "SELECT * FROM test WHERE NOT EXISTS (\
+    SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
+    AND test.col_uint32 != t2.col_uint32)";
+    let plan = test_sql(sql)?;
+    let expected = r#"Projection: test.col_int32, test.col_uint32, 
test.col_utf8, test.col_date32, test.col_date64
+  Anti Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != 
t2.col_uint32
+    TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, 
col_date64]
+    SubqueryAlias: t2
+      TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, 
col_date64]"#;
+    assert_eq!(expected, format!("{:?}", plan));
+    Ok(())
+}
+
 #[test]
 fn intersect() -> Result<()> {
     let sql = "SELECT col_int32, col_utf8 FROM test \

Reply via email to