alamb commented on code in PR #10963:
URL: https://github.com/apache/datafusion/pull/10963#discussion_r1645016269


##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -400,6 +433,182 @@ mod tests {
         assert_together_optimized_plan_eq(plan, expected)
     }
 
+    #[test]
+    fn test_left_join_empty_left_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::Left,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_right_join_empty_right_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::Right,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_left_semi_join_empty_left_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::LeftSemi,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_left_semi_join_empty_right_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::LeftSemi,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_right_semi_join_empty_right_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::RightSemi,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_right_semi_join_empty_left_table() -> Result<()> {
+        let left_table_scan = test_table_scan()?;
+        let left = LogicalPlanBuilder::from(left_table_scan)
+            .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+            .build()?;
+
+        let right_table_scan = test_table_scan_with_name("test2")?;
+        let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+        let plan = LogicalPlanBuilder::from(left)
+            .join_using(
+                right,
+                JoinType::RightSemi,
+                vec![Column::from_name("a".to_string())],
+            )?
+            .build()?;
+
+        let expected = "EmptyRelation";
+        assert_together_optimized_plan_eq(plan, expected)
+    }
+
+    #[test]
+    fn test_right_anti_join_empty_right_table() -> Result<()> {

Review Comment:
   Can you please also add the negative cases ? Like test right 
antijoin_empty_left_table and assert it wasn't rewritten?



##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -400,6 +433,182 @@ mod tests {
         assert_together_optimized_plan_eq(plan, expected)
     }
 
+    #[test]

Review Comment:
   I think having some end to end slt would be good too, but for these rules I 
think having good unit tests are key as well



##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -94,29 +94,62 @@ impl OptimizerRule for PropagateEmptyRelation {
                 Ok(Transformed::no(LogicalPlan::CrossJoin(join.clone())))
             }
 
-            LogicalPlan::Join(ref join) if join.join_type == Inner => {
+            LogicalPlan::Join(ref join) => {
                 // TODO: For Join, more join type need to be careful:
-                // For LeftOuter/LeftSemi/LeftAnti Join, only the left side is 
empty, the Join result is empty.
-                // For LeftSemi Join, if the right side is empty, the Join 
result is empty.
                 // For LeftAnti Join, if the right side is empty, the Join 
result is left side(should exclude null ??).
-                // For RightOuter/RightSemi/RightAnti Join, only the right 
side is empty, the Join result is empty.
-                // For RightSemi Join, if the left side is empty, the Join 
result is empty.
                 // For RightAnti Join, if the left side is empty, the Join 
result is right side(should exclude null ??).
                 // For Full Join, only both sides are empty, the Join result 
is empty.
                 // For LeftOut/Full Join, if the right side is empty, the Join 
can be eliminated with a Projection with left side
                 // columns + right side columns replaced with null values.
                 // For RightOut/Full Join, if the left side is empty, the Join 
can be eliminated with a Projection with right side
                 // columns + left side columns replaced with null values.
                 let (left_empty, right_empty) = 
binary_plan_children_is_empty(&plan)?;
-                if left_empty || right_empty {
-                    return Ok(Transformed::yes(LogicalPlan::EmptyRelation(
-                        EmptyRelation {
+

Review Comment:
   I double checked that this code faithfully implements the semantics in the 
comments, and I did some mental review and verification to convince myself that 
these semantics are correct
   
   i wonder if @jackwener  has a moment to double check too?



##########
datafusion/sqllogictest/test_files/subquery.slt:
##########
@@ -613,10 +613,7 @@ SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM 
t2 WHERE t2_id = t1_id
 query TT
 explain SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE 
t2_id = t1_id limit 0)
 ----
-logical_plan
-01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id
-02)--TableScan: t1 projection=[t1_id, t1_name]
-03)--EmptyRelation
+logical_plan EmptyRelation

Review Comment:
   > causing the entire join to be optimized out with the additional join types 
now propagating emptyrelation.
   
   I agree with your assesment.  I think the `limit 0` means this plan is 
better 
   
   In terms of preserving the test of the subquery rewrite I think it does 
(otherwise it wouldn't be rewritten to a SEMI JOIN and thus the join couldn't 
be removed)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to