neilconway commented on code in PR #21265:
URL: https://github.com/apache/datafusion/pull/21265#discussion_r3037543024


##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -2311,6 +2320,56 @@ mod tests {
         )
     }
 
+    #[test]
+    fn optimize_projections_left_mark_join_with_outer_join() -> Result<()> {
+        use datafusion_expr::utils::disjunction;
+        use datafusion_expr::{exists, out_ref_col};
+
+        let table_a = test_table_scan_with_name("a")?;
+        let table_b = test_table_scan_with_name("b")?;
+
+        let sq_a = Arc::new(
+            LogicalPlanBuilder::from(test_table_scan_with_name("sq_a")?)
+                .filter(col("sq_a.a").eq(out_ref_col(DataType::UInt32, 
"a.a")))?
+                .project(vec![lit(1)])?
+                .build()?,
+        );
+
+        let sq_b = Arc::new(
+            LogicalPlanBuilder::from(test_table_scan_with_name("sq_b")?)
+                .filter(col("sq_b.b").eq(out_ref_col(DataType::UInt32, 
"a.b")))?
+                .project(vec![lit(1)])?
+                .build()?,
+        );
+
+        let exists_a = exists(sq_a);
+        let exists_b = exists(sq_b);
+
+        let plan = LogicalPlanBuilder::from(table_a)
+            .filter(disjunction(vec![exists_a, exists_b]).unwrap())?
+            .join(table_b, JoinType::Left, (vec!["a"], vec!["a"]), None)?
+            .build()?;
+
+        let optimizer = Optimizer::new();
+        let config = OptimizerContext::new();
+        let result = optimizer.optimize(plan, &config, observe);
+        assert!(
+            result.is_ok(),
+            "Full optimizer should not fail with schema mismatch: {:?}",
+            result.err()
+        );
+        let optimized = result.unwrap();
+        let plan_str = format!("{optimized}");
+        // Verify no double projection — the projection we add to strip mark
+        // columns should be merged by optimize_projections, not left stacked.
+        assert!(
+            !plan_str.contains("Projection: a.a, a.b, a.c\n            
Projection:"),

Review Comment:
   Testing for an expected plan via string containment seems a bit unfortunate. 
Can't we check for the plan shape we expect, e.g., via 
`assert_optimized_plan_equal!`?



##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -757,21 +758,29 @@ fn outer_columns_helper_multi<'a, 'b>(
 /// adjusted based on the join type.
 fn split_join_requirements(
     left_len: usize,
+    right_len: usize,
     indices: RequiredIndices,
     join_type: &JoinType,
 ) -> (RequiredIndices, RequiredIndices) {
     match join_type {
         // In these cases requirements are split between left/right children:
-        JoinType::Inner
-        | JoinType::Left
-        | JoinType::Right
-        | JoinType::Full
-        | JoinType::LeftMark
-        | JoinType::RightMark => {
+        JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => 
{
             // Decrease right side indices by `left_len` so that they point to 
valid
             // positions within the right child:
             indices.split_off(left_len)
         }
+        JoinType::LeftMark => {
+            // LeftMark output: [left_cols(0..left_len), mark(left_len)]

Review Comment:
   Nit: I think writing
   
   ```
   // LeftMark output: [left_cols(0..left_len), mark]
   ```
   
   Would be a bit clearer.



##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -2311,6 +2320,56 @@ mod tests {
         )
     }
 
+    #[test]
+    fn optimize_projections_left_mark_join_with_outer_join() -> Result<()> {
+        use datafusion_expr::utils::disjunction;
+        use datafusion_expr::{exists, out_ref_col};
+
+        let table_a = test_table_scan_with_name("a")?;
+        let table_b = test_table_scan_with_name("b")?;
+
+        let sq_a = Arc::new(
+            LogicalPlanBuilder::from(test_table_scan_with_name("sq_a")?)
+                .filter(col("sq_a.a").eq(out_ref_col(DataType::UInt32, 
"a.a")))?
+                .project(vec![lit(1)])?
+                .build()?,
+        );
+
+        let sq_b = Arc::new(
+            LogicalPlanBuilder::from(test_table_scan_with_name("sq_b")?)
+                .filter(col("sq_b.b").eq(out_ref_col(DataType::UInt32, 
"a.b")))?
+                .project(vec![lit(1)])?
+                .build()?,
+        );
+
+        let exists_a = exists(sq_a);
+        let exists_b = exists(sq_b);
+
+        let plan = LogicalPlanBuilder::from(table_a)
+            .filter(disjunction(vec![exists_a, exists_b]).unwrap())?
+            .join(table_b, JoinType::Left, (vec!["a"], vec!["a"]), None)?
+            .build()?;
+
+        let optimizer = Optimizer::new();
+        let config = OptimizerContext::new();
+        let result = optimizer.optimize(plan, &config, observe);
+        assert!(
+            result.is_ok(),
+            "Full optimizer should not fail with schema mismatch: {:?}",

Review Comment:
   Seems a bit fragile to assume the assert fails with a schema mismatch, if it 
fails.



##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########


Review Comment:
   Update this for new `right_len` parameter.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to