kosiew commented on code in PR #22161:
URL: https://github.com/apache/datafusion/pull/22161#discussion_r3327953206


##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -484,6 +504,135 @@ fn optimize_projections(
     }
 }
 
+fn remove_row_preserving_unused_unnest_from_duplicate_insensitive_input(
+    input: &LogicalPlan,
+    required_exprs: &[Expr],
+) -> Result<Option<LogicalPlan>> {
+    if required_exprs.iter().any(Expr::is_volatile) {
+        return Ok(None);
+    }
+
+    match input {
+        LogicalPlan::Unnest(unnest)
+            if can_remove_unused_unnest_for_exprs(unnest, required_exprs)? =>
+        {
+            Ok(Some(Arc::unwrap_or_clone(Arc::clone(&unnest.input))))
+        }
+        LogicalPlan::Projection(projection) => {
+            let LogicalPlan::Unnest(unnest) = projection.input.as_ref() else {
+                return Ok(None);
+            };
+            let required_projection_exprs = RequiredIndices::new()
+                .with_exprs(&projection.schema, required_exprs.iter())
+                .get_at_indices(&projection.expr);
+
+            if can_remove_unused_unnest_for_exprs(unnest, 
&required_projection_exprs)? {
+                Projection::try_new(required_projection_exprs, 
Arc::clone(&unnest.input))
+                    .map(LogicalPlan::Projection)
+                    .map(Some)
+            } else {
+                Ok(None)
+            }
+        }
+        _ => Ok(None),
+    }
+}
+
+fn can_remove_unused_unnest_for_exprs(unnest: &Unnest, exprs: &[Expr]) -> 
Result<bool> {
+    // This rewrite is only safe when UNNEST cannot eliminate input rows, no
+    // required expression depends on an unnested output column, and row
+    // multiplication cannot affect expression evaluation.
+    if !unnest_preserves_at_least_one_row_per_input(unnest)
+        || exprs.iter().any(Expr::is_volatile)
+    {
+        return Ok(false);
+    }
+
+    let mut columns = HashSet::new();
+    for expr in exprs {
+        expr_to_columns(expr, &mut columns)?;
+    }
+
+    for column in columns {
+        let output_index = unnest.schema.index_of_column(&column)?;
+        if is_unnested_input_index(unnest, 
unnest.dependency_indices[output_index]) {
+            return Ok(false);
+        }
+    }
+
+    Ok(true)
+}
+
+fn is_unnested_input_index(unnest: &Unnest, input_index: usize) -> bool {
+    unnest
+        .list_type_columns
+        .iter()
+        .any(|(idx, _)| *idx == input_index)
+        || unnest.struct_type_columns.contains(&input_index)
+}
+
+fn unnest_preserves_at_least_one_row_per_input(unnest: &Unnest) -> bool {

Review Comment:
   - Moved is_unnested_input_index helper onto Unnest:
        - added Unnest::is_unnested_input_index(...)
    - Removed local optimizer helper:
        - fn is_unnested_input_index(unnest: &Unnest, ...)
   
    Not moved:
    - unnest_preserves_at_least_one_row_per_input
    - unnest_input_expr
    - literal_non_empty_list
   
    Those stayed optimizer-local because they encode this rule’s conservative 
proof, not general
    Unnest behavior.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to