AssHero commented on code in PR #2750:
URL: https://github.com/apache/arrow-datafusion/pull/2750#discussion_r902103709


##########
datafusion/sql/src/planner.rs:
##########
@@ -2584,6 +2587,230 @@ fn extract_join_keys(
     }
 }
 
+/// Recursively traversese expr, if expr returns false when
+/// any inputs are null, treats columns of both sides as nonnullable columns.
+///
+/// For and/or expr, extracts from all sub exprs and merges the columns.
+/// For or expr, if one of sub exprs returns true, discards all columns from 
or expr.
+/// For IS NOT NULL/NOT expr, always returns false for NULL input.
+///     extracts columns from these exprs.
+/// For all other exprs, fall through
+fn extract_nonnullable_columns(
+    expr: &Expr,
+    columns: &mut Vec<Column>,
+    top_level: bool,
+) -> Result<()> {
+    match expr {
+        Expr::Column(col) => {
+            columns.push(col.clone());
+            Ok(())
+        }
+        Expr::BinaryExpr { left, op, right } => match op {
+            // If one of the inputs are null for these operators, the results 
should be false.
+            Operator::Eq
+            | Operator::NotEq
+            | Operator::Lt
+            | Operator::LtEq
+            | Operator::Gt
+            | Operator::GtEq => {
+                extract_nonnullable_columns(left, columns, false)?;
+                extract_nonnullable_columns(right, columns, false)
+            }
+            Operator::And => {
+                if !top_level {
+                    return Ok(());
+                }
+                extract_nonnullable_columns(left, columns, top_level)?;
+                extract_nonnullable_columns(right, columns, top_level)
+            }
+            Operator::Or => {
+                let mut left_nonnullable_cols: Vec<Column> = vec![];
+                let mut right_nonnullable_cols: Vec<Column> = vec![];
+
+                extract_nonnullable_columns(left, &mut left_nonnullable_cols, 
top_level)?;
+                extract_nonnullable_columns(
+                    right,
+                    &mut right_nonnullable_cols,
+                    top_level,
+                )?;
+
+                // for query: select *** from a left join b where b.c1 ... or 
b.c2 ...
+                // this can be reduced to inner join.
+                // for query: select *** from a left join b where a.c1 ... or 
b.c2 ...
+                // this can not be reduced.
+                // If columns of relation exist in both sub exprs, any columns 
of this relation
+                // can be added to non nullable columns.
+                if !left_nonnullable_cols.is_empty() && 
!right_nonnullable_cols.is_empty()
+                {
+                    for left_col in &left_nonnullable_cols {
+                        for right_col in &right_nonnullable_cols {
+                            if let (Some(l_rel), Some(r_rel)) =
+                                (&left_col.relation, &right_col.relation)
+                            {
+                                if l_rel == r_rel {
+                                    columns.push(left_col.clone());
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+                Ok(())
+            }
+            _ => Ok(()),
+        },
+        Expr::Not(arg) => extract_nonnullable_columns(arg, columns, false),
+        Expr::IsNotNull(arg) => {
+            if !top_level {
+                return Ok(());
+            }
+            extract_nonnullable_columns(arg, columns, false)
+        }
+        _ => Ok(()),
+    }
+}
+
+/// try to reduce one outer join to inner join
+fn reduce_outer_join(
+    plan: &LogicalPlan,
+    nonnullable_cols: &Vec<Column>,
+) -> Result<Option<LogicalPlan>> {

Review Comment:
   > How about change this function to modify plan in place like
   > 
   > ```rust
   > fn reduce_outer_join(plan: mut LogicalPlan, ...)  -> Result<LogicalPlan>;
   > ```
   > 
   > This might simplify [the 
match](https://github.com/apache/arrow-datafusion/pull/2750/files#diff-c01a34949db6258aa1593f011ecf90f62cbde406acd5cdbf8b9b60b970ace1cfR2717-R2770)
   
   inplace update is better, and I'll check it later.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to