alamb commented on code in PR #10430: URL: https://github.com/apache/datafusion/pull/10430#discussion_r1594710170
########## datafusion/optimizer/src/eliminate_cross_join.rs: ########## @@ -250,90 +265,67 @@ fn find_inner_join( })) } -fn intersect( - accum: &mut Vec<(Expr, Expr)>, - vec1: &[(Expr, Expr)], - vec2: &[(Expr, Expr)], -) { - if !(vec1.is_empty() || vec2.is_empty()) { - for x1 in vec1.iter() { - for x2 in vec2.iter() { - if x1.0 == x2.0 && x1.1 == x2.1 || x1.1 == x2.0 && x1.0 == x2.1 { - accum.push((x1.0.clone(), x1.1.clone())); - } - } - } - } -} - /// Extract join keys from a WHERE clause -fn extract_possible_join_keys(expr: &Expr, accum: &mut Vec<(Expr, Expr)>) -> Result<()> { +fn extract_possible_join_keys(expr: &Expr, join_keys: &mut JoinKeySet) { if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr { match op { Operator::Eq => { - // Ensure that we don't add the same Join keys multiple times - if !(accum.contains(&(*left.clone(), *right.clone())) - || accum.contains(&(*right.clone(), *left.clone()))) - { - accum.push((*left.clone(), *right.clone())); - } + // insert handles ensuring we don't add the same Join keys multiple times + join_keys.insert(left, right); } Operator::And => { - extract_possible_join_keys(left, accum)?; - extract_possible_join_keys(right, accum)? + extract_possible_join_keys(left, join_keys); + extract_possible_join_keys(right, join_keys) } // Fix for issue#78 join predicates from inside of OR expr also pulled up properly. Operator::Or => { - let mut left_join_keys = vec![]; - let mut right_join_keys = vec![]; + let mut left_join_keys = JoinKeySet::new(); + let mut right_join_keys = JoinKeySet::new(); - extract_possible_join_keys(left, &mut left_join_keys)?; - extract_possible_join_keys(right, &mut right_join_keys)?; + extract_possible_join_keys(left, &mut left_join_keys); + extract_possible_join_keys(right, &mut right_join_keys); - intersect(accum, &left_join_keys, &right_join_keys) + join_keys.insert_intersection(left_join_keys, right_join_keys) } _ => (), }; } - Ok(()) } /// Remove join expressions from a filter expression -/// Returns Some() when there are few remaining predicates in filter_expr -/// Returns None otherwise -fn remove_join_expressions( - expr: &Expr, - join_keys: &HashSet<(Expr, Expr)>, -) -> Result<Option<Expr>> { +/// +/// # Returns +/// * `Some()` when there are few remaining predicates in filter_expr +/// * `None` otherwise +fn remove_join_expressions(expr: Expr, join_keys: &JoinKeySet) -> Option<Expr> { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - match op { - Operator::Eq => { - if join_keys.contains(&(*left.clone(), *right.clone())) - || join_keys.contains(&(*right.clone(), *left.clone())) Review Comment: there were several `clone`s (deep copies) here too to simply check if the the join keys contained the expressions which have been removed. ########## datafusion/optimizer/src/eliminate_cross_join.rs: ########## @@ -150,7 +150,7 @@ impl OptimizerRule for EliminateCrossJoin { /// Returns a boolean indicating whether the flattening was successful. fn try_flatten_join_inputs( plan: &LogicalPlan, - possible_join_keys: &mut Vec<(Expr, Expr)>, Review Comment: This is the core change in this PR -- instead of passing around &HashSet or `&Vec()`, they are encapsulated into a struct now which is much more careful to clone only when needed ########## datafusion/optimizer/src/eliminate_cross_join.rs: ########## @@ -131,7 +131,7 @@ impl OptimizerRule for EliminateCrossJoin { .map(|f| Some(LogicalPlan::Filter(f))) } else { // Remove join expressions from filter: - match remove_join_expressions(predicate, &all_join_keys)? { + match remove_join_expressions(predicate.clone(), &all_join_keys) { Review Comment: This clone used to happen inside `remove_join_expressions` so I moved it up so it is clearer (and sets the stage to avoid the clone entirely in the next PR) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org