viirya commented on code in PR #8626:
URL: https://github.com/apache/arrow-datafusion/pull/8626#discussion_r1436134110
##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -47,81 +48,93 @@ impl EliminateCrossJoin {
/// For above queries, the join predicate is available in filters and they are
moved to
/// join nodes appropriately
/// This fix helps to improve the performance of TPCH Q19. issue#78
-///
impl OptimizerRule for EliminateCrossJoin {
fn try_optimize(
&self,
plan: &LogicalPlan,
config: &dyn OptimizerConfig,
) -> Result<Option<LogicalPlan>> {
- match plan {
+ let mut possible_join_keys: Vec<(Expr, Expr)> = vec![];
+ let mut all_inputs: Vec<LogicalPlan> = vec![];
+ let parent_predicate = match plan {
LogicalPlan::Filter(filter) => {
- let input = filter.input.as_ref().clone();
-
- let mut possible_join_keys: Vec<(Expr, Expr)> = vec![];
- let mut all_inputs: Vec<LogicalPlan> = vec![];
- let did_flat_successfully = match &input {
+ let input = filter.input.as_ref();
+ match input {
LogicalPlan::Join(Join {
join_type: JoinType::Inner,
..
})
- | LogicalPlan::CrossJoin(_) => try_flatten_join_inputs(
- &input,
- &mut possible_join_keys,
- &mut all_inputs,
- )?,
+ | LogicalPlan::CrossJoin(_) => {
+ if !try_flatten_join_inputs(
+ input,
+ &mut possible_join_keys,
+ &mut all_inputs,
+ )? {
+ return Ok(None);
+ }
+ extract_possible_join_keys(
+ &filter.predicate,
+ &mut possible_join_keys,
+ )?;
+ Some(&filter.predicate)
+ }
_ => {
return utils::optimize_children(self, plan, config);
}
- };
-
- if !did_flat_successfully {
+ }
+ }
+ LogicalPlan::Join(Join {
+ join_type: JoinType::Inner,
+ ..
+ }) => {
+ if !try_flatten_join_inputs(
+ plan,
+ &mut possible_join_keys,
+ &mut all_inputs,
+ )? {
return Ok(None);
}
+ None
+ }
Review Comment:
Oh, I meant previously this rule doesn't cover the case of reordering joins
to eliminate CrossJoin without a Filter on top of Join (because it did only
match `LogicalPlan::Filter(filter)`. So if adding this new matching case
`LogicalPlan::Join` is to address it, it means we don't have test case covering
it previously. Then we may need to add one.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]