korowa commented on code in PR #12458:
URL: https://github.com/apache/datafusion/pull/12458#discussion_r1760038248
##########
datafusion/optimizer/src/extract_equijoin_predicate.rs:
##########
@@ -67,67 +67,145 @@ impl OptimizerRule for ExtractEquijoinPredicate {
_config: &dyn OptimizerConfig,
) -> Result<Transformed<LogicalPlan>> {
match plan {
- LogicalPlan::Join(Join {
- left,
- right,
- mut on,
- filter: Some(expr),
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- }) => {
- let left_schema = left.schema();
- let right_schema = right.schema();
- let (equijoin_predicates, non_equijoin_expr) =
- split_eq_and_noneq_join_predicate(expr, left_schema,
right_schema)?;
-
- if !equijoin_predicates.is_empty() {
- on.extend(equijoin_predicates);
- Ok(Transformed::yes(LogicalPlan::Join(Join {
- left,
- right,
- on,
- filter: non_equijoin_expr,
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- })))
- } else {
- Ok(Transformed::no(LogicalPlan::Join(Join {
- left,
- right,
- on,
- filter: non_equijoin_expr,
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- })))
- }
- }
+ LogicalPlan::Join(join) => extract_equijoin_predicate(join),
_ => Ok(Transformed::no(plan)),
}
}
}
+fn extract_equijoin_predicate(join: Join) -> Result<Transformed<LogicalPlan>> {
+ fn update_join_predicate(
+ join: Join,
+ extra_on: Vec<EquijoinPredicate>,
+ filter: Option<Expr>,
+ null_equals_null: bool,
+ ) -> Transformed<LogicalPlan> {
+ if extra_on.is_empty() {
+ Transformed::no(LogicalPlan::Join(join))
+ } else {
+ let mut on = join.on;
+ on.extend(extra_on);
+ Transformed::yes(LogicalPlan::Join(Join {
+ left: join.left,
+ right: join.right,
+ on,
+ filter,
+ join_type: join.join_type,
+ join_constraint: join.join_constraint,
+ schema: join.schema,
+ null_equals_null,
+ }))
+ }
+ }
+ if join.filter.is_none() {
+ return Ok(Transformed::no(LogicalPlan::Join(join)));
+ }
+ let expr = join.filter.as_ref().unwrap();
Review Comment:
Will let-else work here to avoid unwrap?
##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -96,6 +96,7 @@ impl OptimizerRule for EliminateCrossJoin {
filter.input.as_ref(),
LogicalPlan::Join(Join {
join_type: JoinType::Inner,
+ null_equals_null: false,
Review Comment:
And also, not sure that this rule shouldn't have any interactions with
`null_equals_null = true` joins -- what is the current behavior of such queries
and won't this additional filter change it drastically?
##########
datafusion/sqllogictest/test_files/join.slt:
##########
@@ -766,6 +766,50 @@ set datafusion.execution.target_partitions = 4;
statement ok
set datafusion.optimizer.repartition_joins = false;
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
+
+# tables for join nulls equals null
+statement ok
+CREATE TABLE IF NOT EXISTS t1(t1_id INT NULL, t1_int INT) AS VALUES
+(11, 1),
+(22, 2),
+(NULL, 3);
+
+statement ok
+CREATE TABLE IF NOT EXISTS t2(t2_id INT NULL, t2_int INT) AS VALUES
+(11, 3),
+(33, 1),
+(NULL, 5),
+(NULL, 6);
+
+
+# IS NOT DISTRINCT can be transformed into equijoin
+query TT
+EXPLAIN SELECT t1_id, t1_int, t2_int FROM t1 JOIN t2 ON t1_id IS NOT DISTINCT
from t2_id
Review Comment:
That would be useful to also have the following test cases
- demonstrating that equality predicates have priority over "is not distinct
from" when picking ON conditions from the filter
- how these joins will work with `eliminate_cross_join` rule (which if I'm
not mistaken also works for filter over inner joins event with ON conditions)
- how these join will be treated by `push_down_filter` -- in some cases it
seems to be able to push filters into ON conditions -- if so, resulting query
won't be correct in case of null_equals_null joins
##########
datafusion/optimizer/src/extract_equijoin_predicate.rs:
##########
@@ -67,67 +67,145 @@ impl OptimizerRule for ExtractEquijoinPredicate {
_config: &dyn OptimizerConfig,
) -> Result<Transformed<LogicalPlan>> {
match plan {
- LogicalPlan::Join(Join {
- left,
- right,
- mut on,
- filter: Some(expr),
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- }) => {
- let left_schema = left.schema();
- let right_schema = right.schema();
- let (equijoin_predicates, non_equijoin_expr) =
- split_eq_and_noneq_join_predicate(expr, left_schema,
right_schema)?;
-
- if !equijoin_predicates.is_empty() {
- on.extend(equijoin_predicates);
- Ok(Transformed::yes(LogicalPlan::Join(Join {
- left,
- right,
- on,
- filter: non_equijoin_expr,
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- })))
- } else {
- Ok(Transformed::no(LogicalPlan::Join(Join {
- left,
- right,
- on,
- filter: non_equijoin_expr,
- join_type,
- join_constraint,
- schema,
- null_equals_null,
- })))
- }
- }
+ LogicalPlan::Join(join) => extract_equijoin_predicate(join),
_ => Ok(Transformed::no(plan)),
}
}
}
+fn extract_equijoin_predicate(join: Join) -> Result<Transformed<LogicalPlan>> {
+ fn update_join_predicate(
+ join: Join,
+ extra_on: Vec<EquijoinPredicate>,
+ filter: Option<Expr>,
+ null_equals_null: bool,
+ ) -> Transformed<LogicalPlan> {
+ if extra_on.is_empty() {
+ Transformed::no(LogicalPlan::Join(join))
+ } else {
+ let mut on = join.on;
+ on.extend(extra_on);
+ Transformed::yes(LogicalPlan::Join(Join {
+ left: join.left,
+ right: join.right,
+ on,
+ filter,
+ join_type: join.join_type,
+ join_constraint: join.join_constraint,
+ schema: join.schema,
+ null_equals_null,
+ }))
+ }
+ }
+ if join.filter.is_none() {
+ return Ok(Transformed::no(LogicalPlan::Join(join)));
+ }
+ let expr = join.filter.as_ref().unwrap();
+
+ let left_schema = join.left.schema();
+ let right_schema = join.right.schema();
+
+ if join.on.is_empty() {
+ let (eq_predicates, null_equals_null, non_eq_expr) =
+ split_eq_and_noneq_join_predicate(expr, left_schema,
right_schema)?;
+ Ok(update_join_predicate(
+ join,
+ eq_predicates,
+ non_eq_expr,
+ null_equals_null,
+ ))
+ } else if join.null_equals_null {
+ let (eq_predicates, non_eq_expr) =
+ split_eq_and_noneq_join_predicate_nulls_eq(expr, left_schema,
right_schema)?;
+
+ Ok(update_join_predicate(
+ join,
+ eq_predicates,
+ non_eq_expr,
+ true,
+ ))
+ } else {
+ let (eq_predicates, non_eq_expr) =
+ split_eq_and_noneq_join_predicate_nulls_not_eq(
+ expr,
+ left_schema,
+ right_schema,
+ )?;
+
+ Ok(update_join_predicate(
+ join,
+ eq_predicates,
+ non_eq_expr,
+ false,
+ ))
+ }
+}
+
fn split_eq_and_noneq_join_predicate(
- filter: Expr,
+ filter: &Expr,
+ left_schema: &DFSchema,
+ right_schema: &DFSchema,
+) -> Result<(Vec<EquijoinPredicate>, bool, Option<Expr>)> {
+ let (eq, noneq) = split_eq_and_noneq_join_predicate_nulls_not_eq(
+ filter,
+ left_schema,
+ right_schema,
+ )?;
+ if !eq.is_empty() {
+ Ok((eq, false, noneq))
+ } else {
+ let (eq, noneq) = split_eq_and_noneq_join_predicate_nulls_eq(
+ filter,
+ left_schema,
+ right_schema,
+ )?;
+ Ok((eq, true, noneq))
+ }
+}
+
+fn split_eq_and_noneq_join_predicate_nulls_eq(
+ filter: &Expr,
+ left_schema: &DFSchema,
+ right_schema: &DFSchema,
+) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> {
+ split_eq_and_noneq_join_predicate_impl(
+ filter,
+ left_schema,
+ right_schema,
+ Operator::IsNotDistinctFrom,
+ )
+}
+
+fn split_eq_and_noneq_join_predicate_nulls_not_eq(
+ filter: &Expr,
+ left_schema: &DFSchema,
+ right_schema: &DFSchema,
+) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> {
+ split_eq_and_noneq_join_predicate_impl(
+ filter,
+ left_schema,
+ right_schema,
+ Operator::Eq,
+ )
+}
+
+fn split_eq_and_noneq_join_predicate_impl(
+ filter: &Expr,
left_schema: &DFSchema,
right_schema: &DFSchema,
+ eq_op: Operator,
) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> {
- let exprs = split_conjunction_owned(filter);
+ let exprs = split_conjunction(filter);
Review Comment:
Just a note: this part (along with cloning exprs) in `else`s of this
function was changed in https://github.com/apache/datafusion/pull/10165 as a
part of reducing expr cloning in optimizer initiative, so, probably, in this
place it's preferred to avoid cloning, unless it's required.
##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -96,6 +96,7 @@ impl OptimizerRule for EliminateCrossJoin {
filter.input.as_ref(),
LogicalPlan::Join(Join {
join_type: JoinType::Inner,
+ null_equals_null: false,
Review Comment:
This rule also has it's own logic for identifying join keys in
`extract_possible_join_keys` function. Shouldn't `is not distinct from`
operator be supported there (perhaps by reusing logic from
`extract_equijoin_predicated_rule`)?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]