maropu commented on a change in pull request #31318:
URL: https://github.com/apache/spark/pull/31318#discussion_r578043404
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
##########
@@ -352,10 +352,7 @@ object BooleanSimplification extends Rule[LogicalPlan]
with PredicateHelper {
val lhs = splitDisjunctivePredicates(left)
Review comment:
Could you update the comment about how to handle this case?
https://github.com/apache/spark/pull/31318/files#diff-d43484d56a4d9991066b5c00d12ec2465c75131e055fc02ee7fb6dfd45b5006fR347-R351
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
##########
@@ -366,6 +363,16 @@ object BooleanSimplification extends Rule[LogicalPlan]
with PredicateHelper {
// ((c || ...) && (d || ...)) || a || b
(common :+ And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
}
+ } else {
+ // No common factors from disjunctive predicates, reduce common
factor from conjunction
Review comment:
nit: could you add an example case here like the comment above (` // (a
|| b || c || ...) && (a || b) => (a || b)`)?
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
+ ('a > 1 && ('b > 2 && 'c > 3)))
+
+ checkCondition((('a > 1 && 'b >2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c >
3)),
+ ('a > 1 && 'b > 2 && 'c > 3 && 'a > 4 && 'b > 5))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c >1))),
+ ('a > 1 && 'b > 3 && 'c >1))
+
+ checkCondition(
+ (('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b >
3) && 'c >1))),
+ (('a > 1 || 'b > 3) && 'd > 0 && 'c >1))
+
+ checkCondition(
+ ('a > 1 && 'b > 2 && 'a > 1 && 'c > 3),
+ ('a > 1 && 'b > 2 && 'c > 3))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && 'a > 1 ) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1
)
Review comment:
nit: remove the unnecessary space between `'c > 1` and `)`.
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
+ ('a > 1 && ('b > 2 && 'c > 3)))
+
+ checkCondition((('a > 1 && 'b >2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c >
3)),
+ ('a > 1 && 'b > 2 && 'c > 3 && 'a > 4 && 'b > 5))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c >1))),
+ ('a > 1 && 'b > 3 && 'c >1))
+
+ checkCondition(
+ (('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b >
3) && 'c >1))),
+ (('a > 1 || 'b > 3) && 'd > 0 && 'c >1))
+
+ checkCondition(
+ ('a > 1 && 'b > 2 && 'a > 1 && 'c > 3),
+ ('a > 1 && 'b > 2 && 'c > 3))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && 'a > 1 ) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1
)
+ , ('a > 1 && 'b > 3))
+ }
+
+ test("SPARK-34222: (((a || b) || a || (a || c))) => a || b || c") {
+
+ checkCondition((('a > 1 || 'b > 2) || 'a > 1 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'c > 3))
+
+ checkCondition((('a > 1 || 'b >2) || 'a > 4 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'a > 4 || 'c > 3))
+
+ checkCondition(
+ ('a > 1 || 'b > 3 || ( 'a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || 'c > 1)))
+ , ('a > 1 || 'b > 3 || 'c > 1))
+
+ checkCondition(
+ (('a > 1 && 'b > 3) || ( ('a > 1 && 'b > 3) || (('a > 1 && 'b > 3) || 'c
> 1)))
+ , (('a > 1 && 'b > 3) || 'c > 1))
+
+ checkCondition(
+ ('a > 1 || 'b > 2 || 'a > 1 || 'c > 3),
+ ('a > 1 || 'b > 2 || 'c > 3))
+
+ checkCondition(
+ ('a > 1 || 'b > 3 || 'a > 1 ) && ('a > 1 || 'b > 3 || 'a > 1 || 'c > 1
)
+ , ('a > 1 || 'b > 3))
Review comment:
ditto
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
Review comment:
nit: remove the unnecessary blank line.
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
Review comment:
nit: add a space between `&&` and `(`
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
+ ('a > 1 && ('b > 2 && 'c > 3)))
+
+ checkCondition((('a > 1 && 'b >2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c >
3)),
+ ('a > 1 && 'b > 2 && 'c > 3 && 'a > 4 && 'b > 5))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c >1))),
+ ('a > 1 && 'b > 3 && 'c >1))
+
+ checkCondition(
+ (('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b >
3) && 'c >1))),
+ (('a > 1 || 'b > 3) && 'd > 0 && 'c >1))
+
+ checkCondition(
+ ('a > 1 && 'b > 2 && 'a > 1 && 'c > 3),
+ ('a > 1 && 'b > 2 && 'c > 3))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && 'a > 1 ) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1
)
+ , ('a > 1 && 'b > 3))
+ }
+
+ test("SPARK-34222: (((a || b) || a || (a || c))) => a || b || c") {
+
+ checkCondition((('a > 1 || 'b > 2) || 'a > 1 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'c > 3))
+
+ checkCondition((('a > 1 || 'b >2) || 'a > 4 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'a > 4 || 'c > 3))
+
+ checkCondition(
+ ('a > 1 || 'b > 3 || ( 'a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || 'c > 1)))
+ , ('a > 1 || 'b > 3 || 'c > 1))
+
+ checkCondition(
+ (('a > 1 && 'b > 3) || ( ('a > 1 && 'b > 3) || (('a > 1 && 'b > 3) || 'c
> 1)))
+ , (('a > 1 && 'b > 3) || 'c > 1))
Review comment:
ditto
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
+ ('a > 1 && ('b > 2 && 'c > 3)))
+
+ checkCondition((('a > 1 && 'b >2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c >
3)),
+ ('a > 1 && 'b > 2 && 'c > 3 && 'a > 4 && 'b > 5))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c >1))),
+ ('a > 1 && 'b > 3 && 'c >1))
+
+ checkCondition(
+ (('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b >
3) && 'c >1))),
+ (('a > 1 || 'b > 3) && 'd > 0 && 'c >1))
+
+ checkCondition(
+ ('a > 1 && 'b > 2 && 'a > 1 && 'c > 3),
+ ('a > 1 && 'b > 2 && 'c > 3))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && 'a > 1 ) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1
)
+ , ('a > 1 && 'b > 3))
+ }
+
+ test("SPARK-34222: (((a || b) || a || (a || c))) => a || b || c") {
+
+ checkCondition((('a > 1 || 'b > 2) || 'a > 1 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'c > 3))
+
+ checkCondition((('a > 1 || 'b >2) || 'a > 4 ||('a > 1 || 'c > 3)),
+ ('a > 1 || 'b > 2 || 'a > 4 || 'c > 3))
+
+ checkCondition(
+ ('a > 1 || 'b > 3 || ( 'a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || 'c > 1)))
+ , ('a > 1 || 'b > 3 || 'c > 1))
Review comment:
could you follow the the other format?
```
checkCondition(
('a > 1 || 'b > 3 || ( 'a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || 'c >
1))),
('a > 1 || 'b > 3 || 'c > 1))
```
##########
File path:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
##########
@@ -126,6 +126,56 @@ class BooleanSimplificationSuite extends PlanTest with
ExpressionEvalHelper with
'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
}
+ test("SPARK-34222: (((a && b) && a && (a && c))) => a && b && c") {
+
+ checkCondition((('a > 1 && 'b > 2) && 'a > 1 &&('a > 1 && 'c > 3)),
+ ('a > 1 && ('b > 2 && 'c > 3)))
+
+ checkCondition((('a > 1 && 'b >2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c >
3)),
+ ('a > 1 && 'b > 2 && 'c > 3 && 'a > 4 && 'b > 5))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c >1))),
+ ('a > 1 && 'b > 3 && 'c >1))
+
+ checkCondition(
+ (('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0 && (('a > 1 || 'b >
3) && 'c >1))),
+ (('a > 1 || 'b > 3) && 'd > 0 && 'c >1))
+
+ checkCondition(
+ ('a > 1 && 'b > 2 && 'a > 1 && 'c > 3),
+ ('a > 1 && 'b > 2 && 'c > 3))
+
+ checkCondition(
+ ('a > 1 && 'b > 3 && 'a > 1 ) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1
)
+ , ('a > 1 && 'b > 3))
+ }
+
+ test("SPARK-34222: (((a || b) || a || (a || c))) => a || b || c") {
+
Review comment:
ditto
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
##########
@@ -366,6 +363,16 @@ object BooleanSimplification extends Rule[LogicalPlan]
with PredicateHelper {
// ((c || ...) && (d || ...)) || a || b
(common :+ And(ldiff.reduce(Or), rdiff.reduce(Or))).reduce(Or)
}
+ } else {
+ // No common factors from disjunctive predicates, reduce common
factor from conjunction
+ val all = splitConjunctivePredicates(left) ++
splitConjunctivePredicates(right)
+ val distinct = all.distinct
Review comment:
Could you use `ExpressionSet` for canonicalization?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]