peter-toth commented on a change in pull request #24495: [SPARK-27604][SQL] Add
filter reduction
URL: https://github.com/apache/spark/pull/24495#discussion_r282880044
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
##########
@@ -151,6 +151,238 @@ object ConstantPropagation extends Rule[LogicalPlan]
with PredicateHelper {
}
}
+/**
+ * Substitutes expressions which can be statically reduced by constraints.
+ * eg.
+ * {{{
+ * SELECT * FROM table WHERE i <= 5 AND i = 5 => ... WHERE i = 5
+ * SELECT * FROM table WHERE i < j AND ... AND i > j => ... WHERE false
+ * }}}
+ */
+object FilterReduction extends Rule[LogicalPlan] with ConstraintHelper {
+ def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+ case f: Filter =>
+ val newCondition = normalizeAndReduceWithConstraints(f.condition)
+ if (newCondition fastEquals f.condition) {
+ f
+ } else {
+ f.copy(condition = newCondition)
+ }
+ }
+
+ private def normalizeAndReduceWithConstraints(expression: Expression):
Expression =
+ reduceWithConstraints(normalize(expression))._1
+
+ private def normalize(expression: Expression) = expression transform {
+ case GreaterThan(x, y) => LessThan(y, x)
+ case GreaterThanOrEqual(x, y) => LessThanOrEqual(y, x)
+ }
+
+ /**
+ * Traverse a condition as a tree and simplify expressions with constraints.
+ * - This functions assumes that the plan has been normalized using
[[normalize()]]
+ * - On matching [[And]], recursively traverse both children, simplify child
expressions with
+ * propagated constraints from sibling and propagate up union of
constraints.
+ * - If a child of [[And]] is [[LessThan]], [[LessThanOrEqual]],
[[EqualTo]], [[EqualNullSafe]],
+ * propagate the constraint.
+ * - On matching [[Or]] or [[Not]], recursively traverse each children,
propagate no constraints.
+ * - Otherwise, stop traversal and propagate no constraints.
+ * @param expression expression to be traversed
+ * @return A tuple including:
+ * 1. Expression: optionally changed expression after traversal
+ * 2. Seq[Expression]: propagated constraints
+ */
+ private def reduceWithConstraints(expression: Expression): (Expression,
Seq[Expression]) =
+ expression match {
+ case e @ (_: LessThan | _: LessThanOrEqual | _: EqualTo | _:
EqualNullSafe)
+ if e.deterministic => (e, Seq(e))
+ case a @ And(left, right) =>
+ val (newLeft, leftConstraints) = reduceWithConstraints(left)
+ val reducedRight = reduceWithConstraints(right, leftConstraints)
+ val (reducedNewRight, rightConstraints) =
reduceWithConstraints(reducedRight)
+ val reducedNewLeft = reduceWithConstraints(newLeft, rightConstraints)
+ val newAnd = if ((reducedNewLeft fastEquals left) &&
+ (reducedNewRight fastEquals right)) {
+ a
+ } else {
+ And(reducedNewLeft, reducedNewRight)
+ }
+ (newAnd, leftConstraints ++ rightConstraints)
+ case o @ Or(left, right) =>
+ // Ignore the EqualityPredicates from children since they are only
propagated through And.
+ val (newLeft, _) = reduceWithConstraints(left)
+ val (newRight, _) = reduceWithConstraints(right)
+ val newOr = if ((newLeft fastEquals left) && (newRight fastEquals
right)) {
+ o
+ } else {
+ Or(newLeft, newRight)
+ }
+
+ (newOr, Seq.empty)
+ case n @ Not(child) =>
+ // Ignore the EqualityPredicates from children since they are only
propagated through And.
+ val (newChild, _) = reduceWithConstraints(child)
+ val newNot = if (newChild fastEquals child) {
+ n
+ } else {
+ Not(newChild)
+ }
+ (newNot, Seq.empty)
+ case _ => (expression, Seq.empty)
+ }
+
+ private def reduceWithConstraints(expression: Expression, constraints:
Seq[Expression]) =
+ constraints.foldLeft(expression)((e, constraint) =>
reduceWithConstraint(e, constraint))
+
+ private def planEqual(x: Expression, y: Expression) =
+ !x.foldable && !y.foldable && x.canonicalized == y.canonicalized
+
+ private def valueEqual(x: Expression, y: Expression) =
+ x.foldable && y.foldable && EqualTo(x,
y).eval(EmptyRow).asInstanceOf[Boolean]
+
+ private def valueLessThan(x: Expression, y: Expression) =
+ x.foldable && y.foldable && LessThan(x,
y).eval(EmptyRow).asInstanceOf[Boolean]
+
+ private def valueLessThanOrEqual(x: Expression, y: Expression) =
+ x.foldable && y.foldable && LessThanOrEqual(x,
y).eval(EmptyRow).asInstanceOf[Boolean]
+
+ private def reduceWithConstraint(expression: Expression, constraint:
Expression): Expression =
+ constraint match {
+ case a LessThan b => expression transformUp {
+ case c LessThan d if planEqual(b, d) && (planEqual(a, c) ||
valueLessThanOrEqual(c, a)) =>
+ Literal.TrueLiteral
+ case c LessThan d if planEqual(b, c) && (planEqual(a, d) ||
valueLessThanOrEqual(d, a)) =>
+ Literal.FalseLiteral
+ case c LessThan d if planEqual(a, c) && (planEqual(b, d) ||
valueLessThanOrEqual(b, d)) =>
+ Literal.TrueLiteral
+ case c LessThan d if planEqual(a, d) && (planEqual(b, c) ||
valueLessThanOrEqual(b, c)) =>
+ Literal.FalseLiteral
+
+ case c LessThanOrEqual d
+ if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c,
a)) =>
+ Literal.TrueLiteral
+ case c LessThanOrEqual d
+ if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d,
a)) =>
+ Literal.FalseLiteral
+ case c LessThanOrEqual d
+ if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b,
d)) =>
+ Literal.TrueLiteral
+ case c LessThanOrEqual d
+ if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b,
c)) =>
+ Literal.FalseLiteral
+
+ case c EqualTo d if planEqual(b, d) && (planEqual(a, c) ||
valueLessThanOrEqual(c, a)) =>
+ Literal.FalseLiteral
+ case c EqualTo d if planEqual(b, c) && (planEqual(a, d) ||
valueLessThanOrEqual(d, a)) =>
+ Literal.FalseLiteral
+ case c EqualTo d if planEqual(a, c) && (planEqual(b, d) ||
valueLessThanOrEqual(b, d)) =>
+ Literal.FalseLiteral
+ case c EqualTo d if planEqual(a, d) && (planEqual(b, c) ||
valueLessThanOrEqual(b, c)) =>
+ Literal.FalseLiteral
+
+ case c EqualNullSafe d
+ if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c,
a)) =>
+ Literal.FalseLiteral
+ case c EqualNullSafe d
+ if planEqual(b, c) && (planEqual(a, d) || valueLessThanOrEqual(d,
a)) =>
+ Literal.FalseLiteral
+ case c EqualNullSafe d
+ if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b,
d)) =>
+ Literal.FalseLiteral
+ case c EqualNullSafe d
+ if planEqual(a, d) && (planEqual(b, c) || valueLessThanOrEqual(b,
c)) =>
+ Literal.FalseLiteral
+
+ case c EqualNullSafe d if planEqual(b, d) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(b, c) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(a, c) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(a, d) => EqualTo(c, d)
+ }
+ case a LessThanOrEqual b => expression transformUp {
+ case c LessThan d if planEqual(b, d) && valueLessThan(c, a) =>
+ Literal.TrueLiteral
+ case c LessThan d if planEqual(b, c) && (planEqual(a, d) ||
valueLessThanOrEqual(d, a)) =>
+ Literal.FalseLiteral
+ case c LessThan d if planEqual(a, c) && valueLessThan(b, d) =>
+ Literal.TrueLiteral
+ case c LessThan d if planEqual(a, d) && (planEqual(b, c) ||
valueLessThanOrEqual(b, c)) =>
+ Literal.FalseLiteral
+
+ case c LessThanOrEqual d
+ if planEqual(b, d) && (planEqual(a, c) || valueLessThanOrEqual(c,
a)) =>
+ Literal.TrueLiteral
+ case c LessThanOrEqual d if planEqual(b, c) && valueLessThan(d, a) =>
+ Literal.FalseLiteral
+ case c LessThanOrEqual d if planEqual(b, c) && (planEqual(a, d) ||
valueEqual(a, d)) =>
+ EqualTo(c, d)
+ case c LessThanOrEqual d
+ if planEqual(a, c) && (planEqual(b, d) || valueLessThanOrEqual(b,
d)) =>
+ Literal.TrueLiteral
+ case c LessThanOrEqual d if planEqual(a, d) && valueLessThan(b, c) =>
+ Literal.FalseLiteral
+ case c LessThanOrEqual d if planEqual(a, d) && (planEqual(b, c) ||
valueEqual(b, c)) =>
+ EqualTo(c, d)
+
+ case c EqualTo d if planEqual(b, d) && valueLessThan(c, a) =>
Literal.FalseLiteral
+ case c EqualTo d if planEqual(b, c) && valueLessThan(d, a) =>
Literal.FalseLiteral
+ case c EqualTo d if planEqual(a, c) && valueLessThan(b, d) =>
Literal.FalseLiteral
+ case c EqualTo d if planEqual(a, d) && valueLessThan(b, c) =>
Literal.FalseLiteral
+
+ case c EqualNullSafe d if planEqual(b, d) && valueLessThan(c, a) =>
Literal.FalseLiteral
+ case c EqualNullSafe d if planEqual(b, c) && valueLessThan(d, a) =>
Literal.FalseLiteral
+ case c EqualNullSafe d if planEqual(a, c) && valueLessThan(b, d) =>
Literal.FalseLiteral
+ case c EqualNullSafe d if planEqual(a, d) && valueLessThan(b, c) =>
Literal.FalseLiteral
+
+ case c EqualNullSafe d if planEqual(b, d) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(b, c) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(a, c) => EqualTo(c, d)
+ case c EqualNullSafe d if planEqual(a, d) => EqualTo(c, d)
+ }
+ case a EqualTo b => expression transformUp {
+ case c LessThan d if planEqual(b, d) && planEqual(a, c) =>
Literal.FalseLiteral
Review comment:
Fixed the issue and added some new UTs.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]