cloud-fan commented on code in PR #38799:
URL: https://github.com/apache/spark/pull/38799#discussion_r1036828144
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala:
##########
@@ -627,6 +627,87 @@ abstract class SparkStrategies extends
QueryPlanner[SparkPlan] {
}
}
+ /**
+ * Optimize the filter based on rank-like window function by reduce not
required rows.
+ * This rule optimizes the following cases:
+ * {{{
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE rn = 5
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE 5 = rn
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE rn < 5
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE 5 > rn
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE rn <= 5
+ * SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1
WHERE 5 >= rn
+ * }}}
+ */
+ object WindowGroupLimit extends Strategy with PredicateHelper {
+
+ /**
+ * Extract all the limit values from predicates.
+ */
+ def extractLimits(condition: Expression, attr: Attribute): Option[Int] = {
+ val limits = splitConjunctivePredicates(condition).collect {
+ case EqualTo(IntegerLiteral(limit), e) if e.semanticEquals(attr) =>
limit
+ case EqualTo(e, IntegerLiteral(limit)) if e.semanticEquals(attr) =>
limit
+ case LessThan(e, IntegerLiteral(limit)) if e.semanticEquals(attr) =>
limit - 1
+ case GreaterThan(IntegerLiteral(limit), e) if e.semanticEquals(attr)
=> limit - 1
+ case LessThanOrEqual(e, IntegerLiteral(limit)) if
e.semanticEquals(attr) => limit
+ case GreaterThanOrEqual(IntegerLiteral(limit), e) if
e.semanticEquals(attr) => limit
+ }
+
+ if (limits.nonEmpty) Some(limits.min) else None
+ }
+
+ private def supports(
+ windowExpressions: Seq[NamedExpression]): Boolean =
windowExpressions.exists {
+ case Alias(WindowExpression(_: Rank | _: DenseRank | _: RowNumber,
WindowSpecDefinition(_, _,
+ SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) =>
true
+ case _ => false
+ }
+
+ def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+ if (conf.windowGroupLimitThreshold == -1) return Nil
+
+ plan match {
+ case filter @ Filter(condition,
+ window @ logical.Window(windowExpressions, partitionSpec, orderSpec,
child))
+ if !child.isInstanceOf[logical.Window] &&
+ supports(windowExpressions) && orderSpec.nonEmpty =>
Review Comment:
do we really require the window to only contain rank like functions?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]