beliefer commented on code in PR #38799:
URL: https://github.com/apache/spark/pull/38799#discussion_r1036903257


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala:
##########
@@ -627,6 +627,87 @@ abstract class SparkStrategies extends 
QueryPlanner[SparkPlan] {
     }
   }
 
+  /**
+   * Optimize the filter based on rank-like window function by reduce not 
required rows.
+   * This rule optimizes the following cases:
+   * {{{
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE rn = 5
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE 5 = rn
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE rn < 5
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE 5 > rn
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE rn <= 5
+   *   SELECT *, ROW_NUMBER() OVER(PARTITION BY k ORDER BY a) AS rn FROM Tab1 
WHERE 5 >= rn
+   * }}}
+   */
+  object WindowGroupLimit extends Strategy with PredicateHelper {
+
+    /**
+     * Extract all the limit values from predicates.
+     */
+    def extractLimits(condition: Expression, attr: Attribute): Option[Int] = {
+      val limits = splitConjunctivePredicates(condition).collect {
+        case EqualTo(IntegerLiteral(limit), e) if e.semanticEquals(attr) => 
limit
+        case EqualTo(e, IntegerLiteral(limit)) if e.semanticEquals(attr) => 
limit
+        case LessThan(e, IntegerLiteral(limit)) if e.semanticEquals(attr) => 
limit - 1
+        case GreaterThan(IntegerLiteral(limit), e) if e.semanticEquals(attr) 
=> limit - 1
+        case LessThanOrEqual(e, IntegerLiteral(limit)) if 
e.semanticEquals(attr) => limit
+        case GreaterThanOrEqual(IntegerLiteral(limit), e) if 
e.semanticEquals(attr) => limit
+      }
+
+      if (limits.nonEmpty) Some(limits.min) else None
+    }
+
+    private def supports(
+        windowExpressions: Seq[NamedExpression]): Boolean = 
windowExpressions.exists {
+      case Alias(WindowExpression(_: Rank | _: DenseRank | _: RowNumber, 
WindowSpecDefinition(_, _,
+      SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => 
true
+      case _ => false
+    }
+
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+      if (conf.windowGroupLimitThreshold == -1) return Nil
+
+      plan match {
+        case filter @ Filter(condition,
+          window @ logical.Window(windowExpressions, partitionSpec, orderSpec, 
child))
+          if !child.isInstanceOf[logical.Window] &&
+            supports(windowExpressions) && orderSpec.nonEmpty =>

Review Comment:
   If the window exists at least one rank like function, we can apply the 
optimization.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to