ulysses-you commented on a change in pull request #1129:
URL: https://github.com/apache/incubator-kyuubi/pull/1129#discussion_r725413342
##########
File path:
dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/watchdog/ForcedMaxOutputRowsRule.scala
##########
@@ -19,11 +19,18 @@ package org.apache.kyuubi.sql.watchdog
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Limit,
LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.expressions.Alias
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct,
Filter, Limit, LogicalPlan, Project, Sort, Union}
import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
import org.apache.kyuubi.sql.KyuubiSQLConf
+object ForcedMaxOutputRowsConstraint {
+ val CHILD_AGGREGATE: TreeNodeTag[String] = TreeNodeTag[String]("childAgg")
+ val CHILD_AGGREGATE_FLAG: String = "childAgg"
Review comment:
it should be unique, how about `__kyuubi_child_agg__` ?
##########
File path:
dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/watchdog/ForcedMaxOutputRowsRule.scala
##########
@@ -45,21 +52,36 @@ import org.apache.kyuubi.sql.KyuubiSQLConf
* */
case class ForcedMaxOutputRowsRule(session: SparkSession) extends
Rule[LogicalPlan] {
+ private def isChildAggregate(a: Aggregate): Boolean = a
+ .aggregateExpressions.exists(p =>
p.getTagValue(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE)
+ .contains(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE_FLAG))
+
+ private def canInsertLimitInner(p: LogicalPlan): Boolean = p match {
+
+ case agg: Aggregate => agg match {
+ case Aggregate(_, Alias(_, "havingCondition")::Nil, _) => false
+ case agg: Aggregate => !isChildAggregate(agg)
+ }
+ case _: Distinct => true
+ case _: Filter => true
+ case _: Project => true
+ case Limit(_, _) => true
+ case _: Sort => true
+ case _: Union => true
+ case _ => false
+
+ }
+
private def canInsertLimit(p: LogicalPlan, maxOutputRowsOpt: Option[Int]):
Boolean = {
maxOutputRowsOpt match {
- case Some(forcedMaxOutputRows) => val supported = p match {
- case _: Project => true
- case _: Aggregate => true
- case Limit(_, _) => true
- case _ => false
- }
+ case Some(forcedMaxOutputRows) => val supported = canInsertLimitInner(p)
supported && !p.maxRows.exists(_ <= forcedMaxOutputRows)
case None => false
}
-
}
+
Review comment:
unnessary change
##########
File path:
dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/watchdog/ForcedMaxOutputRowsRule.scala
##########
@@ -70,3 +92,42 @@ case class ForcedMaxOutputRowsRule(session: SparkSession)
extends Rule[LogicalPl
}
}
+
+case class MarkAggregateOrderRule(session: SparkSession) extends
Rule[LogicalPlan] {
+
+ private def markChildAggregate(a: Aggregate): Unit = {
+ // mark child aggregate
+ a.aggregateExpressions.filter(_.resolved).foreach(_.setTagValue(
+ ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE,
+ ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE_FLAG)
+ )
+ }
+
+ private def findAndMarkChildAggregate(plan: LogicalPlan): LogicalPlan = plan
match {
+ /*
+ * The case mainly process order not aggregate column but grouping column
as below
+ * SELECT c1, COUNT(*) as cnt
+ * FROM t1
+ * GROUP BY c1
+ * ORDER BY c1
+ * */
+ case a: Aggregate if a.aggregateExpressions
+ .exists(x => x.resolved && x.name.equals("aggOrder")) =>
markChildAggregate(a)
+ plan
+
+ case _ => plan.children.foreach { c =>
+ c.foreach {
+ case agg: Aggregate => markChildAggregate(agg)
+ case _ => Unit
+ }
+ }
+ plan
Review comment:
seems the style is wrong ?
##########
File path:
dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/watchdog/ForcedMaxOutputRowsRule.scala
##########
@@ -45,21 +52,36 @@ import org.apache.kyuubi.sql.KyuubiSQLConf
* */
case class ForcedMaxOutputRowsRule(session: SparkSession) extends
Rule[LogicalPlan] {
+ private def isChildAggregate(a: Aggregate): Boolean = a
+ .aggregateExpressions.exists(p =>
p.getTagValue(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE)
+ .contains(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE_FLAG))
+
+ private def canInsertLimitInner(p: LogicalPlan): Boolean = p match {
+
+ case agg: Aggregate => agg match {
+ case Aggregate(_, Alias(_, "havingCondition")::Nil, _) => false
+ case agg: Aggregate => !isChildAggregate(agg)
+ }
+ case _: Distinct => true
+ case _: Filter => true
+ case _: Project => true
+ case Limit(_, _) => true
+ case _: Sort => true
+ case _: Union => true
+ case _ => false
+
+ }
+
private def canInsertLimit(p: LogicalPlan, maxOutputRowsOpt: Option[Int]):
Boolean = {
maxOutputRowsOpt match {
- case Some(forcedMaxOutputRows) => val supported = p match {
- case _: Project => true
- case _: Aggregate => true
- case Limit(_, _) => true
- case _ => false
- }
+ case Some(forcedMaxOutputRows) => val supported = canInsertLimitInner(p)
supported && !p.maxRows.exists(_ <= forcedMaxOutputRows)
Review comment:
nit: `canInsertLimitInner(p) && !p.maxRows.exists(_ <=
forcedMaxOutputRows)`
##########
File path:
dev/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/watchdog/ForcedMaxOutputRowsRule.scala
##########
@@ -45,21 +52,36 @@ import org.apache.kyuubi.sql.KyuubiSQLConf
* */
case class ForcedMaxOutputRowsRule(session: SparkSession) extends
Rule[LogicalPlan] {
+ private def isChildAggregate(a: Aggregate): Boolean = a
+ .aggregateExpressions.exists(p =>
p.getTagValue(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE)
+ .contains(ForcedMaxOutputRowsConstraint.CHILD_AGGREGATE_FLAG))
+
+ private def canInsertLimitInner(p: LogicalPlan): Boolean = p match {
+
+ case agg: Aggregate => agg match {
+ case Aggregate(_, Alias(_, "havingCondition")::Nil, _) => false
+ case agg: Aggregate => !isChildAggregate(agg)
+ }
Review comment:
```scala
case Aggregate(_, Alias(_, "havingCondition")::Nil, _) => false
case agg: Aggregate => !isChildAggregate(agg)
...
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]