cloud-fan commented on a change in pull request #32816:
URL: https://github.com/apache/spark/pull/32816#discussion_r696358035



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
##########
@@ -35,15 +36,48 @@ case class SimpleCost(value: Long) extends Cost {
 }
 
 /**
- * A simple implementation of [[CostEvaluator]], which counts the number of
- * [[ShuffleExchangeLike]] nodes in the plan.
+ * A skew join aware implementation of [[Cost]], which consider shuffle number 
and skew join number
  */
-object SimpleCostEvaluator extends CostEvaluator {
+case class SkewJoinAwareCost(
+    numShuffles: Int,
+    numSkewJoins: Int) extends Cost {
+  override def compare(that: Cost): Int = that match {
+    case other: SkewJoinAwareCost =>
+      // If more skew joins are optimized or less shuffle nodes, it means the 
cost is lower
+      if (numSkewJoins > other.numSkewJoins) {
+        -1
+      } else if (numSkewJoins < other.numSkewJoins) {
+        1
+      } else if (numShuffles < other.numShuffles) {
+        -1
+      } else if (numShuffles > other.numShuffles) {
+        1
+      } else {
+        0
+      }
 
+    case _ =>
+      throw 
QueryExecutionErrors.cannotCompareCostWithTargetCostError(that.toString)
+  }
+}
+
+/**
+ * A skew join aware implementation of [[CostEvaluator]], which counts the 
number of
+ * [[ShuffleExchangeLike]] nodes and skew join nodes in the plan.
+ */
+case class SimpleCostEvaluator(forceOptimizeSkewedJoin: Boolean) extends 
CostEvaluator {
   override def evaluateCost(plan: SparkPlan): Cost = {
-    val cost = plan.collect {
+    val shuffleNumber = plan.collect {
       case s: ShuffleExchangeLike => s
     }.size
-    SimpleCost(cost)
+
+    if (forceOptimizeSkewedJoin) {
+      val skewJoinNumber = plan.collect {

Review comment:
       nit: `numShuffles` and `numSkewJoins`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to