[GitHub] [spark] ulysses-you commented on a change in pull request #32883: [SPARK-35725][SQL] Support repartition expand partitions in AQE

GitBox Mon, 14 Jun 2021 00:14:57 -0700


ulysses-you commented on a change in pull request #32883:
URL: https://github.com/apache/spark/pull/32883#discussion_r649828089




##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
##########
@@ -91,18 +91,3 @@ case class CoalesceShufflePartitions(session: SparkSession) 
extends CustomShuffl
     s.outputPartitioning != SinglePartition && 
supportedShuffleOrigins.contains(s.shuffleOrigin)
   }
 }
-
-private class ShuffleStageInfo(
-    val shuffleStage: ShuffleQueryStageExec,
-    val partitionSpecs: Option[Seq[ShufflePartitionSpec]])
-
-private object ShuffleStageInfo {
-  def unapply(plan: SparkPlan)
-  : Option[(ShuffleQueryStageExec, Option[Seq[ShufflePartitionSpec]])] = plan 
match {
-    case stage: ShuffleQueryStageExec =>
-      Some((stage, None))
-    case CustomShuffleReaderExec(s: ShuffleQueryStageExec, partitionSpecs) =>
-      Some((s, Some(partitionSpecs)))
-    case _ => None
-  }
-}

Review comment:
       these code move to `ShufflePartitionsUtil` so that new rule 
`ExpandShufflePartitions` can use them.

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
##########
@@ -90,41 +89,6 @@ object OptimizeSkewedJoin extends CustomShuffleReaderRule {
     }
   }
 
-  /**
-   * Get the map size of the specific reduce shuffle Id.
-   */
-  private def getMapSizesForReduceId(shuffleId: Int, partitionId: Int): 
Array[Long] = {
-    val mapOutputTracker = 
SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
-    
mapOutputTracker.shuffleStatuses(shuffleId).mapStatuses.map{_.getSizeForBlock(partitionId)}
-  }
-
-  /**
-   * Splits the skewed partition based on the map size and the target 
partition size
-   * after split, and create a list of `PartialMapperPartitionSpec`. Returns 
None if can't split.
-   */
-  private def createSkewPartitionSpecs(
-      shuffleId: Int,
-      reducerId: Int,
-      targetSize: Long): Option[Seq[PartialReducerPartitionSpec]] = {
-    val mapPartitionSizes = getMapSizesForReduceId(shuffleId, reducerId)
-    val mapStartIndices = ShufflePartitionsUtil.splitSizeListByTargetSize(
-      mapPartitionSizes, targetSize)
-    if (mapStartIndices.length > 1) {
-      Some(mapStartIndices.indices.map { i =>
-        val startMapIndex = mapStartIndices(i)
-        val endMapIndex = if (i == mapStartIndices.length - 1) {
-          mapPartitionSizes.length
-        } else {
-          mapStartIndices(i + 1)
-        }
-        val dataSize = 
startMapIndex.until(endMapIndex).map(mapPartitionSizes(_)).sum
-        PartialReducerPartitionSpec(reducerId, startMapIndex, endMapIndex, 
dataSize)
-      })
-    } else {
-      None
-    }
-  }
-

Review comment:
       these code move to ShufflePartitionsUtil so that new rule 
ExpandShufflePartitions can use them.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] ulysses-you commented on a change in pull request #32883: [SPARK-35725][SQL] Support repartition expand partitions in AQE

Reply via email to