advancedxy commented on a change in pull request #26040: [SPARK-9853][Core] 
Optimize shuffle fetch of continuous partition IDs
URL: https://github.com/apache/spark/pull/26040#discussion_r333848114
 
 

 ##########
 File path: 
core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
 ##########
 @@ -335,6 +341,51 @@ final class ShuffleBlockFetcherIterator(
     remoteRequests
   }
 
+  private[this] def mergeContinuousShuffleBlockIdsIfNeeded(
+      blocks: ArrayBuffer[FetchBlockInfo]): ArrayBuffer[FetchBlockInfo] = {
+
+    def mergeFetchBlockInfo(toBeMerged: ArrayBuffer[FetchBlockInfo]): 
FetchBlockInfo = {
+      val startBlockId = toBeMerged.head.blockId.asInstanceOf[ShuffleBlockId]
+      FetchBlockInfo(
+        ShuffleBlockBatchId(
+          startBlockId.shuffleId,
+          startBlockId.mapId,
+          startBlockId.reduceId,
+          toBeMerged.last.blockId.asInstanceOf[ShuffleBlockId].reduceId + 1),
+        toBeMerged.map(_.size).sum,
+        toBeMerged.head.mapIndex)
+    }
+
+    val result = if (fetchContinuousShuffleBlocksInBatch) {
+      var curBlocks = new ArrayBuffer[FetchBlockInfo]
+      val mergedBlockInfo = new ArrayBuffer[FetchBlockInfo]
+      val iter = blocks.iterator
+
+      while (iter.hasNext) {
+        val info = iter.next()
+        val curBlockId = info.blockId.asInstanceOf[ShuffleBlockId]
+        if (curBlocks.isEmpty) {
+          curBlocks += info
+        } else {
+          if (curBlockId.mapId != 
curBlocks.head.blockId.asInstanceOf[ShuffleBlockId].mapId) {
 
 Review comment:
   How about we keep tracking with `preMapId`, `startReduceId`, `endReduceId` 
and `mergedBlockSize` and avoid using `curBlocks: ArrayBuffer[FetchBlockInfo]` 
since we don't need all the info in curBlocks.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to