cloud-fan commented on a change in pull request #27786: [SPARK-31034][CORE] 
ShuffleBlockFetcherIterator should always create request for last block group
URL: https://github.com/apache/spark/pull/27786#discussion_r387717872
 
 

 ##########
 File path: 
core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
 ##########
 @@ -341,32 +341,86 @@ class ShuffleBlockFetcherIteratorSuite extends 
SparkFunSuite with PrivateMethodT
     assert(blockManager.hostLocalDirManager.get.getCachedHostLocalDirs().size 
=== 1)
   }
 
-  test("fetch continuous blocks in batch respects maxSize and maxBlocks") {
+  test("fetch continuous blocks in batch should respect maxBytesInFlight") {
     val blockManager = mock(classOf[BlockManager])
     val localBmId = BlockManagerId("test-client", "test-local-host", 1)
     doReturn(localBmId).when(blockManager).blockManagerId
 
     // Make sure remote blocks would return the merged block
-    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
-    val remoteBlocks = Seq[BlockId](
+    val remoteBmId1 = BlockManagerId("test-client-1", "test-client-1", 1)
+    val remoteBmId2 = BlockManagerId("test-client-2", "test-client-2", 2)
+    val remoteBlocks1 = (0 until 15).map(ShuffleBlockId(0, 3, _))
+    val remoteBlocks2 = Seq[BlockId](ShuffleBlockId(0, 4, 0), 
ShuffleBlockId(0, 4, 1))
+    val mergedRemoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 3, 0, 3) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 3, 3, 6) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 3, 6, 9) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 3, 9, 12) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 3, 12, 15) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 4, 0, 2) -> createMockManagedBuffer())
+    val transfer = createMockTransfer(mergedRemoteBlocks)
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId1, remoteBlocks1.map(blockId => (blockId, 100L, 1))),
+      (remoteBmId2, remoteBlocks2.map(blockId => (blockId, 100L, 
1)))).toIterator
+
+    val taskContext = TaskContext.empty()
+    val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => in,
+      1500,
+      Int.MaxValue,
+      Int.MaxValue,
+      Int.MaxValue,
+      true,
+      false,
+      metrics,
+      true)
+
+    var numResults = 0
+    // After initialize(), there will be 6 FetchRequests. And each of the 
first 5 requests
+    // includes 1 merged block which is merged from 3 shuffle blocks. The last 
request has 1 merged
+    // block which merged from 2 shuffle blocks. So, only the first 5 
requests(5 * 3 * 100 >= 1500)
+    // can be sent. The second FetchRequest will hit 
maxBlocksInFlightPerAddress so it won't
 
 Review comment:
   `The second` -> `The 6th`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to