[GitHub] [spark] attilapiros commented on a change in pull request #28708: [SPARK-20629][CORE][K8S] Copy shuffle data when nodes are being shutdown

GitBox Tue, 09 Jun 2020 09:44:26 -0700


attilapiros commented on a change in pull request #28708:
URL: https://github.com/apache/spark/pull/28708#discussion_r436664435




##########
File path: 
core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
##########
@@ -148,6 +170,87 @@ private[spark] class IndexShuffleBlockResolver(
     }
   }
 
+  /**
+   * Write a provided shuffle block as a stream. Used for block migrations.
+   * ShuffleBlockBatchIds must contain the full range represented in the 
ShuffleIndexBlock.
+   * Requires the caller to delete any shuffle index blocks where the shuffle 
block fails to
+   * put.
+   */
+  override def putShuffleBlockAsStream(blockId: BlockId, serializerManager: 
SerializerManager):
+      StreamCallbackWithID = {
+    val file = blockId match {
+      case ShuffleIndexBlockId(shuffleId, mapId, _) =>
+        getIndexFile(shuffleId, mapId)
+      case ShuffleDataBlockId(shuffleId, mapId, _) =>
+        getDataFile(shuffleId, mapId)
+      case _ =>
+        throw new Exception(s"Unexpected shuffle block transfer ${blockId} as 
" +
+          s"${blockId.getClass().getSimpleName()}")
+    }
+    val fileTmp = Utils.tempFileWith(file)
+    val channel = Channels.newChannel(
+      serializerManager.wrapStream(blockId,
+        new FileOutputStream(fileTmp)))
+
+    new StreamCallbackWithID {
+
+      override def getID: String = blockId.name
+
+      override def onData(streamId: String, buf: ByteBuffer): Unit = {
+        while (buf.hasRemaining) {
+          channel.write(buf)
+        }
+      }
+
+      override def onComplete(streamId: String): Unit = {
+        logTrace(s"Done receiving shuffle block $blockId, now storing on local 
disk.")
+        channel.close()
+        val diskSize = fileTmp.length()
+        this.synchronized {
+          if (file.exists()) {
+            file.delete()
+          }
+          if (!fileTmp.renameTo(file)) {
+            throw new IOException(s"fail to rename file ${fileTmp} to ${file}")
+          }
+        }
+        blockManager.reportBlockStatus(blockId, BlockStatus(
+          StorageLevel(
+            useDisk = true,
+            useMemory = false,
+            useOffHeap = false,
+            deserialized = false,
+            replication = 0)
+          , 0, diskSize))

Review comment:
       ```suggestion
           blockManager.reportBlockStatus(blockId, 
BlockStatus(StorageLevel.DISK_ONLY, 0, diskSize))
   ```

##########
File path: core/src/main/scala/org/apache/spark/storage/BlockManager.scala
##########
@@ -1790,6 +1822,106 @@ private[spark] class BlockManager(
     }
   }
 
+  private class ShuffleMigrationRunnable(peer: BlockManagerId) extends 
Runnable {
+    @volatile var running = true
+    override def run(): Unit = {
+      var migrating: Option[(Int, Long)] = None
+      val storageLevel = StorageLevel(
+        useDisk = true,
+        useMemory = false,
+        useOffHeap = false,
+        deserialized = false,
+        replication = 1)

Review comment:
       `StorageLevel.DISK_ONLY` can be used instead:
   ```suggestion
    ```

##########
File path: core/src/main/scala/org/apache/spark/storage/BlockManager.scala
##########
@@ -1790,6 +1822,106 @@ private[spark] class BlockManager(
     }
   }
 
+  private class ShuffleMigrationRunnable(peer: BlockManagerId) extends 
Runnable {
+    @volatile var running = true
+    override def run(): Unit = {
+      var migrating: Option[(Int, Long)] = None
+      val storageLevel = StorageLevel(
+        useDisk = true,
+        useMemory = false,
+        useOffHeap = false,
+        deserialized = false,
+        replication = 1)
+      logInfo(s"Starting migration thread for ${peer}")
+      // Once a block fails to transfer to an executor stop trying to transfer 
more blocks
+      try {
+        while (running) {
+          val migrating = Option(shufflesToMigrate.poll())
+          migrating match {
+            case None =>
+              logInfo("Nothing to migrate")
+              // Nothing to do right now, but maybe a transfer will fail or a 
new block
+              // will finish being committed.
+              val SLEEP_TIME_SECS = 1
+              Thread.sleep(SLEEP_TIME_SECS * 1000L)
+            case Some((shuffleId, mapId)) =>
+              logInfo(s"Trying to migrate shuffle ${shuffleId},${mapId} to 
${peer}")
+              val blocks =
+                migratableResolver.getMigrationBlocks(shuffleId, mapId)
+              logInfo(s"Got migration sub-blocks ${blocks}")
+              blocks.foreach { case (blockId, buffer) =>
+                logInfo(s"Migrating sub-block ${blockId}")
+                blockTransferService.uploadBlockSync(
+                  peer.host,
+                  peer.port,
+                  peer.executorId,
+                  blockId,
+                  buffer,
+                  storageLevel,

Review comment:
       ```suggestion
                     StorageLevel.DISK_ONLY,
   ```

##########
File path: core/src/main/scala/org/apache/spark/storage/BlockManager.scala
##########
@@ -1790,6 +1822,110 @@ private[spark] class BlockManager(
     }
   }
 
+  private class ShuffleMigrationRunnable(peer: BlockManagerId) extends 
Runnable {
+    @volatile var running = true
+    override def run(): Unit = {
+      var migrating: Option[(Int, Long)] = None
+      logInfo(s"Starting migration thread for ${peer}")
+      // Once a block fails to transfer to an executor stop trying to transfer 
more blocks
+      try {
+        while (running && !Thread.interrupted()) {
+          val migrating = Option(shufflesToMigrate.poll())
+          migrating match {
+            case None =>
+              logInfo("Nothing to migrate")
+              // Nothing to do right now, but maybe a transfer will fail or a 
new block
+              // will finish being committed.
+              val SLEEP_TIME_SECS = 1
+              Thread.sleep(SLEEP_TIME_SECS * 1000L)
+            case Some((shuffleId, mapId)) =>
+              logInfo(s"Trying to migrate shuffle ${shuffleId},${mapId} to 
${peer}")
+              val blocks =
+                migratableResolver.getMigrationBlocks(shuffleId, mapId)
+              logInfo(s"Got migration sub-blocks ${blocks}")
+              blocks.foreach { case (blockId, buffer) =>
+                logInfo(s"Migrating sub-block ${blockId}")
+                blockTransferService.uploadBlockSync(
+                  peer.host,
+                  peer.port,
+                  peer.executorId,
+                  blockId,
+                  buffer,
+                  StorageLevel.DISK_ONLY,
+                  null)// class tag, we don't need for shuffle
+                logInfo(s"Migrated sub block ${blockId}")
+              }
+              logInfo(s"Migrated ${shuffleId},${mapId} to ${peer}")
+          }
+        }
+        // This catch is intentionally outside of the while running block.
+        // if we encounter errors migrating to an executor we want to stop.
+      } catch {
+        case e: Exception =>
+          migrating match {
+            case Some(shuffleMap) =>
+              logError("Error ${e} during migration, adding ${shuffleMap} back 
to migration queue")

Review comment:
       ```suggestion
                 logError(s"Error ${e} during migration, adding ${shuffleMap} 
back to migration queue")
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] attilapiros commented on a change in pull request #28708: [SPARK-20629][CORE][K8S] Copy shuffle data when nodes are being shutdown

Reply via email to