Github user shivaram commented on a diff in the pull request:

    https://github.com/apache/spark/pull/1907#discussion_r16214044
  
    --- Diff: 
core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala ---
    @@ -262,77 +270,51 @@ object BlockFetcherIterator {
           readMetrics: ShuffleReadMetrics)
         extends BasicBlockFetcherIterator(blockManager, blocksByAddress, 
serializer, readMetrics) {
     
    -    import blockManager._
    +    override protected def sendRequest(req: FetchRequest) {
    +      logDebug("Sending request for %d blocks (%s) from %s".format(
    +        req.blocks.size, Utils.bytesToString(req.size), 
req.address.hostPort))
    +      val cmId = new ConnectionManagerId(req.address.host, 
req.address.port)
     
    -    val fetchRequestsSync = new LinkedBlockingQueue[FetchRequest]
    -
    -    private def startCopiers(numCopiers: Int): List[_ <: Thread] = {
    -      (for ( i <- Range(0,numCopiers) ) yield {
    -        val copier = new Thread {
    -          override def run(){
    -            try {
    -              while(!isInterrupted && !fetchRequestsSync.isEmpty) {
    -                sendRequest(fetchRequestsSync.take())
    -              }
    -            } catch {
    -              case x: InterruptedException => logInfo("Copier Interrupted")
    -              // case _ => throw new SparkException("Exception Throw in 
Shuffle Copier")
    +      bytesInFlight += req.size
    +      val sizeMap = req.blocks.toMap // so we can look up the size of each 
blockID
    +      val client = blockManager.nettyBlockClientFactory.createClient(
    +        cmId.host, req.address.nettyPort)
    +      val blocks = req.blocks.map(_._1.toString)
    +
    +      client.fetchBlocks(
    +        blocks,
    +        (blockId: String, refBuf: ReferenceCountedBuffer) => {
    +          // Increment the reference count so the buffer won't be recycled.
    +          // TODO: This could result in memory leaks when the task is 
stopped due to exception
    +          // before the iterator is exhausted.
    +          refBuf.retain()
    +          val buf = refBuf.byteBuffer()
    +          val blockSize = buf.remaining()
    +          val bid = BlockId(blockId)
    +
    +          // TODO: remove code duplication between here and 
BlockManager.dataDeserialization.
    +          results.put(new FetchResult(bid, sizeMap(bid), () => {
    +            def createIterator: Iterator[Any] = {
    +              val stream = blockManager.wrapForCompression(bid, 
refBuf.inputStream())
    +              serializer.newInstance().deserializeStream(stream).asIterator
    +            }
    +            new LazyInitIterator(createIterator) {
    +              // Release the buffer when we are done traversing it.
    +              override def close(): Unit = refBuf.release()
                 }
    +          }))
    +
    +          readMetrics.remoteBytesRead += blockSize
    --- End diff --
    
    Could this function be run from multiple threads ? There is a TODO a little 
further ahead in the file that this needs to be fixed etc.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to