Github user HeartSaVioR commented on a diff in the pull request: https://github.com/apache/spark/pull/21385#discussion_r190131693 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/shuffle/UnsafeRowReceiver.scala --- @@ -56,20 +69,73 @@ private[shuffle] class UnsafeRowReceiver( override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case r: UnsafeRowReceiverMessage => - queue.put(r) + queues(r.writerId).put(r) context.reply(()) } override def read(): Iterator[UnsafeRow] = { new NextIterator[UnsafeRow] { - override def getNext(): UnsafeRow = queue.take() match { - case ReceiverRow(r) => r - case ReceiverEpochMarker() => - finished = true - null + // An array of flags for whether each writer ID has gotten an epoch marker. + private val writerEpochMarkersReceived = + mutable.Map.empty[Int, Boolean].withDefaultValue(false) + + private val executor = Executors.newFixedThreadPool(numShuffleWriters) --- End diff -- And I'm also now seeing this approach as alternative to deal with alignment (not buffer rows explicitly but just don't read after epoch comes in). Nice approach.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org