Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/20292#discussion_r162054687 --- Diff: core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala --- @@ -463,21 +463,21 @@ class ExternalAppendOnlyMap[K, V, C]( // An intermediate stream that reads from exactly one batch // This guards against pre-fetching and other arbitrary behavior of higher level streams - private var deserializeStream = nextBatchStream() + private var deserializeStream = null.asInstanceOf[Option[DeserializationStream]] private var nextItem: (K, C) = null private var objectsRead = 0 /** * Construct a stream that reads only from the next batch. */ - private def nextBatchStream(): DeserializationStream = { + private def nextBatchStream(): Option[DeserializationStream] = { // Note that batchOffsets.length = numBatches + 1 since we did a scan above; check whether // we're still in a valid batch. - if (batchIndex < batchOffsets.length - 1) { + if (batchIndex < batchOffsets.length - 1 && deserializeStream.isDefined) { if (deserializeStream != null) { --- End diff -- This check doesn't make sense then. You probably want a `deserializeStream.foreach { ... }` construct anyway
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org