Github user eyalfa commented on a diff in the pull request:
https://github.com/apache/spark/pull/21369#discussion_r190542635
--- Diff:
core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
---
@@ -414,6 +415,99 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
with LocalSparkContext {
sc.stop()
}
+ test("spill during iteration") {
+ val size = 1000
+ val conf = createSparkConf(loadDefaults = true)
+ sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
+ val map = createExternalMap[Int]
+
+ map.insertAll((0 until size).iterator.map(i => (i / 10, i)))
+ assert(map.numSpills == 0, "map was not supposed to spill")
+
+ val it = map.iterator
+ assert( it.isInstanceOf[CompletionIterator[_, _]])
+ val underlyingIt = map.readingIterator
+ assert( underlyingIt != null )
+ val underlyingMapIterator = underlyingIt.upstream
+ assert(underlyingMapIterator != null)
+ val underlyingMapIteratorClass = underlyingMapIterator.getClass
+ assert(underlyingMapIteratorClass.getEnclosingClass ==
classOf[AppendOnlyMap[_, _]])
+
+ val underlyingMap = map.currentMap
+ assert(underlyingMap != null)
+
+ val first50Keys = for ( _ <- 0 until 50) yield {
+ val (k, vs) = it.next
+ val sortedVs = vs.sorted
+ assert(sortedVs.seq == (0 until 10).map(10 * k + _))
+ k
+ }
+ assert( map.numSpills == 0 )
+ map.spill(Long.MaxValue, null)
+ // these asserts try to show that we're no longer holding references
to the underlying map.
+ // it'd be nice to use something like
+ //
https://github.com/scala/scala/blob/2.13.x/test/junit/scala/tools/testing/AssertUtil.scala
+ // (lines 69-89)
+ assert(map.currentMap == null)
+ assert(underlyingIt.upstream ne underlyingMapIterator)
+ assert(underlyingIt.upstream.getClass != underlyingMapIteratorClass)
+ assert(underlyingIt.upstream.getClass.getEnclosingClass !=
classOf[AppendOnlyMap[_, _]])
--- End diff --
hmm, we can in line 508 but not in this test.
in this test we look at the iterator immediately after a spill, at this
point upstream is supposed to be replaced by a `DiskMapIterator`, I guess we
can check for this directly (after relaxing its visibility to package private).
in line 508, we can simply compare with Iterator.empty
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]