dongjoon-hyun commented on a change in pull request #22617:
[SPARK-25484][SQL][TEST] Refactor ExternalAppendOnlyUnsafeRowArrayBenchmark
URL: https://github.com/apache/spark/pull/22617#discussion_r246280459
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
##########
@@ -158,80 +176,23 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
}
}
- val conf = new SparkConf(false)
- // Make the Java serializer write a reset instruction (TC_RESET) after
each object to test
- // for a bug we had with bytes written past the last object in a batch
(SPARK-2792)
- conf.set("spark.serializer.objectStreamReset", "1")
- conf.set("spark.serializer", "org.apache.spark.serializer.JavaSerializer")
-
- val sc = new SparkContext("local", "test", conf)
- val taskContext = MemoryTestingUtils.fakeTaskContext(SparkEnv.get)
- TaskContext.setTaskContext(taskContext)
- benchmark.run()
- sc.stop()
+ withFakeTaskContext {
+ benchmark.run()
+ }
}
- def main(args: Array[String]): Unit = {
-
- //
=========================================================================================
//
- // WITHOUT SPILL
- //
=========================================================================================
//
-
- val spillThreshold = 100 * 1000
-
- /*
- Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
- Array with 1000 rows: Best/Avg Time(ms) Rate(M/s)
Per Row(ns) Relative
-
------------------------------------------------------------------------------------------------
- ArrayBuffer 7821 / 7941 33.5
29.8 1.0X
- ExternalAppendOnlyUnsafeRowArray 8798 / 8819 29.8
33.6 0.9X
- */
- testAgainstRawArrayBuffer(spillThreshold, 1000, 1 << 18)
-
- /*
- Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
- Array with 30000 rows: Best/Avg Time(ms) Rate(M/s)
Per Row(ns) Relative
-
------------------------------------------------------------------------------------------------
- ArrayBuffer 19200 / 19206 25.6
39.1 1.0X
- ExternalAppendOnlyUnsafeRowArray 19558 / 19562 25.1
39.8 1.0X
- */
- testAgainstRawArrayBuffer(spillThreshold, 30 * 1000, 1 << 14)
-
- /*
- Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
- Array with 100000 rows: Best/Avg Time(ms) Rate(M/s)
Per Row(ns) Relative
-
------------------------------------------------------------------------------------------------
- ArrayBuffer 5949 / 6028 17.2
58.1 1.0X
- ExternalAppendOnlyUnsafeRowArray 6078 / 6138 16.8
59.4 1.0X
- */
- testAgainstRawArrayBuffer(spillThreshold, 100 * 1000, 1 << 10)
-
- //
=========================================================================================
//
- // WITH SPILL
- //
=========================================================================================
//
-
- /*
- Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
- Spilling with 1000 rows: Best/Avg Time(ms) Rate(M/s)
Per Row(ns) Relative
-
------------------------------------------------------------------------------------------------
- UnsafeExternalSorter 9239 / 9470 28.4
35.2 1.0X
- ExternalAppendOnlyUnsafeRowArray 8857 / 8909 29.6
33.8 1.0X
- */
- testAgainstRawUnsafeExternalSorter(100 * 1000, 1000, 1 << 18)
-
- /*
- Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
- Spilling with 10000 rows: Best/Avg Time(ms) Rate(M/s)
Per Row(ns) Relative
-
------------------------------------------------------------------------------------------------
- UnsafeExternalSorter 4 / 5 39.3
25.5 1.0X
- ExternalAppendOnlyUnsafeRowArray 5 / 6 29.8
33.5 0.8X
- */
- testAgainstRawUnsafeExternalSorter(
-
config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get, 10 *
1000, 1 << 4)
+ override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+ runBenchmark("WITHOUT SPILL") {
+ val spillThreshold = 100 * 1000
+ testAgainstRawArrayBuffer(spillThreshold, 100 * 1000, 1 << 10)
+ testAgainstRawArrayBuffer(spillThreshold, 1000, 1 << 18)
+ testAgainstRawArrayBuffer(spillThreshold, 30 * 1000, 1 << 14)
Review comment:
Let's keep the original sequence; `1000` -> `30 * 1000` -> `100 * 1000`.
Increasing order is more intuitive.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]