This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5031e52f9e0 [SPARK-46217][CORE][TESTS] Include `Driver/App` data in `PersistenceEngineBenchmark` 5031e52f9e0 is described below commit 5031e52f9e032e8e450af9fcd294f5b53e2c4cfd Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Sun Dec 3 15:50:09 2023 -0800 [SPARK-46217][CORE][TESTS] Include `Driver/App` data in `PersistenceEngineBenchmark` ### What changes were proposed in this pull request? This PR aims to include `DirverInfo` and `ApplicationInfo` data in `PersistenceEngineBenchmark`. ### Why are the changes needed? Previously, `PersistenceEngine` recovers three kind of information. Previously, `PersistenceEngineBenchmark ` focused on `WorkerInfo` only. This PR will add two other informations to be more complete. https://github.com/apache/spark/blob/3da2e5c632468ec7cf7001255c1a44197b46ce30/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala#L56-L78 ### Does this PR introduce _any_ user-facing change? No. This is a test improvement. ### How was this patch tested? Manual tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44130 from dongjoon-hyun/SPARK-46217. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../PersistenceEngineBenchmark-jdk21-results.txt | 28 +++++----- .../PersistenceEngineBenchmark-results.txt | 28 +++++----- .../deploy/master/PersistenceEngineBenchmark.scala | 65 +++++++++++++++++----- 3 files changed, 80 insertions(+), 41 deletions(-) diff --git a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt index 38e74ed6b53..314fb6958b6 100644 --- a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt +++ b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt @@ -4,20 +4,20 @@ PersistenceEngineBenchmark OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1051-azure AMD EPYC 7763 64-Core Processor -2000 Workers: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +1000 Workers: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ---------------------------------------------------------------------------------------------------------------------------------------- -ZooKeeperPersistenceEngine with JavaSerializer 2254 2329 119 0.0 1126867.1 1.0X -ZooKeeperPersistenceEngine with KryoSerializer 1911 1912 1 0.0 955667.1 1.2X -FileSystemPersistenceEngine with JavaSerializer 438 448 15 0.0 218868.1 5.1X -FileSystemPersistenceEngine with JavaSerializer (lz4) 187 195 8 0.0 93337.8 12.1X -FileSystemPersistenceEngine with JavaSerializer (lzf) 193 216 20 0.0 96678.8 11.7X -FileSystemPersistenceEngine with JavaSerializer (snappy) 175 183 10 0.0 87652.3 12.9X -FileSystemPersistenceEngine with JavaSerializer (zstd) 243 255 14 0.0 121695.2 9.3X -FileSystemPersistenceEngine with KryoSerializer 150 160 15 0.0 75089.7 15.0X -FileSystemPersistenceEngine with KryoSerializer (lz4) 170 177 10 0.0 84996.7 13.3X -FileSystemPersistenceEngine with KryoSerializer (lzf) 192 203 12 0.0 96019.1 11.7X -FileSystemPersistenceEngine with KryoSerializer (snappy) 184 202 16 0.0 92241.3 12.2X -FileSystemPersistenceEngine with KryoSerializer (zstd) 232 238 5 0.0 116075.2 9.7X -BlackHolePersistenceEngine 0 0 0 27.3 36.6 30761.0X +ZooKeeperPersistenceEngine with JavaSerializer 5402 5546 233 0.0 5402030.8 1.0X +ZooKeeperPersistenceEngine with KryoSerializer 4185 4220 32 0.0 4184623.1 1.3X +FileSystemPersistenceEngine with JavaSerializer 1591 1634 37 0.0 1590836.4 3.4X +FileSystemPersistenceEngine with JavaSerializer (lz4) 611 623 14 0.0 611256.6 8.8X +FileSystemPersistenceEngine with JavaSerializer (lzf) 626 640 13 0.0 626072.2 8.6X +FileSystemPersistenceEngine with JavaSerializer (snappy) 595 628 29 0.0 594744.4 9.1X +FileSystemPersistenceEngine with JavaSerializer (zstd) 755 774 21 0.0 754604.4 7.2X +FileSystemPersistenceEngine with KryoSerializer 479 489 8 0.0 479404.7 11.3X +FileSystemPersistenceEngine with KryoSerializer (lz4) 392 406 12 0.0 392165.7 13.8X +FileSystemPersistenceEngine with KryoSerializer (lzf) 525 536 14 0.0 524916.7 10.3X +FileSystemPersistenceEngine with KryoSerializer (snappy) 519 533 14 0.0 518569.3 10.4X +FileSystemPersistenceEngine with KryoSerializer (zstd) 627 663 31 0.0 627233.2 8.6X +BlackHolePersistenceEngine 0 0 0 6.0 166.0 32541.8X diff --git a/core/benchmarks/PersistenceEngineBenchmark-results.txt b/core/benchmarks/PersistenceEngineBenchmark-results.txt index fc743da46a8..64c8bb44084 100644 --- a/core/benchmarks/PersistenceEngineBenchmark-results.txt +++ b/core/benchmarks/PersistenceEngineBenchmark-results.txt @@ -4,20 +4,20 @@ PersistenceEngineBenchmark OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1051-azure AMD EPYC 7763 64-Core Processor -2000 Workers: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +1000 Workers: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ---------------------------------------------------------------------------------------------------------------------------------------- -ZooKeeperPersistenceEngine with JavaSerializer 2276 2360 115 0.0 1137909.6 1.0X -ZooKeeperPersistenceEngine with KryoSerializer 1883 1906 34 0.0 941364.2 1.2X -FileSystemPersistenceEngine with JavaSerializer 431 436 7 0.0 215436.9 5.3X -FileSystemPersistenceEngine with JavaSerializer (lz4) 209 216 9 0.0 104404.1 10.9X -FileSystemPersistenceEngine with JavaSerializer (lzf) 199 202 2 0.0 99489.5 11.4X -FileSystemPersistenceEngine with JavaSerializer (snappy) 192 199 9 0.0 95872.9 11.9X -FileSystemPersistenceEngine with JavaSerializer (zstd) 258 264 6 0.0 129249.4 8.8X -FileSystemPersistenceEngine with KryoSerializer 139 151 13 0.0 69374.5 16.4X -FileSystemPersistenceEngine with KryoSerializer (lz4) 159 165 8 0.0 79588.9 14.3X -FileSystemPersistenceEngine with KryoSerializer (lzf) 180 195 18 0.0 89844.0 12.7X -FileSystemPersistenceEngine with KryoSerializer (snappy) 164 183 18 0.0 82016.0 13.9X -FileSystemPersistenceEngine with KryoSerializer (zstd) 206 218 11 0.0 102838.9 11.1X -BlackHolePersistenceEngine 0 0 0 35.1 28.5 39908.5X +ZooKeeperPersistenceEngine with JavaSerializer 5463 5745 246 0.0 5463433.7 1.0X +ZooKeeperPersistenceEngine with KryoSerializer 4478 4513 31 0.0 4477926.8 1.2X +FileSystemPersistenceEngine with JavaSerializer 1650 1670 19 0.0 1649557.9 3.3X +FileSystemPersistenceEngine with JavaSerializer (lz4) 628 649 19 0.0 628265.9 8.7X +FileSystemPersistenceEngine with JavaSerializer (lzf) 545 575 28 0.0 544502.0 10.0X +FileSystemPersistenceEngine with JavaSerializer (snappy) 608 625 27 0.0 607926.2 9.0X +FileSystemPersistenceEngine with JavaSerializer (zstd) 805 814 8 0.0 804867.1 6.8X +FileSystemPersistenceEngine with KryoSerializer 420 461 40 0.0 420137.2 13.0X +FileSystemPersistenceEngine with KryoSerializer (lz4) 380 452 64 0.0 379742.4 14.4X +FileSystemPersistenceEngine with KryoSerializer (lzf) 518 543 31 0.0 518217.1 10.5X +FileSystemPersistenceEngine with KryoSerializer (snappy) 483 513 31 0.0 483112.6 11.3X +FileSystemPersistenceEngine with KryoSerializer (zstd) 668 679 9 0.0 668335.1 8.2X +BlackHolePersistenceEngine 0 0 0 5.8 172.5 31668.2X diff --git a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala index a1df75b846d..f538a1a06f6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala @@ -18,12 +18,14 @@ package org.apache.spark.deploy.master import java.net.ServerSocket +import java.util.Date import java.util.concurrent.ThreadLocalRandom import org.apache.curator.test.TestingServer import org.apache.spark.SparkConf import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.deploy.{DeployTestUtils, DriverDescription} import org.apache.spark.internal.config.Deploy.ZOOKEEPER_URL import org.apache.spark.io.CompressionCodec import org.apache.spark.resource.ResourceUtils.{FPGA, GPU} @@ -53,11 +55,27 @@ object PersistenceEngineBenchmark extends BenchmarkBase { override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val numIters = 3 - val numWorkers = 2000 + val numWorkers = 1000 + val numDrivers = 2000 + val numApplications = 2000 val workers = (1 to numWorkers).map(createWorkerInfo).toArray + val drivers = (1 to numDrivers).map(createDriverInfo).toArray + val applications = (1 to numApplications).map(createApplicationInfo).toArray conf.set(ZOOKEEPER_URL, zkTestServer.getConnectString) + def writeAndRead(engine: PersistenceEngine): Unit = { + workers.foreach(engine.addWorker) + drivers.foreach(engine.addDriver) + applications.foreach(engine.addApplication) + engine.read[WorkerInfo]("worker_") + engine.read[DriverInfo]("driver_") + engine.read[ApplicationInfo]("app_") + applications.foreach(engine.removeApplication) + drivers.foreach(engine.removeDriver) + workers.foreach(engine.removeWorker) + } + runBenchmark("PersistenceEngineBenchmark") { val benchmark = new Benchmark(s"$numWorkers Workers", numWorkers, output = output) @@ -65,9 +83,7 @@ object PersistenceEngineBenchmark extends BenchmarkBase { val serializerName = serializer.getClass.getSimpleName benchmark.addCase(s"ZooKeeperPersistenceEngine with $serializerName", numIters) { _ => val engine = new ZooKeeperPersistenceEngine(conf, serializer) - workers.foreach(engine.addWorker) - engine.read[WorkerInfo]("worker_") - workers.foreach(engine.removeWorker) + writeAndRead(engine) engine.close() } } @@ -78,9 +94,7 @@ object PersistenceEngineBenchmark extends BenchmarkBase { benchmark.addCase(name, numIters) { _ => val dir = Utils.createTempDir().getAbsolutePath val engine = new FileSystemPersistenceEngine(dir, serializer) - workers.foreach(engine.addWorker) - engine.read[WorkerInfo]("worker_") - workers.foreach(engine.removeWorker) + writeAndRead(engine) engine.close() } CompressionCodec.ALL_COMPRESSION_CODECS.foreach { c => @@ -90,9 +104,7 @@ object PersistenceEngineBenchmark extends BenchmarkBase { benchmark.addCase(name, numIters) { _ => val dir = Utils.createTempDir().getAbsolutePath val engine = new FileSystemPersistenceEngine(dir, serializer, Some(codec)) - workers.foreach(engine.addWorker) - engine.read[WorkerInfo]("worker_") - workers.foreach(engine.removeWorker) + writeAndRead(engine) engine.close() } } @@ -100,9 +112,7 @@ object PersistenceEngineBenchmark extends BenchmarkBase { benchmark.addCase("BlackHolePersistenceEngine", numIters) { _ => val engine = new BlackHolePersistenceEngine() - workers.foreach(engine.addWorker) - engine.read[WorkerInfo]("worker_") - workers.foreach(engine.removeWorker) + writeAndRead(engine) engine.close() } @@ -124,6 +134,35 @@ object PersistenceEngineBenchmark extends BenchmarkBase { workerInfo } + private def createDriverInfo(id: Int): DriverInfo = { + val now = System.currentTimeMillis() + val date = new Date(now) + new DriverInfo(now, f"driver-20240101000000-$id%04d", DriverDescription( + jarUrl = "local:///opt/spark/examples/jars/spark-examples.jar", + mem = 1024, + cores = 1, + supervise = false, + command = DeployTestUtils.createDriverCommand() + ), date) + } + + private def createApplicationInfo(id: Int): ApplicationInfo = { + val now = System.currentTimeMillis() + val submitDate = new Date(now) + val customResources = Map( + GPU -> 3, + FPGA -> 3) + val appDesc = DeployTestUtils.createAppDesc(customResources) + val appInfo = new ApplicationInfo( + now.toLong, + f"app-20231031224509-$id%04d", + appDesc, submitDate, + null, + Int.MaxValue) + appInfo.endTime = now + 1000 // Elapsed 1s from submitDate in order to give different values + appInfo + } + def findFreePort(conf: SparkConf): Int = { val candidatePort = ThreadLocalRandom.current().nextInt(1024, 65536) Utils.startServiceOnPort(candidatePort, (trialPort: Int) => { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org