LuciferYang commented on PR #37999: URL: https://github.com/apache/spark/pull/37999#issuecomment-1257719214
Write a mirco-benchmark to test Jackson ObjectWriter read and write: https://github.com/LuciferYang/spark/blob/objectMapper/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/JacksonBenchmark.scala ```scala def testReadJsonToMap(valuesPerIteration: Int): Unit = { val input = """ |{"mergeDir":"/a/b/c/mergeDirName","attemptId":"appattempt_1648454518011_994053_000001"} """.stripMargin val benchmark = new Benchmark("Test read json to map", valuesPerIteration, output = output) benchmark.addCase("Test Multiple") { _: Int => for (_ <- 0L until valuesPerIteration) { val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) mapper.readValue(input, classOf[mutable.HashMap[String, String]]) } } val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) benchmark.addCase("Test Single") { _: Int => for (_ <- 0L until valuesPerIteration) { mapper.readValue(input, classOf[mutable.HashMap[String, String]]) } } benchmark.run() } def testWriteMapToJson(valuesPerIteration: Int): Unit = { val map: mutable.HashMap[String, String] = new mutable.HashMap[String, String]() map.put("mergeDir", "/a/b/c/mergeDirName") map.put("attemptId", "yarn_appattempt_1648454518011_994053_000001") val benchmark = new Benchmark("Test write map to json", valuesPerIteration, output = output) benchmark.addCase("Test Multiple") { _: Int => for (_ <- 0L until valuesPerIteration) { val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) mapper.writeValueAsString(map) } } val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) benchmark.addCase("Test Single") { _: Int => for (_ <- 0L until valuesPerIteration) { mapper.writeValueAsString(map) } } benchmark.run() } def testCreateObjectMapper(valuesPerIteration: Int): Unit = { val benchmark = new Benchmark("Test create ObjectMapper", valuesPerIteration, output = output) benchmark.addCase("Test create ObjectMapper") { _: Int => for (_ <- 0L until valuesPerIteration) { val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) } } benchmark.run() } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val valuesPerIteration = 10000 testCreateObjectMapper(valuesPerIteration = valuesPerIteration) testWriteMapToJson(valuesPerIteration = valuesPerIteration) testReadJsonToMap(valuesPerIteration = valuesPerIteration) } ``` and run this use GA: ``` ------------------------------------------------------------------------------------------------------------------------ Test create ObjectMapper 648 652 4 0.0 64819.0 1.0XOpenJDK 64-Bit Server VM 1.8.0_332-b09 on Linux 5.13.0-1022-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Test write map to json: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ Test Multiple 2116 2127 15 0.0 211556.5 1.0X Test Single 4 4 0 2.4 416.1 508.4XOpenJDK 64-Bit Server VM 1.8.0_332-b09 on Linux 5.13.0-1022-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Test read json to map: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ Test Multiple 8848 8867 27 0.0 884776.2 1.0X Test Single ``` From the test results, we should use singleton Jackson ObjectMapper, because it seems expensive to new a ObjectMapper instance. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
