LuciferYang commented on PR #37999:
URL: https://github.com/apache/spark/pull/37999#issuecomment-1257719214

   Write a mirco-benchmark to test Jackson ObjectWriter read and write:
   
   
https://github.com/LuciferYang/spark/blob/objectMapper/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/JacksonBenchmark.scala
   
   ```scala
   
     def testReadJsonToMap(valuesPerIteration: Int): Unit = {
       val input =
         """
           
|{"mergeDir":"/a/b/c/mergeDirName","attemptId":"appattempt_1648454518011_994053_000001"}
         """.stripMargin
   
       val benchmark = new Benchmark("Test read json to map",
         valuesPerIteration, output = output)
   
       benchmark.addCase("Test Multiple") { _: Int =>
         for (_ <- 0L until valuesPerIteration) {
           val mapper = new ObjectMapper()
           mapper.registerModule(DefaultScalaModule)
           mapper.readValue(input, classOf[mutable.HashMap[String, String]])
         }
       }
   
       val mapper = new ObjectMapper()
       mapper.registerModule(DefaultScalaModule)
       benchmark.addCase("Test Single") { _: Int =>
         for (_ <- 0L until valuesPerIteration) {
           mapper.readValue(input, classOf[mutable.HashMap[String, String]])
         }
       }
   
       benchmark.run()
     }
   
     def testWriteMapToJson(valuesPerIteration: Int): Unit = {
   
       val map: mutable.HashMap[String, String] = new mutable.HashMap[String, 
String]()
       map.put("mergeDir", "/a/b/c/mergeDirName")
       map.put("attemptId", "yarn_appattempt_1648454518011_994053_000001")
   
   
       val benchmark = new Benchmark("Test write map to json",
         valuesPerIteration, output = output)
   
       benchmark.addCase("Test Multiple") { _: Int =>
         for (_ <- 0L until valuesPerIteration) {
           val mapper = new ObjectMapper()
           mapper.registerModule(DefaultScalaModule)
           mapper.writeValueAsString(map)
         }
       }
   
       val mapper = new ObjectMapper()
       mapper.registerModule(DefaultScalaModule)
       benchmark.addCase("Test Single") { _: Int =>
         for (_ <- 0L until valuesPerIteration) {
           mapper.writeValueAsString(map)
         }
       }
   
       benchmark.run()
     }
   
     def testCreateObjectMapper(valuesPerIteration: Int): Unit = {
   
       val benchmark = new Benchmark("Test create ObjectMapper",
         valuesPerIteration, output = output)
   
       benchmark.addCase("Test create ObjectMapper") { _: Int =>
         for (_ <- 0L until valuesPerIteration) {
           val mapper = new ObjectMapper()
           mapper.registerModule(DefaultScalaModule)
         }
       }
   
       benchmark.run()
     }
   
     override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
       val valuesPerIteration = 10000
   
       testCreateObjectMapper(valuesPerIteration = valuesPerIteration)
       testWriteMapToJson(valuesPerIteration = valuesPerIteration)
       testReadJsonToMap(valuesPerIteration = valuesPerIteration)
     }
   ```
   
   and run this use GA:
   
   
   ```
   
------------------------------------------------------------------------------------------------------------------------
   Test create ObjectMapper                            648            652       
    4          0.0       64819.0       1.0XOpenJDK 64-Bit Server VM 
1.8.0_332-b09 on Linux 5.13.0-1022-azure
   Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
   Test write map to json:                   Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Test Multiple                                      2116           2127       
   15          0.0      211556.5       1.0X
   Test Single                                           4              4       
    0          2.4         416.1     508.4XOpenJDK 64-Bit Server VM 
1.8.0_332-b09 on Linux 5.13.0-1022-azure
   Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
   Test read json to map:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   Test Multiple                                      8848           8867       
   27          0.0      884776.2       1.0X
   Test Single   
   ```
   
   From the test results, we should use singleton Jackson ObjectMapper, because 
it seems expensive to new a ObjectMapper instance.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to