This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a2e3188b4997 [SPARK-50640][CORE][TESTS] Update `ChecksumBenchmark` by 
removing `PureJavaCrc32C` and setting `Adler32` as a baseline
a2e3188b4997 is described below

commit a2e3188b4997001f4dbc1eb364d61ca55d438208
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Fri Dec 20 14:28:32 2024 -0800

    [SPARK-50640][CORE][TESTS] Update `ChecksumBenchmark` by removing 
`PureJavaCrc32C` and setting `Adler32` as a baseline
    
    ### What changes were proposed in this pull request?
    
    This PR aims to update `ChecksumBenchmark` by
    - Removing `PureJavaCrc32C`
    - Setting `Adler32` as a baseline
    
    ### Why are the changes needed?
    
    Not only Apache Spark, but also Apache Hadoop community doesn't use that 
legacy code on Java 9+ since 2018 from Hadoop 3.1.0 (HADOOP-15033).
    - https://github.com/apache/hadoop/pull/291
    
    We can save our resources by removing obsolete code usage and focusing on 
our available options.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manual review.
    
    I also attached Apple Silicon result.
    
    **Java 17**
    ```
    [info] OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Mac OS X 15.3
    [info] Apple M3 Max
    [info] Checksum Algorithms:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
    [info] 
----------------------------------------------------------------------------------------------------
    [info] Adler32                        8689           8709          28       
   0.0     8485001.2       1.0X
    [info] CRC32                          3201           3205           4       
   0.0     3125877.4       2.7X
    [info] CRC32C                         3199           3205           5       
   0.0     3124264.6       2.7X
    ```
    
    **Java 21**
    ```
    [info] OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Mac OS X 15.3
    [info] Apple M3 Max
    [info] Checksum Algorithms: Best Time(ms)   Avg Time(ms)   Stdev(ms)    
Rate(M/s)   Per Row(ns)   Relative
    [info] 
---------------------------------------------------------------------------------------------------
    [info] Adler32                       9208           9226          20        
  0.0     8991732.4       1.0X
    [info] CRC32                         3238           3357         105        
  0.0     3162007.9       2.8X
    [info] CRC32C                        3224           3351         110        
  0.0     3147966.1       2.9X
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49258 from dongjoon-hyun/SPARK-50640.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 core/benchmarks/ChecksumBenchmark-jdk21-results.txt           |  9 ++++-----
 core/benchmarks/ChecksumBenchmark-results.txt                 |  9 ++++-----
 .../scala/org/apache/spark/shuffle/ChecksumBenchmark.scala    | 11 +++--------
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/core/benchmarks/ChecksumBenchmark-jdk21-results.txt 
b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
index 85370450f355..8230f15aa659 100644
--- a/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
+++ b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
@@ -2,13 +2,12 @@
 Benchmark Checksum Algorithms
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-CRC32                                              2743           2746         
  3          0.0     2678409.9       1.0X
-CRC32C                                             1974           2055         
 70          0.0     1928129.2       1.4X
-Adler32                                           12689          12709         
 17          0.0    12391425.9       0.2X
-hadoop PureJavaCrc32C                             23027          23041         
 13          0.0    22487098.9       0.1X
+Adler32                                           11116          11123         
  7          0.0    10855585.4       1.0X
+CRC32                                              2774           2777         
  4          0.0     2709448.1       4.0X
+CRC32C                                             2083           2148         
 65          0.0     2034177.5       5.3X
 
 
diff --git a/core/benchmarks/ChecksumBenchmark-results.txt 
b/core/benchmarks/ChecksumBenchmark-results.txt
index cce5a61abf63..bf4e6182edfd 100644
--- a/core/benchmarks/ChecksumBenchmark-results.txt
+++ b/core/benchmarks/ChecksumBenchmark-results.txt
@@ -2,13 +2,12 @@
 Benchmark Checksum Algorithms
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-CRC32                                              2757           2758         
  1          0.0     2692250.2       1.0X
-CRC32C                                             2142           2244         
116          0.0     2091901.8       1.3X
-Adler32                                           12699          12712         
 15          0.0    12401205.6       0.2X
-hadoop PureJavaCrc32C                             23049          23066         
 15          0.0    22508320.3       0.1X
+Adler32                                           11112          11130         
 20          0.0    10851949.2       1.0X
+CRC32                                              2765           2767         
  2          0.0     2699749.0       4.0X
+CRC32C                                             2101           2159         
 54          0.0     2051565.3       5.3X
 
 
diff --git 
a/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala 
b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
index 16a50fabb7ff..4eb167930b0d 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
@@ -19,8 +19,6 @@ package org.apache.spark.shuffle
 
 import java.util.zip.{Adler32, CRC32, CRC32C}
 
-import org.apache.hadoop.util.PureJavaCrc32C
-
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 
 /**
@@ -41,18 +39,15 @@ object ChecksumBenchmark extends BenchmarkBase {
     runBenchmark("Benchmark Checksum Algorithms") {
       val data: Array[Byte] = (1 until 32 * 1024 * 1024).map(_.toByte).toArray
       val benchmark = new Benchmark("Checksum Algorithms", N, 3, output = 
output)
+      benchmark.addCase(s"Adler32") { _ =>
+        (1 to N).foreach(_ => new Adler32().update(data))
+      }
       benchmark.addCase("CRC32") { _ =>
         (1 to N).foreach(_ => new CRC32().update(data))
       }
       benchmark.addCase(s"CRC32C") { _ =>
         (1 to N).foreach(_ => new CRC32C().update(data))
       }
-      benchmark.addCase(s"Adler32") { _ =>
-        (1 to N).foreach(_ => new Adler32().update(data))
-      }
-      benchmark.addCase(s"hadoop PureJavaCrc32C") { _ =>
-        (1 to N).foreach(_ => new PureJavaCrc32C().update(data))
-      }
       benchmark.run()
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to