This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 95faa0283e18 [SPARK-49490][SQL] Add benchmarks for initCap
95faa0283e18 is described below
commit 95faa0283e18343adba4b39083829d3c3724c035
Author: Mark Andreev <[email protected]>
AuthorDate: Thu Nov 21 09:14:06 2024 +0100
[SPARK-49490][SQL] Add benchmarks for initCap
### What changes were proposed in this pull request?
Add benchmarks for all codepaths of initCap, namely, paths that call:
- execBinaryICU
- execBinary
- execLowercase
- execICU
### Why are the changes needed?
Requested by jira ticket SPARK-49490.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
The benchmark was tested locally by performing a manual run.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #48501 from mrk-andreev/SPARK-49490.
Authored-by: Mark Andreev <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../CollationBenchmark-jdk21-results.txt | 106 ++++++++++++++-------
sql/core/benchmarks/CollationBenchmark-results.txt | 106 ++++++++++++++-------
.../CollationNonASCIIBenchmark-jdk21-results.txt | 106 ++++++++++++++-------
.../CollationNonASCIIBenchmark-results.txt | 106 ++++++++++++++-------
.../execution/benchmark/CollationBenchmark.scala | 44 +++++++++
5 files changed, 324 insertions(+), 144 deletions(-)
diff --git a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
index e31b45b48f8f..88db9ebfa1e3 100644
--- a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1353 1357
5 0.1 13532.2 1.0X
-UTF8_LCASE 2601 2602
2 0.0 26008.0 1.9X
-UNICODE 16745 16756
16 0.0 167450.9 12.4X
-UNICODE_CI 16590 16627
52 0.0 165904.8 12.3X
+UTF8_BINARY 1193 1194
1 0.1 11929.0 1.0X
+UTF8_LCASE 2717 2721
6 0.0 27168.5 2.3X
+UNICODE 17991 17993
2 0.0 179913.6 15.1X
+UNICODE_CI 17837 17842
7 0.0 178369.9 15.0X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1746 1746
0 0.1 17462.6 1.0X
-UTF8_LCASE 2629 2630
1 0.0 26294.8 1.5X
-UNICODE 16744 16744
0 0.0 167438.6 9.6X
-UNICODE_CI 16518 16521
4 0.0 165180.2 9.5X
+UTF8_BINARY 1523 1523
0 0.1 15233.9 1.0X
+UTF8_LCASE 2441 2441
0 0.0 24407.9 1.6X
+UNICODE 17875 17884
13 0.0 178749.6 11.7X
+UNICODE_CI 17701 17703
2 0.0 177013.8 11.6X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 2808 2808
1 0.0 28076.2 1.0X
-UTF8_LCASE 5409 5410
0 0.0 54093.0 1.9X
-UNICODE 67930 67957
38 0.0 679296.7 24.2X
-UNICODE_CI 56004 56005
1 0.0 560044.2 19.9X
+UTF8_BINARY 2660 2666
9 0.0 26601.1 1.0X
+UTF8_LCASE 5013 5016
3 0.0 50134.0 1.9X
+UNICODE 75622 75623
1 0.0 756217.3 28.4X
+UNICODE_CI 63036 63042
9 0.0 630360.9 23.7X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1612 1614
2 0.1 16118.8 1.0X
-UTF8_LCASE 14509 14526
23 0.0 145092.7 9.0X
-UNICODE 308136 308631
700 0.0 3081364.6 191.2X
-UNICODE_CI 314612 314846
330 0.0 3146120.0 195.2X
+UTF8_BINARY 2121 2122
0 0.0 21214.2 1.0X
+UTF8_LCASE 27635 27636
1 0.0 276347.7 13.0X
+UNICODE 523746 524012
376 0.0 5237460.5 246.9X
+UNICODE_CI 520134 520227
131 0.0 5201343.3 245.2X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1913 1914
1 0.1 19131.3 1.0X
-UTF8_LCASE 9785 9788
5 0.0 97847.7 5.1X
-UNICODE 311517 311580
89 0.0 3115167.2 162.8X
-UNICODE_CI 316517 316660
201 0.0 3165173.7 165.4X
+UTF8_BINARY 2767 2769
4 0.0 27666.3 1.0X
+UTF8_LCASE 26861 26861
1 0.0 268606.4 9.7X
+UNICODE 518540 518815
389 0.0 5185401.3 187.4X
+UNICODE_CI 521156 521261
148 0.0 5211559.5 188.4X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1891 1891
0 0.1 18912.1 1.0X
-UTF8_LCASE 10089 10093
5 0.0 100893.6 5.3X
-UNICODE 336905 336931
36 0.0 3369051.8 178.1X
-UNICODE_CI 339944 340585
907 0.0 3399439.0 179.7X
+UTF8_BINARY 2919 2921
3 0.0 29190.2 1.0X
+UTF8_LCASE 26862 26862
1 0.0 268618.0 9.2X
+UNICODE 504534 504927
556 0.0 5045340.3 172.8X
+UNICODE_CI 506542 506565
32 0.0 5065423.0 173.5X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU: Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE 419
425 5 0.2 4189.2 1.0X
+UNICODE_CI 416
426 6 0.2 4163.2 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 575
576 0 0.2 5754.0 1.0X
+UTF8_LCASE 575
576 1 0.2 5747.8 1.0X
+UNICODE 576
576 0 0.2 5761.5 1.0X
+UNICODE_CI 576
578 2 0.2 5758.0 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 159
159 1 0.6 1587.6 1.0X
+UTF8_LCASE 159
159 0 0.6 1586.6 1.0X
+UNICODE 158
159 1 0.6 1584.9 1.0X
+UNICODE_CI 159
160 1 0.6 1586.1 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 397
405 5 0.3 3974.4 1.0X
+UTF8_LCASE 401
405 5 0.2 4009.5 1.0X
+UNICODE 395
399 3 0.3 3953.9 1.0X
+UNICODE_CI 395
400 3 0.3 3952.0 1.0X
diff --git a/sql/core/benchmarks/CollationBenchmark-results.txt
b/sql/core/benchmarks/CollationBenchmark-results.txt
index eb8ae040a46f..8402a2db6d86 100644
--- a/sql/core/benchmarks/CollationBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1373 1373
0 0.1 13730.8 1.0X
-UTF8_LCASE 3311 3311
0 0.0 33106.6 2.4X
-UNICODE 19067 19100
46 0.0 190672.9 13.9X
-UNICODE_CI 18704 18795
129 0.0 187040.2 13.6X
+UTF8_BINARY 1223 1224
1 0.1 12231.5 1.0X
+UTF8_LCASE 3280 3281
1 0.0 32803.3 2.7X
+UNICODE 17207 17207
0 0.0 172065.7 14.1X
+UNICODE_CI 16560 16565
7 0.0 165604.3 13.5X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1706 1708
3 0.1 17060.4 1.0X
-UTF8_LCASE 3958 3965
10 0.0 39575.4 2.3X
-UNICODE 18831 18865
48 0.0 188311.2 11.0X
-UNICODE_CI 18818 18825
9 0.0 188181.7 11.0X
+UTF8_BINARY 1656 1657
0 0.1 16564.0 1.0X
+UTF8_LCASE 3320 3321
0 0.0 33203.0 2.0X
+UNICODE 16392 16393
2 0.0 163921.3 9.9X
+UNICODE_CI 16314 16319
6 0.0 163143.3 9.8X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 3092 3093
1 0.0 30918.5 1.0X
-UTF8_LCASE 6273 6289
23 0.0 62734.3 2.0X
-UNICODE 66953 66962
13 0.0 669525.2 21.7X
-UNICODE_CI 53934 53946
17 0.0 539338.7 17.4X
+UTF8_BINARY 2812 2813
1 0.0 28119.0 1.0X
+UTF8_LCASE 5682 5685
4 0.0 56823.2 2.0X
+UNICODE 71678 71685
10 0.0 716777.4 25.5X
+UNICODE_CI 60660 60670
15 0.0 606597.4 21.6X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1643 1644
1 0.1 16431.2 1.0X
-UTF8_LCASE 17241 17273
45 0.0 172411.1 10.5X
-UNICODE 304878 307207
3294 0.0 3048780.8 185.5X
-UNICODE_CI 317341 320620
4637 0.0 3173412.3 193.1X
+UTF8_BINARY 2528 2528
1 0.0 25276.8 1.0X
+UTF8_LCASE 28034 28050
24 0.0 280335.5 11.1X
+UNICODE 521518 521690
242 0.0 5215184.7 206.3X
+UNICODE_CI 508188 508312
176 0.0 5081880.5 201.0X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 1973 1977
6 0.1 19726.2 1.0X
-UTF8_LCASE 17070 17119
70 0.0 170697.7 8.7X
-UNICODE 306091 306797
999 0.0 3060911.4 155.2X
-UNICODE_CI 306558 307812
1774 0.0 3065581.4 155.4X
+UTF8_BINARY 2772 2774
4 0.0 27715.0 1.0X
+UTF8_LCASE 27387 27390
4 0.0 273872.8 9.9X
+UNICODE 501025 501076
72 0.0 5010249.5 180.8X
+UNICODE_CI 506654 506666
16 0.0 5066544.6 182.8X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 2064 2064
0 0.0 20640.6 1.0X
-UTF8_LCASE 16883 16899
23 0.0 168829.3 8.2X
-UNICODE 309882 310702
1160 0.0 3098819.7 150.1X
-UNICODE_CI 313599 314798
1695 0.0 3135994.6 151.9X
+UTF8_BINARY 2886 2888
3 0.0 28858.9 1.0X
+UTF8_LCASE 27433 27445
17 0.0 274326.2 9.5X
+UNICODE 501068 501186
168 0.0 5010676.2 173.6X
+UNICODE_CI 506619 506655
52 0.0 5066185.6 175.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU: Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE 407
411 4 0.2 4065.4 1.0X
+UNICODE_CI 419
423 3 0.2 4194.1 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 564
565 2 0.2 5639.2 1.0X
+UTF8_LCASE 563
563 0 0.2 5629.0 1.0X
+UNICODE 563
565 2 0.2 5634.3 1.0X
+UNICODE_CI 564
564 0 0.2 5640.9 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 165
166 1 0.6 1647.3 1.0X
+UTF8_LCASE 165
165 1 0.6 1646.7 1.0X
+UNICODE 165
165 1 0.6 1646.5 1.0X
+UNICODE_CI 165
166 1 0.6 1648.7 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 391
399 6 0.3 3912.1 1.0X
+UTF8_LCASE 389
399 7 0.3 3894.2 1.0X
+UNICODE 383
391 6 0.3 3828.6 1.0X
+UNICODE_CI 383
387 2 0.3 3833.0 1.0X
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
index 58ceaecb795d..4da64ade11d6 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 170 171
2 0.2 4260.8 1.0X
-UTF8_LCASE 7006 7009
5 0.0 175152.5 41.1X
-UNICODE 5152 5153
2 0.0 128798.5 30.2X
-UNICODE_CI 5307 5319
18 0.0 132666.4 31.1X
+UTF8_BINARY 156 156
0 0.3 3887.8 1.0X
+UTF8_LCASE 9717 9729
18 0.0 242914.7 62.5X
+UNICODE 5026 5027
2 0.0 125640.1 32.3X
+UNICODE_CI 4969 4972
4 0.0 124224.9 32.0X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 310 311
1 0.1 7748.1 1.0X
-UTF8_LCASE 6971 6977
8 0.0 174277.9 22.5X
-UNICODE 5788 5794
9 0.0 144708.3 18.7X
-UNICODE_CI 5710 5715
7 0.0 142754.1 18.4X
+UTF8_BINARY 279 279
0 0.1 6969.5 1.0X
+UTF8_LCASE 9624 9628
5 0.0 240611.6 34.5X
+UNICODE 5243 5244
0 0.0 131080.1 18.8X
+UNICODE_CI 5173 5173
0 0.0 129322.8 18.6X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 379 380
1 0.1 9479.8 1.0X
-UTF8_LCASE 3541 3547
7 0.0 88536.4 9.3X
-UNICODE 15014 15030
22 0.0 375356.2 39.6X
-UNICODE_CI 12226 12231
6 0.0 305661.7 32.2X
+UTF8_BINARY 383 383
0 0.1 9576.7 1.0X
+UTF8_LCASE 4927 4931
6 0.0 123170.3 12.9X
+UNICODE 17244 17261
24 0.0 431096.6 45.0X
+UNICODE_CI 12968 12970
3 0.0 324194.1 33.9X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 315 315
1 0.1 7864.6 1.0X
-UTF8_LCASE 7995 8005
14 0.0 199880.8 25.4X
-UNICODE 58712 58801
125 0.0 1467803.8 186.6X
-UNICODE_CI 58777 58784
9 0.0 1469433.1 186.8X
+UTF8_BINARY 535 536
2 0.1 13371.6 1.0X
+UTF8_LCASE 9479 9480
2 0.0 236964.5 17.7X
+UNICODE 93629 93676
66 0.0 2340726.5 175.1X
+UNICODE_CI 93222 93309
124 0.0 2330541.2 174.3X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 284 284
1 0.1 7093.4 1.0X
-UTF8_LCASE 5169 5171
4 0.0 129215.5 18.2X
-UNICODE 57857 57897
57 0.0 1446425.3 203.9X
-UNICODE_CI 58803 58826
32 0.0 1470086.9 207.2X
+UTF8_BINARY 430 431
1 0.1 10755.8 1.0X
+UTF8_LCASE 6550 6551
2 0.0 163753.7 15.2X
+UNICODE 87435 87467
45 0.0 2185886.8 203.2X
+UNICODE_CI 90113 90255
201 0.0 2252836.0 209.5X
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 284 284
0 0.1 7090.7 1.0X
-UTF8_LCASE 5187 5187
1 0.0 129665.9 18.3X
-UNICODE 64562 64565
5 0.0 1614041.8 227.6X
-UNICODE_CI 63633 63686
75 0.0 1590826.2 224.4X
+UTF8_BINARY 455 456
2 0.1 11369.5 1.0X
+UTF8_LCASE 7108 7115
9 0.0 177705.2 15.6X
+UNICODE 101835 101866
43 0.0 2545883.9 223.9X
+UNICODE_CI 100962 101026
91 0.0 2524045.2 222.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU: Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE 254
255 1 0.2 6346.5 1.0X
+UNICODE_CI 254
254 0 0.2 6348.1 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 322
323 1 0.1 8046.3 1.0X
+UTF8_LCASE 322
324 2 0.1 8059.0 1.0X
+UNICODE 322
323 1 0.1 8050.7 1.0X
+UNICODE_CI 322
325 4 0.1 8062.4 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 119
120 1 0.3 2972.1 1.0X
+UTF8_LCASE 119
120 1 0.3 2971.9 1.0X
+UNICODE 119
120 1 0.3 2970.3 1.0X
+UNICODE_CI 119
120 1 0.3 2968.6 1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 254
255 1 0.2 6345.2 1.0X
+UTF8_LCASE 254
255 0 0.2 6351.8 1.0X
+UNICODE 254
255 0 0.2 6352.9 1.0X
+UNICODE_CI 254
254 0 0.2 6341.2 1.0X
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
index 2920ffbeac6a..fba59f3893e2 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - equalsFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
--------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 133 133
1 0.3 3314.1 1.0X
-UTF8_LCASE 7149 7159
15 0.0 178721.0 53.9X
-UNICODE 5693 5699
8 0.0 142326.0 42.9X
-UNICODE_CI 6228 6231
5 0.0 155690.1 47.0X
+UTF8_BINARY 125 126
1 0.3 3128.6 1.0X
+UTF8_LCASE 10335 10345
14 0.0 258377.4 82.6X
+UNICODE 5604 5610
8 0.0 140110.8 44.8X
+UNICODE_CI 5570 5577
9 0.0 139252.7 44.5X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - compareFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 472 475
3 0.1 11799.9 1.0X
-UTF8_LCASE 7398 7423
37 0.0 184940.1 15.7X
-UNICODE 6079 6083
5 0.0 151983.2 12.9X
-UNICODE_CI 6032 6034
2 0.0 150811.6 12.8X
+UTF8_BINARY 293 294
2 0.1 7326.8 1.0X
+UTF8_LCASE 10035 10035
1 0.0 250865.2 34.2X
+UNICODE 5578 5580
3 0.0 139455.8 19.0X
+UNICODE_CI 5539 5541
2 0.0 138483.8 18.9X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - hashFunction: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 408 411
2 0.1 10203.7 1.0X
-UTF8_LCASE 3642 3644
4 0.0 91040.2 8.9X
-UNICODE 13471 13477
9 0.0 336766.1 33.0X
-UNICODE_CI 11242 11249
10 0.0 281047.3 27.5X
+UTF8_BINARY 388 388
0 0.1 9699.6 1.0X
+UTF8_LCASE 4965 4967
3 0.0 124121.3 12.8X
+UNICODE 15750 15753
5 0.0 393740.9 40.6X
+UNICODE_CI 12509 12511
2 0.0 312735.5 32.2X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - contains: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 330 331
1 0.1 8249.5 1.0X
-UTF8_LCASE 8380 8388
12 0.0 209490.6 25.4X
-UNICODE 59720 59750
43 0.0 1493005.1 181.0X
-UNICODE_CI 57747 57748
2 0.0 1443681.8 175.0X
+UTF8_BINARY 421 422
2 0.1 10512.9 1.0X
+UTF8_LCASE 10793 10796
5 0.0 269819.0 25.7X
+UNICODE 94324 94330
9 0.0 2358090.9 224.3X
+UNICODE_CI 91647 91748
143 0.0 2291174.6 217.9X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - startsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 312 312
1 0.1 7798.9 1.0X
-UTF8_LCASE 5300 5300
1 0.0 132491.4 17.0X
-UNICODE 57388 57515
179 0.0 1434709.7 184.0X
-UNICODE_CI 57642 57764
171 0.0 1441059.6 184.8X
+UTF8_BINARY 452 453
0 0.1 11307.9 1.0X
+UTF8_LCASE 6871 6872
2 0.0 171782.0 15.2X
+UNICODE 90881 90924
60 0.0 2272034.5 200.9X
+UNICODE_CI 91333 91363
42 0.0 2283331.3 201.9X
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
-AMD EPYC 7763 64-Core Processor
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
collation unit benchmarks - endsWith: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative time
------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY 292 293
0 0.1 7311.6 1.0X
-UTF8_LCASE 5375 5375
1 0.0 134375.2 18.4X
-UNICODE 62773 63030
363 0.0 1569337.4 214.6X
-UNICODE_CI 61268 61382
161 0.0 1531693.6 209.5X
+UTF8_BINARY 451 452
2 0.1 11268.1 1.0X
+UTF8_LCASE 6685 6686
2 0.0 167120.8 14.8X
+UNICODE 99387 99484
138 0.0 2484672.5 220.5X
+UNICODE_CI 98525 98597
101 0.0 2463132.9 218.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU: Best Time(ms) Avg
Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE 231
232 0 0.2 5784.5 1.0X
+UNICODE_CI 231
232 1 0.2 5780.4 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 312
314 1 0.1 7811.2 1.0X
+UTF8_LCASE 313
314 2 0.1 7822.9 1.0X
+UNICODE 313
314 1 0.1 7815.5 1.0X
+UNICODE_CI 313
315 4 0.1 7825.7 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 132
133 0 0.3 3302.0 1.0X
+UTF8_LCASE 132
132 0 0.3 3297.5 1.0X
+UNICODE 132
133 1 0.3 3296.9 1.0X
+UNICODE_CI 132
132 0 0.3 3298.1 1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase: Best Time(ms)
Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY 231
231 0 0.2 5770.4 1.0X
+UTF8_LCASE 231
232 1 0.2 5776.4 1.0X
+UNICODE 231
231 0 0.2 5767.5 1.0X
+UNICODE_CI 231
232 1 0.2 5770.2 1.0X
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
index 21a5a4a979eb..37297f6aa94c 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
@@ -185,6 +185,48 @@ abstract class CollationBenchmarkBase extends
BenchmarkBase {
}
benchmark.run(relativeTime = true)
}
+
+ def benchmarkInitCap(
+ collationTypes: Seq[String],
+ utf8Strings: Seq[UTF8String]): Unit = {
+ type CollationId = Int
+ type InitCapEstimator = (UTF8String, CollationId) => Unit
+ def skipCollationTypeFilter: Any => Boolean = _ => true
+ def createBenchmark(
+ implName: String,
+ impl: InitCapEstimator,
+ collationTypeFilter: String => Boolean): Unit = {
+ val benchmark = new Benchmark(
+ s"collation unit benchmarks - initCap using impl $implName",
+ utf8Strings.size * 10,
+ warmupTime = 10.seconds,
+ output = output)
+ collationTypes.filter(collationTypeFilter).foreach { collationType => {
+ val collationId = CollationFactory.collationNameToId(collationType)
+ benchmark.addCase(collationType) { _ =>
+ utf8Strings.foreach { s =>
+ impl(s.repeat(1_000), collationId)
+ }
+ }
+ }
+ }
+ benchmark.run(relativeTime = true)
+ }
+
+ createBenchmark(
+ "execICU",
+ (s, collationId) => CollationSupport.InitCap.execICU(s, collationId),
+ collationType => CollationFactory.fetchCollation(collationType).collator
!= null)
+ createBenchmark(
+ "execBinaryICU",
+ (s, _) => CollationSupport.InitCap.execBinaryICU(s),
skipCollationTypeFilter)
+ createBenchmark(
+ "execBinary",
+ (s, _) => CollationSupport.InitCap.execBinary(s),
skipCollationTypeFilter)
+ createBenchmark(
+ "execLowercase",
+ (s, _) => CollationSupport.InitCap.execLowercase(s),
skipCollationTypeFilter)
+ }
}
/**
@@ -219,6 +261,7 @@ object CollationBenchmark extends CollationBenchmarkBase {
benchmarkContains(collationTypes, inputs)
benchmarkStartsWith(collationTypes, inputs)
benchmarkEndsWith(collationTypes, inputs)
+ benchmarkInitCap(collationTypes, inputs)
}
}
@@ -248,5 +291,6 @@ object CollationNonASCIIBenchmark extends
CollationBenchmarkBase {
benchmarkContains(collationTypes, inputs)
benchmarkStartsWith(collationTypes, inputs)
benchmarkEndsWith(collationTypes, inputs)
+ benchmarkInitCap(collationTypes, inputs)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]