This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 879aae39409 [SPARK-38920][SQL][TEST] Add ORC blockSize tests to
BloomFilterBenchmark
879aae39409 is described below
commit 879aae39409ae92f434c3bb4101d66334f9833dd
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sat Apr 16 19:05:22 2022 -0700
[SPARK-38920][SQL][TEST] Add ORC blockSize tests to BloomFilterBenchmark
### What changes were proposed in this pull request?
This PR aims to improve `BloomFilterBenchmark` by adding more `blockSize`
combination tests for ORC.
- Java 8: https://github.com/dongjoon-hyun/spark/actions/runs/2178431204
- Java 11: https://github.com/dongjoon-hyun/spark/actions/runs/2178432284
- Java 17: https://github.com/dongjoon-hyun/spark/actions/runs/2178432661
### Why are the changes needed?
For Parquet, we had the benchmark already. This will provide a feature
parity of the comparison.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Manual test because this is a benchmark.
Closes #36218 from dongjoon-hyun/SPARK-38920.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../BloomFilterBenchmark-jdk11-results.txt | 112 +++++++++++++----
.../BloomFilterBenchmark-jdk17-results.txt | 132 ++++++++++++++++-----
.../benchmarks/BloomFilterBenchmark-results.txt | 112 +++++++++++++----
.../execution/benchmark/BloomFilterBenchmark.scala | 30 +++--
4 files changed, 304 insertions(+), 82 deletions(-)
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
index fab16b64870..1bd32b0e7a9 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
@@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 20453 20495
60 4.9 204.5 1.0X
-With bloom filter 22539 22694
218 4.4 225.4 0.9X
+Without bloom filter 15574 15579
6 6.4 155.7 1.0X
+With bloom filter 17915 17972
80 5.6 179.2 0.9X
================================================================================================
@@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 1708 1800
129 58.5 17.1 1.0X
-With bloom filter 1324 1357
47 75.5 13.2 1.3X
+Without bloom filter, blocksize: 2097152 1667 1675
11 60.0 16.7 1.0X
+With bloom filter, blocksize: 2097152 1098 1134
50 91.1 11.0 1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304 1446 1514
97 69.2 14.5 1.0X
+With bloom filter, blocksize: 4194304 1069 1145
108 93.6 10.7 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456 1436 1468
46 69.6 14.4 1.0X
+With bloom filter, blocksize: 6291456 1035 1060
36 96.6 10.3 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608 1451 1488
52 68.9 14.5 1.0X
+With bloom filter, blocksize: 8388608 1016 1027
15 98.4 10.2 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912 1463 1463
1 68.4 14.6 1.0X
+With bloom filter, blocksize: 12582912 1023 1041
24 97.7 10.2 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216 1473 1505
46 67.9 14.7 1.0X
+With bloom filter, blocksize: 16777216 997 1016
26 100.3 10.0 1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432 1440 1482
59 69.4 14.4 1.0X
+With bloom filter, blocksize: 33554432 1037 1065
40 96.4 10.4 1.4X
================================================================================================
@@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 17586 17666
112 5.7 175.9 1.0X
-With bloom filter 21429 21507
111 4.7 214.3 0.8X
+Without bloom filter 16645 16907
371 6.0 166.4 1.0X
+With bloom filter 20968 21145
250 4.8 209.7 0.8X
================================================================================================
@@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152 1219 1246
39 82.1 12.2 1.0X
-With bloom filter, blocksize: 2097152 379 420
48 264.0 3.8 3.2X
+Without bloom filter, blocksize: 2097152 1101 1106
7 90.8 11.0 1.0X
+With bloom filter, blocksize: 2097152 308 365
37 325.2 3.1 3.6X
================================================================================================
@@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304 1015 1023
11 98.5 10.2 1.0X
-With bloom filter, blocksize: 4194304 237 267
25 422.1 2.4 4.3X
+Without bloom filter, blocksize: 4194304 933 970
33 107.2 9.3 1.0X
+With bloom filter, blocksize: 4194304 269 302
32 371.1 2.7 3.5X
================================================================================================
@@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456 997 1008
16 100.3 10.0 1.0X
-With bloom filter, blocksize: 6291456 268 290
11 372.6 2.7 3.7X
+Without bloom filter, blocksize: 6291456 977 1026
69 102.3 9.8 1.0X
+With bloom filter, blocksize: 6291456 358 379
14 279.6 3.6 2.7X
================================================================================================
@@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608 957 1014
81 104.5 9.6 1.0X
-With bloom filter, blocksize: 8388608 361 401
33 277.0 3.6 2.6X
+Without bloom filter, blocksize: 8388608 1009 1026
24 99.2 10.1 1.0X
+With bloom filter, blocksize: 8388608 371 395
28 269.3 3.7 2.7X
================================================================================================
@@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912 1015 1033
26 98.5 10.1 1.0X
-With bloom filter, blocksize: 12582912 606 635
29 165.2 6.1 1.7X
+Without bloom filter, blocksize: 12582912 972 977
6 102.9 9.7 1.0X
+With bloom filter, blocksize: 12582912 695 725
30 143.9 6.9 1.4X
================================================================================================
@@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216 1038 1055
23 96.3 10.4 1.0X
-With bloom filter, blocksize: 16777216 901 905
4 111.0 9.0 1.2X
+Without bloom filter, blocksize: 16777216 938 946
8 106.6 9.4 1.0X
+With bloom filter, blocksize: 16777216 833 870
45 120.0 8.3 1.1X
================================================================================================
@@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432 941 955
13 106.3 9.4 1.0X
-With bloom filter, blocksize: 33554432 888 930
39 112.6 8.9 1.1X
+Without bloom filter, blocksize: 33554432 929 955
24 107.6 9.3 1.0X
+With bloom filter, blocksize: 33554432 1003 1010
10 99.7 10.0 0.9X
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
index 1288a06ec53..cd3ec1b3660 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
@@ -3,11 +3,11 @@ ORC Write
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 19097 19523
602 5.2 191.0 1.0X
-With bloom filter 22213 22402
267 4.5 222.1 0.9X
+Without bloom filter 15778 15792
19 6.3 157.8 1.0X
+With bloom filter 17951 18076
178 5.6 179.5 0.9X
================================================================================================
@@ -15,11 +15,83 @@ ORC Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 1528 1628
142 65.4 15.3 1.0X
-With bloom filter 1370 1391
29 73.0 13.7 1.1X
+Without bloom filter, blocksize: 2097152 1327 1404
109 75.4 13.3 1.0X
+With bloom filter, blocksize: 2097152 929 943
24 107.6 9.3 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304 1342 1348
8 74.5 13.4 1.0X
+With bloom filter, blocksize: 4194304 1085 1087
2 92.1 10.9 1.2X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456 1325 1332
10 75.5 13.3 1.0X
+With bloom filter, blocksize: 6291456 1115 1117
3 89.7 11.1 1.2X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608 1203 1213
14 83.1 12.0 1.0X
+With bloom filter, blocksize: 8388608 1168 1171
4 85.6 11.7 1.0X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912 1774 1781
10 56.4 17.7 1.0X
+With bloom filter, blocksize: 12582912 1171 1182
15 85.4 11.7 1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216 1723 1728
7 58.0 17.2 1.0X
+With bloom filter, blocksize: 16777216 1329 1344
20 75.2 13.3 1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432 1847 1858
15 54.1 18.5 1.0X
+With bloom filter, blocksize: 33554432 1222 1312
126 81.8 12.2 1.5X
================================================================================================
@@ -27,11 +99,11 @@ Parquet Write
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 17846 17981
191 5.6 178.5 1.0X
-With bloom filter 22883 22982
140 4.4 228.8 0.8X
+Without bloom filter 16902 16907
8 5.9 169.0 1.0X
+With bloom filter 28237 28266
41 3.5 282.4 0.6X
================================================================================================
@@ -39,11 +111,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152 947 1012
92 105.6 9.5 1.0X
-With bloom filter, blocksize: 2097152 311 325
12 322.0 3.1 3.0X
+Without bloom filter, blocksize: 2097152 763 796
31 131.1 7.6 1.0X
+With bloom filter, blocksize: 2097152 248 261
13 403.2 2.5 3.1X
================================================================================================
@@ -51,11 +123,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304 824 837
15 121.3 8.2 1.0X
-With bloom filter, blocksize: 4194304 210 222
10 475.8 2.1 3.9X
+Without bloom filter, blocksize: 4194304 1020 1020
0 98.0 10.2 1.0X
+With bloom filter, blocksize: 4194304 193 201
11 517.5 1.9 5.3X
================================================================================================
@@ -63,11 +135,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456 1210 1327
166 82.6 12.1 1.0X
-With bloom filter, blocksize: 6291456 302 315
9 331.1 3.0 4.0X
+Without bloom filter, blocksize: 6291456 1023 1023
1 97.8 10.2 1.0X
+With bloom filter, blocksize: 6291456 298 306
8 336.0 3.0 3.4X
================================================================================================
@@ -75,11 +147,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608 1244 1251
10 80.4 12.4 1.0X
-With bloom filter, blocksize: 8388608 504 513
8 198.4 5.0 2.5X
+Without bloom filter, blocksize: 8388608 1033 1038
6 96.8 10.3 1.0X
+With bloom filter, blocksize: 8388608 459 467
4 217.7 4.6 2.2X
================================================================================================
@@ -87,11 +159,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912 1384 1408
34 72.3 13.8 1.0X
-With bloom filter, blocksize: 12582912 900 955
49 111.1 9.0 1.5X
+Without bloom filter, blocksize: 12582912 1077 1084
11 92.9 10.8 1.0X
+With bloom filter, blocksize: 12582912 734 741
9 136.2 7.3 1.5X
================================================================================================
@@ -99,11 +171,11 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216 1468 1482
19 68.1 14.7 1.0X
-With bloom filter, blocksize: 16777216 1302 1313
15 76.8 13.0 1.1X
+Without bloom filter, blocksize: 16777216 1044 1047
4 95.7 10.4 1.0X
+With bloom filter, blocksize: 16777216 825 835
11 121.2 8.2 1.3X
================================================================================================
@@ -111,10 +183,10 @@ Parquet Read
================================================================================================
OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432 1444 1451
9 69.2 14.4 1.0X
-With bloom filter, blocksize: 33554432 1458 1461
3 68.6 14.6 1.0X
+Without bloom filter, blocksize: 33554432 1232 1237
6 81.2 12.3 1.0X
+With bloom filter, blocksize: 33554432 1152 1199
67 86.8 11.5 1.1X
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index de191252961..745249db930 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 17889 18038
211 5.6 178.9 1.0X
-With bloom filter 20780 20941
228 4.8 207.8 0.9X
+Without bloom filter 18682 18792
156 5.4 186.8 1.0X
+With bloom filter 21347 21396
69 4.7 213.5 0.9X
================================================================================================
@@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 1690 1694
6 59.2 16.9 1.0X
-With bloom filter 1274 1291
24 78.5 12.7 1.3X
+Without bloom filter, blocksize: 2097152 1656 1666
14 60.4 16.6 1.0X
+With bloom filter, blocksize: 2097152 1203 1230
37 83.1 12.0 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304 1625 1632
10 61.6 16.2 1.0X
+With bloom filter, blocksize: 4194304 1224 1284
85 81.7 12.2 1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456 1599 1601
3 62.5 16.0 1.0X
+With bloom filter, blocksize: 6291456 1162 1179
24 86.0 11.6 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608 1623 1638
21 61.6 16.2 1.0X
+With bloom filter, blocksize: 8388608 1181 1197
23 84.7 11.8 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912 1631 1643
18 61.3 16.3 1.0X
+With bloom filter, blocksize: 12582912 1159 1183
34 86.3 11.6 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216 1586 1597
16 63.0 15.9 1.0X
+With bloom filter, blocksize: 16777216 1170 1175
7 85.5 11.7 1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432 1646 1649
4 60.8 16.5 1.0X
+With bloom filter, blocksize: 33554432 1186 1187
1 84.3 11.9 1.4X
================================================================================================
@@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Write 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter 15998 16218
311 6.3 160.0 1.0X
-With bloom filter 29974 30180
291 3.3 299.7 0.5X
+Without bloom filter 16633 16773
197 6.0 166.3 1.0X
+With bloom filter 23442 23538
136 4.3 234.4 0.7X
================================================================================================
@@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152 1032 1039
10 96.9 10.3 1.0X
-With bloom filter, blocksize: 2097152 285 302
22 350.7 2.9 3.6X
+Without bloom filter, blocksize: 2097152 955 965
13 104.7 9.6 1.0X
+With bloom filter, blocksize: 2097152 271 289
17 368.8 2.7 3.5X
================================================================================================
@@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304 927 939
11 107.9 9.3 1.0X
-With bloom filter, blocksize: 4194304 239 263
27 418.3 2.4 3.9X
+Without bloom filter, blocksize: 4194304 897 907
9 111.4 9.0 1.0X
+With bloom filter, blocksize: 4194304 242 255
19 412.6 2.4 3.7X
================================================================================================
@@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456 958 968
11 104.3 9.6 1.0X
-With bloom filter, blocksize: 6291456 279 302
17 357.9 2.8 3.4X
+Without bloom filter, blocksize: 6291456 923 934
11 108.3 9.2 1.0X
+With bloom filter, blocksize: 6291456 271 283
11 369.0 2.7 3.4X
================================================================================================
@@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608 898 930
31 111.4 9.0 1.0X
-With bloom filter, blocksize: 8388608 360 380
13 278.0 3.6 2.5X
+Without bloom filter, blocksize: 8388608 916 920
3 109.1 9.2 1.0X
+With bloom filter, blocksize: 8388608 442 448
9 226.4 4.4 2.1X
================================================================================================
@@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912 937 954
25 106.7 9.4 1.0X
-With bloom filter, blocksize: 12582912 650 696
40 153.9 6.5 1.4X
+Without bloom filter, blocksize: 12582912 899 917
15 111.2 9.0 1.0X
+With bloom filter, blocksize: 12582912 676 682
7 148.0 6.8 1.3X
================================================================================================
@@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216 934 949
14 107.1 9.3 1.0X
-With bloom filter, blocksize: 16777216 925 935
9 108.2 9.2 1.0X
+Without bloom filter, blocksize: 16777216 894 913
17 111.8 8.9 1.0X
+With bloom filter, blocksize: 16777216 866 890
26 115.4 8.7 1.0X
================================================================================================
@@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux
5.13.0-1021-azure
Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
Read a row from 100M rows: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432 910 931
18 109.8 9.1 1.0X
-With bloom filter, blocksize: 33554432 910 927
15 109.9 9.1 1.0X
+Without bloom filter, blocksize: 33554432 896 921
22 111.6 9.0 1.0X
+With bloom filter, blocksize: 33554432 909 924
20 110.1 9.1 1.0X
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
index ccb65c7d3ac..5f193109ca2 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
@@ -61,21 +61,27 @@ object BloomFilterBenchmark extends SqlBasedBenchmark {
}
private def readORCBenchmark(): Unit = {
- withTempPath { dir =>
- val path = dir.getCanonicalPath
+ val blockSizes = Seq(2 * 1024 * 1024, 4 * 1024 * 1024, 6 * 1024 * 1024, 8
* 1024 * 1024,
+ 12 * 1024 * 1024, 16 * 1024 * 1024, 32 * 1024 * 1024)
+ for (blocksize <- blockSizes) {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
- df.write.orc(path + "/withoutBF")
- df.write.option("orc.bloom.filter.columns", "value").orc(path +
"/withBF")
+ df.write.option("orc.block.size", blocksize).orc(path + "/withoutBF")
+ df.write
+ .option("orc.block.size", blocksize)
+ .option("orc.bloom.filter.columns", "value").orc(path + "/withBF")
- runBenchmark(s"ORC Read") {
- val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows",
N, output = output)
- benchmark.addCase("Without bloom filter") { _ =>
- spark.read.orc(path + "/withoutBF").where("value = 0").noop()
- }
- benchmark.addCase("With bloom filter") { _ =>
- spark.read.orc(path + "/withBF").where("value = 0").noop()
+ runBenchmark(s"ORC Read") {
+ val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M
rows", N, output = output)
+ benchmark.addCase("Without bloom filter, blocksize: " + blocksize) {
_ =>
+ spark.read.orc(path + "/withoutBF").where("value = 0").noop()
+ }
+ benchmark.addCase("With bloom filter, blocksize: " + blocksize) { _
=>
+ spark.read.orc(path + "/withBF").where("value = 0").noop()
+ }
+ benchmark.run()
}
- benchmark.run()
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]