This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 879aae39409 [SPARK-38920][SQL][TEST] Add ORC blockSize tests to 
BloomFilterBenchmark
879aae39409 is described below

commit 879aae39409ae92f434c3bb4101d66334f9833dd
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Sat Apr 16 19:05:22 2022 -0700

    [SPARK-38920][SQL][TEST] Add ORC blockSize tests to BloomFilterBenchmark
    
    ### What changes were proposed in this pull request?
    
    This PR aims to improve `BloomFilterBenchmark` by adding more `blockSize` 
combination tests for ORC.
    
    - Java 8: https://github.com/dongjoon-hyun/spark/actions/runs/2178431204
    - Java 11: https://github.com/dongjoon-hyun/spark/actions/runs/2178432284
    - Java 17: https://github.com/dongjoon-hyun/spark/actions/runs/2178432661
    
    ### Why are the changes needed?
    
    For Parquet, we had the benchmark already. This will provide a feature 
parity of the comparison.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manual test because this is a benchmark.
    
    Closes #36218 from dongjoon-hyun/SPARK-38920.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../BloomFilterBenchmark-jdk11-results.txt         | 112 +++++++++++++----
 .../BloomFilterBenchmark-jdk17-results.txt         | 132 ++++++++++++++++-----
 .../benchmarks/BloomFilterBenchmark-results.txt    | 112 +++++++++++++----
 .../execution/benchmark/BloomFilterBenchmark.scala |  30 +++--
 4 files changed, 304 insertions(+), 82 deletions(-)

diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
index fab16b64870..1bd32b0e7a9 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
@@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              20453          20495         
 60          4.9         204.5       1.0X
-With bloom filter                                 22539          22694         
218          4.4         225.4       0.9X
+Without bloom filter                              15574          15579         
  6          6.4         155.7       1.0X
+With bloom filter                                 17915          17972         
 80          5.6         179.2       0.9X
 
 
 
================================================================================================
@@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1708           1800         
129         58.5          17.1       1.0X
-With bloom filter                                  1324           1357         
 47         75.5          13.2       1.3X
+Without bloom filter, blocksize: 2097152           1667           1675         
 11         60.0          16.7       1.0X
+With bloom filter, blocksize: 2097152              1098           1134         
 50         91.1          11.0       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1446           1514         
 97         69.2          14.5       1.0X
+With bloom filter, blocksize: 4194304              1069           1145         
108         93.6          10.7       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1436           1468         
 46         69.6          14.4       1.0X
+With bloom filter, blocksize: 6291456              1035           1060         
 36         96.6          10.3       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1451           1488         
 52         68.9          14.5       1.0X
+With bloom filter, blocksize: 8388608              1016           1027         
 15         98.4          10.2       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1463           1463        
   1         68.4          14.6       1.0X
+With bloom filter, blocksize: 12582912              1023           1041        
  24         97.7          10.2       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1473           1505        
  46         67.9          14.7       1.0X
+With bloom filter, blocksize: 16777216               997           1016        
  26        100.3          10.0       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1440           1482        
  59         69.4          14.4       1.0X
+With bloom filter, blocksize: 33554432              1037           1065        
  40         96.4          10.4       1.4X
 
 
 
================================================================================================
@@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              17586          17666         
112          5.7         175.9       1.0X
-With bloom filter                                 21429          21507         
111          4.7         214.3       0.8X
+Without bloom filter                              16645          16907         
371          6.0         166.4       1.0X
+With bloom filter                                 20968          21145         
250          4.8         209.7       0.8X
 
 
 
================================================================================================
@@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152           1219           1246         
 39         82.1          12.2       1.0X
-With bloom filter, blocksize: 2097152               379            420         
 48        264.0           3.8       3.2X
+Without bloom filter, blocksize: 2097152           1101           1106         
  7         90.8          11.0       1.0X
+With bloom filter, blocksize: 2097152               308            365         
 37        325.2           3.1       3.6X
 
 
 
================================================================================================
@@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304           1015           1023         
 11         98.5          10.2       1.0X
-With bloom filter, blocksize: 4194304               237            267         
 25        422.1           2.4       4.3X
+Without bloom filter, blocksize: 4194304            933            970         
 33        107.2           9.3       1.0X
+With bloom filter, blocksize: 4194304               269            302         
 32        371.1           2.7       3.5X
 
 
 
================================================================================================
@@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            997           1008         
 16        100.3          10.0       1.0X
-With bloom filter, blocksize: 6291456               268            290         
 11        372.6           2.7       3.7X
+Without bloom filter, blocksize: 6291456            977           1026         
 69        102.3           9.8       1.0X
+With bloom filter, blocksize: 6291456               358            379         
 14        279.6           3.6       2.7X
 
 
 
================================================================================================
@@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            957           1014         
 81        104.5           9.6       1.0X
-With bloom filter, blocksize: 8388608               361            401         
 33        277.0           3.6       2.6X
+Without bloom filter, blocksize: 8388608           1009           1026         
 24         99.2          10.1       1.0X
+With bloom filter, blocksize: 8388608               371            395         
 28        269.3           3.7       2.7X
 
 
 
================================================================================================
@@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912           1015           1033        
  26         98.5          10.1       1.0X
-With bloom filter, blocksize: 12582912               606            635        
  29        165.2           6.1       1.7X
+Without bloom filter, blocksize: 12582912            972            977        
   6        102.9           9.7       1.0X
+With bloom filter, blocksize: 12582912               695            725        
  30        143.9           6.9       1.4X
 
 
 
================================================================================================
@@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216           1038           1055        
  23         96.3          10.4       1.0X
-With bloom filter, blocksize: 16777216               901            905        
   4        111.0           9.0       1.2X
+Without bloom filter, blocksize: 16777216            938            946        
   8        106.6           9.4       1.0X
+With bloom filter, blocksize: 16777216               833            870        
  45        120.0           8.3       1.1X
 
 
 
================================================================================================
@@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            941            955        
  13        106.3           9.4       1.0X
-With bloom filter, blocksize: 33554432               888            930        
  39        112.6           8.9       1.1X
+Without bloom filter, blocksize: 33554432            929            955        
  24        107.6           9.3       1.0X
+With bloom filter, blocksize: 33554432              1003           1010        
  10         99.7          10.0       0.9X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt 
b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
index 1288a06ec53..cd3ec1b3660 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
@@ -3,11 +3,11 @@ ORC Write
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              19097          19523         
602          5.2         191.0       1.0X
-With bloom filter                                 22213          22402         
267          4.5         222.1       0.9X
+Without bloom filter                              15778          15792         
 19          6.3         157.8       1.0X
+With bloom filter                                 17951          18076         
178          5.6         179.5       0.9X
 
 
 
================================================================================================
@@ -15,11 +15,83 @@ ORC Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1528           1628         
142         65.4          15.3       1.0X
-With bloom filter                                  1370           1391         
 29         73.0          13.7       1.1X
+Without bloom filter, blocksize: 2097152           1327           1404         
109         75.4          13.3       1.0X
+With bloom filter, blocksize: 2097152               929            943         
 24        107.6           9.3       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1342           1348         
  8         74.5          13.4       1.0X
+With bloom filter, blocksize: 4194304              1085           1087         
  2         92.1          10.9       1.2X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1325           1332         
 10         75.5          13.3       1.0X
+With bloom filter, blocksize: 6291456              1115           1117         
  3         89.7          11.1       1.2X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1203           1213         
 14         83.1          12.0       1.0X
+With bloom filter, blocksize: 8388608              1168           1171         
  4         85.6          11.7       1.0X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1774           1781        
  10         56.4          17.7       1.0X
+With bloom filter, blocksize: 12582912              1171           1182        
  15         85.4          11.7       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1723           1728        
   7         58.0          17.2       1.0X
+With bloom filter, blocksize: 16777216              1329           1344        
  20         75.2          13.3       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1847           1858        
  15         54.1          18.5       1.0X
+With bloom filter, blocksize: 33554432              1222           1312        
 126         81.8          12.2       1.5X
 
 
 
================================================================================================
@@ -27,11 +99,11 @@ Parquet Write
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              17846          17981         
191          5.6         178.5       1.0X
-With bloom filter                                 22883          22982         
140          4.4         228.8       0.8X
+Without bloom filter                              16902          16907         
  8          5.9         169.0       1.0X
+With bloom filter                                 28237          28266         
 41          3.5         282.4       0.6X
 
 
 
================================================================================================
@@ -39,11 +111,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            947           1012         
 92        105.6           9.5       1.0X
-With bloom filter, blocksize: 2097152               311            325         
 12        322.0           3.1       3.0X
+Without bloom filter, blocksize: 2097152            763            796         
 31        131.1           7.6       1.0X
+With bloom filter, blocksize: 2097152               248            261         
 13        403.2           2.5       3.1X
 
 
 
================================================================================================
@@ -51,11 +123,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            824            837         
 15        121.3           8.2       1.0X
-With bloom filter, blocksize: 4194304               210            222         
 10        475.8           2.1       3.9X
+Without bloom filter, blocksize: 4194304           1020           1020         
  0         98.0          10.2       1.0X
+With bloom filter, blocksize: 4194304               193            201         
 11        517.5           1.9       5.3X
 
 
 
================================================================================================
@@ -63,11 +135,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456           1210           1327         
166         82.6          12.1       1.0X
-With bloom filter, blocksize: 6291456               302            315         
  9        331.1           3.0       4.0X
+Without bloom filter, blocksize: 6291456           1023           1023         
  1         97.8          10.2       1.0X
+With bloom filter, blocksize: 6291456               298            306         
  8        336.0           3.0       3.4X
 
 
 
================================================================================================
@@ -75,11 +147,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608           1244           1251         
 10         80.4          12.4       1.0X
-With bloom filter, blocksize: 8388608               504            513         
  8        198.4           5.0       2.5X
+Without bloom filter, blocksize: 8388608           1033           1038         
  6         96.8          10.3       1.0X
+With bloom filter, blocksize: 8388608               459            467         
  4        217.7           4.6       2.2X
 
 
 
================================================================================================
@@ -87,11 +159,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912           1384           1408        
  34         72.3          13.8       1.0X
-With bloom filter, blocksize: 12582912               900            955        
  49        111.1           9.0       1.5X
+Without bloom filter, blocksize: 12582912           1077           1084        
  11         92.9          10.8       1.0X
+With bloom filter, blocksize: 12582912               734            741        
   9        136.2           7.3       1.5X
 
 
 
================================================================================================
@@ -99,11 +171,11 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216           1468           1482        
  19         68.1          14.7       1.0X
-With bloom filter, blocksize: 16777216              1302           1313        
  15         76.8          13.0       1.1X
+Without bloom filter, blocksize: 16777216           1044           1047        
   4         95.7          10.4       1.0X
+With bloom filter, blocksize: 16777216               825            835        
  11        121.2           8.2       1.3X
 
 
 
================================================================================================
@@ -111,10 +183,10 @@ Parquet Read
 
================================================================================================
 
 OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432           1444           1451        
   9         69.2          14.4       1.0X
-With bloom filter, blocksize: 33554432              1458           1461        
   3         68.6          14.6       1.0X
+Without bloom filter, blocksize: 33554432           1232           1237        
   6         81.2          12.3       1.0X
+With bloom filter, blocksize: 33554432              1152           1199        
  67         86.8          11.5       1.1X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt 
b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index de191252961..745249db930 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -6,8 +6,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              17889          18038         
211          5.6         178.9       1.0X
-With bloom filter                                 20780          20941         
228          4.8         207.8       0.9X
+Without bloom filter                              18682          18792         
156          5.4         186.8       1.0X
+With bloom filter                                 21347          21396         
 69          4.7         213.5       0.9X
 
 
 
================================================================================================
@@ -18,8 +18,80 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1690           1694         
  6         59.2          16.9       1.0X
-With bloom filter                                  1274           1291         
 24         78.5          12.7       1.3X
+Without bloom filter, blocksize: 2097152           1656           1666         
 14         60.4          16.6       1.0X
+With bloom filter, blocksize: 2097152              1203           1230         
 37         83.1          12.0       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1625           1632         
 10         61.6          16.2       1.0X
+With bloom filter, blocksize: 4194304              1224           1284         
 85         81.7          12.2       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1599           1601         
  3         62.5          16.0       1.0X
+With bloom filter, blocksize: 6291456              1162           1179         
 24         86.0          11.6       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1623           1638         
 21         61.6          16.2       1.0X
+With bloom filter, blocksize: 8388608              1181           1197         
 23         84.7          11.8       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1631           1643        
  18         61.3          16.3       1.0X
+With bloom filter, blocksize: 12582912              1159           1183        
  34         86.3          11.6       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1586           1597        
  16         63.0          15.9       1.0X
+With bloom filter, blocksize: 16777216              1170           1175        
   7         85.5          11.7       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1646           1649        
   4         60.8          16.5       1.0X
+With bloom filter, blocksize: 33554432              1186           1187        
   1         84.3          11.9       1.4X
 
 
 
================================================================================================
@@ -30,8 +102,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              15998          16218         
311          6.3         160.0       1.0X
-With bloom filter                                 29974          30180         
291          3.3         299.7       0.5X
+Without bloom filter                              16633          16773         
197          6.0         166.3       1.0X
+With bloom filter                                 23442          23538         
136          4.3         234.4       0.7X
 
 
 
================================================================================================
@@ -42,8 +114,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152           1032           1039         
 10         96.9          10.3       1.0X
-With bloom filter, blocksize: 2097152               285            302         
 22        350.7           2.9       3.6X
+Without bloom filter, blocksize: 2097152            955            965         
 13        104.7           9.6       1.0X
+With bloom filter, blocksize: 2097152               271            289         
 17        368.8           2.7       3.5X
 
 
 
================================================================================================
@@ -54,8 +126,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            927            939         
 11        107.9           9.3       1.0X
-With bloom filter, blocksize: 4194304               239            263         
 27        418.3           2.4       3.9X
+Without bloom filter, blocksize: 4194304            897            907         
  9        111.4           9.0       1.0X
+With bloom filter, blocksize: 4194304               242            255         
 19        412.6           2.4       3.7X
 
 
 
================================================================================================
@@ -66,8 +138,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            958            968         
 11        104.3           9.6       1.0X
-With bloom filter, blocksize: 6291456               279            302         
 17        357.9           2.8       3.4X
+Without bloom filter, blocksize: 6291456            923            934         
 11        108.3           9.2       1.0X
+With bloom filter, blocksize: 6291456               271            283         
 11        369.0           2.7       3.4X
 
 
 
================================================================================================
@@ -78,8 +150,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            898            930         
 31        111.4           9.0       1.0X
-With bloom filter, blocksize: 8388608               360            380         
 13        278.0           3.6       2.5X
+Without bloom filter, blocksize: 8388608            916            920         
  3        109.1           9.2       1.0X
+With bloom filter, blocksize: 8388608               442            448         
  9        226.4           4.4       2.1X
 
 
 
================================================================================================
@@ -90,8 +162,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            937            954        
  25        106.7           9.4       1.0X
-With bloom filter, blocksize: 12582912               650            696        
  40        153.9           6.5       1.4X
+Without bloom filter, blocksize: 12582912            899            917        
  15        111.2           9.0       1.0X
+With bloom filter, blocksize: 12582912               676            682        
   7        148.0           6.8       1.3X
 
 
 
================================================================================================
@@ -102,8 +174,8 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            934            949        
  14        107.1           9.3       1.0X
-With bloom filter, blocksize: 16777216               925            935        
   9        108.2           9.2       1.0X
+Without bloom filter, blocksize: 16777216            894            913        
  17        111.8           8.9       1.0X
+With bloom filter, blocksize: 16777216               866            890        
  26        115.4           8.7       1.0X
 
 
 
================================================================================================
@@ -114,7 +186,7 @@ OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 
5.13.0-1021-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            910            931        
  18        109.8           9.1       1.0X
-With bloom filter, blocksize: 33554432               910            927        
  15        109.9           9.1       1.0X
+Without bloom filter, blocksize: 33554432            896            921        
  22        111.6           9.0       1.0X
+With bloom filter, blocksize: 33554432               909            924        
  20        110.1           9.1       1.0X
 
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
index ccb65c7d3ac..5f193109ca2 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
@@ -61,21 +61,27 @@ object BloomFilterBenchmark extends SqlBasedBenchmark {
   }
 
   private def readORCBenchmark(): Unit = {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
+    val blockSizes = Seq(2 * 1024 * 1024, 4 * 1024 * 1024, 6 * 1024 * 1024, 8 
* 1024 * 1024,
+      12 * 1024 * 1024, 16 * 1024 * 1024, 32 * 1024 * 1024)
+    for (blocksize <- blockSizes) {
+      withTempPath { dir =>
+        val path = dir.getCanonicalPath
 
-      df.write.orc(path + "/withoutBF")
-      df.write.option("orc.bloom.filter.columns", "value").orc(path + 
"/withBF")
+        df.write.option("orc.block.size", blocksize).orc(path + "/withoutBF")
+        df.write
+          .option("orc.block.size", blocksize)
+          .option("orc.bloom.filter.columns", "value").orc(path + "/withBF")
 
-      runBenchmark(s"ORC Read") {
-        val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", 
N, output = output)
-        benchmark.addCase("Without bloom filter") { _ =>
-          spark.read.orc(path + "/withoutBF").where("value = 0").noop()
-        }
-        benchmark.addCase("With bloom filter") { _ =>
-          spark.read.orc(path + "/withBF").where("value = 0").noop()
+        runBenchmark(s"ORC Read") {
+          val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M 
rows", N, output = output)
+          benchmark.addCase("Without bloom filter, blocksize: " + blocksize) { 
_ =>
+            spark.read.orc(path + "/withoutBF").where("value = 0").noop()
+          }
+          benchmark.addCase("With bloom filter, blocksize: " + blocksize) { _ 
=>
+            spark.read.orc(path + "/withBF").where("value = 0").noop()
+          }
+          benchmark.run()
         }
-        benchmark.run()
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to