This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4c59a83 [SPARK-37921][TESTS] Update OrcReadBenchmark to use Hive ORC reader as the basis 4c59a83 is described below commit 4c59a830a6a235400d0184fb6ce24c9e054d3e4b Author: William Hyun <will...@apache.org> AuthorDate: Sat Jan 15 21:52:31 2022 -0800 [SPARK-37921][TESTS] Update OrcReadBenchmark to use Hive ORC reader as the basis ### What changes were proposed in this pull request? This PR aims to update `OrcReadBenchmark` to use Hive ORC reader as the basis for comparison. ### Why are the changes needed? This will improve the visibility of native ORC reader's improvement because currently the new improvements are shown as `1.0x`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually review. Closes #35219 from williamhyun/benchmark. Authored-by: William Hyun <will...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../benchmarks/OrcReadBenchmark-jdk11-results.txt | 188 ++++++++--------- .../benchmarks/OrcReadBenchmark-jdk17-results.txt | 188 ++++++++--------- sql/hive/benchmarks/OrcReadBenchmark-results.txt | 232 ++++++++++----------- .../spark/sql/hive/orc/OrcReadBenchmark.scala | 74 +++---- 4 files changed, 341 insertions(+), 341 deletions(-) diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt index 3f9e63f..f9ab5dd 100644 --- a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt +++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt @@ -2,221 +2,221 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1064 1070 9 14.8 67.6 1.0X -Native ORC Vectorized 237 326 73 66.3 15.1 4.5X -Hive built-in ORC 1232 1330 139 12.8 78.3 0.9X +Hive built-in ORC 1137 1138 1 13.8 72.3 1.0X +Native ORC MR 962 982 17 16.3 61.2 1.2X +Native ORC Vectorized 225 298 65 69.9 14.3 5.1X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 947 1056 155 16.6 60.2 1.0X -Native ORC Vectorized 232 311 56 67.7 14.8 4.1X -Hive built-in ORC 1317 1330 19 11.9 83.7 0.7X +Hive built-in ORC 1250 1253 4 12.6 79.5 1.0X +Native ORC MR 1038 1135 136 15.1 66.0 1.2X +Native ORC Vectorized 232 307 47 67.9 14.7 5.4X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 964 1070 150 16.3 61.3 1.0X -Native ORC Vectorized 275 304 32 57.2 17.5 3.5X -Hive built-in ORC 1328 1336 11 11.8 84.4 0.7X +Hive built-in ORC 1360 1399 55 11.6 86.5 1.0X +Native ORC MR 1047 1107 85 15.0 66.5 1.3X +Native ORC Vectorized 273 291 20 57.7 17.3 5.0X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1006 1066 84 15.6 64.0 1.0X -Native ORC Vectorized 342 353 12 46.0 21.7 2.9X -Hive built-in ORC 1361 1386 36 11.6 86.5 0.7X +Hive built-in ORC 1381 1425 62 11.4 87.8 1.0X +Native ORC MR 1136 1138 4 13.9 72.2 1.2X +Native ORC Vectorized 336 377 31 46.8 21.4 4.1X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1020 1026 8 15.4 64.8 1.0X -Native ORC Vectorized 352 381 23 44.7 22.4 2.9X -Hive built-in ORC 1457 1457 0 10.8 92.7 0.7X +Hive built-in ORC 1425 1425 1 11.0 90.6 1.0X +Native ORC MR 1090 1093 4 14.4 69.3 1.3X +Native ORC Vectorized 349 381 47 45.1 22.2 4.1X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1036 1056 28 15.2 65.9 1.0X -Native ORC Vectorized 387 403 15 40.6 24.6 2.7X -Hive built-in ORC 1409 1417 11 11.2 89.6 0.7X +Hive built-in ORC 1434 1477 61 11.0 91.2 1.0X +Native ORC MR 1116 1125 12 14.1 71.0 1.3X +Native ORC Vectorized 366 388 18 43.0 23.2 3.9X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1993 2094 144 5.3 190.0 1.0X -Native ORC Vectorized 1290 1348 83 8.1 123.0 1.5X -Hive built-in ORC 2336 2426 127 4.5 222.8 0.9X +Hive built-in ORC 2442 2543 143 4.3 232.8 1.0X +Native ORC MR 2030 2048 25 5.2 193.6 1.2X +Native ORC Vectorized 1261 1266 8 8.3 120.2 1.9X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Data column - Native ORC MR 1369 1384 22 11.5 87.0 1.0X -Data column - Native ORC Vectorized 406 428 20 38.7 25.8 3.4X -Data column - Hive built-in ORC 1444 1527 118 10.9 91.8 0.9X -Partition column - Native ORC MR 745 796 45 21.1 47.4 1.8X -Partition column - Native ORC Vectorized 70 96 28 223.2 4.5 19.4X -Partition column - Hive built-in ORC 1035 1063 39 15.2 65.8 1.3X -Both columns - Native ORC MR 1245 1306 86 12.6 79.2 1.1X -Both columns - Native ORC Vectorized 385 424 35 40.9 24.5 3.6X -Both columns - Hive built-in ORC 1481 1566 120 10.6 94.2 0.9X +Data column - Hive built-in ORC 1615 1617 3 9.7 102.7 1.0X +Data column - Native ORC MR 1330 1373 61 11.8 84.6 1.2X +Data column - Native ORC Vectorized 343 404 83 45.8 21.8 4.7X +Partition column - Hive built-in ORC 1087 1099 18 14.5 69.1 1.5X +Partition column - Native ORC MR 912 922 12 17.2 58.0 1.8X +Partition column - Native ORC Vectorized 67 94 33 234.6 4.3 24.1X +Both columns - Hive built-in ORC 1743 1748 7 9.0 110.8 0.9X +Both columns - Native ORC MR 1454 1459 6 10.8 92.5 1.1X +Both columns - Native ORC Vectorized 354 414 57 44.4 22.5 4.6X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1102 1261 224 9.5 105.1 1.0X -Native ORC Vectorized 216 260 55 48.5 20.6 5.1X -Hive built-in ORC 1299 1427 181 8.1 123.9 0.8X +Hive built-in ORC 1331 1342 16 7.9 126.9 1.0X +Native ORC MR 901 910 12 11.6 85.9 1.5X +Native ORC Vectorized 228 291 72 45.9 21.8 5.8X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1632 1653 30 6.4 155.6 1.0X -Native ORC Vectorized 689 698 8 15.2 65.7 2.4X -Hive built-in ORC 2224 2254 43 4.7 212.1 0.7X +Hive built-in ORC 2295 2298 4 4.6 218.9 1.0X +Native ORC MR 1711 1743 46 6.1 163.1 1.3X +Native ORC Vectorized 686 692 8 15.3 65.4 3.3X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1516 1555 54 6.9 144.6 1.0X -Native ORC Vectorized 782 801 19 13.4 74.6 1.9X -Hive built-in ORC 2023 2110 123 5.2 192.9 0.7X +Hive built-in ORC 2045 2107 88 5.1 195.0 1.0X +Native ORC MR 1577 1585 11 6.6 150.4 1.3X +Native ORC Vectorized 801 804 5 13.1 76.4 2.6X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 879 931 48 11.9 83.8 1.0X -Native ORC Vectorized 250 342 85 42.0 23.8 3.5X -Hive built-in ORC 1204 1219 20 8.7 114.9 0.7X +Hive built-in ORC 1254 1261 10 8.4 119.6 1.0X +Native ORC MR 944 962 15 11.1 90.1 1.3X +Native ORC Vectorized 262 334 103 40.1 25.0 4.8X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 159 192 24 6.6 151.4 1.0X -Native ORC Vectorized 85 116 32 12.3 81.0 1.9X -Hive built-in ORC 790 853 99 1.3 753.9 0.2X +Hive built-in ORC 954 1002 68 1.1 909.8 1.0X +Native ORC MR 149 188 30 7.0 141.9 6.4X +Native ORC Vectorized 83 108 30 12.7 78.7 11.6X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 200 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 161 196 40 6.5 153.9 1.0X -Native ORC Vectorized 110 139 28 9.6 104.6 1.5X -Hive built-in ORC 1549 1585 51 0.7 1476.8 0.1X +Hive built-in ORC 1939 1994 78 0.5 1848.9 1.0X +Native ORC MR 187 259 57 5.6 178.2 10.4X +Native ORC Vectorized 117 193 46 9.0 111.2 16.6X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 300 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 201 221 14 5.2 191.8 1.0X -Native ORC Vectorized 135 163 23 7.8 128.6 1.5X -Hive built-in ORC 2166 2172 8 0.5 2065.6 0.1X +Hive built-in ORC 2759 2827 96 0.4 2631.6 1.0X +Native ORC MR 328 368 50 3.2 312.5 8.4X +Native ORC Vectorized 149 210 68 7.0 141.9 18.5X ================================================================================================ Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 473 522 41 2.2 451.4 1.0X -Native ORC Vectorized 234 351 58 4.5 222.9 2.0X -Hive built-in ORC 472 601 116 2.2 449.8 1.0X +Hive built-in ORC 681 696 17 1.5 649.0 1.0X +Native ORC MR 484 497 9 2.2 461.7 1.4X +Native ORC Vectorized 303 371 59 3.5 289.3 2.2X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 100 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 3238 3394 221 0.3 3087.5 1.0X -Native ORC Vectorized 2724 2844 169 0.4 2598.2 1.2X -Hive built-in ORC 3898 3934 52 0.3 3717.0 0.8X +Hive built-in ORC 3762 4091 465 0.3 3588.1 1.0X +Native ORC MR 3503 3577 104 0.3 3340.7 1.1X +Native ORC Vectorized 2296 2415 168 0.5 2189.9 1.6X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 300 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 10723 10890 236 0.1 10226.4 1.0X -Native ORC Vectorized 9966 10091 177 0.1 9503.9 1.1X -Hive built-in ORC 12360 12482 172 0.1 11787.4 0.9X +Hive built-in ORC 11058 11109 72 0.1 10545.5 1.0X +Native ORC MR 11323 11354 44 0.1 10798.4 1.0X +Native ORC Vectorized 11246 11315 97 0.1 10725.2 1.0X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 600 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 24875 25382 717 0.0 23722.6 1.0X -Native ORC Vectorized 22763 22830 95 0.0 21708.5 1.1X -Hive built-in ORC 27783 28079 419 0.0 26496.0 0.9X +Hive built-in ORC 25265 29571 441 0.0 24094.4 1.0X +Native ORC MR 26980 27178 280 0.0 25730.4 0.9X +Native ORC Vectorized 26603 26976 527 0.0 25370.3 0.9X ================================================================================================ Nested Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 4175 4184 12 0.3 3982.0 1.0X -Native ORC Vectorized 1476 1483 9 0.7 1407.9 2.8X -Hive built-in ORC 4128 4150 31 0.3 3936.6 1.0X +Hive built-in ORC 4354 4453 140 0.2 4152.1 1.0X +Native ORC MR 3674 4025 497 0.3 3503.4 1.2X +Native ORC Vectorized 1000 1014 21 1.0 953.4 4.4X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 30 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 9819 9945 178 0.1 9364.0 1.0X -Native ORC Vectorized 3771 3809 54 0.3 3596.0 2.6X -Hive built-in ORC 11067 11090 32 0.1 10554.8 0.9X +Hive built-in ORC 11727 11762 50 0.1 11183.8 1.0X +Native ORC MR 8861 8862 1 0.1 8450.8 1.3X +Native ORC Vectorized 2441 2497 79 0.4 2327.9 4.8X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 30 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 10779 10781 3 0.1 10279.7 1.0X -Native ORC Vectorized 7162 7392 325 0.1 6830.7 1.5X -Hive built-in ORC 8417 8553 192 0.1 8027.5 1.3X +Hive built-in ORC 9604 9616 17 0.1 9159.4 1.0X +Native ORC MR 9501 9535 47 0.1 9061.0 1.0X +Native ORC Vectorized 4418 4582 232 0.2 4213.6 2.2X diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt index 836b563..b24cef4 100644 --- a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt +++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt @@ -2,221 +2,221 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 803 838 38 19.6 51.1 1.0X -Native ORC Vectorized 147 173 21 107.1 9.3 5.5X -Hive built-in ORC 1098 1115 23 14.3 69.8 0.7X +Hive built-in ORC 933 962 48 16.9 59.3 1.0X +Native ORC MR 864 910 76 18.2 54.9 1.1X +Native ORC Vectorized 144 172 22 108.9 9.2 6.5X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 856 927 81 18.4 54.4 1.0X -Native ORC Vectorized 136 161 15 115.3 8.7 6.3X -Hive built-in ORC 1188 1328 198 13.2 75.5 0.7X +Hive built-in ORC 1203 1301 139 13.1 76.5 1.0X +Native ORC MR 848 875 27 18.5 53.9 1.4X +Native ORC Vectorized 117 139 17 134.3 7.4 10.3X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 813 875 105 19.3 51.7 1.0X -Native ORC Vectorized 138 158 15 113.9 8.8 5.9X -Hive built-in ORC 1158 1158 0 13.6 73.6 0.7X +Hive built-in ORC 1252 1257 6 12.6 79.6 1.0X +Native ORC MR 873 939 92 18.0 55.5 1.4X +Native ORC Vectorized 127 146 17 124.0 8.1 9.9X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 839 844 7 18.8 53.3 1.0X -Native ORC Vectorized 180 207 30 87.4 11.4 4.7X -Hive built-in ORC 1358 1394 52 11.6 86.3 0.6X +Hive built-in ORC 1286 1299 19 12.2 81.8 1.0X +Native ORC MR 948 966 17 16.6 60.3 1.4X +Native ORC Vectorized 171 203 24 91.9 10.9 7.5X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 906 968 58 17.4 57.6 1.0X -Native ORC Vectorized 237 292 56 66.3 15.1 3.8X -Hive built-in ORC 1395 1416 30 11.3 88.7 0.6X +Hive built-in ORC 1234 1243 13 12.7 78.4 1.0X +Native ORC MR 1019 1048 41 15.4 64.8 1.2X +Native ORC Vectorized 219 235 15 71.8 13.9 5.6X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1041 1060 27 15.1 66.2 1.0X -Native ORC Vectorized 265 320 44 59.4 16.8 3.9X -Hive built-in ORC 1339 1374 49 11.7 85.2 0.8X +Hive built-in ORC 1304 1309 6 12.1 82.9 1.0X +Native ORC MR 1007 1022 22 15.6 64.0 1.3X +Native ORC Vectorized 253 274 16 62.2 16.1 5.2X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 2091 2136 63 5.0 199.5 1.0X -Native ORC Vectorized 1253 1260 10 8.4 119.5 1.7X -Hive built-in ORC 2384 2391 9 4.4 227.4 0.9X +Hive built-in ORC 2178 2250 102 4.8 207.7 1.0X +Native ORC MR 1816 1821 7 5.8 173.2 1.2X +Native ORC Vectorized 1003 1025 31 10.5 95.6 2.2X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Data column - Native ORC MR 1549 1631 116 10.2 98.5 1.0X -Data column - Native ORC Vectorized 295 346 45 53.3 18.8 5.3X -Data column - Hive built-in ORC 1851 1896 64 8.5 117.7 0.8X -Partition column - Native ORC MR 850 868 19 18.5 54.1 1.8X -Partition column - Native ORC Vectorized 54 67 9 288.7 3.5 28.4X -Partition column - Hive built-in ORC 1131 1174 60 13.9 71.9 1.4X -Both columns - Native ORC MR 1069 1077 10 14.7 68.0 1.4X -Both columns - Native ORC Vectorized 208 226 18 75.6 13.2 7.4X -Both columns - Hive built-in ORC 1811 1812 1 8.7 115.2 0.9X +Data column - Hive built-in ORC 1442 1449 9 10.9 91.7 1.0X +Data column - Native ORC MR 1171 1186 20 13.4 74.5 1.2X +Data column - Native ORC Vectorized 179 197 20 87.8 11.4 8.1X +Partition column - Hive built-in ORC 1022 1045 32 15.4 65.0 1.4X +Partition column - Native ORC MR 848 887 43 18.5 53.9 1.7X +Partition column - Native ORC Vectorized 54 64 8 293.9 3.4 26.9X +Both columns - Hive built-in ORC 1513 1548 50 10.4 96.2 1.0X +Both columns - Native ORC MR 1189 1204 21 13.2 75.6 1.2X +Both columns - Native ORC Vectorized 197 225 24 79.7 12.6 7.3X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 825 830 5 12.7 78.6 1.0X -Native ORC Vectorized 199 207 10 52.8 18.9 4.2X -Hive built-in ORC 1206 1210 6 8.7 115.0 0.7X +Hive built-in ORC 1259 1271 17 8.3 120.1 1.0X +Native ORC MR 842 864 21 12.5 80.3 1.5X +Native ORC Vectorized 187 199 13 56.2 17.8 6.7X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1542 1572 42 6.8 147.1 1.0X -Native ORC Vectorized 523 582 66 20.1 49.8 3.0X -Hive built-in ORC 2190 2190 0 4.8 208.9 0.7X +Hive built-in ORC 2140 2155 21 4.9 204.1 1.0X +Native ORC MR 1559 1563 6 6.7 148.7 1.4X +Native ORC Vectorized 512 535 34 20.5 48.9 4.2X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1490 1499 13 7.0 142.1 1.0X -Native ORC Vectorized 630 695 97 16.7 60.1 2.4X -Hive built-in ORC 2112 2121 13 5.0 201.4 0.7X +Hive built-in ORC 1880 1920 56 5.6 179.3 1.0X +Native ORC MR 1467 1484 24 7.1 139.9 1.3X +Native ORC Vectorized 608 624 11 17.2 58.0 3.1X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 815 830 23 12.9 77.7 1.0X -Native ORC Vectorized 225 249 26 46.5 21.5 3.6X -Hive built-in ORC 1247 1259 16 8.4 119.0 0.7X +Hive built-in ORC 1195 1209 20 8.8 113.9 1.0X +Native ORC MR 857 895 34 12.2 81.7 1.4X +Native ORC Vectorized 218 233 15 48.1 20.8 5.5X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 141 173 19 7.5 134.0 1.0X -Native ORC Vectorized 77 91 9 13.7 73.2 1.8X -Hive built-in ORC 758 776 16 1.4 722.9 0.2X +Hive built-in ORC 884 924 43 1.2 842.7 1.0X +Native ORC MR 122 145 18 8.6 116.7 7.2X +Native ORC Vectorized 67 82 13 15.7 63.9 13.2X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 200 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 190 232 29 5.5 181.4 1.0X -Native ORC Vectorized 118 149 41 8.9 112.7 1.6X -Hive built-in ORC 1537 1558 30 0.7 1465.7 0.1X +Hive built-in ORC 1473 1520 67 0.7 1404.6 1.0X +Native ORC MR 161 177 16 6.5 153.4 9.2X +Native ORC Vectorized 107 126 14 9.8 102.0 13.8X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 300 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 237 268 28 4.4 226.0 1.0X -Native ORC Vectorized 165 188 17 6.4 157.2 1.4X -Hive built-in ORC 2103 2171 96 0.5 2005.3 0.1X +Hive built-in ORC 1988 2050 87 0.5 1896.3 1.0X +Native ORC MR 210 237 27 5.0 199.9 9.5X +Native ORC Vectorized 149 166 16 7.0 142.0 13.4X ================================================================================================ Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 278 294 12 3.8 265.5 1.0X -Native ORC Vectorized 213 246 41 4.9 202.9 1.3X -Hive built-in ORC 536 586 40 2.0 511.0 0.5X +Hive built-in ORC 477 498 14 2.2 454.9 1.0X +Native ORC MR 323 329 5 3.2 307.7 1.5X +Native ORC Vectorized 169 206 49 6.2 161.6 2.8X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 100 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 2235 2244 13 0.5 2131.8 1.0X -Native ORC Vectorized 3154 3159 7 0.3 3007.6 0.7X -Hive built-in ORC 3740 4089 493 0.3 3567.0 0.6X +Hive built-in ORC 3006 3007 1 0.3 2867.0 1.0X +Native ORC MR 2469 2707 337 0.4 2354.2 1.2X +Native ORC Vectorized 1407 1422 22 0.7 1341.4 2.1X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 300 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 7350 8577 1735 0.1 7009.2 1.0X -Native ORC Vectorized 7161 8481 1867 0.1 6829.0 1.0X -Hive built-in ORC 10307 10909 851 0.1 9829.6 0.7X +Hive built-in ORC 8820 8867 67 0.1 8411.4 1.0X +Native ORC MR 7301 7422 171 0.1 6962.8 1.2X +Native ORC Vectorized 7286 7300 20 0.1 6948.6 1.2X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 600 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 15931 18238 NaN 0.1 15192.6 1.0X -Native ORC Vectorized 15192 16500 1851 0.1 14487.9 1.0X -Hive built-in ORC 29853 30027 247 0.0 28469.9 0.5X +Hive built-in ORC 24634 27218 NaN 0.0 23492.4 1.0X +Native ORC MR 19304 19441 195 0.1 18409.3 1.3X +Native ORC Vectorized 19081 19091 14 0.1 18197.3 1.3X ================================================================================================ Nested Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 3399 3463 90 0.3 3241.5 1.0X -Native ORC Vectorized 1513 1630 166 0.7 1442.7 2.2X -Hive built-in ORC 3953 3960 10 0.3 3770.0 0.9X +Hive built-in ORC 4044 4112 96 0.3 3857.0 1.0X +Native ORC MR 4086 4092 8 0.3 3897.0 1.0X +Native ORC Vectorized 977 1007 43 1.1 931.5 4.1X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 30 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 7667 7684 24 0.1 7311.9 1.0X -Native ORC Vectorized 3865 3881 22 0.3 3685.8 2.0X -Hive built-in ORC 11223 11246 32 0.1 10703.5 0.7X +Hive built-in ORC 10733 10785 73 0.1 10236.0 1.0X +Native ORC MR 7707 7707 0 0.1 7349.8 1.4X +Native ORC Vectorized 2260 2318 82 0.5 2155.3 4.7X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure +OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 30 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 9506 9633 181 0.1 9065.4 1.0X -Native ORC Vectorized 4170 4320 212 0.3 3976.4 2.3X -Hive built-in ORC 12756 13821 1506 0.1 12164.7 0.7X +Hive built-in ORC 7851 8136 403 0.1 7487.6 1.0X +Native ORC MR 9074 9180 150 0.1 8653.9 0.9X +Native ORC Vectorized 2485 2588 146 0.4 2369.7 3.2X diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt index a08c349..137bfcc 100644 --- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt +++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt @@ -2,221 +2,221 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1016 1068 74 15.5 64.6 1.0X -Native ORC Vectorized 220 252 33 71.4 14.0 4.6X -Hive built-in ORC 1274 1290 22 12.3 81.0 0.8X +Hive built-in ORC 1138 1191 76 13.8 72.3 1.0X +Native ORC MR 999 1115 164 15.7 63.5 1.1X +Native ORC Vectorized 155 183 23 101.7 9.8 7.4X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1117 1142 36 14.1 71.0 1.0X -Native ORC Vectorized 157 189 20 100.4 10.0 7.1X -Hive built-in ORC 1369 1399 42 11.5 87.1 0.8X +Hive built-in ORC 1034 1056 30 15.2 65.8 1.0X +Native ORC MR 859 878 19 18.3 54.6 1.2X +Native ORC Vectorized 130 155 22 121.1 8.3 8.0X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1064 1189 177 14.8 67.6 1.0X -Native ORC Vectorized 179 204 25 87.9 11.4 5.9X -Hive built-in ORC 1454 1468 20 10.8 92.4 0.7X +Hive built-in ORC 1056 1081 35 14.9 67.1 1.0X +Native ORC MR 946 1015 96 16.6 60.2 1.1X +Native ORC Vectorized 152 173 25 103.5 9.7 6.9X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1070 1196 177 14.7 68.1 1.0X -Native ORC Vectorized 216 232 14 72.8 13.7 5.0X -Hive built-in ORC 1484 1533 69 10.6 94.4 0.7X +Hive built-in ORC 1619 1776 222 9.7 103.0 1.0X +Native ORC MR 913 1015 145 17.2 58.0 1.8X +Native ORC Vectorized 187 207 19 84.3 11.9 8.7X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1164 1181 24 13.5 74.0 1.0X -Native ORC Vectorized 264 290 24 59.6 16.8 4.4X -Hive built-in ORC 1536 1572 51 10.2 97.7 0.8X +Hive built-in ORC 1117 1138 30 14.1 71.0 1.0X +Native ORC MR 909 921 20 17.3 57.8 1.2X +Native ORC Vectorized 202 224 36 78.0 12.8 5.5X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1127 1174 67 14.0 71.7 1.0X -Native ORC Vectorized 285 302 17 55.2 18.1 4.0X -Hive built-in ORC 1571 1582 16 10.0 99.9 0.7X +Hive built-in ORC 1123 1124 2 14.0 71.4 1.0X +Native ORC MR 933 951 22 16.9 59.3 1.2X +Native ORC Vectorized 231 247 34 68.1 14.7 4.9X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 2329 2413 119 4.5 222.1 1.0X -Native ORC Vectorized 1274 1282 12 8.2 121.5 1.8X -Hive built-in ORC 2622 2692 99 4.0 250.0 0.9X +Hive built-in ORC 2149 2163 21 4.9 204.9 1.0X +Native ORC MR 1844 1863 27 5.7 175.9 1.2X +Native ORC Vectorized 1059 1071 18 9.9 101.0 2.0X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Data column - Native ORC MR 1304 1309 8 12.1 82.9 1.0X -Data column - Native ORC Vectorized 221 259 25 71.1 14.1 5.9X -Data column - Hive built-in ORC 1586 1606 28 9.9 100.8 0.8X -Partition column - Native ORC MR 868 889 29 18.1 55.2 1.5X -Partition column - Native ORC Vectorized 71 85 18 222.3 4.5 18.4X -Partition column - Hive built-in ORC 1210 1241 43 13.0 77.0 1.1X -Both columns - Native ORC MR 1397 1435 54 11.3 88.8 0.9X -Both columns - Native ORC Vectorized 236 257 22 66.5 15.0 5.5X -Both columns - Hive built-in ORC 1723 1726 4 9.1 109.6 0.8X +Data column - Hive built-in ORC 1218 1220 3 12.9 77.4 1.0X +Data column - Native ORC MR 1110 1113 4 14.2 70.6 1.1X +Data column - Native ORC Vectorized 185 205 19 85.1 11.7 6.6X +Partition column - Hive built-in ORC 884 897 18 17.8 56.2 1.4X +Partition column - Native ORC MR 701 745 71 22.4 44.6 1.7X +Partition column - Native ORC Vectorized 56 65 6 281.7 3.5 21.8X +Both columns - Hive built-in ORC 1206 1225 26 13.0 76.7 1.0X +Both columns - Native ORC MR 1103 1164 86 14.3 70.1 1.1X +Both columns - Native ORC Vectorized 201 240 47 78.4 12.8 6.1X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1074 1089 21 9.8 102.4 1.0X -Native ORC Vectorized 221 254 33 47.5 21.0 4.9X -Hive built-in ORC 1435 1437 2 7.3 136.9 0.7X +Hive built-in ORC 1124 1136 17 9.3 107.2 1.0X +Native ORC MR 854 867 17 12.3 81.5 1.3X +Native ORC Vectorized 173 179 6 60.5 16.5 6.5X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1948 1964 21 5.4 185.8 1.0X -Native ORC Vectorized 666 687 31 15.7 63.5 2.9X -Hive built-in ORC 2454 2489 50 4.3 234.0 0.8X +Hive built-in ORC 1985 1985 0 5.3 189.3 1.0X +Native ORC MR 1557 1561 5 6.7 148.5 1.3X +Native ORC Vectorized 470 486 22 22.3 44.8 4.2X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 1744 1756 16 6.0 166.4 1.0X -Native ORC Vectorized 707 736 38 14.8 67.4 2.5X -Hive built-in ORC 2225 2259 48 4.7 212.2 0.8X +Hive built-in ORC 1857 1891 49 5.6 177.1 1.0X +Native ORC MR 1508 1518 14 7.0 143.8 1.2X +Native ORC Vectorized 646 660 11 16.2 61.6 2.9X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 996 1101 149 10.5 95.0 1.0X -Native ORC Vectorized 282 311 18 37.1 26.9 3.5X -Hive built-in ORC 1405 1420 20 7.5 134.0 0.7X +Hive built-in ORC 1066 1084 25 9.8 101.7 1.0X +Native ORC MR 834 851 14 12.6 79.6 1.3X +Native ORC Vectorized 242 269 36 43.3 23.1 4.4X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 153 180 17 6.8 146.2 1.0X -Native ORC Vectorized 85 99 18 12.3 81.4 1.8X -Hive built-in ORC 912 971 97 1.2 869.4 0.2X +Hive built-in ORC 912 1006 133 1.2 869.3 1.0X +Native ORC MR 125 144 19 8.4 119.4 7.3X +Native ORC Vectorized 74 83 14 14.2 70.3 12.4X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 200 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 254 272 15 4.1 242.5 1.0X -Native ORC Vectorized 122 138 15 8.6 116.6 2.1X -Hive built-in ORC 1772 1819 67 0.6 1689.5 0.1X +Hive built-in ORC 1502 1531 40 0.7 1432.7 1.0X +Native ORC MR 160 174 17 6.6 152.3 9.4X +Native ORC Vectorized 110 125 20 9.5 105.3 13.6X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 300 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 233 271 31 4.5 222.5 1.0X -Native ORC Vectorized 162 184 25 6.5 154.8 1.4X -Hive built-in ORC 2591 2602 16 0.4 2470.6 0.1X +Hive built-in ORC 2184 2191 9 0.5 2082.9 1.0X +Native ORC MR 215 233 19 4.9 204.6 10.2X +Native ORC Vectorized 160 172 18 6.5 152.7 13.6X ================================================================================================ Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 369 415 54 2.8 351.7 1.0X -Native ORC Vectorized 201 214 9 5.2 191.3 1.8X -Hive built-in ORC 712 719 6 1.5 679.0 0.5X +Hive built-in ORC 513 558 70 2.0 489.3 1.0X +Native ORC MR 316 327 11 3.3 301.6 1.6X +Native ORC Vectorized 171 189 28 6.1 163.3 3.0X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 100 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 2764 2834 99 0.4 2636.2 1.0X -Native ORC Vectorized 1651 1669 26 0.6 1574.2 1.7X -Hive built-in ORC 3957 3998 58 0.3 3774.0 0.7X +Hive built-in ORC 3081 3260 254 0.3 2938.2 1.0X +Native ORC MR 2552 2627 105 0.4 2434.1 1.2X +Native ORC Vectorized 1473 1610 193 0.7 1404.8 2.1X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 300 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 9368 11693 NaN 0.1 8934.4 1.0X -Native ORC Vectorized 9324 9737 585 0.1 8891.6 1.0X -Hive built-in ORC 13303 13665 512 0.1 12687.2 0.7X +Hive built-in ORC 9531 10232 991 0.1 9089.8 1.0X +Native ORC MR 9412 9496 119 0.1 8975.6 1.0X +Native ORC Vectorized 9434 9483 69 0.1 8997.0 1.0X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Struct Column Scan with 600 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Native ORC MR 32403 35146 NaN 0.0 30902.3 1.0X -Native ORC Vectorized 38268 39336 1511 0.0 36495.2 0.8X -Hive built-in ORC 47590 48669 1525 0.0 45385.7 0.7X +Hive built-in ORC 34314 35490 1663 0.0 32724.4 1.0X +Native ORC MR 36051 36191 197 0.0 34381.3 1.0X +Native ORC Vectorized 36014 37273 1780 0.0 34346.1 1.0X ================================================================================================ Nested Struct scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 5127 5720 838 0.2 4889.8 1.0X -Native ORC Vectorized 1064 1067 4 1.0 1014.8 4.8X -Hive built-in ORC 4622 4647 36 0.2 4407.6 1.1X +Hive built-in ORC 3492 3768 390 0.3 3330.1 1.0X +Native ORC MR 3918 3932 20 0.3 3736.1 0.9X +Native ORC Vectorized 893 911 17 1.2 851.7 3.9X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 30 Elements, 10 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 11342 11343 2 0.1 10816.3 1.0X -Native ORC Vectorized 2889 2891 4 0.4 2755.1 3.9X -Hive built-in ORC 12754 12890 192 0.1 12163.6 0.9X +Hive built-in ORC 9499 10127 888 0.1 9058.7 1.0X +Native ORC MR 9227 9234 9 0.1 8799.9 1.0X +Native ORC Vectorized 2326 2389 89 0.5 2218.2 4.1X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure -Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Nested Struct Scan with 10 Elements, 30 Fields: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------ -Native ORC MR 12483 12602 167 0.1 11905.1 1.0X -Native ORC Vectorized 3522 3615 132 0.3 3358.5 3.5X -Hive built-in ORC 9775 9784 12 0.1 9322.4 1.3X +Hive built-in ORC 8315 8552 335 0.1 7929.5 1.0X +Native ORC MR 11559 12147 832 0.1 11023.1 0.7X +Native ORC Vectorized 2808 2965 222 0.4 2678.2 3.0X diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala index 990b34c..61a9360 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala @@ -90,6 +90,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql("SELECT sum(id) FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql("SELECT sum(id) FROM nativeOrcTable").noop() @@ -100,10 +104,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(id) FROM nativeOrcTable").noop() } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql("SELECT sum(id) FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -121,6 +121,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { dir, spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop() @@ -131,10 +135,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop() } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -150,6 +150,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p")) + benchmark.addCase("Data column - Hive built-in ORC") { _ => + spark.sql("SELECT sum(id) FROM hiveOrcTable").noop() + } + benchmark.addCase("Data column - Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql("SELECT sum(id) FROM nativeOrcTable").noop() @@ -160,8 +164,8 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(id) FROM nativeOrcTable").noop() } - benchmark.addCase("Data column - Hive built-in ORC") { _ => - spark.sql("SELECT sum(id) FROM hiveOrcTable").noop() + benchmark.addCase("Partition column - Hive built-in ORC") { _ => + spark.sql("SELECT sum(p) FROM hiveOrcTable").noop() } benchmark.addCase("Partition column - Native ORC MR") { _ => @@ -174,8 +178,8 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(p) FROM nativeOrcTable").noop() } - benchmark.addCase("Partition column - Hive built-in ORC") { _ => - spark.sql("SELECT sum(p) FROM hiveOrcTable").noop() + benchmark.addCase("Both columns - Hive built-in ORC") { _ => + spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").noop() } benchmark.addCase("Both columns - Native ORC MR") { _ => @@ -188,10 +192,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").noop() } - benchmark.addCase("Both columns - Hive built-in ORC") { _ => - spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -206,6 +206,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql("SELECT CAST((id % 200) + 10000 as STRING) AS c1 FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop() @@ -216,10 +220,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop() } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -240,6 +240,11 @@ object OrcReadBenchmark extends SqlBasedBenchmark { val benchmark = new Benchmark(s"String with Nulls Scan ($percentageOfNulls%)", values, output = output) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " + + "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " + @@ -252,11 +257,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop() } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " + - "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop() - } - benchmark.run() } } @@ -275,6 +275,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql("SELECT * FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop() @@ -285,10 +289,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop() } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -307,6 +307,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql("SELECT * FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql(s"SELECT * FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql(s"SELECT * FROM nativeOrcTable").noop() @@ -319,10 +323,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { } } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql(s"SELECT * FROM hiveOrcTable").noop() - } - benchmark.run() } } @@ -346,6 +346,10 @@ object OrcReadBenchmark extends SqlBasedBenchmark { prepareTable(dir, spark.sql("SELECT * FROM t1")) + benchmark.addCase("Hive built-in ORC") { _ => + spark.sql(s"SELECT * FROM hiveOrcTable").noop() + } + benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { spark.sql(s"SELECT * FROM nativeOrcTable").noop() @@ -358,10 +362,6 @@ object OrcReadBenchmark extends SqlBasedBenchmark { } } - benchmark.addCase("Hive built-in ORC") { _ => - spark.sql(s"SELECT * FROM hiveOrcTable").noop() - } - benchmark.run() } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org