This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bd669a927f0 [SPARK-43443][SQL] Add benchmark for Timestamp type 
inference when use invalid value
bd669a927f0 is described below

commit bd669a927f09f271afd6a1058493c23d8a0e3c04
Author: Hisoka <[email protected]>
AuthorDate: Thu May 11 13:35:25 2023 +0300

    [SPARK-43443][SQL] Add benchmark for Timestamp type inference when use 
invalid value
    
    ### What changes were proposed in this pull request?
    
    When we try to speed up Timestamp type inference with format (PR: #36562 
#41078 #41091). There is no way to judge whether the change has improved the 
speed for Timestamp type inference.
    
    So we need a benchmark to measure whether our optimization of Timestamp 
type inference is useful, we have valid Timestamp value benchmark at now, but 
don't have invalid Timestamp value benchmark when use Timestamp type inference.
    
    ### Why are the changes needed?
    
    Add new benchmark for Timestamp type inference when use invalid value, to 
make sure our speed up PR work normally.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    benchmarks already are test code.
    
    Closes #41131 from Hisoka-X/add_banchmarks.
    
    Authored-by: Hisoka <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 sql/core/benchmarks/CSVBenchmark-jdk11-results.txt |  95 ++++++-------
 sql/core/benchmarks/CSVBenchmark-jdk17-results.txt |  95 ++++++-------
 sql/core/benchmarks/CSVBenchmark-results.txt       |  95 ++++++-------
 .../benchmarks/JsonBenchmark-jdk11-results.txt     | 123 ++++++++---------
 .../benchmarks/JsonBenchmark-jdk17-results.txt     | 123 ++++++++---------
 sql/core/benchmarks/JsonBenchmark-results.txt      | 147 +++++++++++----------
 .../execution/datasources/csv/CSVBenchmark.scala   |  30 +++++
 .../execution/datasources/json/JsonBenchmark.scala |  26 ++++
 8 files changed, 404 insertions(+), 330 deletions(-)

diff --git a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
index ca33c059b3a..0185251877e 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
@@ -2,66 +2,69 @@
 Benchmark to measure CSV read/write performance
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 36620          36718         
168          0.0      732395.8       1.0X
+One quoted string                                 30782          30948         
229          0.0      615635.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               86305          86907        
1033          0.0       86305.2       1.0X
-Select 100 columns                                38778          38792         
 15          0.0       38778.3       2.2X
-Select one column                                 31901          31913         
 12          0.0       31901.0       2.7X
-count()                                            6971           7033         
 61          0.1        6970.9      12.4X
-Select 100 columns, one bad input field           51175          51195         
 26          0.0       51174.8       1.7X
-Select 100 columns, corrupt record field          56219          56283         
 60          0.0       56219.3       1.5X
+Select 1000 columns                               74038          74677        
1024          0.0       74038.3       1.0X
+Select 100 columns                                33611          33625         
 12          0.0       33611.1       2.2X
+Select one column                                 29350          29428         
 73          0.0       29349.7       2.5X
+count()                                            4909           4934         
 26          0.2        4908.8      15.1X
+Select 100 columns, one bad input field           50774          50800         
 25          0.0       50773.6       1.5X
+Select 100 columns, corrupt record field          55746          55812         
 79          0.0       55746.5       1.3X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       15004          15047         
 38          0.7        1500.4       1.0X
-Select 1 column + count()                          9955           9980         
 26          1.0         995.5       1.5X
-count()                                            3335           3370         
 37          3.0         333.5       4.5X
+Select 10 columns + count()                       13624          13630         
  8          0.7        1362.4       1.0X
+Select 1 column + count()                         12076          12105         
 28          0.8        1207.6       1.1X
+count()                                            2753           2767         
 19          3.6         275.3       4.9X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1445           1467         
 19          6.9         144.5       1.0X
-to_csv(timestamp)                                 10173          10216         
 37          1.0        1017.3       0.1X
-write timestamps to files                          9261           9265         
  5          1.1         926.1       0.2X
-Create a dataset of dates                          1668           1677         
 14          6.0         166.8       0.9X
-to_csv(date)                                       6974           6983         
  8          1.4         697.4       0.2X
-write dates to files                               5645           5646         
  1          1.8         564.5       0.3X
+Create a dataset of timestamps                     1312           1318         
  7          7.6         131.2       1.0X
+to_csv(timestamp)                                 11054          11073         
 24          0.9        1105.4       0.1X
+write timestamps to files                          9917           9927         
 12          1.0         991.7       0.1X
+Create a dataset of dates                          1455           1467         
 10          6.9         145.5       0.9X
+to_csv(date)                                       6987           7019         
 31          1.4         698.7       0.2X
+write dates to files                               5710           5728         
 17          1.8         571.0       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2304           2317         
 12          4.3         230.4       1.0X
-read timestamps from files                        27615          27644         
 26          0.4        2761.5       0.1X
-infer timestamps from files                       54215          54288         
 88          0.2        5421.5       0.0X
-read date text from files                          2108           2115         
  6          4.7         210.8       1.1X
-read date from files                              14103          14187         
 74          0.7        1410.3       0.2X
-infer date from files                             28103          28147         
 43          0.4        2810.3       0.1X
-timestamp strings                                  2288           2306         
 26          4.4         228.8       1.0X
-parse timestamps from Dataset[String]             29895          29929         
 31          0.3        2989.5       0.1X
-infer timestamps from Dataset[String]             57020          57120         
 87          0.2        5702.0       0.0X
-date strings                                       2481           2498         
 24          4.0         248.1       0.9X
-parse dates from Dataset[String]                  15589          15653         
 55          0.6        1558.9       0.1X
-from_csv(timestamp)                               28382          28515         
115          0.4        2838.2       0.1X
-from_csv(date)                                    14770          14776         
  8          0.7        1477.0       0.2X
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
 1686           1703          19          5.9         168.6       1.0X
+read timestamps from files                                                     
26971          26992          18          0.4        2697.1       0.1X
+infer timestamps from files                                                    
53617          53773         194          0.2        5361.7       0.0X
+read date text from files                                                      
 1564           1570           7          6.4         156.4       1.1X
+read date from files                                                           
14001          14028          34          0.7        1400.1       0.1X
+infer date from files                                                          
28967          29034          79          0.3        2896.7       0.1X
+timestamp strings                                                              
 2187           2197          16          4.6         218.7       0.8X
+parse timestamps from Dataset[String]                                          
29704          29731          24          0.3        2970.4       0.1X
+infer timestamps from Dataset[String]                                          
57380          57403          21          0.2        5738.0       0.0X
+date strings                                                                   
 2498           2504           6          4.0         249.8       0.7X
+parse dates from Dataset[String]                                               
16172          16193          25          0.6        1617.2       0.1X
+from_csv(timestamp)                                                            
28265          28266           1          0.4        2826.5       0.1X
+from_csv(date)                                                                 
15885          15903          18          0.6        1588.5       0.1X
+infer error timestamps from Dataset[String] with default format                
17608          17654          53          0.6        1760.8       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
17606          17647          42          0.6        1760.6       0.1X
+infer error timestamps from Dataset[String] with legacy format                 
17614          17641          31          0.6        1761.4       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       18446          18451         
  8          0.0      184461.7       1.0X
-pushdown disabled                                 18480          18487         
  6          0.0      184800.1       1.0X
-w/ filters                                         1296           1317         
 22          0.1       12957.9      14.2X
+w/o filters                                       17470          17494         
 31          0.0      174701.6       1.0X
+pushdown disabled                                 17563          17579         
 15          0.0      175635.0       1.0X
+w/ filters                                         1003           1019         
 15          0.1       10027.1      17.4X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt 
b/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
index 3fa769c3ff1..24818d20437 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
@@ -2,66 +2,69 @@
 Benchmark to measure CSV read/write performance
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 43617          43739         
166          0.0      872348.0       1.0X
+One quoted string                                 32457          33105         
972          0.0      649138.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               69859          70316         
787          0.0       69858.8       1.0X
-Select 100 columns                                33296          33363         
 58          0.0       33296.3       2.1X
-Select one column                                 28650          28705         
 67          0.0       28649.7       2.4X
-count()                                            4781           4788         
  6          0.2        4781.4      14.6X
-Select 100 columns, one bad input field           52076          52199         
109          0.0       52076.1       1.3X
-Select 100 columns, corrupt record field          56995          57069         
 87          0.0       56994.7       1.2X
+Select 1000 columns                               86590          87162         
877          0.0       86589.6       1.0X
+Select 100 columns                                29806          29895         
 79          0.0       29806.3       2.9X
+Select one column                                 25109          25196         
136          0.0       25109.4       3.4X
+count()                                            5329           5392         
 57          0.2        5328.5      16.3X
+Select 100 columns, one bad input field           53502          53981         
660          0.0       53502.2       1.6X
+Select 100 columns, corrupt record field          61580          61799         
219          0.0       61580.2       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       18758          18825         
 81          0.5        1875.8       1.0X
-Select 1 column + count()                         12676          12715         
 37          0.8        1267.6       1.5X
-count()                                            2904           2921         
 16          3.4         290.4       6.5X
+Select 10 columns + count()                       11033          11072         
 67          0.9        1103.3       1.0X
+Select 1 column + count()                          7731           7780         
 61          1.3         773.1       1.4X
+count()                                            2373           2384         
 15          4.2         237.3       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1266           1279         
 19          7.9         126.6       1.0X
-to_csv(timestamp)                                 11788          11805         
 15          0.8        1178.8       0.1X
-write timestamps to files                         11123          11128         
  8          0.9        1112.3       0.1X
-Create a dataset of dates                          1626           1632         
  8          6.2         162.6       0.8X
-to_csv(date)                                       6637           6647         
 10          1.5         663.7       0.2X
-write dates to files                               5362           5372         
 16          1.9         536.2       0.2X
+Create a dataset of timestamps                     1402           1407         
  4          7.1         140.2       1.0X
+to_csv(timestamp)                                  8749           8780         
 42          1.1         874.9       0.2X
+write timestamps to files                          7990           8016         
 23          1.3         799.0       0.2X
+Create a dataset of dates                          1569           1604         
 32          6.4         156.9       0.9X
+to_csv(date)                                       6087           6119         
 29          1.6         608.7       0.2X
+write dates to files                               5175           5216         
 37          1.9         517.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     1786           1810         
 20          5.6         178.6       1.0X
-read timestamps from files                        29363          29380         
 19          0.3        2936.3       0.1X
-infer timestamps from files                       58705          58773         
111          0.2        5870.5       0.0X
-read date text from files                          1825           1841         
 14          5.5         182.5       1.0X
-read date from files                              16948          16961         
 20          0.6        1694.8       0.1X
-infer date from files                             33431          33491         
 55          0.3        3343.1       0.1X
-timestamp strings                                  2033           2044         
 16          4.9         203.3       0.9X
-parse timestamps from Dataset[String]             31981          32007         
 25          0.3        3198.1       0.1X
-infer timestamps from Dataset[String]             61788          61842         
 74          0.2        6178.8       0.0X
-date strings                                       2393           2432         
 38          4.2         239.3       0.7X
-parse dates from Dataset[String]                  20352          20358         
  6          0.5        2035.2       0.1X
-from_csv(timestamp)                               30370          30376         
  6          0.3        3037.0       0.1X
-from_csv(date)                                    19089          19124         
 31          0.5        1908.9       0.1X
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
 1852           1875          24          5.4         185.2       1.0X
+read timestamps from files                                                     
21698          21727          31          0.5        2169.8       0.1X
+infer timestamps from files                                                    
43973          44067         139          0.2        4397.3       0.0X
+read date text from files                                                      
 1689           1692           2          5.9         168.9       1.1X
+read date from files                                                           
12019          12053          30          0.8        1201.9       0.2X
+infer date from files                                                          
24792          25462         671          0.4        2479.2       0.1X
+timestamp strings                                                              
 2165           2182          22          4.6         216.5       0.9X
+parse timestamps from Dataset[String]                                          
25100          25228         117          0.4        2510.0       0.1X
+infer timestamps from Dataset[String]                                          
47447          47712         272          0.2        4744.7       0.0X
+date strings                                                                   
 2459           2478          22          4.1         245.9       0.8X
+parse dates from Dataset[String]                                               
14796          14877          73          0.7        1479.6       0.1X
+from_csv(timestamp)                                                            
22954          23133         221          0.4        2295.4       0.1X
+from_csv(date)                                                                 
13271          13558         293          0.8        1327.1       0.1X
+infer error timestamps from Dataset[String] with default format                
15398          15420          26          0.6        1539.8       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
15522          15567          45          0.6        1552.2       0.1X
+infer error timestamps from Dataset[String] with legacy format                 
15390          15427          47          0.6        1539.0       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1037-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       15895          15908         
 12          0.0      158949.9       1.0X
-pushdown disabled                                 15860          15878         
 16          0.0      158599.7       1.0X
-w/ filters                                         1173           1184         
 10          0.1       11734.4      13.5X
+w/o filters                                       15358          15477         
150          0.0      153576.7       1.0X
+pushdown disabled                                 15305          15375         
 61          0.0      153054.0       1.0X
+w/ filters                                         1137           1157         
 18          0.1       11366.1      13.5X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt 
b/sql/core/benchmarks/CSVBenchmark-results.txt
index 685314823b1..f01a94562e0 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,66 +2,69 @@
 Benchmark to measure CSV read/write performance
 
================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 51029          51429         
347          0.0     1020572.6       1.0X
+One quoted string                                 57475          58232         
656          0.0     1149508.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               92368          95831         
NaN          0.0       92368.2       1.0X
-Select 100 columns                                42543          42870         
365          0.0       42542.9       2.2X
-Select one column                                 37891          39311        
1459          0.0       37891.3       2.4X
-count()                                            8267           8498         
363          0.1        8267.1      11.2X
-Select 100 columns, one bad input field           54075          54747         
584          0.0       54075.2       1.7X
-Select 100 columns, corrupt record field          58613          59802        
1561          0.0       58613.1       1.6X
+Select 1000 columns                              132598         136307        
1948          0.0      132598.0       1.0X
+Select 100 columns                                49929          53277        
1633          0.0       49929.3       2.7X
+Select one column                                 42134          43249        
1654          0.0       42134.2       3.1X
+count()                                           10570          11359         
805          0.1       10570.3      12.5X
+Select 100 columns, one bad input field           72705          73396         
608          0.0       72704.8       1.8X
+Select 100 columns, corrupt record field          78021          79071        
1078          0.0       78020.5       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       15170          15199         
 33          0.7        1517.0       1.0X
-Select 1 column + count()                          9307           9311         
  6          1.1         930.7       1.6X
-count()                                            3125           3128         
  3          3.2         312.5       4.9X
+Select 10 columns + count()                       16357          16437         
115          0.6        1635.7       1.0X
+Select 1 column + count()                         10360          10615         
253          1.0        1036.0       1.6X
+count()                                            3333           3367         
 56          3.0         333.3       4.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1243           1286         
 49          8.0         124.3       1.0X
-to_csv(timestamp)                                 11040          11075         
 31          0.9        1104.0       0.1X
-write timestamps to files                          9767           9778         
 10          1.0         976.7       0.1X
-Create a dataset of dates                          1551           1559         
 12          6.4         155.1       0.8X
-to_csv(date)                                       7114           7189         
129          1.4         711.4       0.2X
-write dates to files                               5607           5613         
  6          1.8         560.7       0.2X
+Create a dataset of timestamps                     1556           1606         
 66          6.4         155.6       1.0X
+to_csv(timestamp)                                 12943          12999         
 90          0.8        1294.3       0.1X
+write timestamps to files                         11018          11054         
 56          0.9        1101.8       0.1X
+Create a dataset of dates                          1900           1903         
  3          5.3         190.0       0.8X
+to_csv(date)                                       8235           8243         
 14          1.2         823.5       0.2X
+write dates to files                               6582           6692         
139          1.5         658.2       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     1938           1950         
 15          5.2         193.8       1.0X
-read timestamps from files                        26964          27056         
 81          0.4        2696.4       0.1X
-infer timestamps from files                       52679          52703         
 21          0.2        5267.9       0.0X
-read date text from files                          1753           1757         
  3          5.7         175.3       1.1X
-read date from files                              15439          15450         
 16          0.6        1543.9       0.1X
-infer date from files                             30228          30300         
 63          0.3        3022.8       0.1X
-timestamp strings                                  2659           2670         
 12          3.8         265.9       0.7X
-parse timestamps from Dataset[String]             29391          29434         
 56          0.3        2939.1       0.1X
-infer timestamps from Dataset[String]             55049          55129         
 69          0.2        5504.9       0.0X
-date strings                                       2760           2767         
  8          3.6         276.0       0.7X
-parse dates from Dataset[String]                  17604          17651         
 50          0.6        1760.4       0.1X
-from_csv(timestamp)                               27749          27961         
188          0.4        2774.9       0.1X
-from_csv(date)                                    16323          16359         
 42          0.6        1632.3       0.1X
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
 2226           2249          30          4.5         222.6       1.0X
+read timestamps from files                                                     
30143          30372         261          0.3        3014.3       0.1X
+infer timestamps from files                                                    
60276          60371         149          0.2        6027.6       0.0X
+read date text from files                                                      
 1944           1984          35          5.1         194.4       1.1X
+read date from files                                                           
16159          16379         191          0.6        1615.9       0.1X
+infer date from files                                                          
31909          32249         296          0.3        3190.9       0.1X
+timestamp strings                                                              
 3178           3200          23          3.1         317.8       0.7X
+parse timestamps from Dataset[String]                                          
33062          33394         513          0.3        3306.2       0.1X
+infer timestamps from Dataset[String]                                          
63364          63523         162          0.2        6336.4       0.0X
+date strings                                                                   
 3365           3396          29          3.0         336.5       0.7X
+parse dates from Dataset[String]                                               
17453          17749         368          0.6        1745.3       0.1X
+from_csv(timestamp)                                                            
30051          30266         252          0.3        3005.1       0.1X
+from_csv(date)                                                                 
16519          17018         434          0.6        1651.9       0.1X
+infer error timestamps from Dataset[String] with default format                
20660          21094         382          0.5        2066.0       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
20132          20573         382          0.5        2013.2       0.1X
+infer error timestamps from Dataset[String] with legacy format                 
20576          20924         411          0.5        2057.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       17040          17047         
 11          0.0      170398.1       1.0X
-pushdown disabled                                 16951          16979         
 38          0.0      169511.8       1.0X
-w/ filters                                         1627           1640         
 11          0.1       16269.1      10.5X
+w/o filters                                       20229          20361         
121          0.0      202293.3       1.0X
+pushdown disabled                                 20203          20451         
249          0.0      202026.7       1.0X
+w/ filters                                         2228           2250         
 25          0.0       22283.5       9.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt 
b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
index 0919ba4aea1..d02d313b09c 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -3,118 +3,121 @@ Benchmark for performance of JSON parsing
 
================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3733           3757         
 27          1.3         746.6       1.0X
-UTF-8 is set                                       5090           5097         
 10          1.0        1018.1       0.7X
+No encoding                                        3582           3790         
182          1.4         716.4       1.0X
+UTF-8 is set                                       5285           5306         
 36          0.9        1056.9       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2562           2606         
 60          2.0         512.4       1.0X
-UTF-8 is set                                       3887           3897         
 15          1.3         777.4       0.7X
+No encoding                                        2450           2468         
 27          2.0         489.9       1.0X
+UTF-8 is set                                       4008           4024         
 20          1.2         801.6       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4124           4162         
 55          0.2        4124.0       1.0X
-UTF-8 is set                                       6521           6533         
 19          0.2        6520.8       0.6X
+No encoding                                        4942           5166         
216          0.2        4941.8       1.0X
+UTF-8 is set                                       6957           6974         
 22          0.1        6957.5       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       12911          13013         
158          0.0      258211.1       1.0X
-UTF-8 is set                                      14453          14513         
 71          0.0      289059.0       0.9X
+No encoding                                       13403          13470         
 61          0.0      268054.2       1.0X
+UTF-8 is set                                      15129          15172         
 60          0.0      302583.0       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2190           2202         
 14          0.5        2189.9       1.0X
-Select 1 column                                    2155           2160         
  6          0.5        2154.6       1.0X
+Select 10 columns                                  2246           2270         
 23          0.4        2245.7       1.0X
+Select 1 column                                    1445           1457         
 15          0.7        1445.5       1.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       804            805         
  1          1.2         804.1       1.0X
-Short column with UTF-8                            1043           1055         
 12          1.0        1042.7       0.8X
-Wide column without encoding                       6965           7014         
 70          0.1        6964.9       0.1X
-Wide column with UTF-8                             9173           9228         
 49          0.1        9173.2       0.1X
+Short column without encoding                       764            775         
 10          1.3         764.1       1.0X
+Short column with UTF-8                            1048           1067         
 17          1.0        1048.1       0.7X
+Wide column without encoding                       7143           7195         
 66          0.1        7142.9       0.1X
+Wide column with UTF-8                            11115          11123         
 10          0.1       11115.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                            96             98         
  2         10.4          96.0       1.0X
-from_json                                          1518           1522         
  4          0.7        1517.6       0.1X
-json_tuple                                         1771           1784         
 17          0.6        1771.1       0.1X
-get_json_object                                    1549           1555         
  9          0.6        1548.8       0.1X
+Text read                                            90             94         
  4         11.1          90.4       1.0X
+from_json                                          1533           1537         
  5          0.7        1532.6       0.1X
+json_tuple                                         1796           1822         
 30          0.6        1795.8       0.1X
+get_json_object                                    1530           1532         
  2          0.7        1530.4       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           346            348         
  2         14.5          69.1       1.0X
-schema inferring                                   2948           2959         
 16          1.7         589.7       0.1X
-parsing                                            3348           3353         
  5          1.5         669.6       0.1X
+Text read                                           339            346         
  7         14.8          67.7       1.0X
+schema inferring                                   2695           2718         
 22          1.9         539.0       0.1X
+parsing                                            3240           3253         
 12          1.5         648.0       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           887            892         
  9          5.6         177.5       1.0X
-Schema inferring                                   3637           3644         
  7          1.4         727.3       0.2X
-Parsing without charset                            3784           3786         
  3          1.3         756.7       0.2X
-Parsing with UTF-8                                 5095           5107         
 16          1.0        1019.0       0.2X
+Text read                                           835            842         
 11          6.0         167.1       1.0X
+Schema inferring                                   3437           3444         
  6          1.5         687.4       0.2X
+Parsing without charset                            3682           3687         
  6          1.4         736.5       0.2X
+Parsing with UTF-8                                 5272           5277         
  6          0.9        1054.4       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      158            166         
  8          6.3         158.0       1.0X
-to_json(timestamp)                                 1302           1314         
 11          0.8        1301.9       0.1X
-write timestamps to files                          1029           1031         
  2          1.0        1029.1       0.2X
-Create a dataset of dates                           176            182         
  9          5.7         176.2       0.9X
-to_json(date)                                       900            905         
  5          1.1         900.4       0.2X
-write dates to files                                633            640         
 11          1.6         633.5       0.2X
+Create a dataset of timestamps                      156            173         
 15          6.4         156.2       1.0X
+to_json(timestamp)                                 1124           1140         
 19          0.9        1123.6       0.1X
+write timestamps to files                          1014           1023         
 13          1.0        1013.5       0.2X
+Create a dataset of dates                           178            184         
  8          5.6         177.7       0.9X
+to_json(date)                                       779            791         
 11          1.3         778.8       0.2X
+write dates to files                                617            623         
  8          1.6         616.8       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      216            217         
  2          4.6         216.0       1.0X
-read timestamps from files                         2933           2941         
 11          0.3        2932.8       0.1X
-infer timestamps from files                        7218           7220         
  3          0.1        7218.5       0.0X
-read date text from files                           204            219         
 25          4.9         204.4       1.1X
-read date from files                                925            939         
 12          1.1         925.1       0.2X
-timestamp strings                                   268            275         
 13          3.7         267.9       0.8X
-parse timestamps from Dataset[String]              3151           3155         
  6          0.3        3150.8       0.1X
-infer timestamps from Dataset[String]              7482           7487         
  4          0.1        7481.9       0.0X
-date strings                                        362            368         
 11          2.8         361.8       0.6X
-parse dates from Dataset[String]                   1322           1327         
  8          0.8        1321.7       0.2X
-from_json(timestamp)                               3900           3906         
  5          0.3        3900.4       0.1X
-from_json(date)                                    2042           2060         
 21          0.5        2042.1       0.1X
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
  212            233          27          4.7         212.2       1.0X
+read timestamps from files                                                     
 2852           2860          10          0.4        2852.0       0.1X
+infer timestamps from files                                                    
 7126           7168          37          0.1        7126.0       0.0X
+read date text from files                                                      
  196            208          18          5.1         195.6       1.1X
+read date from files                                                           
  916            930          12          1.1         915.7       0.2X
+timestamp strings                                                              
  258            266          12          3.9         258.3       0.8X
+parse timestamps from Dataset[String]                                          
 3072           3083          12          0.3        3071.9       0.1X
+infer timestamps from Dataset[String]                                          
 7294           7296           4          0.1        7293.9       0.0X
+date strings                                                                   
  349            356          11          2.9         349.1       0.6X
+parse dates from Dataset[String]                                               
 1291           1302          16          0.8        1291.4       0.2X
+from_json(timestamp)                                                           
 3898           3918          22          0.3        3897.6       0.1X
+from_json(date)                                                                
 2083           2098          13          0.5        2082.7       0.1X
+infer error timestamps from Dataset[String] with default format                
 2009           2021          14          0.5        2008.6       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
16451          16463          16          0.1       16450.8       0.0X
+infer error timestamps from Dataset[String] with legacy format                 
 5495           5502          10          0.2        5494.5       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 11.0.18+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       21342          21358         
 14          0.0      213417.3       1.0X
-pushdown disabled                                 20319          20339         
 19          0.0      203192.2       1.1X
-w/ filters                                          515            523         
 10          0.2        5147.0      41.5X
+w/o filters                                       20703          20718         
 18          0.0      207032.9       1.0X
+pushdown disabled                                 19676          19688         
 10          0.0      196764.1       1.1X
+w/ filters                                          573            581         
  8          0.2        5734.6      36.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt 
b/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
index 70e65be2f70..0f05f28d61b 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
@@ -3,118 +3,121 @@ Benchmark for performance of JSON parsing
 
================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3315           3502         
162          1.5         663.1       1.0X
-UTF-8 is set                                       4406           4410         
  8          1.1         881.2       0.8X
+No encoding                                        2933           2997         
 99          1.7         586.6       1.0X
+UTF-8 is set                                       4364           4377         
 12          1.1         872.9       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2261           2282         
 19          2.2         452.3       1.0X
-UTF-8 is set                                       3681           3690         
  8          1.4         736.3       0.6X
+No encoding                                        2316           2338         
 28          2.2         463.1       1.0X
+UTF-8 is set                                       3781           3793         
 17          1.3         756.2       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        6174           6198         
 40          0.2        6174.0       1.0X
-UTF-8 is set                                       5829           5848         
 18          0.2        5828.7       1.1X
+No encoding                                        4311           4401         
117          0.2        4310.8       1.0X
+UTF-8 is set                                       5871           5893         
 21          0.2        5871.3       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       13380          13527         
149          0.0      267608.0       1.0X
-UTF-8 is set                                      14322          14371         
 78          0.0      286442.2       0.9X
+No encoding                                       12641          12897         
242          0.0      252822.3       1.0X
+UTF-8 is set                                      14120          14160         
 48          0.0      282391.8       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2019           2023         
  5          0.5        2019.1       1.0X
-Select 1 column                                    2408           2412         
  4          0.4        2408.5       0.8X
+Select 10 columns                                  1899           1911         
 20          0.5        1898.6       1.0X
+Select 1 column                                    1414           1423         
 12          0.7        1414.3       1.3X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       660            662         
  2          1.5         660.5       1.0X
-Short column with UTF-8                             909            913         
  6          1.1         908.8       0.7X
-Wide column without encoding                       7378           7417         
 62          0.1        7378.2       0.1X
-Wide column with UTF-8                             8489           8514         
 27          0.1        8489.2       0.1X
+Short column without encoding                       661            664         
  2          1.5         661.1       1.0X
+Short column with UTF-8                             918            931         
 20          1.1         917.8       0.7X
+Wide column without encoding                       6706           6736         
 49          0.1        6705.7       0.1X
+Wide column with UTF-8                             9911           9924         
 16          0.1        9911.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                            97            102         
  4         10.3          96.7       1.0X
-from_json                                          1621           1632         
 10          0.6        1620.9       0.1X
-json_tuple                                         1990           1999         
 13          0.5        1989.6       0.0X
-get_json_object                                    1827           1830         
  4          0.5        1827.2       0.1X
+Text read                                            90             93         
  2         11.1          90.4       1.0X
+from_json                                          1561           1568         
 12          0.6        1561.0       0.1X
+json_tuple                                         1875           1913         
 63          0.5        1874.8       0.0X
+get_json_object                                    1700           1734         
 30          0.6        1699.6       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           434            437         
  4         11.5          86.8       1.0X
-schema inferring                                   2621           2625         
  4          1.9         524.2       0.2X
-parsing                                            2834           2844         
  9          1.8         566.9       0.2X
+Text read                                           375            378         
  4         13.3          74.9       1.0X
+schema inferring                                   2356           2357         
  1          2.1         471.2       0.2X
+parsing                                            2825           2827         
  2          1.8         565.0       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           966            970         
  5          5.2         193.2       1.0X
-Schema inferring                                   3274           3287         
 14          1.5         654.7       0.3X
-Parsing without charset                            3286           3291         
  7          1.5         657.2       0.3X
-Parsing with UTF-8                                 4499           4508         
  9          1.1         899.8       0.2X
+Text read                                           906            916         
  8          5.5         181.3       1.0X
+Schema inferring                                   3068           3073         
  5          1.6         613.5       0.3X
+Parsing without charset                            3264           3270         
  5          1.5         652.9       0.3X
+Parsing with UTF-8                                 4837           4838         
  2          1.0         967.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      195            196         
  1          5.1         194.7       1.0X
-to_json(timestamp)                                 1103           1124         
 30          0.9        1103.4       0.2X
-write timestamps to files                           954            957         
  4          1.0         954.1       0.2X
-Create a dataset of dates                           189            193         
  5          5.3         188.6       1.0X
-to_json(date)                                       763            767         
  3          1.3         763.1       0.3X
-write dates to files                                616            626         
 12          1.6         615.6       0.3X
+Create a dataset of timestamps                      172            188         
 17          5.8         171.7       1.0X
+to_json(timestamp)                                 1052           1063         
 13          1.0        1051.6       0.2X
+write timestamps to files                           893            901         
 14          1.1         892.7       0.2X
+Create a dataset of dates                           191            196         
  5          5.2         190.7       0.9X
+to_json(date)                                       806            809         
  2          1.2         806.0       0.2X
+write dates to files                                613            628         
 22          1.6         613.2       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      242            247         
  9          4.1         241.7       1.0X
-read timestamps from files                         2547           2592         
 73          0.4        2546.7       0.1X
-infer timestamps from files                        6289           6293         
  3          0.2        6289.4       0.0X
-read date text from files                           215            224         
  9          4.6         215.1       1.1X
-read date from files                                791            792         
  1          1.3         791.0       0.3X
-timestamp strings                                   262            274         
 17          3.8         262.1       0.9X
-parse timestamps from Dataset[String]              2668           2672         
  5          0.4        2668.2       0.1X
-infer timestamps from Dataset[String]              6392           6398         
 10          0.2        6392.0       0.0X
-date strings                                        331            332         
  1          3.0         331.1       0.7X
-parse dates from Dataset[String]                   1046           1053         
  8          1.0        1045.5       0.2X
-from_json(timestamp)                               3597           3614         
 25          0.3        3596.9       0.1X
-from_json(date)                                    2033           2036         
  4          0.5        2032.7       0.1X
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
  227            230           4          4.4         226.7       1.0X
+read timestamps from files                                                     
 2360           2364           3          0.4        2359.9       0.1X
+infer timestamps from files                                                    
 5955           5960           5          0.2        5955.3       0.0X
+read date text from files                                                      
  204            208           7          4.9         204.0       1.1X
+read date from files                                                           
  863            866           5          1.2         862.6       0.3X
+timestamp strings                                                              
  268            280          15          3.7         267.9       0.8X
+parse timestamps from Dataset[String]                                          
 2632           2639           9          0.4        2632.0       0.1X
+infer timestamps from Dataset[String]                                          
 6208           6214           6          0.2        6207.8       0.0X
+date strings                                                                   
  319            324           5          3.1         318.6       0.7X
+parse dates from Dataset[String]                                               
 1080           1085           5          0.9        1079.8       0.2X
+from_json(timestamp)                                                           
 3640           3647           8          0.3        3639.5       0.1X
+from_json(date)                                                                
 2055           2060           5          0.5        2054.8       0.1X
+infer error timestamps from Dataset[String] with default format                
 1738           1740           2          0.6        1737.5       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
18441          18858         363          0.1       18440.9       0.0X
+infer error timestamps from Dataset[String] with legacy format                 
 5622           5648          26          0.2        5622.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1031-azure
+OpenJDK 64-Bit Server VM 17.0.6+10 on Linux 5.15.0-1036-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       18281          18287         
  5          0.0      182808.6       1.0X
-pushdown disabled                                 17483          17486         
  3          0.0      174828.7       1.0X
-w/ filters                                          912            935         
 38          0.1        9116.7      20.1X
+w/o filters                                       19240          19283         
 39          0.0      192400.7       1.0X
+pushdown disabled                                 18420          18422         
  1          0.0      184201.3       1.0X
+w/ filters                                          597            599         
  4          0.2        5965.2      32.3X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt 
b/sql/core/benchmarks/JsonBenchmark-results.txt
index a0f1d4f81a8..c2afd5b8293 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,118 +3,121 @@ Benchmark for performance of JSON parsing
 
================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2998           3032         
 52          1.7         599.7       1.0X
-UTF-8 is set                                       4735           4762         
 27          1.1         946.9       0.6X
+No encoding                                        2973           3233         
291          1.7         594.7       1.0X
+UTF-8 is set                                       4375           4796         
430          1.1         874.9       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2186           2199         
 17          2.3         437.1       1.0X
-UTF-8 is set                                       3496           3502         
  6          1.4         699.3       0.6X
+No encoding                                        2359           2404         
 39          2.1         471.8       1.0X
+UTF-8 is set                                       3814           3885         
101          1.3         762.8       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4982           5288         
530          0.2        4982.2       1.0X
-UTF-8 is set                                       8619           8660         
 36          0.1        8619.1       0.6X
+No encoding                                        4630           4969         
347          0.2        4630.4       1.0X
+UTF-8 is set                                       8963           9040         
 82          0.1        8963.4       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-No encoding                                       13048          13132         
107          0.0      260965.2       1.0X
-UTF-8 is set                                      14475          14539         
 69          0.0      289493.6       0.9X
+No encoding                                       15252          15481         
329          0.0      305030.9       1.0X
+UTF-8 is set                                      16349          16961         
627          0.0      326988.8       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2111           2117         
  5          0.5        2110.8       1.0X
-Select 1 column                                    2268           2271         
  5          0.4        2268.1       0.9X
+Select 10 columns                                  2290           2296         
  6          0.4        2289.6       1.0X
+Select 1 column                                    1636           1652         
 15          0.6        1635.6       1.4X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       683            690         
  8          1.5         683.2       1.0X
-Short column with UTF-8                             969            975         
  6          1.0         968.7       0.7X
-Wide column without encoding                      10784          10896         
 99          0.1       10783.9       0.1X
-Wide column with UTF-8                            14434          14439         
  5          0.1       14433.6       0.0X
+Short column without encoding                       661            673         
 12          1.5         661.1       1.0X
+Short column with UTF-8                             950            978         
 26          1.1         950.1       0.7X
+Wide column without encoding                      11106          11297         
179          0.1       11106.4       0.1X
+Wide column with UTF-8                            13743          13762         
 18          0.1       13743.3       0.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           145            145         
  0          6.9         144.7       1.0X
-from_json                                          2019           2027         
  9          0.5        2018.8       0.1X
-json_tuple                                         2349           2360         
 10          0.4        2348.9       0.1X
-get_json_object                                    2110           2114         
  6          0.5        2109.6       0.1X
+Text read                                           119            131         
 15          8.4         119.5       1.0X
+from_json                                          2475           2493         
 18          0.4        2474.9       0.0X
+json_tuple                                         2680           2745         
 57          0.4        2680.3       0.0X
+get_json_object                                    2549           2630         
 88          0.4        2549.3       0.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           584            586         
  3          8.6         116.8       1.0X
-schema inferring                                   2762           2763         
  1          1.8         552.4       0.2X
-parsing                                            2671           2690         
 17          1.9         534.2       0.2X
+Text read                                           545            567         
 29          9.2         109.0       1.0X
+schema inferring                                   2460           2498         
 42          2.0         492.1       0.2X
+parsing                                            2618           2656         
 36          1.9         523.6       0.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Text read                                           861            864         
  4          5.8         172.2       1.0X
-Schema inferring                                   3146           3148         
  2          1.6         629.1       0.3X
-Parsing without charset                            3297           3303         
 10          1.5         659.4       0.3X
-Parsing with UTF-8                                 4522           4548         
 24          1.1         904.5       0.2X
+Text read                                           884            897         
 16          5.7         176.8       1.0X
+Schema inferring                                   3016           3029         
 21          1.7         603.2       0.3X
+Parsing without charset                            3251           3267         
 14          1.5         650.2       0.3X
+Parsing with UTF-8                                 4892           5020         
118          1.0         978.3       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      155            159         
  4          6.5         154.8       1.0X
-to_json(timestamp)                                 1133           1150         
 15          0.9        1133.2       0.1X
-write timestamps to files                           991           1003         
 11          1.0         991.1       0.2X
-Create a dataset of dates                           179            183         
  4          5.6         178.9       0.9X
-to_json(date)                                       772            777         
  5          1.3         771.7       0.2X
-write dates to files                                595            605         
 13          1.7         594.9       0.3X
+Create a dataset of timestamps                      163            164         
  2          6.1         162.6       1.0X
+to_json(timestamp)                                 1307           1383         
 92          0.8        1307.4       0.1X
+write timestamps to files                          1044           1090         
 40          1.0        1044.5       0.2X
+Create a dataset of dates                           195            207         
 10          5.1         195.2       0.8X
+to_json(date)                                       915            934         
 19          1.1         914.8       0.2X
+write dates to files                                717            727         
  9          1.4         717.3       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      229            232         
  3          4.4         229.2       1.0X
-read timestamps from files                         2397           2420         
 20          0.4        2396.9       0.1X
-infer timestamps from files                        6187           6191         
  7          0.2        6186.8       0.0X
-read date text from files                           205            206         
  2          4.9         205.0       1.1X
-read date from files                                821            825         
  4          1.2         820.9       0.3X
-timestamp strings                                   321            338         
 21          3.1         320.5       0.7X
-parse timestamps from Dataset[String]              2694           2710         
 23          0.4        2693.6       0.1X
-infer timestamps from Dataset[String]              6537           6647         
143          0.2        6536.6       0.0X
-date strings                                        364            377         
 12          2.7         364.1       0.6X
-parse dates from Dataset[String]                   1162           1168         
  6          0.9        1162.2       0.2X
-from_json(timestamp)                               3981           3986         
  5          0.3        3980.6       0.1X
-from_json(date)                                    2549           2556         
  9          0.4        2549.2       0.1X
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Read dates and timestamps:                                             Best 
Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                                                 
  270            280           9          3.7         270.4       1.0X
+read timestamps from files                                                     
 2623           2789         159          0.4        2623.1       0.1X
+infer timestamps from files                                                    
 6416           7147         703          0.2        6415.7       0.0X
+read date text from files                                                      
  233            234           1          4.3         233.3       1.2X
+read date from files                                                           
  948            969          24          1.1         948.2       0.3X
+timestamp strings                                                              
  335            347          14          3.0         334.9       0.8X
+parse timestamps from Dataset[String]                                          
 2961           2993          41          0.3        2960.6       0.1X
+infer timestamps from Dataset[String]                                          
 7139           7314         158          0.1        7139.1       0.0X
+date strings                                                                   
  384            397          15          2.6         383.6       0.7X
+parse dates from Dataset[String]                                               
 1325           1347          24          0.8        1325.0       0.2X
+from_json(timestamp)                                                           
 4774           4788          13          0.2        4773.6       0.1X
+from_json(date)                                                                
 3078           3090          11          0.3        3078.5       0.1X
+infer error timestamps from Dataset[String] with default format                
 2025           2058          28          0.5        2025.0       0.1X
+infer error timestamps from Dataset[String] with user-provided format          
20261          20338          95          0.0       20260.6       0.0X
+infer error timestamps from Dataset[String] with legacy format                 
 5495           5528          38          0.2        5495.4       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1031-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_362-b09 on Linux 5.15.0-1036-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       18061          18073         
 11          0.0      180612.7       1.0X
-pushdown disabled                                 17413          17427         
 15          0.0      174125.9       1.0X
-w/ filters                                          717            719         
  2          0.1        7166.6      25.2X
+w/o filters                                       19710          19942         
208          0.0      197100.3       1.0X
+pushdown disabled                                 19128          19422         
467          0.0      191281.3       1.0X
+w/ filters                                          676            709         
 29          0.1        6760.3      29.2X
 
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index 7e2a2688e65..eb561e13fc6 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -292,6 +292,36 @@ object CSVBenchmark extends SqlBasedBenchmark {
         ds.noop()
       }
 
+      def errorTimestampStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map {
+            i => s"data${i % 200}"
+          }
+        }.select($"value".as("timestamp")).as[String]
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
default format",
+        numIters) { _ =>
+        spark.read.option("header", false)
+          .option("inferSchema", true)
+          .csv(errorTimestampStr).noop()
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
user-provided format",
+        numIters) { _ =>
+        spark.read.option("header", false)
+          .option("inferSchema", true).option("timestampFormat",
+          "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").csv(errorTimestampStr).noop()
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
legacy format",
+        numIters) { _ =>
+        withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "LEGACY") {
+          spark.read.option("header", false)
+            .option("inferSchema", true).csv(errorTimestampStr).noop()
+        }
+      }
+
       readBench.run()
     }
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index a7794848434..c522378a65d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -473,6 +473,32 @@ object JsonBenchmark extends SqlBasedBenchmark {
         ds.noop()
       }
 
+      def errorTimestampStr: Dataset[String] = {
+        spark.range(0, rowsNum, 1, 1).mapPartitions { iter =>
+          iter.map { i =>
+            s"""{"timestamp":"data${i % 200}"}"""
+          }
+        }.select($"value".as("timestamp")).as[String]
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
default format",
+        numIters) { _ =>
+        spark.read.option("inferTimestamp", 
true).json(errorTimestampStr).noop()
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
user-provided format",
+        numIters) { _ =>
+        spark.read.option("inferTimestamp", true).option("timestampFormat",
+          "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").json(errorTimestampStr).noop()
+      }
+
+      readBench.addCase("infer error timestamps from Dataset[String] with 
legacy format",
+        numIters) { _ =>
+        withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "LEGACY") {
+          spark.read.option("inferTimestamp", 
true).json(errorTimestampStr).noop()
+        }
+      }
+
       readBench.run()
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to