This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e2b9b1afe Support Absolute Timestamps in CSV Schema Inference (#4131) 
(#4217)
e2b9b1afe is described below

commit e2b9b1afebd45ab0dcdef260d662733bb3ac7d82
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon May 22 11:53:12 2023 +0100

    Support Absolute Timestamps in CSV Schema Inference (#4131) (#4217)
---
 arrow-csv/src/reader/mod.rs | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 0ab1664f5..328c2cd41 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -150,10 +150,10 @@ lazy_static! {
         r"^-?(\d+)$", //INTEGER
         r"^-?((\d*\.\d+|\d+\.\d*)([eE]-?\d+)?|\d+([eE]-?\d+))$", //DECIMAL
         r"^\d{4}-\d\d-\d\d$", //DATE32
-        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d$", //Timestamp(Second)
-        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,3}$", 
//Timestamp(Millisecond)
-        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,6}$", 
//Timestamp(Microsecond)
-        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,9}$", //Timestamp(Nanosecond)
+        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d(?:[^\d\.].*)?$", 
//Timestamp(Second)
+        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,3}(?:[^\d].*)?$", 
//Timestamp(Millisecond)
+        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,6}(?:[^\d].*)?$", 
//Timestamp(Microsecond)
+        r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,9}(?:[^\d].*)?$", 
//Timestamp(Nanosecond)
     ]).unwrap();
 }
 
@@ -2165,6 +2165,19 @@ mod tests {
                 ],
                 DataType::Timestamp(TimeUnit::Microsecond, None),
             ),
+            (
+                &["2020-03-19 02:00:00+02:00", "2020-03-19 02:00:00Z"],
+                DataType::Timestamp(TimeUnit::Second, None),
+            ),
+            (
+                &[
+                    "2020-03-19",
+                    "2020-03-19 02:00:00+02:00",
+                    "2020-03-19 02:00:00Z",
+                    "2020-03-19 02:00:00.12Z",
+                ],
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+            ),
             (
                 &[
                     "2020-03-19",

Reply via email to