This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e2b9b1afe Support Absolute Timestamps in CSV Schema Inference (#4131)
(#4217)
e2b9b1afe is described below
commit e2b9b1afebd45ab0dcdef260d662733bb3ac7d82
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon May 22 11:53:12 2023 +0100
Support Absolute Timestamps in CSV Schema Inference (#4131) (#4217)
---
arrow-csv/src/reader/mod.rs | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 0ab1664f5..328c2cd41 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -150,10 +150,10 @@ lazy_static! {
r"^-?(\d+)$", //INTEGER
r"^-?((\d*\.\d+|\d+\.\d*)([eE]-?\d+)?|\d+([eE]-?\d+))$", //DECIMAL
r"^\d{4}-\d\d-\d\d$", //DATE32
- r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d$", //Timestamp(Second)
- r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,3}$",
//Timestamp(Millisecond)
- r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,6}$",
//Timestamp(Microsecond)
- r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d.\d{1,9}$", //Timestamp(Nanosecond)
+ r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d(?:[^\d\.].*)?$",
//Timestamp(Second)
+ r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,3}(?:[^\d].*)?$",
//Timestamp(Millisecond)
+ r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,6}(?:[^\d].*)?$",
//Timestamp(Microsecond)
+ r"^\d{4}-\d\d-\d\d[T ]\d\d:\d\d:\d\d\.\d{1,9}(?:[^\d].*)?$",
//Timestamp(Nanosecond)
]).unwrap();
}
@@ -2165,6 +2165,19 @@ mod tests {
],
DataType::Timestamp(TimeUnit::Microsecond, None),
),
+ (
+ &["2020-03-19 02:00:00+02:00", "2020-03-19 02:00:00Z"],
+ DataType::Timestamp(TimeUnit::Second, None),
+ ),
+ (
+ &[
+ "2020-03-19",
+ "2020-03-19 02:00:00+02:00",
+ "2020-03-19 02:00:00Z",
+ "2020-03-19 02:00:00.12Z",
+ ],
+ DataType::Timestamp(TimeUnit::Millisecond, None),
+ ),
(
&[
"2020-03-19",