This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 72d8a78317 Extend string parsing support for Date32 to encompass the
timestamp format (#5282)
72d8a78317 is described below
commit 72d8a783176219f0864022daba70e84ceab7e221
Author: Marko Grujic <[email protected]>
AuthorDate: Thu Jan 11 12:00:12 2024 +0100
Extend string parsing support for Date32 to encompass the timestamp format
(#5282)
---
arrow-cast/src/cast.rs | 12 +++++++-----
arrow-cast/src/parse.rs | 17 ++++++++++++++---
2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 92b9071a67..bd35096e06 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -7482,9 +7482,9 @@ mod tests {
let a = StringArray::from(vec![
"2000-01-01", // valid date with leading 0s
+ "2000-01-01T12:00:00", // valid datetime, will throw away the time
part
"2000-2-2", // valid date without leading 0s
"2000-00-00", // invalid month and day
- "2000-01-01T12:00:00", // date + time is invalid
"2000", // just a year is invalid
]);
let array = Arc::new(a) as ArrayRef;
@@ -7500,17 +7500,19 @@ mod tests {
assert!(c.is_valid(0)); // "2000-01-01"
assert_eq!(date_value, c.value(0));
+ assert!(c.is_valid(1)); // "2000-01-01T12:00:00"
+ assert_eq!(date_value, c.value(1));
+
let date_value = since(
NaiveDate::from_ymd_opt(2000, 2, 2).unwrap(),
from_ymd(1970, 1, 1).unwrap(),
)
.num_days() as i32;
- assert!(c.is_valid(1)); // "2000-2-2"
- assert_eq!(date_value, c.value(1));
+ assert!(c.is_valid(2)); // "2000-2-2"
+ assert_eq!(date_value, c.value(2));
// test invalid inputs
- assert!(!c.is_valid(2)); // "2000-00-00"
- assert!(!c.is_valid(3)); // "2000-01-01T12:00:00"
+ assert!(!c.is_valid(3)); // "2000-00-00"
assert!(!c.is_valid(4)); // "2000"
}
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 3d2e47ed95..50e9fda672 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -546,8 +546,11 @@ const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates
that can be represented a
fn parse_date(string: &str) -> Option<NaiveDate> {
if string.len() > 10 {
- return None;
- }
+ // Try to parse as datetime and return just the date part
+ return string_to_datetime(&Utc, string)
+ .map(|dt| dt.date_naive())
+ .ok();
+ };
let mut digits = [0; 10];
let mut mask = 0;
@@ -1488,10 +1491,13 @@ mod tests {
"2020-9-08",
"2020-12-1",
"1690-2-5",
+ "2020-09-08 01:02:03",
];
for case in cases {
let v =
date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap();
- let expected: NaiveDate = case.parse().unwrap();
+ let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d")
+ .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S"))
+ .unwrap();
assert_eq!(v.date(), expected);
}
@@ -1503,6 +1509,11 @@ mod tests {
"2020-09-08-03",
"2020--04-03",
"2020--",
+ "2020-09-08 01",
+ "2020-09-08 01:02",
+ "2020-09-08 01-02-03",
+ "2020-9-8 01:02:03",
+ "2020-09-08 1:2:3",
];
for case in err_cases {
assert_eq!(Date32Type::parse(case), None);