alamb commented on code in PR #3860:
URL: https://github.com/apache/arrow-rs/pull/3860#discussion_r1138623431
##########
arrow-cast/src/parse.rs:
##########
@@ -1382,6 +1429,62 @@ mod tests {
);
}
+ #[test]
+ fn test_string_to_time_invalid() {
+ let cases = [
+ "25:00",
Review Comment:
I recommend also testing space and leading colon
````
" 9:00:00",
":09:00",
```
Perhaps also with a leading T (in case someone splits a RFC timestamp
incorrectly)
```
"T9:00:00",
```
##########
arrow-cast/src/parse.rs:
##########
@@ -1382,6 +1429,62 @@ mod tests {
);
}
+ #[test]
+ fn test_string_to_time_invalid() {
+ let cases = [
+ "25:00",
+ "9:00:",
+ "009:00",
+ "09:0:00",
+ "25:00:00",
+ "13:00 AM",
+ "13:00 PM",
+ "12:00. AM",
+ "09:0:00",
+ "09:01:0",
+ "09:01:1",
+ "9:1:0",
+ "09:01:0",
+ "1:00.123",
+ "1:00:00.123f",
+ ];
+ for case in cases {
+ assert!(string_to_time(case).is_none(), "{case}");
+ }
+ }
+
+ #[test]
+ fn test_string_to_time_chrono() {
+ let cases = [
+ ("1:00", "%H:%M"),
+ ("12:00", "%H:%M"),
+ ("13:00", "%H:%M"),
+ ("24:00", "%H:%M"),
+ ("1:00:00", "%H:%M:%S"),
+ ("12:00:30", "%H:%M:%S"),
+ ("13:00:59", "%H:%M:%S"),
+ ("24:00:60", "%H:%M:%S"),
+ ("09:00:00", "%H:%M:%S%.f"),
+ ("0:00:30.123456", "%H:%M:%S%.f"),
+ ("0:00 AM", "%I:%M %P"),
+ ("1:00 AM", "%I:%M %P"),
+ ("12:00 AM", "%I:%M %P"),
+ ("13:00 AM", "%I:%M %P"),
+ ("0:00 PM", "%I:%M %P"),
+ ("1:00 PM", "%I:%M %P"),
+ ("12:00 PM", "%I:%M %P"),
+ ("13:00 PM", "%I:%M %P"),
+ ("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
+ ("1:00:30.123456789 PM", "%I:%M:%S%.f %P"),
+ ("1:00:30.123456789123 PM", "%I:%M:%S%.f %P"),
Review Comment:
Given the experience with https://github.com/apache/arrow-rs/issues/3859, I
think it would be valuable to include a time that has 4 and 7 significant digit
(aka not a multiple of three)
```
("1:00:30.1234 PM", "%I:%M:%S%.f %P"),
("1:00:30.123456 PM", "%I:%M:%S%.f %P"),
```
Perhaps?
##########
arrow-cast/src/parse.rs:
##########
@@ -1382,6 +1429,62 @@ mod tests {
);
}
+ #[test]
+ fn test_string_to_time_invalid() {
+ let cases = [
+ "25:00",
Review Comment:
Also intermediate non digits:
```
"1:00:30.12F456 PM",
```
##########
arrow-cast/src/parse.rs:
##########
@@ -1382,6 +1429,62 @@ mod tests {
);
}
+ #[test]
+ fn test_string_to_time_invalid() {
+ let cases = [
+ "25:00",
Review Comment:
Also, very long subseconds:
```
("1:00:30.123456789123456789 PM")
```
##########
arrow-cast/src/parse.rs:
##########
@@ -291,79 +292,118 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64,
ArrowError> {
/// This function does not support parsing strings with a timezone
/// or offset specified, as it considers only time since midnight.
pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
- // colon count, presence of decimal, presence of whitespace
- fn preprocess_time_string(string: &str) -> (usize, bool, bool) {
- string
- .as_bytes()
- .iter()
- .fold((0, false, false), |tup, char| match char {
- b':' => (tup.0 + 1, tup.1, tup.2),
- b'.' => (tup.0, true, tup.2),
- b' ' => (tup.0, tup.1, true),
- _ => tup,
- })
+ let nt = string_to_time(s).ok_or_else(|| {
+ ArrowError::ParseError(format!("Failed to parse \'{s}\' as time"))
+ })?;
+ Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond()
as i64)
+}
+
+fn string_to_time(s: &str) -> Option<NaiveTime> {
+ let bytes = s.as_bytes();
+ if bytes.len() < 4 {
+ return None;
}
- // Do a preprocess pass of the string to prune which formats to attempt
parsing for
- let formats: &[&str] = match preprocess_time_string(s.trim()) {
- // 24-hour clock, with hour, minutes, seconds and fractions of a
second specified
- // Examples:
- // * 09:50:12.123456789
- // * 9:50:12.123456789
- (2, true, false) => &["%H:%M:%S%.f", "%k:%M:%S%.f"],
-
- // 12-hour clock, with hour, minutes, seconds and fractions of a
second specified
- // Examples:
- // * 09:50:12.123456789 PM
- // * 09:50:12.123456789 pm
- // * 9:50:12.123456789 AM
- // * 9:50:12.123456789 am
- (2, true, true) => &[
- "%I:%M:%S%.f %P",
- "%I:%M:%S%.f %p",
- "%l:%M:%S%.f %P",
- "%l:%M:%S%.f %p",
- ],
-
- // 24-hour clock, with hour, minutes and seconds specified
- // Examples:
- // * 09:50:12
- // * 9:50:12
- (2, false, false) => &["%H:%M:%S", "%k:%M:%S"],
-
- // 12-hour clock, with hour, minutes and seconds specified
- // Examples:
- // * 09:50:12 PM
- // * 09:50:12 pm
- // * 9:50:12 AM
- // * 9:50:12 am
- (2, false, true) => &["%I:%M:%S %P", "%I:%M:%S %p", "%l:%M:%S %P",
"%l:%M:%S %p"],
-
- // 24-hour clock, with hour and minutes specified
- // Examples:
- // * 09:50
- // * 9:50
- (1, false, false) => &["%H:%M", "%k:%M"],
-
- // 12-hour clock, with hour and minutes specified
- // Examples:
- // * 09:50 PM
- // * 09:50 pm
- // * 9:50 AM
- // * 9:50 am
- (1, false, true) => &["%I:%M %P", "%I:%M %p", "%l:%M %P", "%l:%M %p"],
-
- _ => &[],
+ let (am, bytes) = match bytes.get(bytes.len() - 3..) {
+ Some(b" AM") => (Some(true), &bytes[..bytes.len() - 3]),
Review Comment:
Are `Am` and `Pm` valid? If not, perhaps we can add a test showing that
##########
arrow-cast/src/parse.rs:
##########
@@ -1382,6 +1429,62 @@ mod tests {
);
}
+ #[test]
+ fn test_string_to_time_invalid() {
+ let cases = [
+ "25:00",
Review Comment:
Also just AM
```
"AM",
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]