tustvold commented on code in PR #3101:
URL: https://github.com/apache/arrow-rs/pull/3101#discussion_r1023191009
##########
arrow-cast/src/parse.rs:
##########
@@ -132,6 +132,106 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64,
ArrowError> {
)))
}
+/// Accepts a string in ISO8601 standard format and some
+/// variants and converts it to nanoseconds since midnight.
+///
+/// Examples of accepted inputs:
+/// * `09:26:56.123 AM`
+/// * `23:59:59`
+/// * `6:00 pm`
+//
+/// Internally, this function uses the `chrono` library for the
+/// time parsing
+///
+/// ## Timezone / Offset Handling
+///
+/// This function does not support parsing strings with a timezone
+/// or offset specified, as it considers only time since midnight.
+pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
+ // colon count, presence of decimal, presence of whitespace
+ fn preprocess_time_string(string: &str) -> (u8, bool, bool) {
+ string
+ .chars()
+ .fold((0, false, false), |tup, char| match char {
+ ':' => (tup.0.saturating_add(1), tup.1, tup.2),
Review Comment:
```suggestion
.fold((0_usize, false, false), |tup, char| match char {
':' => (tup.0 + 1, tup.1, tup.2),
```
Using a usize means this can't actually overflow
##########
arrow-cast/src/parse.rs:
##########
@@ -411,4 +577,265 @@ mod tests {
parse_timestamp("2020-09-08 13:42:29").unwrap()
);
}
+
+ #[test]
+ fn parse_time64_nanos() {
+ assert_eq!(
+ Time64NanosecondType::parse("02:10:01.1234567899999999"),
+ Some(7_801_123_456_789)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("02:10:01.1234567"),
+ Some(7_801_123_456_700)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01.1234567"),
+ Some(7_801_123_456_700)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("12:10:01.123456789 AM"),
+ Some(601_123_456_789)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("12:10:01.123456789 am"),
+ Some(601_123_456_789)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01.12345678 PM"),
+ Some(51_001_123_456_780)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01.12345678 pm"),
+ Some(51_001_123_456_780)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("02:10:01"),
+ Some(7_801_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01"),
+ Some(7_801_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("12:10:01 AM"),
+ Some(601_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("12:10:01 am"),
+ Some(601_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01 PM"),
+ Some(51_001_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10:01 pm"),
+ Some(51_001_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("02:10"),
+ Some(7_800_000_000_000)
+ );
+ assert_eq!(Time64NanosecondType::parse("2:10"),
Some(7_800_000_000_000));
+ assert_eq!(
+ Time64NanosecondType::parse("12:10 AM"),
+ Some(600_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("12:10 am"),
+ Some(600_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10 PM"),
+ Some(51_000_000_000_000)
+ );
+ assert_eq!(
+ Time64NanosecondType::parse("2:10 pm"),
+ Some(51_000_000_000_000)
+ );
+
+ // parse directly as nanoseconds
+ assert_eq!(Time64NanosecondType::parse("1"), Some(1));
+
+ // colon overflow
+ assert_eq!(
+ Time64NanosecondType::parse(&(0..=256).map(|_|
':').collect::<String>()),
+ None
+ );
+
+ // leap second
+ assert_eq!(
+ Time64NanosecondType::parse("23:59:60"),
+ Some(86_400_000_000_000)
+ );
+
+ // custom format
Review Comment:
:+1:
##########
arrow-cast/src/parse.rs:
##########
@@ -132,6 +132,106 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64,
ArrowError> {
)))
}
+/// Accepts a string in ISO8601 standard format and some
+/// variants and converts it to nanoseconds since midnight.
+///
+/// Examples of accepted inputs:
+/// * `09:26:56.123 AM`
+/// * `23:59:59`
+/// * `6:00 pm`
+//
+/// Internally, this function uses the `chrono` library for the
+/// time parsing
+///
+/// ## Timezone / Offset Handling
+///
+/// This function does not support parsing strings with a timezone
+/// or offset specified, as it considers only time since midnight.
+pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
+ // colon count, presence of decimal, presence of whitespace
+ fn preprocess_time_string(string: &str) -> (u8, bool, bool) {
+ string
+ .chars()
Review Comment:
```suggestion
.as_bytes()
.iter()
```
And then match using `b':'`
We don't actually need to use `chars` here, as the nature of the UTF-8
encoding is such that ASCII can be compared without ambiguity -
https://en.wikipedia.org/wiki/UTF-8#Encoding
##########
arrow-cast/src/parse.rs:
##########
@@ -132,6 +132,106 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64,
ArrowError> {
)))
}
+/// Accepts a string in ISO8601 standard format and some
+/// variants and converts it to nanoseconds since midnight.
+///
+/// Examples of accepted inputs:
+/// * `09:26:56.123 AM`
+/// * `23:59:59`
+/// * `6:00 pm`
+//
+/// Internally, this function uses the `chrono` library for the
+/// time parsing
+///
+/// ## Timezone / Offset Handling
+///
+/// This function does not support parsing strings with a timezone
+/// or offset specified, as it considers only time since midnight.
+pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
+ // colon count, presence of decimal, presence of whitespace
+ fn preprocess_time_string(string: &str) -> (u8, bool, bool) {
+ string
+ .chars()
+ .fold((0, false, false), |tup, char| match char {
+ ':' => (tup.0.saturating_add(1), tup.1, tup.2),
+ '.' => (tup.0, true, tup.2),
+ ' ' => (tup.0, tup.1, true),
+ _ => tup,
+ })
+ }
+
+ fn naive_time_parser(string: &str, formats: &[&str]) -> Option<NaiveTime> {
Review Comment:
It might be cleaner to make the match block evaluate to `&[&str]` and then
have this follow.
e.g. something like
```
let formats = match preprocess_time_string(s.trim()) {
...
};
formats
.iter()
.find_map(|f| NaiveTime::parse_from_str(string, f).ok())
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]