alamb commented on code in PR #2251:
URL: https://github.com/apache/arrow-rs/pull/2251#discussion_r934903273
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ let sec = time.num_seconds_from_midnight();
+ let frac = time.nanosecond();
+ let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
Review Comment:
It took me a while to grok this -- I think it is leap second handling
https://docs.rs/chrono/0.4.19/chrono/trait.Timelike.html#tymethod.nanosecond
Maybe we could add a comment explaining what was going on
```suggestion
// handle leap second
// see
https://docs.rs/chrono/0.4.19/chrono/trait.Timelike.html#tymethod.nanosecond
let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
```
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
Review Comment:
https://github.com/apache/arrow-rs/blob/master/arrow/src/temporal_conversions.rs
may be a good place to put these functions too (so they have a chance of being
found / reused)
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -136,9 +137,25 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
(Utf8, LargeUtf8) => true,
(LargeUtf8, Utf8) => true,
- (Utf8, Date32 | Date64 | Timestamp(TimeUnit::Nanosecond, None)) =>
true,
+ (Utf8,
+ Date32
+ | Date64
+ | Time32(TimeUnit::Second)
Review Comment:
❤️
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ let sec = time.num_seconds_from_midnight();
+ let frac = time.nanosecond();
+ let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
+ (sec + adjust) as i32
+}
+
+fn milliseconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ /// The number of nanoseconds per millisecond.
+ const NANOS_PER_MILLI: u32 = 1_000_000;
+ /// The number of milliseconds per second.
+ const MILLIS_PER_SEC: u32 = 1_000;
+
+ let sec = time.num_seconds_from_midnight() * MILLIS_PER_SEC;
+ let frac = time.nanosecond();
+ let (frac, adjust) = if frac < 1_000_000_000 {
+ (frac, 0)
+ } else {
+ (frac - 1_000_000_000, MILLIS_PER_SEC)
+ };
+ (sec + adjust + frac / NANOS_PER_MILLI) as i32
Review Comment:
There is probably a good reason, but my feeble mind can't figure it out. Why
is it important to break up `frac` and `adjust`?
For example, isn't this equivalent?
```suggestion
(sec + frac / NANOS_PER_MILLI) as i32
```
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ let sec = time.num_seconds_from_midnight();
+ let frac = time.nanosecond();
+ let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
+ (sec + adjust) as i32
+}
+
+fn milliseconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ /// The number of nanoseconds per millisecond.
+ const NANOS_PER_MILLI: u32 = 1_000_000;
+ /// The number of milliseconds per second.
+ const MILLIS_PER_SEC: u32 = 1_000;
+
+ let sec = time.num_seconds_from_midnight() * MILLIS_PER_SEC;
+ let frac = time.nanosecond();
+ let (frac, adjust) = if frac < 1_000_000_000 {
+ (frac, 0)
+ } else {
+ (frac - 1_000_000_000, MILLIS_PER_SEC)
+ };
+ (sec + adjust + frac / NANOS_PER_MILLI) as i32
+}
+
+/// Casts generic string arrays to `Time32SecondArray`
+fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
+ array: &dyn Array,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef> {
+ let string_array = array
+ .as_any()
+ .downcast_ref::<GenericStringArray<Offset>>()
+ .unwrap();
+
+ let array = if cast_options.safe {
+ let iter = (0..string_array.len()).map(|i| {
+ if string_array.is_null(i) {
+ None
+ } else {
+ string_array
+ .value(i)
+ .parse::<chrono::NaiveTime>()
+ .map(|time| seconds_since_midnight(&time))
+ .ok()
+ }
+ });
+
+ // Benefit:
+ // 20% performance improvement
+ // Soundness:
+ // The iterator is trustedLen because it comes from an
`StringArray`.
+ unsafe { Time32SecondArray::from_trusted_len_iter(iter) }
+ } else {
+ let vec = (0..string_array.len())
+ .map(|i| {
+ if string_array.is_null(i) {
+ Ok(None)
+ } else {
+ let string = string_array
+ .value(i);
+ chrono::Duration::days(3);
Review Comment:
The
```
chrono::Duration::days(3);
```
Seems like a leftover?
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ let sec = time.num_seconds_from_midnight();
+ let frac = time.nanosecond();
+ let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
+ (sec + adjust) as i32
+}
+
+fn milliseconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ /// The number of nanoseconds per millisecond.
+ const NANOS_PER_MILLI: u32 = 1_000_000;
+ /// The number of milliseconds per second.
+ const MILLIS_PER_SEC: u32 = 1_000;
+
+ let sec = time.num_seconds_from_midnight() * MILLIS_PER_SEC;
+ let frac = time.nanosecond();
+ let (frac, adjust) = if frac < 1_000_000_000 {
+ (frac, 0)
+ } else {
+ (frac - 1_000_000_000, MILLIS_PER_SEC)
+ };
+ (sec + adjust + frac / NANOS_PER_MILLI) as i32
+}
+
+/// Casts generic string arrays to `Time32SecondArray`
+fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
+ array: &dyn Array,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef> {
+ let string_array = array
+ .as_any()
+ .downcast_ref::<GenericStringArray<Offset>>()
+ .unwrap();
+
+ let array = if cast_options.safe {
+ let iter = (0..string_array.len()).map(|i| {
+ if string_array.is_null(i) {
+ None
+ } else {
+ string_array
+ .value(i)
+ .parse::<chrono::NaiveTime>()
+ .map(|time| seconds_since_midnight(&time))
+ .ok()
+ }
+ });
Review Comment:
I think the same type of transformation can be applied to the iterator below
this as well.
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1584,6 +1625,303 @@ fn cast_string_to_date64<Offset: OffsetSizeTrait>(
Ok(Arc::new(array) as ArrayRef)
}
+fn seconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ let sec = time.num_seconds_from_midnight();
+ let frac = time.nanosecond();
+ let adjust = if frac < 1_000_000_000 { 0 } else { 1 };
+ (sec + adjust) as i32
+}
+
+fn milliseconds_since_midnight(time: &chrono::NaiveTime) -> i32 {
+ /// The number of nanoseconds per millisecond.
+ const NANOS_PER_MILLI: u32 = 1_000_000;
+ /// The number of milliseconds per second.
+ const MILLIS_PER_SEC: u32 = 1_000;
+
+ let sec = time.num_seconds_from_midnight() * MILLIS_PER_SEC;
+ let frac = time.nanosecond();
+ let (frac, adjust) = if frac < 1_000_000_000 {
+ (frac, 0)
+ } else {
+ (frac - 1_000_000_000, MILLIS_PER_SEC)
+ };
+ (sec + adjust + frac / NANOS_PER_MILLI) as i32
+}
+
+/// Casts generic string arrays to `Time32SecondArray`
+fn cast_string_to_time32second<Offset: OffsetSizeTrait>(
+ array: &dyn Array,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef> {
+ let string_array = array
+ .as_any()
+ .downcast_ref::<GenericStringArray<Offset>>()
+ .unwrap();
+
+ let array = if cast_options.safe {
+ let iter = (0..string_array.len()).map(|i| {
+ if string_array.is_null(i) {
+ None
+ } else {
+ string_array
+ .value(i)
+ .parse::<chrono::NaiveTime>()
+ .map(|time| seconds_since_midnight(&time))
+ .ok()
+ }
+ });
Review Comment:
I think this will be faster as it will not require checking bounds for calls
to `is_null` or `value`:
```suggestion
let iter = string_array
.iter()
.flat_map(|v| {
v.map(|v| {
v.parse::<chrono::NaiveTime>()
.map(|time| seconds_since_midnight(&time))
.ok()
})
});
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]