tustvold commented on code in PR #3016:
URL: https://github.com/apache/arrow-rs/pull/3016#discussion_r1014498040
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1413,6 +1422,71 @@ pub fn cast_with_options(
as_primitive_array::<TimestampNanosecondType>(array)
.unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
)),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Microsecond)) =>
Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x * MICROSECONDS % MICROSECONDS_IN_DAY
Review Comment:
x is already in microseconds, so I don't think we need to multiply by
MICROSECONDS
I wonder if we could use `as_datetime` and `as_datetime_with_timezone`, to
obtain a NaiveDateTime (when no timezone) or `DateTime<Tz>` when there is a
timezone, and then use
https://docs.rs/chrono/latest/chrono/naive/struct.NaiveDateTime.html#method.time
and https://docs.rs/chrono/latest/chrono/struct.DateTime.html#method.time to
obtain the time. Followed by
https://docs.rs/chrono/latest/chrono/trait.Timelike.html to obtain the correct
value for the time unit
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1413,6 +1422,71 @@ pub fn cast_with_options(
as_primitive_array::<TimestampNanosecondType>(array)
.unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
)),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Microsecond)) =>
Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x * MICROSECONDS % MICROSECONDS_IN_DAY
+ }),
+ )),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Nanosecond)) =>
Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x * NANOSECONDS % NANOSECONDS_IN_DAY
+ }),
+ )),
+ (Timestamp(TimeUnit::Millisecond, _), Time64(TimeUnit::Microsecond))
=> {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / MILLISECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, _), Time64(TimeUnit::Nanosecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / MILLISECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, _), Time64(TimeUnit::Microsecond))
=> {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / MICROSECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, _), Time64(TimeUnit::Nanosecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / MICROSECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, _), Time64(TimeUnit::Microsecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / NANOSECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, _), Time64(TimeUnit::Nanosecond)) => {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / NANOSECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(_, _), Time32(_)) => cast_with_options(
+ &cast_with_options(array, &Time64(TimeUnit::Microsecond),
cast_options)?,
+ to_type,
+ cast_options,
+ ),
Review Comment:
I think this could overflow the Time64 if the timestamp is in Seconds, when
a direct conversion wouldn't. Not 100% sure of this though
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1413,6 +1422,71 @@ pub fn cast_with_options(
as_primitive_array::<TimestampNanosecondType>(array)
.unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
)),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Microsecond)) =>
Ok(Arc::new(
Review Comment:
I'm not sure this correctly handles timezones, in particular the value in
the TimestampArray is relative to UTC epoch. I would expect the Time64
conversion to compute the time relative to the encoded timezone if any.
I think just handling timezones of None would be fine for a first pass
##########
arrow/src/compute/kernels/cast.rs:
##########
@@ -1413,6 +1422,71 @@ pub fn cast_with_options(
as_primitive_array::<TimestampNanosecondType>(array)
.unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
)),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Microsecond)) =>
Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x * MICROSECONDS % MICROSECONDS_IN_DAY
+ }),
+ )),
+ (Timestamp(TimeUnit::Second, _), Time64(TimeUnit::Nanosecond)) =>
Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x * NANOSECONDS % NANOSECONDS_IN_DAY
+ }),
+ )),
+ (Timestamp(TimeUnit::Millisecond, _), Time64(TimeUnit::Microsecond))
=> {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / MILLISECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Millisecond, _), Time64(TimeUnit::Nanosecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / MILLISECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, _), Time64(TimeUnit::Microsecond))
=> {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / MICROSECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Microsecond, _), Time64(TimeUnit::Nanosecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / MICROSECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, _), Time64(TimeUnit::Microsecond)) =>
{
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64MicrosecondType>(|x| {
+ x / NANOSECONDS % MICROSECONDS_IN_DAY
+ }),
+ ))
+ }
+ (Timestamp(TimeUnit::Nanosecond, _), Time64(TimeUnit::Nanosecond)) => {
+ Ok(Arc::new(
+ as_primitive_array::<TimestampSecondType>(array)
+ .unary::<_, Time64NanosecondType>(|x| {
+ x / NANOSECONDS % NANOSECONDS_IN_DAY
+ }),
+ ))
+ }
Review Comment:
I think either approach is fine, whichever makes the code easier to read
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]