This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new cf59b6cd826 Structured interval types for `IntervalMonthDayNano` or
`IntervalDayTime` (#3125) (#5654) (#5769)
cf59b6cd826 is described below
commit cf59b6cd826412635dc391d4cf0f9d8310f5a226
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Mon May 20 12:03:48 2024 +0100
Structured interval types for `IntervalMonthDayNano` or `IntervalDayTime`
(#3125) (#5654) (#5769)
* Structured interval type (#3125) (#5654)
* Update integration-test
* Fix 32-bit build
* Review feedback
---
arrow-arith/src/numeric.rs | 43 +--
arrow-array/src/arithmetic.rs | 14 +-
arrow-array/src/array/dictionary_array.rs | 2 +-
arrow-array/src/array/primitive_array.rs | 52 ++-
arrow-array/src/types.rs | 77 +---
arrow-buffer/src/arith.rs | 63 +++
arrow-buffer/src/bigint/mod.rs | 55 +--
arrow-buffer/src/interval.rs | 424 +++++++++++++++++++++
arrow-buffer/src/lib.rs | 11 +-
arrow-buffer/src/native.rs | 70 +++-
arrow-cast/src/cast/mod.rs | 111 +++---
arrow-cast/src/display.rs | 25 +-
arrow-cast/src/pretty.rs | 40 +-
arrow-data/src/data.rs | 10 +-
arrow-integration-test/src/lib.rs | 67 ++--
arrow-ord/src/comparison.rs | 71 +++-
arrow-ord/src/ord.rs | 20 +-
arrow-row/src/fixed.rs | 42 +-
arrow-select/src/take.rs | 15 +-
arrow/benches/comparison_kernels.rs | 10 +-
arrow/src/util/bench_util.rs | 20 +-
arrow/tests/array_cast.rs | 12 +-
.../src/arrow/array_reader/fixed_len_byte_array.rs | 11 +-
parquet/src/arrow/arrow_reader/mod.rs | 10 +-
parquet/src/arrow/arrow_writer/mod.rs | 26 +-
25 files changed, 972 insertions(+), 329 deletions(-)
diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index b2c87bba514..17b794762b9 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -25,7 +25,7 @@ use arrow_array::cast::AsArray;
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
-use arrow_buffer::ArrowNativeType;
+use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
use crate::arity::{binary, try_binary};
@@ -343,12 +343,12 @@ trait TimestampOp: ArrowTimestampType {
type Duration: ArrowPrimitiveType<Native = i64>;
fn add_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
- fn add_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
- fn add_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
+ fn add_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) ->
Option<i64>;
+ fn add_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz)
-> Option<i64>;
fn sub_year_month(timestamp: i64, delta: i32, tz: Tz) -> Option<i64>;
- fn sub_day_time(timestamp: i64, delta: i64, tz: Tz) -> Option<i64>;
- fn sub_month_day_nano(timestamp: i64, delta: i128, tz: Tz) -> Option<i64>;
+ fn sub_day_time(timestamp: i64, delta: IntervalDayTime, tz: Tz) ->
Option<i64>;
+ fn sub_month_day_nano(timestamp: i64, delta: IntervalMonthDayNano, tz: Tz)
-> Option<i64>;
}
macro_rules! timestamp {
@@ -360,11 +360,11 @@ macro_rules! timestamp {
Self::add_year_months(left, right, tz)
}
- fn add_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
+ fn add_day_time(left: i64, right: IntervalDayTime, tz: Tz) ->
Option<i64> {
Self::add_day_time(left, right, tz)
}
- fn add_month_day_nano(left: i64, right: i128, tz: Tz) ->
Option<i64> {
+ fn add_month_day_nano(left: i64, right: IntervalMonthDayNano, tz:
Tz) -> Option<i64> {
Self::add_month_day_nano(left, right, tz)
}
@@ -372,11 +372,11 @@ macro_rules! timestamp {
Self::subtract_year_months(left, right, tz)
}
- fn sub_day_time(left: i64, right: i64, tz: Tz) -> Option<i64> {
+ fn sub_day_time(left: i64, right: IntervalDayTime, tz: Tz) ->
Option<i64> {
Self::subtract_day_time(left, right, tz)
}
- fn sub_month_day_nano(left: i64, right: i128, tz: Tz) ->
Option<i64> {
+ fn sub_month_day_nano(left: i64, right: IntervalMonthDayNano, tz:
Tz) -> Option<i64> {
Self::subtract_month_day_nano(left, right, tz)
}
}
@@ -506,12 +506,12 @@ fn timestamp_op<T: TimestampOp>(
/// Note: these should be fallible (#4456)
trait DateOp: ArrowTemporalType {
fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
- fn add_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
- fn add_month_day_nano(timestamp: Self::Native, delta: i128) ->
Self::Native;
+ fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) ->
Self::Native;
+ fn add_month_day_nano(timestamp: Self::Native, delta:
IntervalMonthDayNano) -> Self::Native;
fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
- fn sub_day_time(timestamp: Self::Native, delta: i64) -> Self::Native;
- fn sub_month_day_nano(timestamp: Self::Native, delta: i128) ->
Self::Native;
+ fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) ->
Self::Native;
+ fn sub_month_day_nano(timestamp: Self::Native, delta:
IntervalMonthDayNano) -> Self::Native;
}
macro_rules! date {
@@ -521,11 +521,11 @@ macro_rules! date {
Self::add_year_months(left, right)
}
- fn add_day_time(left: Self::Native, right: i64) -> Self::Native {
+ fn add_day_time(left: Self::Native, right: IntervalDayTime) ->
Self::Native {
Self::add_day_time(left, right)
}
- fn add_month_day_nano(left: Self::Native, right: i128) ->
Self::Native {
+ fn add_month_day_nano(left: Self::Native, right:
IntervalMonthDayNano) -> Self::Native {
Self::add_month_day_nano(left, right)
}
@@ -533,11 +533,11 @@ macro_rules! date {
Self::subtract_year_months(left, right)
}
- fn sub_day_time(left: Self::Native, right: i64) -> Self::Native {
+ fn sub_day_time(left: Self::Native, right: IntervalDayTime) ->
Self::Native {
Self::subtract_day_time(left, right)
}
- fn sub_month_day_nano(left: Self::Native, right: i128) ->
Self::Native {
+ fn sub_month_day_nano(left: Self::Native, right:
IntervalMonthDayNano) -> Self::Native {
Self::subtract_month_day_nano(left, right)
}
}
@@ -1346,13 +1346,10 @@ mod tests {
IntervalMonthDayNanoType::make_value(35, -19,
41899000000000000)
])
);
- let a = IntervalMonthDayNanoArray::from(vec![i64::MAX as i128]);
- let b = IntervalMonthDayNanoArray::from(vec![1]);
+ let a =
IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::MAX]);
+ let b =
IntervalMonthDayNanoArray::from(vec![IntervalMonthDayNano::ONE]);
let err = add(&a, &b).unwrap_err().to_string();
- assert_eq!(
- err,
- "Compute error: Overflow happened on: 9223372036854775807 + 1"
- );
+ assert_eq!(err, "Compute error: Overflow happened on: 2147483647 + 1");
}
fn test_duration_impl<T: ArrowPrimitiveType<Native = i64>>() {
diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index 59053619030..72989ad7d5e 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use arrow_buffer::{i256, ArrowNativeType};
+use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime,
IntervalMonthDayNano};
use arrow_schema::ArrowError;
use half::f16;
use num::complex::ComplexFloat;
@@ -139,7 +139,10 @@ pub trait ArrowNativeTypeOp: ArrowNativeType {
macro_rules! native_type_op {
($t:tt) => {
- native_type_op!($t, 0, 1, $t::MIN, $t::MAX);
+ native_type_op!($t, 0, 1);
+ };
+ ($t:tt, $zero:expr, $one: expr) => {
+ native_type_op!($t, $zero, $one, $t::MIN, $t::MAX);
};
($t:tt, $zero:expr, $one: expr, $min: expr, $max: expr) => {
impl ArrowNativeTypeOp for $t {
@@ -284,6 +287,13 @@ native_type_op!(u32);
native_type_op!(u64);
native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
+native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
+native_type_op!(
+ IntervalMonthDayNano,
+ IntervalMonthDayNano::ZERO,
+ IntervalMonthDayNano::ONE
+);
+
macro_rules! native_type_float_op {
($t:tt, $zero:expr, $one:expr, $min:expr, $max:expr) => {
impl ArrowNativeTypeOp for $t {
diff --git a/arrow-array/src/array/dictionary_array.rs
b/arrow-array/src/array/dictionary_array.rs
index 763e340b792..045917a1bfb 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -946,7 +946,7 @@ where
/// return Ok(d.with_values(r));
/// }
/// downcast_primitive_array! {
-/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x|
x.to_string())).collect::<StringArray>())),
+/// a => Ok(Arc::new(a.iter().map(|x| x.map(|x|
format!("{x:?}"))).collect::<StringArray>())),
/// d => Err(ArrowError::InvalidArgumentError(format!("{d:?} not
supported")))
/// }
/// }
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 924cab1ac83..919a1010116 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1502,6 +1502,7 @@ mod tests {
use crate::builder::{Decimal128Builder, Decimal256Builder};
use crate::cast::downcast_array;
use crate::BooleanArray;
+ use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::TimeUnit;
#[test]
@@ -1624,33 +1625,46 @@ mod tests {
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
- // a day_time interval contains days and milliseconds, but we do not
yet have accessors for the values
- let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
+ let v0 = IntervalDayTime {
+ days: 34,
+ milliseconds: 1,
+ };
+ let v2 = IntervalDayTime {
+ days: -2,
+ milliseconds: -5,
+ };
+
+ let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]);
+
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
- assert_eq!(1, arr.value(0));
- assert_eq!(1, arr.values()[0]);
+ assert_eq!(v0, arr.value(0));
+ assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
- assert_eq!(-5, arr.value(2));
- assert_eq!(-5, arr.values()[2]);
+ assert_eq!(v2, arr.value(2));
+ assert_eq!(v2, arr.values()[2]);
- // a month_day_nano interval contains months, days and nanoseconds,
- // but we do not yet have accessors for the values.
- // TODO: implement month, day, and nanos access method for
month_day_nano.
- let arr = IntervalMonthDayNanoArray::from(vec![
- Some(100000000000000000000),
- None,
- Some(-500000000000000000000),
- ]);
+ let v0 = IntervalMonthDayNano {
+ months: 2,
+ days: 34,
+ nanoseconds: -1,
+ };
+ let v2 = IntervalMonthDayNano {
+ months: -3,
+ days: -2,
+ nanoseconds: 4,
+ };
+
+ let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None,
Some(v2)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
- assert_eq!(100000000000000000000, arr.value(0));
- assert_eq!(100000000000000000000, arr.values()[0]);
+ assert_eq!(v0, arr.value(0));
+ assert_eq!(v0, arr.values()[0]);
assert!(arr.is_null(1));
- assert_eq!(-500000000000000000000, arr.value(2));
- assert_eq!(-500000000000000000000, arr.values()[2]);
+ assert_eq!(v2, arr.value(2));
+ assert_eq!(v2, arr.values()[2]);
}
#[test]
@@ -2460,7 +2474,7 @@ mod tests {
expected = "PrimitiveArray expected data type Interval(MonthDayNano)
got Interval(DayTime)"
)]
fn test_invalid_interval_type() {
- let array = IntervalDayTimeArray::from(vec![1, 2, 3]);
+ let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]);
let _ = IntervalMonthDayNanoArray::from(array.into_data());
}
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 038b2a291f5..198a11cb697 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -23,7 +23,7 @@ use crate::delta::{
use crate::temporal_conversions::as_datetime_with_timezone;
use crate::timezone::Tz;
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
-use arrow_buffer::{i256, Buffer, OffsetBuffer};
+use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano,
OffsetBuffer};
use arrow_data::decimal::{validate_decimal256_precision,
validate_decimal_precision};
use arrow_data::{validate_binary_view, validate_string_view};
use arrow_schema::{
@@ -220,7 +220,7 @@ make_type!(
);
make_type!(
IntervalDayTimeType,
- i64,
+ IntervalDayTime,
DataType::Interval(IntervalUnit::DayTime),
r#"A “calendar” interval type in days and milliseconds.
@@ -247,7 +247,7 @@ which can lead to surprising results. Please see the
description of ordering on
);
make_type!(
IntervalMonthDayNanoType,
- i128,
+ IntervalMonthDayNano,
DataType::Interval(IntervalUnit::MonthDayNano),
r#"A “calendar” interval type in months, days, and nanoseconds.
@@ -264,11 +264,11 @@ Each field is independent (e.g. there is no constraint
that the quantity of
nanoseconds represents less than a day's worth of time).
```text
-┌──────────────────────────────┬─────────────┬──────────────┐
-│ Nanos │ Days │ Months │
-│ (64 bits) │ (32 bits) │ (32 bits) │
-└──────────────────────────────┴─────────────┴──────────────┘
- 0 63 95 127 bit offset
+┌───────────────┬─────────────┬─────────────────────────────┐
+│ Months │ Days │ Nanos │
+│ (32 bits) │ (32 bits) │ (64 bits) │
+└───────────────┴─────────────┴─────────────────────────────┘
+ 0 32 64 128 bit offset
```
Please see the [Arrow
Spec](https://github.com/apache/arrow/blob/081b4022fe6f659d8765efc82b3f4787c5039e3c/format/Schema.fbs#L409-L415)
for more details
@@ -917,25 +917,8 @@ impl IntervalDayTimeType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `millis` - The number of milliseconds (+/-) represented in this
interval
#[inline]
- pub fn make_value(
- days: i32,
- millis: i32,
- ) -> <IntervalDayTimeType as ArrowPrimitiveType>::Native {
- /*
-
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1433
- struct DayMilliseconds {
- int32_t days = 0;
- int32_t milliseconds = 0;
- ...
- }
- 64 56 48 40 32 24 16 8 0
- +-------+-------+-------+-------+-------+-------+-------+-------+
- | days | milliseconds |
- +-------+-------+-------+-------+-------+-------+-------+-------+
- */
- let m = millis as u64 & u32::MAX as u64;
- let d = (days as u64 & u32::MAX as u64) << 32;
- (m | d) as <IntervalDayTimeType as ArrowPrimitiveType>::Native
+ pub fn make_value(days: i32, milliseconds: i32) -> IntervalDayTime {
+ IntervalDayTime { days, milliseconds }
}
/// Turns a IntervalDayTimeType into a tuple of (days, milliseconds)
@@ -944,10 +927,8 @@ impl IntervalDayTimeType {
///
/// * `i` - The IntervalDayTimeType to convert
#[inline]
- pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) ->
(i32, i32) {
- let days = (i >> 32) as i32;
- let ms = i as i32;
- (days, ms)
+ pub fn to_parts(i: IntervalDayTime) -> (i32, i32) {
+ (i.days, i.milliseconds)
}
}
@@ -960,27 +941,12 @@ impl IntervalMonthDayNanoType {
/// * `days` - The number of days (+/-) represented in this interval
/// * `nanos` - The number of nanoseconds (+/-) represented in this
interval
#[inline]
- pub fn make_value(
- months: i32,
- days: i32,
- nanos: i64,
- ) -> <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native {
- /*
-
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
- struct MonthDayNanos {
- int32_t months;
- int32_t days;
- int64_t nanoseconds;
+ pub fn make_value(months: i32, days: i32, nanoseconds: i64) ->
IntervalMonthDayNano {
+ IntervalMonthDayNano {
+ months,
+ days,
+ nanoseconds,
}
- 128 112 96 80 64 48 32 16 0
- +-------+-------+-------+-------+-------+-------+-------+-------+
- | months | days | nanos |
- +-------+-------+-------+-------+-------+-------+-------+-------+
- */
- let m = (months as u128 & u32::MAX as u128) << 96;
- let d = (days as u128 & u32::MAX as u128) << 64;
- let n = nanos as u128 & u64::MAX as u128;
- (m | d | n) as <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native
}
/// Turns a IntervalMonthDayNanoType into a tuple of (months, days, nanos)
@@ -989,13 +955,8 @@ impl IntervalMonthDayNanoType {
///
/// * `i` - The IntervalMonthDayNanoType to convert
#[inline]
- pub fn to_parts(
- i: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
- ) -> (i32, i32, i64) {
- let months = (i >> 96) as i32;
- let days = (i >> 64) as i32;
- let nanos = i as i64;
- (months, days, nanos)
+ pub fn to_parts(i: IntervalMonthDayNano) -> (i32, i32, i64) {
+ (i.months, i.days, i.nanoseconds)
}
}
diff --git a/arrow-buffer/src/arith.rs b/arrow-buffer/src/arith.rs
new file mode 100644
index 00000000000..ca693c3607d
--- /dev/null
+++ b/arrow-buffer/src/arith.rs
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Derives `std::ops::$op` for `$ty` calling `$wrapping` or `$checked`
variants
+/// based on if debug_assertions enabled
+macro_rules! derive_arith {
+ ($ty:ty, $t:ident, $op:ident, $wrapping:ident, $checked:ident) => {
+ impl std::ops::$t for $ty {
+ type Output = $ty;
+
+ #[cfg(debug_assertions)]
+ fn $op(self, rhs: Self) -> Self::Output {
+ self.$checked(rhs)
+ .expect(concat!(stringify!($ty), " overflow"))
+ }
+
+ #[cfg(not(debug_assertions))]
+ fn $op(self, rhs: Self) -> Self::Output {
+ self.$wrapping(rhs)
+ }
+ }
+
+ impl<'a> std::ops::$t<$ty> for &'a $ty {
+ type Output = $ty;
+
+ fn $op(self, rhs: $ty) -> Self::Output {
+ (*self).$op(rhs)
+ }
+ }
+
+ impl<'a> std::ops::$t<&'a $ty> for $ty {
+ type Output = $ty;
+
+ fn $op(self, rhs: &'a $ty) -> Self::Output {
+ self.$op(*rhs)
+ }
+ }
+
+ impl<'a, 'b> std::ops::$t<&'b $ty> for &'a $ty {
+ type Output = $ty;
+
+ fn $op(self, rhs: &'b $ty) -> Self::Output {
+ (*self).$op(*rhs)
+ }
+ }
+ };
+}
+
+pub(crate) use derive_arith;
diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs
index a8aaff13cd2..bbe65b073aa 100644
--- a/arrow-buffer/src/bigint/mod.rs
+++ b/arrow-buffer/src/bigint/mod.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use crate::arith::derive_arith;
use crate::bigint::div::div_rem;
use num::cast::AsPrimitive;
use num::{BigInt, FromPrimitive, ToPrimitive};
@@ -638,55 +639,13 @@ fn mulx(a: u128, b: u128) -> (u128, u128) {
(low, high)
}
-macro_rules! derive_op {
- ($t:ident, $op:ident, $wrapping:ident, $checked:ident) => {
- impl std::ops::$t for i256 {
- type Output = i256;
+derive_arith!(i256, Add, add, wrapping_add, checked_add);
+derive_arith!(i256, Sub, sub, wrapping_sub, checked_sub);
+derive_arith!(i256, Mul, mul, wrapping_mul, checked_mul);
+derive_arith!(i256, Div, div, wrapping_div, checked_div);
+derive_arith!(i256, Rem, rem, wrapping_rem, checked_rem);
- #[cfg(debug_assertions)]
- fn $op(self, rhs: Self) -> Self::Output {
- self.$checked(rhs).expect("i256 overflow")
- }
-
- #[cfg(not(debug_assertions))]
- fn $op(self, rhs: Self) -> Self::Output {
- self.$wrapping(rhs)
- }
- }
-
- impl<'a> std::ops::$t<i256> for &'a i256 {
- type Output = i256;
-
- fn $op(self, rhs: i256) -> Self::Output {
- (*self).$op(rhs)
- }
- }
-
- impl<'a> std::ops::$t<&'a i256> for i256 {
- type Output = i256;
-
- fn $op(self, rhs: &'a i256) -> Self::Output {
- self.$op(*rhs)
- }
- }
-
- impl<'a, 'b> std::ops::$t<&'b i256> for &'a i256 {
- type Output = i256;
-
- fn $op(self, rhs: &'b i256) -> Self::Output {
- (*self).$op(*rhs)
- }
- }
- };
-}
-
-derive_op!(Add, add, wrapping_add, checked_add);
-derive_op!(Sub, sub, wrapping_sub, checked_sub);
-derive_op!(Mul, mul, wrapping_mul, checked_mul);
-derive_op!(Div, div, wrapping_div, checked_div);
-derive_op!(Rem, rem, wrapping_rem, checked_rem);
-
-impl std::ops::Neg for i256 {
+impl Neg for i256 {
type Output = i256;
#[cfg(debug_assertions)]
diff --git a/arrow-buffer/src/interval.rs b/arrow-buffer/src/interval.rs
new file mode 100644
index 00000000000..7e8043e9a72
--- /dev/null
+++ b/arrow-buffer/src/interval.rs
@@ -0,0 +1,424 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::arith::derive_arith;
+use std::ops::Neg;
+
+/// Value of an IntervalMonthDayNano array
+#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
+#[repr(C)]
+pub struct IntervalMonthDayNano {
+ pub months: i32,
+ pub days: i32,
+ pub nanoseconds: i64,
+}
+
+impl IntervalMonthDayNano {
+ /// The additive identity i.e. `0`.
+ pub const ZERO: Self = Self::new(0, 0, 0);
+
+ /// The multiplicative identity, i.e. `1`.
+ pub const ONE: Self = Self::new(1, 1, 1);
+
+ /// The multiplicative inverse, i.e. `-1`.
+ pub const MINUS_ONE: Self = Self::new(-1, -1, -1);
+
+ /// The maximum value that can be represented
+ pub const MAX: Self = Self::new(i32::MAX, i32::MAX, i64::MAX);
+
+ /// The minimum value that can be represented
+ pub const MIN: Self = Self::new(i32::MIN, i32::MIN, i64::MIN);
+
+ /// Create a new [`IntervalMonthDayNano`]
+ #[inline]
+ pub const fn new(months: i32, days: i32, nanoseconds: i64) -> Self {
+ Self {
+ months,
+ days,
+ nanoseconds,
+ }
+ }
+
+ /// Computes the absolute value
+ #[inline]
+ pub fn wrapping_abs(self) -> Self {
+ Self {
+ months: self.months.wrapping_abs(),
+ days: self.days.wrapping_abs(),
+ nanoseconds: self.nanoseconds.wrapping_abs(),
+ }
+ }
+
+ /// Computes the absolute value
+ #[inline]
+ pub fn checked_abs(self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_abs()?,
+ days: self.days.checked_abs()?,
+ nanoseconds: self.nanoseconds.checked_abs()?,
+ })
+ }
+
+ /// Negates the value
+ #[inline]
+ pub fn wrapping_neg(self) -> Self {
+ Self {
+ months: self.months.wrapping_neg(),
+ days: self.days.wrapping_neg(),
+ nanoseconds: self.nanoseconds.wrapping_neg(),
+ }
+ }
+
+ /// Negates the value
+ #[inline]
+ pub fn checked_neg(self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_neg()?,
+ days: self.days.checked_neg()?,
+ nanoseconds: self.nanoseconds.checked_neg()?,
+ })
+ }
+
+ /// Performs wrapping addition
+ #[inline]
+ pub fn wrapping_add(self, other: Self) -> Self {
+ Self {
+ months: self.months.wrapping_add(other.months),
+ days: self.days.wrapping_add(other.days),
+ nanoseconds: self.nanoseconds.wrapping_add(other.nanoseconds),
+ }
+ }
+
+ /// Performs checked addition
+ #[inline]
+ pub fn checked_add(self, other: Self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_add(other.months)?,
+ days: self.days.checked_add(other.days)?,
+ nanoseconds: self.nanoseconds.checked_add(other.nanoseconds)?,
+ })
+ }
+
+ /// Performs wrapping subtraction
+ #[inline]
+ pub fn wrapping_sub(self, other: Self) -> Self {
+ Self {
+ months: self.months.wrapping_sub(other.months),
+ days: self.days.wrapping_sub(other.days),
+ nanoseconds: self.nanoseconds.wrapping_sub(other.nanoseconds),
+ }
+ }
+
+ /// Performs checked subtraction
+ #[inline]
+ pub fn checked_sub(self, other: Self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_sub(other.months)?,
+ days: self.days.checked_sub(other.days)?,
+ nanoseconds: self.nanoseconds.checked_sub(other.nanoseconds)?,
+ })
+ }
+
+ /// Performs wrapping multiplication
+ #[inline]
+ pub fn wrapping_mul(self, other: Self) -> Self {
+ Self {
+ months: self.months.wrapping_mul(other.months),
+ days: self.days.wrapping_mul(other.days),
+ nanoseconds: self.nanoseconds.wrapping_mul(other.nanoseconds),
+ }
+ }
+
+ /// Performs checked multiplication
+ pub fn checked_mul(self, other: Self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_mul(other.months)?,
+ days: self.days.checked_mul(other.days)?,
+ nanoseconds: self.nanoseconds.checked_mul(other.nanoseconds)?,
+ })
+ }
+
+ /// Performs wrapping division
+ #[inline]
+ pub fn wrapping_div(self, other: Self) -> Self {
+ Self {
+ months: self.months.wrapping_div(other.months),
+ days: self.days.wrapping_div(other.days),
+ nanoseconds: self.nanoseconds.wrapping_div(other.nanoseconds),
+ }
+ }
+
+ /// Performs checked division
+ pub fn checked_div(self, other: Self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_div(other.months)?,
+ days: self.days.checked_div(other.days)?,
+ nanoseconds: self.nanoseconds.checked_div(other.nanoseconds)?,
+ })
+ }
+
+ /// Performs wrapping remainder
+ #[inline]
+ pub fn wrapping_rem(self, other: Self) -> Self {
+ Self {
+ months: self.months.wrapping_rem(other.months),
+ days: self.days.wrapping_rem(other.days),
+ nanoseconds: self.nanoseconds.wrapping_rem(other.nanoseconds),
+ }
+ }
+
+ /// Performs checked remainder
+ pub fn checked_rem(self, other: Self) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_rem(other.months)?,
+ days: self.days.checked_rem(other.days)?,
+ nanoseconds: self.nanoseconds.checked_rem(other.nanoseconds)?,
+ })
+ }
+
+ /// Performs wrapping exponentiation
+ #[inline]
+ pub fn wrapping_pow(self, exp: u32) -> Self {
+ Self {
+ months: self.months.wrapping_pow(exp),
+ days: self.days.wrapping_pow(exp),
+ nanoseconds: self.nanoseconds.wrapping_pow(exp),
+ }
+ }
+
+ /// Performs checked exponentiation
+ #[inline]
+ pub fn checked_pow(self, exp: u32) -> Option<Self> {
+ Some(Self {
+ months: self.months.checked_pow(exp)?,
+ days: self.days.checked_pow(exp)?,
+ nanoseconds: self.nanoseconds.checked_pow(exp)?,
+ })
+ }
+}
+
+impl Neg for IntervalMonthDayNano {
+ type Output = Self;
+
+ #[cfg(debug_assertions)]
+ fn neg(self) -> Self::Output {
+ self.checked_neg().expect("IntervalMonthDayNano overflow")
+ }
+
+ #[cfg(not(debug_assertions))]
+ fn neg(self) -> Self::Output {
+ self.wrapping_neg()
+ }
+}
+
+derive_arith!(IntervalMonthDayNano, Add, add, wrapping_add, checked_add);
+derive_arith!(IntervalMonthDayNano, Sub, sub, wrapping_sub, checked_sub);
+derive_arith!(IntervalMonthDayNano, Mul, mul, wrapping_mul, checked_mul);
+derive_arith!(IntervalMonthDayNano, Div, div, wrapping_div, checked_div);
+derive_arith!(IntervalMonthDayNano, Rem, rem, wrapping_rem, checked_rem);
+
+/// Value of an IntervalDayTime array
+#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
+#[repr(C)]
+pub struct IntervalDayTime {
+ pub days: i32,
+ pub milliseconds: i32,
+}
+
+impl IntervalDayTime {
+ /// The additive identity i.e. `0`.
+ pub const ZERO: Self = Self::new(0, 0);
+
+ /// The multiplicative identity, i.e. `1`.
+ pub const ONE: Self = Self::new(1, 1);
+
+ /// The multiplicative inverse, i.e. `-1`.
+ pub const MINUS_ONE: Self = Self::new(-1, -1);
+
+ /// The maximum value that can be represented
+ pub const MAX: Self = Self::new(i32::MAX, i32::MAX);
+
+ /// The minimum value that can be represented
+ pub const MIN: Self = Self::new(i32::MIN, i32::MIN);
+
+ /// Create a new [`IntervalDayTime`]
+ #[inline]
+ pub const fn new(days: i32, milliseconds: i32) -> Self {
+ Self { days, milliseconds }
+ }
+
+ /// Computes the absolute value
+ #[inline]
+ pub fn wrapping_abs(self) -> Self {
+ Self {
+ days: self.days.wrapping_abs(),
+ milliseconds: self.milliseconds.wrapping_abs(),
+ }
+ }
+
+ /// Computes the absolute value
+ #[inline]
+ pub fn checked_abs(self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_abs()?,
+ milliseconds: self.milliseconds.checked_abs()?,
+ })
+ }
+
+ /// Negates the value
+ #[inline]
+ pub fn wrapping_neg(self) -> Self {
+ Self {
+ days: self.days.wrapping_neg(),
+ milliseconds: self.milliseconds.wrapping_neg(),
+ }
+ }
+
+ /// Negates the value
+ #[inline]
+ pub fn checked_neg(self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_neg()?,
+ milliseconds: self.milliseconds.checked_neg()?,
+ })
+ }
+
+ /// Performs wrapping addition
+ #[inline]
+ pub fn wrapping_add(self, other: Self) -> Self {
+ Self {
+ days: self.days.wrapping_add(other.days),
+ milliseconds: self.milliseconds.wrapping_add(other.milliseconds),
+ }
+ }
+
+ /// Performs checked addition
+ #[inline]
+ pub fn checked_add(self, other: Self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_add(other.days)?,
+ milliseconds: self.milliseconds.checked_add(other.milliseconds)?,
+ })
+ }
+
+ /// Performs wrapping subtraction
+ #[inline]
+ pub fn wrapping_sub(self, other: Self) -> Self {
+ Self {
+ days: self.days.wrapping_sub(other.days),
+ milliseconds: self.milliseconds.wrapping_sub(other.milliseconds),
+ }
+ }
+
+ /// Performs checked subtraction
+ #[inline]
+ pub fn checked_sub(self, other: Self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_sub(other.days)?,
+ milliseconds: self.milliseconds.checked_sub(other.milliseconds)?,
+ })
+ }
+
+ /// Performs wrapping multiplication
+ #[inline]
+ pub fn wrapping_mul(self, other: Self) -> Self {
+ Self {
+ days: self.days.wrapping_mul(other.days),
+ milliseconds: self.milliseconds.wrapping_mul(other.milliseconds),
+ }
+ }
+
+ /// Performs checked multiplication
+ pub fn checked_mul(self, other: Self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_mul(other.days)?,
+ milliseconds: self.milliseconds.checked_mul(other.milliseconds)?,
+ })
+ }
+
+ /// Performs wrapping division
+ #[inline]
+ pub fn wrapping_div(self, other: Self) -> Self {
+ Self {
+ days: self.days.wrapping_div(other.days),
+ milliseconds: self.milliseconds.wrapping_div(other.milliseconds),
+ }
+ }
+
+ /// Performs checked division
+ pub fn checked_div(self, other: Self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_div(other.days)?,
+ milliseconds: self.milliseconds.checked_div(other.milliseconds)?,
+ })
+ }
+
+ /// Performs wrapping remainder
+ #[inline]
+ pub fn wrapping_rem(self, other: Self) -> Self {
+ Self {
+ days: self.days.wrapping_rem(other.days),
+ milliseconds: self.milliseconds.wrapping_rem(other.milliseconds),
+ }
+ }
+
+ /// Performs checked remainder
+ pub fn checked_rem(self, other: Self) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_rem(other.days)?,
+ milliseconds: self.milliseconds.checked_rem(other.milliseconds)?,
+ })
+ }
+
+ /// Performs wrapping exponentiation
+ #[inline]
+ pub fn wrapping_pow(self, exp: u32) -> Self {
+ Self {
+ days: self.days.wrapping_pow(exp),
+ milliseconds: self.milliseconds.wrapping_pow(exp),
+ }
+ }
+
+ /// Performs checked exponentiation
+ #[inline]
+ pub fn checked_pow(self, exp: u32) -> Option<Self> {
+ Some(Self {
+ days: self.days.checked_pow(exp)?,
+ milliseconds: self.milliseconds.checked_pow(exp)?,
+ })
+ }
+}
+
+impl Neg for IntervalDayTime {
+ type Output = Self;
+
+ #[cfg(debug_assertions)]
+ fn neg(self) -> Self::Output {
+ self.checked_neg().expect("IntervalDayMillisecond overflow")
+ }
+
+ #[cfg(not(debug_assertions))]
+ fn neg(self) -> Self::Output {
+ self.wrapping_neg()
+ }
+}
+
+derive_arith!(IntervalDayTime, Add, add, wrapping_add, checked_add);
+derive_arith!(IntervalDayTime, Sub, sub, wrapping_sub, checked_sub);
+derive_arith!(IntervalDayTime, Mul, mul, wrapping_mul, checked_mul);
+derive_arith!(IntervalDayTime, Div, div, wrapping_div, checked_div);
+derive_arith!(IntervalDayTime, Rem, rem, wrapping_rem, checked_rem);
diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs
index 612897af9be..a7bf93ed0c1 100644
--- a/arrow-buffer/src/lib.rs
+++ b/arrow-buffer/src/lib.rs
@@ -28,10 +28,17 @@ pub mod builder;
pub use builder::*;
mod bigint;
-mod bytes;
-mod native;
pub use bigint::i256;
+mod bytes;
+
+mod native;
pub use native::*;
+
mod util;
pub use util::*;
+
+mod interval;
+pub use interval::*;
+
+mod arith;
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index de665d4e387..e05c1311ff3 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::i256;
+use crate::{i256, IntervalDayTime, IntervalMonthDayNano};
use half::f16;
mod private {
@@ -239,6 +239,60 @@ impl ArrowNativeType for i256 {
}
}
+impl private::Sealed for IntervalMonthDayNano {}
+impl ArrowNativeType for IntervalMonthDayNano {
+ fn from_usize(_: usize) -> Option<Self> {
+ None
+ }
+
+ fn as_usize(self) -> usize {
+ ((self.months as u64) | ((self.days as u64) << 32)) as usize
+ }
+
+ fn usize_as(i: usize) -> Self {
+ Self::new(i as _, ((i as u64) >> 32) as _, 0)
+ }
+
+ fn to_usize(self) -> Option<usize> {
+ None
+ }
+
+ fn to_isize(self) -> Option<isize> {
+ None
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ None
+ }
+}
+
+impl private::Sealed for IntervalDayTime {}
+impl ArrowNativeType for IntervalDayTime {
+ fn from_usize(_: usize) -> Option<Self> {
+ None
+ }
+
+ fn as_usize(self) -> usize {
+ ((self.days as u64) | ((self.milliseconds as u64) << 32)) as usize
+ }
+
+ fn usize_as(i: usize) -> Self {
+ Self::new(i as _, ((i as u64) >> 32) as _)
+ }
+
+ fn to_usize(self) -> Option<usize> {
+ None
+ }
+
+ fn to_isize(self) -> Option<isize> {
+ None
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ None
+ }
+}
+
/// Allows conversion from supported Arrow types to a byte slice.
pub trait ToByteSlice {
/// Converts this instance into a byte slice
@@ -282,4 +336,18 @@ mod tests {
assert!(a.to_usize().is_none());
assert_eq!(a.to_isize().unwrap(), -1);
}
+
+ #[test]
+ fn test_interval_usize() {
+ assert_eq!(IntervalDayTime::new(1, 0).as_usize(), 1);
+ assert_eq!(IntervalMonthDayNano::new(1, 0, 0).as_usize(), 1);
+
+ let a = IntervalDayTime::new(23, 53);
+ let b = IntervalDayTime::usize_as(a.as_usize());
+ assert_eq!(a, b);
+
+ let a = IntervalMonthDayNano::new(23, 53, 0);
+ let b = IntervalMonthDayNano::usize_as(a.as_usize());
+ assert_eq!(a, b);
+ }
}
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index f2fdfd04497..d03c46ae46a 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -46,7 +46,7 @@ use crate::cast::dictionary::*;
use crate::cast::list::*;
use crate::cast::string::*;
-use arrow_buffer::ScalarBuffer;
+use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer};
use arrow_data::ByteView;
use chrono::{NaiveTime, Offset, TimeZone, Utc};
use std::cmp::Ordering;
@@ -277,11 +277,6 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
DayTime => false,
MonthDayNano => false,
},
- (Int64, Interval(to_type)) => match to_type {
- YearMonth => false,
- DayTime => true,
- MonthDayNano => false,
- },
(Duration(_), Interval(MonthDayNano)) => true,
(Interval(MonthDayNano), Duration(_)) => true,
(Interval(YearMonth), Interval(MonthDayNano)) => true,
@@ -394,9 +389,9 @@ fn cast_month_day_nano_to_duration<D:
ArrowTemporalType<Native = i64>>(
};
if cast_options.safe {
- let iter = array
- .iter()
- .map(|v| v.and_then(|v| (v >> 64 == 0).then_some((v as i64) /
scale)));
+ let iter = array.iter().map(|v| {
+ v.and_then(|v| (v.days == 0 && v.months ==
0).then_some(v.nanoseconds / scale))
+ });
Ok(Arc::new(unsafe {
PrimitiveArray::<D>::from_trusted_len_iter(iter)
}))
@@ -404,8 +399,8 @@ fn cast_month_day_nano_to_duration<D:
ArrowTemporalType<Native = i64>>(
let vec = array
.iter()
.map(|v| {
- v.map(|v| match v >> 64 {
- 0 => Ok((v as i64) / scale),
+ v.map(|v| match v.days == 0 && v.months == 0 {
+ true => Ok((v.nanoseconds) / scale),
_ => Err(ArrowError::ComputeError(
"Cannot convert interval containing non-zero months or
days to duration"
.to_string(),
@@ -444,9 +439,12 @@ fn cast_duration_to_interval<D: ArrowTemporalType<Native =
i64>>(
};
if cast_options.safe {
- let iter = array
- .iter()
- .map(|v| v.and_then(|v| v.checked_mul(scale).map(|v| v as i128)));
+ let iter = array.iter().map(|v| {
+ v.and_then(|v| {
+ v.checked_mul(scale)
+ .map(|v| IntervalMonthDayNano::new(0, 0, v))
+ })
+ });
Ok(Arc::new(unsafe {
PrimitiveArray::<IntervalMonthDayNanoType>::from_trusted_len_iter(iter)
}))
@@ -456,7 +454,7 @@ fn cast_duration_to_interval<D: ArrowTemporalType<Native =
i64>>(
.map(|v| {
v.map(|v| {
if let Ok(v) = v.mul_checked(scale) {
- Ok(v as i128)
+ Ok(IntervalMonthDayNano::new(0, 0, v))
} else {
Err(ArrowError::ComputeError(format!(
"Cannot cast to {:?}. Overflowing on {:?}",
@@ -1964,18 +1962,9 @@ pub fn cast_with_options(
(Interval(IntervalUnit::DayTime),
Interval(IntervalUnit::MonthDayNano)) => {
cast_interval_day_time_to_interval_month_day_nano(array,
cast_options)
}
- (Interval(IntervalUnit::YearMonth), Int64) => {
- cast_numeric_arrays::<IntervalYearMonthType, Int64Type>(array,
cast_options)
- }
- (Interval(IntervalUnit::DayTime), Int64) => {
- cast_reinterpret_arrays::<IntervalDayTimeType, Int64Type>(array)
- }
(Int32, Interval(IntervalUnit::YearMonth)) => {
cast_reinterpret_arrays::<Int32Type, IntervalYearMonthType>(array)
}
- (Int64, Interval(IntervalUnit::DayTime)) => {
- cast_reinterpret_arrays::<Int64Type, IntervalDayTimeType>(array)
- }
(_, _) => Err(ArrowError::CastError(format!(
"Casting from {from_type:?} to {to_type:?} not supported",
))),
@@ -2340,7 +2329,7 @@ where
#[cfg(test)]
mod tests {
- use arrow_buffer::{Buffer, NullBuffer};
+ use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer};
use chrono::NaiveDate;
use half::f16;
@@ -5064,25 +5053,6 @@ mod tests {
}
}
- #[test]
- fn test_cast_interval_to_i64() {
- let base = vec![5, 6, 7, 8];
-
- let interval_arrays = vec![
- Arc::new(IntervalDayTimeArray::from(base.clone())) as ArrayRef,
- Arc::new(IntervalYearMonthArray::from(
- base.iter().map(|x| *x as i32).collect::<Vec<i32>>(),
- )) as ArrayRef,
- ];
-
- for arr in interval_arrays {
- assert!(can_cast_types(arr.data_type(), &DataType::Int64));
- let result = cast(&arr, &DataType::Int64).unwrap();
- let result = result.as_primitive::<Int64Type>();
- assert_eq!(base.as_slice(), result.values());
- }
- }
-
#[test]
fn test_cast_to_strings() {
let a = Int32Array::from(vec![1, 2, 3]);
@@ -8491,7 +8461,10 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 1234567000000000);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(0, 0, 1234567000000000)
+ );
let array = vec![i64::MAX];
let casted_array =
cast_from_duration_to_interval::<DurationSecondType>(
@@ -8521,7 +8494,10 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 1234567000000);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(0, 0, 1234567000000)
+ );
let array = vec![i64::MAX];
let casted_array =
cast_from_duration_to_interval::<DurationMillisecondType>(
@@ -8551,7 +8527,10 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 1234567000);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(0, 0, 1234567000)
+ );
let array = vec![i64::MAX];
let casted_array =
cast_from_duration_to_interval::<DurationMicrosecondType>(
@@ -8581,7 +8560,10 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 1234567);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(0, 0, 1234567)
+ );
let array = vec![i64::MAX];
let casted_array =
cast_from_duration_to_interval::<DurationNanosecondType>(
@@ -8592,7 +8574,10 @@ mod tests {
},
)
.unwrap();
- assert_eq!(casted_array.value(0), 9223372036854775807);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(0, 0, i64::MAX)
+ );
}
/// helper function to test casting from interval to duration
@@ -8617,14 +8602,15 @@ mod tests {
safe: false,
format_options: FormatOptions::default(),
};
+ let v = IntervalMonthDayNano::new(0, 0, 1234567);
// from interval month day nano to duration second
- let array = vec![1234567].into();
+ let array = vec![v].into();
let casted_array: DurationSecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 0);
- let array = vec![i128::MAX].into();
+ let array = vec![IntervalMonthDayNano::MAX].into();
let casted_array: DurationSecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert!(!casted_array.is_valid(0));
@@ -8633,12 +8619,12 @@ mod tests {
assert!(res.is_err());
// from interval month day nano to duration millisecond
- let array = vec![1234567].into();
+ let array = vec![v].into();
let casted_array: DurationMillisecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1);
- let array = vec![i128::MAX].into();
+ let array = vec![IntervalMonthDayNano::MAX].into();
let casted_array: DurationMillisecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert!(!casted_array.is_valid(0));
@@ -8647,12 +8633,12 @@ mod tests {
assert!(res.is_err());
// from interval month day nano to duration microsecond
- let array = vec![1234567].into();
+ let array = vec![v].into();
let casted_array: DurationMicrosecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1234);
- let array = vec![i128::MAX].into();
+ let array = vec![IntervalMonthDayNano::MAX].into();
let casted_array =
cast_from_interval_to_duration::<DurationMicrosecondType>(&array,
&nullable).unwrap();
assert!(!casted_array.is_valid(0));
@@ -8662,12 +8648,12 @@ mod tests {
assert!(casted_array.is_err());
// from interval month day nano to duration nanosecond
- let array = vec![1234567].into();
+ let array = vec![v].into();
let casted_array: DurationNanosecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert_eq!(casted_array.value(0), 1234567);
- let array = vec![i128::MAX].into();
+ let array = vec![IntervalMonthDayNano::MAX].into();
let casted_array: DurationNanosecondArray =
cast_from_interval_to_duration(&array, &nullable).unwrap();
assert!(!casted_array.is_valid(0));
@@ -8730,12 +8716,15 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 97812474910747780469848774134464512);
+ assert_eq!(
+ casted_array.value(0),
+ IntervalMonthDayNano::new(1234567, 0, 0)
+ );
}
/// helper function to test casting from interval day time to interval
month day nano
fn cast_from_interval_day_time_to_interval_month_day_nano(
- array: Vec<i64>,
+ array: Vec<IntervalDayTime>,
cast_options: &CastOptions,
) -> Result<PrimitiveArray<IntervalMonthDayNanoType>, ArrowError> {
let array = PrimitiveArray::<IntervalDayTimeType>::from(array);
@@ -8753,7 +8742,7 @@ mod tests {
#[test]
fn test_cast_from_interval_day_time_to_interval_month_day_nano() {
// from interval day time to interval month day nano
- let array = vec![123];
+ let array = vec![IntervalDayTime::new(123, 0)];
let casted_array =
cast_from_interval_day_time_to_interval_month_day_nano(array,
&CastOptions::default())
.unwrap();
@@ -8761,7 +8750,7 @@ mod tests {
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
- assert_eq!(casted_array.value(0), 123000000);
+ assert_eq!(casted_array.value(0), IntervalMonthDayNano::new(0, 123,
0));
}
#[test]
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index a5f69b66094..edde288e9c3 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -660,19 +660,16 @@ impl<'a> DisplayIndex for &'a
PrimitiveArray<IntervalYearMonthType> {
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalDayTimeType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
- let value: u64 = self.value(idx) as u64;
+ let value = self.value(idx);
- let days_parts: i32 = ((value & 0xFFFFFFFF00000000) >> 32) as i32;
- let milliseconds_part: i32 = (value & 0xFFFFFFFF) as i32;
-
- let secs = milliseconds_part / 1_000;
+ let secs = value.milliseconds / 1_000;
let mins = secs / 60;
let hours = mins / 60;
let secs = secs - (mins * 60);
let mins = mins - (hours * 60);
- let milliseconds = milliseconds_part % 1_000;
+ let milliseconds = value.milliseconds % 1_000;
let secs_sign = if secs < 0 || milliseconds < 0 {
"-"
@@ -683,7 +680,7 @@ impl<'a> DisplayIndex for &'a
PrimitiveArray<IntervalDayTimeType> {
write!(
f,
"0 years 0 mons {} days {} hours {} mins {}{}.{:03} secs",
- days_parts,
+ value.days,
hours,
mins,
secs_sign,
@@ -696,28 +693,24 @@ impl<'a> DisplayIndex for &'a
PrimitiveArray<IntervalDayTimeType> {
impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
- let value: u128 = self.value(idx) as u128;
-
- let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000)
>> 96) as i32;
- let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as
i32;
- let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
+ let value = self.value(idx);
- let secs = nanoseconds_part / 1_000_000_000;
+ let secs = value.nanoseconds / 1_000_000_000;
let mins = secs / 60;
let hours = mins / 60;
let secs = secs - (mins * 60);
let mins = mins - (hours * 60);
- let nanoseconds = nanoseconds_part % 1_000_000_000;
+ let nanoseconds = value.nanoseconds % 1_000_000_000;
let secs_sign = if secs < 0 || nanoseconds < 0 { "-" } else { "" };
write!(
f,
"0 years {} mons {} days {} hours {} mins {}{}.{:09} secs",
- months_part,
- days_part,
+ value.months,
+ value.days,
hours,
mins,
secs_sign,
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 00bba928114..49fb359b9d4 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -142,7 +142,7 @@ mod tests {
use arrow_array::builder::*;
use arrow_array::types::*;
use arrow_array::*;
- use arrow_buffer::ScalarBuffer;
+ use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
use arrow_schema::*;
use crate::display::array_value_to_string;
@@ -963,12 +963,12 @@ mod tests {
#[test]
fn test_pretty_format_interval_day_time() {
let arr = Arc::new(arrow_array::IntervalDayTimeArray::from(vec![
- Some(-600000),
- Some(4294966295),
- Some(4294967295),
- Some(1),
- Some(10),
- Some(100),
+ Some(IntervalDayTime::new(-1, -600_000)),
+ Some(IntervalDayTime::new(0, -1001)),
+ Some(IntervalDayTime::new(0, -1)),
+ Some(IntervalDayTime::new(0, 1)),
+ Some(IntervalDayTime::new(0, 10)),
+ Some(IntervalDayTime::new(0, 100)),
]));
let schema = Arc::new(Schema::new(vec![Field::new(
@@ -1002,19 +1002,19 @@ mod tests {
#[test]
fn test_pretty_format_interval_month_day_nano_array() {
let arr = Arc::new(arrow_array::IntervalMonthDayNanoArray::from(vec![
- Some(-600000000000),
- Some(18446744072709551615),
- Some(18446744073709551615),
- Some(1),
- Some(10),
- Some(100),
- Some(1_000),
- Some(10_000),
- Some(100_000),
- Some(1_000_000),
- Some(10_000_000),
- Some(100_000_000),
- Some(1_000_000_000),
+ Some(IntervalMonthDayNano::new(-1, -1, -600_000_000_000)),
+ Some(IntervalMonthDayNano::new(0, 0, -1_000_000_001)),
+ Some(IntervalMonthDayNano::new(0, 0, -1)),
+ Some(IntervalMonthDayNano::new(0, 0, 1)),
+ Some(IntervalMonthDayNano::new(0, 0, 10)),
+ Some(IntervalMonthDayNano::new(0, 0, 100)),
+ Some(IntervalMonthDayNano::new(0, 0, 1_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 10_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 100_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 1_000_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 10_000_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 100_000_000)),
+ Some(IntervalMonthDayNano::new(0, 0, 1_000_000_000)),
]));
let schema = Arc::new(Schema::new(vec![Field::new(
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index d092fd049d7..5ee96639488 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -20,7 +20,9 @@
use crate::bit_iterator::BitSliceIterator;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
-use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{
+ bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime,
IntervalMonthDayNano, MutableBuffer,
+};
use arrow_schema::{ArrowError, DataType, UnionMode};
use std::ops::Range;
use std::sync::Arc;
@@ -1568,8 +1570,10 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
DataType::Time32(_) => DataTypeLayout::new_fixed_width::<i32>(),
DataType::Time64(_) => DataTypeLayout::new_fixed_width::<i64>(),
DataType::Interval(YearMonth) =>
DataTypeLayout::new_fixed_width::<i32>(),
- DataType::Interval(DayTime) =>
DataTypeLayout::new_fixed_width::<i64>(),
- DataType::Interval(MonthDayNano) =>
DataTypeLayout::new_fixed_width::<i128>(),
+ DataType::Interval(DayTime) =>
DataTypeLayout::new_fixed_width::<IntervalDayTime>(),
+ DataType::Interval(MonthDayNano) => {
+ DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
+ }
DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
DataType::Decimal128(_, _) =>
DataTypeLayout::new_fixed_width::<i128>(),
DataType::Decimal256(_, _) =>
DataTypeLayout::new_fixed_width::<i256>(),
diff --git a/arrow-integration-test/src/lib.rs
b/arrow-integration-test/src/lib.rs
index 30f0ccfbe12..66fa9f3320e 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -21,7 +21,7 @@
//!
//! This is not a canonical format, but provides a human-readable way of
verifying language implementations
-use arrow_buffer::ScalarBuffer;
+use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
use hex::decode;
use num::BigInt;
use num::Signed;
@@ -32,7 +32,6 @@ use std::sync::Arc;
use arrow::array::*;
use arrow::buffer::{Buffer, MutableBuffer};
-use arrow::compute;
use arrow::datatypes::*;
use arrow::error::{ArrowError, Result};
use arrow::util::bit_util;
@@ -349,10 +348,7 @@ pub fn array_from_json(
}
Ok(Arc::new(b.finish()))
}
- DataType::Int32
- | DataType::Date32
- | DataType::Time32(_)
- | DataType::Interval(IntervalUnit::YearMonth) => {
+ DataType::Int32 | DataType::Date32 | DataType::Time32(_) => {
let mut b = Int32Builder::with_capacity(json_col.count);
for (is_valid, value) in json_col
.validity
@@ -367,14 +363,29 @@ pub fn array_from_json(
};
}
let array = Arc::new(b.finish()) as ArrayRef;
- compute::cast(&array, field.data_type())
+ arrow::compute::cast(&array, field.data_type())
+ }
+ DataType::Interval(IntervalUnit::YearMonth) => {
+ let mut b =
IntervalYearMonthBuilder::with_capacity(json_col.count);
+ for (is_valid, value) in json_col
+ .validity
+ .as_ref()
+ .unwrap()
+ .iter()
+ .zip(json_col.data.unwrap())
+ {
+ match is_valid {
+ 1 => b.append_value(value.as_i64().unwrap() as i32),
+ _ => b.append_null(),
+ };
+ }
+ Ok(Arc::new(b.finish()))
}
DataType::Int64
| DataType::Date64
| DataType::Time64(_)
| DataType::Timestamp(_, _)
- | DataType::Duration(_)
- | DataType::Interval(IntervalUnit::DayTime) => {
+ | DataType::Duration(_) => {
let mut b = Int64Builder::with_capacity(json_col.count);
for (is_valid, value) in json_col
.validity
@@ -387,6 +398,25 @@ pub fn array_from_json(
1 => b.append_value(match value {
Value::Number(n) => n.as_i64().unwrap(),
Value::String(s) => s.parse().expect("Unable to parse
string as i64"),
+ _ => panic!("Unable to parse {value:?} as number"),
+ }),
+ _ => b.append_null(),
+ };
+ }
+ let array = Arc::new(b.finish()) as ArrayRef;
+ arrow::compute::cast(&array, field.data_type())
+ }
+ DataType::Interval(IntervalUnit::DayTime) => {
+ let mut b = IntervalDayTimeBuilder::with_capacity(json_col.count);
+ for (is_valid, value) in json_col
+ .validity
+ .as_ref()
+ .unwrap()
+ .iter()
+ .zip(json_col.data.unwrap())
+ {
+ match is_valid {
+ 1 => b.append_value(match value {
Value::Object(ref map)
if map.contains_key("days") &&
map.contains_key("milliseconds") =>
{
@@ -397,13 +427,9 @@ pub fn array_from_json(
match (days, milliseconds) {
(Value::Number(d), Value::Number(m))
=> {
- let mut bytes = [0_u8; 8];
- let m = (m.as_i64().unwrap() as
i32).to_le_bytes();
- let d = (d.as_i64().unwrap() as
i32).to_le_bytes();
-
- let c = [d, m].concat();
-
bytes.copy_from_slice(c.as_slice());
- i64::from_le_bytes(bytes)
+ let days = d.as_i64().unwrap() as
_;
+ let millis = m.as_i64().unwrap()
as _;
+ IntervalDayTime::new(days, millis)
}
_ => {
panic!("Unable to parse {value:?}
as interval daytime")
@@ -418,8 +444,7 @@ pub fn array_from_json(
_ => b.append_null(),
};
}
- let array = Arc::new(b.finish()) as ArrayRef;
- compute::cast(&array, field.data_type())
+ Ok(Arc::new(b.finish()))
}
DataType::UInt8 => {
let mut b = UInt8Builder::with_capacity(json_col.count);
@@ -523,11 +548,7 @@ pub fn array_from_json(
let months = months.as_i64().unwrap() as
i32;
let days = days.as_i64().unwrap() as i32;
let nanoseconds =
nanoseconds.as_i64().unwrap();
- let months_days_ns: i128 =
- ((nanoseconds as i128) &
0xFFFFFFFFFFFFFFFF) << 64
- | ((days as i128) & 0xFFFFFFFF) <<
32
- | ((months as i128) & 0xFFFFFFFF);
- months_days_ns
+ IntervalMonthDayNano::new(months, days,
nanoseconds)
}
(_, _, _) => {
panic!("Unable to parse {v:?} as
MonthDayNano")
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index 4f56883eaeb..4197b610e7a 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -119,7 +119,7 @@ mod tests {
ListBuilder, PrimitiveDictionaryBuilder, StringBuilder,
StringDictionaryBuilder,
};
use arrow_array::types::*;
- use arrow_buffer::{i256, ArrowNativeType, Buffer};
+ use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime,
IntervalMonthDayNano};
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field};
use half::f16;
@@ -856,26 +856,48 @@ mod tests {
#[test]
fn test_interval_array() {
- let a = IntervalDayTimeArray::from(vec![Some(0), Some(6), Some(834),
None, Some(3), None]);
- let b =
- IntervalDayTimeArray::from(vec![Some(70), Some(6), Some(833),
Some(6), Some(3), None]);
+ let a = IntervalDayTimeArray::from(vec![
+ Some(IntervalDayTime::new(0, 1)),
+ Some(IntervalDayTime::new(0, 6)),
+ Some(IntervalDayTime::new(4, 834)),
+ None,
+ Some(IntervalDayTime::new(2, 3)),
+ None
+ ]);
+ let b = IntervalDayTimeArray::from(vec![
+ Some(IntervalDayTime::new(0, 4)),
+ Some(IntervalDayTime::new(0, 6)),
+ Some(IntervalDayTime::new(0, 834)),
+ None,
+ Some(IntervalDayTime::new(2, 3)),
+ None
+ ]);
let res = crate::cmp::eq(&a, &b).unwrap();
assert_eq!(
&res,
&BooleanArray::from(vec![Some(false), Some(true), Some(false),
None, Some(true), None])
);
- let a =
- IntervalMonthDayNanoArray::from(vec![Some(0), Some(6), Some(834),
None, Some(3), None]);
- let b = IntervalMonthDayNanoArray::from(
- vec![Some(86), Some(5), Some(8), Some(6), Some(3), None],
- );
+ let a = IntervalMonthDayNanoArray::from(vec![
+ Some(IntervalMonthDayNano::new(0, 0, 6)),
+ Some(IntervalMonthDayNano::new(2, 0, 0)),
+ Some(IntervalMonthDayNano::new(2, -5, 0)),
+ None,
+ Some(IntervalMonthDayNano::new(0, 0, 2)),
+ Some(IntervalMonthDayNano::new(5, 0, -23)),
+ ]);
+ let b = IntervalMonthDayNanoArray::from(vec![
+ Some(IntervalMonthDayNano::new(0, 0, 6)),
+ Some(IntervalMonthDayNano::new(2, 3, 0)),
+ Some(IntervalMonthDayNano::new(5, -5, 0)),
+ None,
+ Some(IntervalMonthDayNano::new(-1, 0, 2)),
+ None,
+ ]);
let res = crate::cmp::lt(&a, &b).unwrap();
assert_eq!(
&res,
- &BooleanArray::from(
- vec![Some(true), Some(false), Some(false), None, Some(false),
None]
- )
+ &BooleanArray::from(vec![Some(false), Some(true), Some(true),
None, Some(false), None])
);
let a =
@@ -1421,10 +1443,22 @@ mod tests {
#[test]
fn test_interval_dyn_scalar() {
- let array = IntervalDayTimeArray::from(vec![Some(1), None, Some(8),
None, Some(10)]);
+ let array = IntervalDayTimeArray::from(vec![
+ Some(IntervalDayTime::new(1, 0)),
+ None,
+ Some(IntervalDayTime::new(8, 0)),
+ None,
+ Some(IntervalDayTime::new(10, 0)),
+ ]);
test_primitive_dyn_scalar(array);
- let array = IntervalMonthDayNanoArray::from(vec![Some(1), None,
Some(8), None, Some(10)]);
+ let array = IntervalMonthDayNanoArray::from(vec![
+ Some(IntervalMonthDayNano::new(1, 0, 0)),
+ None,
+ Some(IntervalMonthDayNano::new(8, 0, 0)),
+ None,
+ Some(IntervalMonthDayNano::new(10, 0, 0)),
+ ]);
test_primitive_dyn_scalar(array);
let array = IntervalYearMonthArray::from(vec![Some(1), None, Some(8),
None, Some(10)]);
@@ -2054,11 +2088,16 @@ mod tests {
#[test]
fn test_eq_dyn_neq_dyn_dictionary_interval_array() {
- let values = IntervalDayTimeArray::from(vec![1, 6, 10, 2, 3, 5]);
+ let values = IntervalDayTimeArray::from(vec![
+ Some(IntervalDayTime::new(0, 1)),
+ Some(IntervalDayTime::new(0, 1)),
+ Some(IntervalDayTime::new(0, 6)),
+ Some(IntervalDayTime::new(4, 10)),
+ ]);
let values = Arc::new(values) as ArrayRef;
let keys1 = UInt64Array::from_iter_values([1_u64, 0, 3]);
- let keys2 = UInt64Array::from_iter_values([2_u64, 0, 3]);
+ let keys2 = UInt64Array::from_iter_values([2_u64, 1, 3]);
let dict_array1 = DictionaryArray::new(keys1, values.clone());
let dict_array2 = DictionaryArray::new(keys2, values.clone());
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index e793038de92..8f21cd7c498 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -131,7 +131,7 @@ pub fn build_compare(left: &dyn Array, right: &dyn Array)
-> Result<DynComparato
#[cfg(test)]
pub mod tests {
use super::*;
- use arrow_buffer::{i256, OffsetBuffer};
+ use arrow_buffer::{i256, IntervalDayTime, OffsetBuffer};
use half::f16;
use std::sync::Arc;
@@ -396,21 +396,25 @@ pub mod tests {
#[test]
fn test_interval_dict() {
- let values = IntervalDayTimeArray::from(vec![1, 0, 2, 5]);
+ let v1 = IntervalDayTime::new(0, 1);
+ let v2 = IntervalDayTime::new(0, 2);
+ let v3 = IntervalDayTime::new(12, 2);
+
+ let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None,
Some(v3)]);
let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
let array1 = DictionaryArray::new(keys, Arc::new(values));
- let values = IntervalDayTimeArray::from(vec![2, 3, 4, 5]);
+ let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None,
Some(v1)]);
let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
let array2 = DictionaryArray::new(keys, Arc::new(values));
let cmp = build_compare(&array1, &array2).unwrap();
- assert_eq!(Ordering::Less, cmp(0, 0));
- assert_eq!(Ordering::Less, cmp(0, 3));
- assert_eq!(Ordering::Equal, cmp(3, 3));
- assert_eq!(Ordering::Greater, cmp(3, 1));
- assert_eq!(Ordering::Greater, cmp(3, 2));
+ assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3
+ assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1
+ assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1
+ assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2
+ assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2
}
#[test]
diff --git a/arrow-row/src/fixed.rs b/arrow-row/src/fixed.rs
index 831105bd5f1..0f3c3d0912f 100644
--- a/arrow-row/src/fixed.rs
+++ b/arrow-row/src/fixed.rs
@@ -19,7 +19,9 @@ use crate::array::PrimitiveArray;
use crate::null_sentinel;
use arrow_array::builder::BufferBuilder;
use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray};
-use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{
+ bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime,
IntervalMonthDayNano, MutableBuffer,
+};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{DataType, SortOptions};
use half::f16;
@@ -163,6 +165,44 @@ impl FixedLengthEncoding for f64 {
}
}
+impl FixedLengthEncoding for IntervalDayTime {
+ type Encoded = [u8; 8];
+
+ fn encode(self) -> Self::Encoded {
+ let mut out = [0_u8; 8];
+ out[..4].copy_from_slice(&self.days.encode());
+ out[4..].copy_from_slice(&self.milliseconds.encode());
+ out
+ }
+
+ fn decode(encoded: Self::Encoded) -> Self {
+ Self {
+ days: i32::decode(encoded[..4].try_into().unwrap()),
+ milliseconds: i32::decode(encoded[4..].try_into().unwrap()),
+ }
+ }
+}
+
+impl FixedLengthEncoding for IntervalMonthDayNano {
+ type Encoded = [u8; 16];
+
+ fn encode(self) -> Self::Encoded {
+ let mut out = [0_u8; 16];
+ out[..4].copy_from_slice(&self.months.encode());
+ out[4..8].copy_from_slice(&self.days.encode());
+ out[8..].copy_from_slice(&self.nanoseconds.encode());
+ out
+ }
+
+ fn decode(encoded: Self::Encoded) -> Self {
+ Self {
+ months: i32::decode(encoded[..4].try_into().unwrap()),
+ days: i32::decode(encoded[4..8].try_into().unwrap()),
+ nanoseconds: i64::decode(encoded[8..].try_into().unwrap()),
+ }
+ }
+}
+
/// Returns the total encoded length (including null byte) for a value of type
`T::Native`
pub const fn encoded_len<T>(_col: &PrimitiveArray<T>) -> usize
where
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index a4dd2470ab6..b8d59142db7 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -845,6 +845,7 @@ pub fn take_record_batch(
mod tests {
use super::*;
use arrow_array::builder::*;
+ use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
use arrow_schema::{Field, Fields, TimeUnit};
fn test_take_decimal_arrays(
@@ -1158,20 +1159,26 @@ mod tests {
.unwrap();
// interval_day_time
+ let v1 = IntervalDayTime::new(0, 0);
+ let v2 = IntervalDayTime::new(2, 0);
+ let v3 = IntervalDayTime::new(-15, 0);
test_take_primitive_arrays::<IntervalDayTimeType>(
- vec![Some(0), None, Some(2), Some(-15), None],
+ vec![Some(v1), None, Some(v2), Some(v3), None],
&index,
None,
- vec![Some(-15), None, None, Some(-15), Some(2)],
+ vec![Some(v3), None, None, Some(v3), Some(v2)],
)
.unwrap();
// interval_month_day_nano
+ let v1 = IntervalMonthDayNano::new(0, 0, 0);
+ let v2 = IntervalMonthDayNano::new(2, 0, 0);
+ let v3 = IntervalMonthDayNano::new(-15, 0, 0);
test_take_primitive_arrays::<IntervalMonthDayNanoType>(
- vec![Some(0), None, Some(2), Some(-15), None],
+ vec![Some(v1), None, Some(v2), Some(v3), None],
&index,
None,
- vec![Some(-15), None, None, Some(-15), Some(2)],
+ vec![Some(v3), None, None, Some(v3), Some(v2)],
)
.unwrap();
diff --git a/arrow/benches/comparison_kernels.rs
b/arrow/benches/comparison_kernels.rs
index a272144b52e..f330e1386cc 100644
--- a/arrow/benches/comparison_kernels.rs
+++ b/arrow/benches/comparison_kernels.rs
@@ -22,9 +22,9 @@ use criterion::Criterion;
extern crate arrow;
use arrow::compute::kernels::cmp::*;
-use arrow::datatypes::IntervalMonthDayNanoType;
use arrow::util::bench_util::*;
use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
+use arrow_buffer::IntervalMonthDayNano;
use arrow_string::like::*;
use arrow_string::regexp::regexp_is_match_utf8_scalar;
@@ -59,10 +59,8 @@ fn add_benchmark(c: &mut Criterion) {
let arr_a = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 42);
let arr_b = create_primitive_array_with_seed::<Float32Type>(SIZE, 0.0, 43);
- let arr_month_day_nano_a =
- create_primitive_array_with_seed::<IntervalMonthDayNanoType>(SIZE,
0.0, 43);
- let arr_month_day_nano_b =
- create_primitive_array_with_seed::<IntervalMonthDayNanoType>(SIZE,
0.0, 43);
+ let arr_month_day_nano_a = create_month_day_nano_array_with_seed(SIZE,
0.0, 43);
+ let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE,
0.0, 43);
let arr_string = create_string_array::<i32>(SIZE, 0.0);
let scalar = Float32Array::from(vec![1.0]);
@@ -134,7 +132,7 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("eq MonthDayNano", |b| {
b.iter(|| eq(&arr_month_day_nano_a, &arr_month_day_nano_b))
});
- let scalar = IntervalMonthDayNanoArray::new_scalar(123);
+ let scalar =
IntervalMonthDayNanoArray::new_scalar(IntervalMonthDayNano::new(123, 0, 0));
c.bench_function("eq scalar MonthDayNano", |b| {
b.iter(|| eq(&arr_month_day_nano_b, &scalar).unwrap())
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 140c5bc9259..9fae8e6bab3 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -20,7 +20,7 @@
use crate::array::*;
use crate::datatypes::*;
use crate::util::test_util::seedable_rng;
-use arrow_buffer::Buffer;
+use arrow_buffer::{Buffer, IntervalMonthDayNano};
use rand::distributions::uniform::SampleUniform;
use rand::thread_rng;
use rand::Rng;
@@ -72,6 +72,24 @@ where
.collect()
}
+pub fn create_month_day_nano_array_with_seed(
+ size: usize,
+ null_density: f32,
+ seed: u64,
+) -> IntervalMonthDayNanoArray {
+ let mut rng = StdRng::seed_from_u64(seed);
+
+ (0..size)
+ .map(|_| {
+ if rng.gen::<f32>() < null_density {
+ None
+ } else {
+ Some(IntervalMonthDayNano::new(rng.gen(), rng.gen(),
rng.gen()))
+ }
+ })
+ .collect()
+}
+
/// Creates an random (but fixed-seeded) array of a given size and null density
pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32)
-> BooleanArray
where
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 2d3167c928d..0fd89cc2bff 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -32,7 +32,7 @@ use arrow_array::{
TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray,
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
UnionArray,
};
-use arrow_buffer::{i256, Buffer};
+use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano};
use arrow_cast::pretty::pretty_format_columns;
use arrow_cast::{can_cast_types, cast};
use arrow_data::ArrayData;
@@ -249,8 +249,14 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
Arc::new(Time64MicrosecondArray::from(vec![1000, 2000])),
Arc::new(Time64NanosecondArray::from(vec![1000, 2000])),
Arc::new(IntervalYearMonthArray::from(vec![1000, 2000])),
- Arc::new(IntervalDayTimeArray::from(vec![1000, 2000])),
- Arc::new(IntervalMonthDayNanoArray::from(vec![1000, 2000])),
+ Arc::new(IntervalDayTimeArray::from(vec![
+ IntervalDayTime::new(0, 1000),
+ IntervalDayTime::new(0, 2000),
+ ])),
+ Arc::new(IntervalMonthDayNanoArray::from(vec![
+ IntervalMonthDayNano::new(0, 0, 1000),
+ IntervalMonthDayNano::new(0, 0, 1000),
+ ])),
Arc::new(DurationSecondArray::from(vec![1000, 2000])),
Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index a0d25d403c1..a9159bb4712 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -30,7 +30,7 @@ use arrow_array::{
ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray,
Float16Array,
IntervalDayTimeArray, IntervalYearMonthArray,
};
-use arrow_buffer::{i256, Buffer};
+use arrow_buffer::{i256, Buffer, IntervalDayTime};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, IntervalUnit};
use bytes::Bytes;
@@ -195,7 +195,14 @@ impl ArrayReader for FixedLenByteArrayReader {
IntervalUnit::DayTime => Arc::new(
binary
.iter()
- .map(|o| o.map(|b|
i64::from_le_bytes(b[4..12].try_into().unwrap())))
+ .map(|o| {
+ o.map(|b| {
+ IntervalDayTime::new(
+
i32::from_le_bytes(b[4..8].try_into().unwrap()),
+
i32::from_le_bytes(b[8..12].try_into().unwrap()),
+ )
+ })
+ })
.collect::<IntervalDayTimeArray>(),
) as ArrayRef,
IntervalUnit::MonthDayNano => {
diff --git a/parquet/src/arrow/arrow_reader/mod.rs
b/parquet/src/arrow/arrow_reader/mod.rs
index a30bf168619..db75c54bf5d 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -750,7 +750,7 @@ mod tests {
Decimal128Type, Decimal256Type, DecimalType, Float16Type, Float32Type,
Float64Type,
};
use arrow_array::*;
- use arrow_buffer::{i256, ArrowNativeType, Buffer};
+ use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, Fields,
Schema};
use arrow_select::concat::concat_batches;
@@ -1060,8 +1060,12 @@ mod tests {
Arc::new(
vals.iter()
.map(|x| {
- x.as_ref()
- .map(|b|
i64::from_le_bytes(b.as_ref()[4..12].try_into().unwrap()))
+ x.as_ref().map(|b| IntervalDayTime {
+ days:
i32::from_le_bytes(b.as_ref()[4..8].try_into().unwrap()),
+ milliseconds: i32::from_le_bytes(
+ b.as_ref()[8..12].try_into().unwrap(),
+ ),
+ })
})
.collect::<IntervalDayTimeArray>(),
)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index bf4b88ac52d..60feda69e84 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -942,11 +942,11 @@ fn get_interval_dt_array_slice(
) -> Vec<FixedLenByteArray> {
let mut values = Vec::with_capacity(indices.len());
for i in indices {
- let mut prefix = vec![0; 4];
- let mut value = array.value(*i).to_le_bytes().to_vec();
- prefix.append(&mut value);
- debug_assert_eq!(prefix.len(), 12);
- values.push(FixedLenByteArray::from(ByteArray::from(prefix)));
+ let mut out = [0; 12];
+ let value = array.value(*i);
+ out[4..8].copy_from_slice(&value.days.to_le_bytes());
+ out[8..12].copy_from_slice(&value.milliseconds.to_le_bytes());
+ values.push(FixedLenByteArray::from(ByteArray::from(out.to_vec())));
}
values
}
@@ -1016,7 +1016,7 @@ mod tests {
use arrow::error::Result as ArrowResult;
use arrow::util::pretty::pretty_format_batches;
use arrow::{array::*, buffer::Buffer};
- use arrow_buffer::NullBuffer;
+ use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer};
use arrow_schema::Fields;
use crate::basic::Encoding;
@@ -2057,7 +2057,12 @@ mod tests {
#[test]
fn interval_day_time_single_column() {
- required_and_optional::<IntervalDayTimeArray, _>(0..SMALL_SIZE as i64);
+ required_and_optional::<IntervalDayTimeArray, _>(vec![
+ IntervalDayTime::new(0, 1),
+ IntervalDayTime::new(0, 3),
+ IntervalDayTime::new(3, -2),
+ IntervalDayTime::new(-200, 4),
+ ]);
}
#[test]
@@ -2065,7 +2070,12 @@ mod tests {
expected = "Attempting to write an Arrow interval type MonthDayNano to
parquet that is not yet implemented"
)]
fn interval_month_day_nano_single_column() {
- required_and_optional::<IntervalMonthDayNanoArray, _>(0..SMALL_SIZE as
i128);
+ required_and_optional::<IntervalMonthDayNanoArray, _>(vec![
+ IntervalMonthDayNano::new(0, 1, 5),
+ IntervalMonthDayNano::new(0, 3, 2),
+ IntervalMonthDayNano::new(3, -2, -5),
+ IntervalMonthDayNano::new(-200, 4, -1),
+ ]);
}
#[test]