This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 21a9a2ad5f Implement cast and other operations on decimal32 and
decimal64 (#7815)
21a9a2ad5f is described below
commit 21a9a2ad5f047d08883fd7589b2053cdd9b56626
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Tue Aug 19 14:24:47 2025 -0700
Implement cast and other operations on decimal32 and decimal64 (#7815)
# Which issue does this PR close?
Part of addressing #6661 but does not close it; there is at least one
more PR for CSV/Parquet/JSON support.
# What changes are included in this PR?
This change adds cast operations for the recently-added `decimal32` and
`decimal64` types. It also adds tests to verify that sort and comparison
work correctly, and includes these types in benchmarks.
# Are these changes tested?
Yes.
# Are there any user-facing changes?
Casting to and from `decimal32` and `decimal64` is now supported.
---
arrow-array/src/cast.rs | 12 +
arrow-array/src/types.rs | 2 +
arrow-cast/src/cast/decimal.rs | 72 ++++++
arrow-cast/src/cast/mod.rs | 518 +++++++++++++++++++++++++++++++++++---
arrow-ord/src/comparison.rs | 211 ++++++++++++++++
arrow-ord/src/ord.rs | 28 ++-
arrow-ord/src/sort.rs | 20 ++
arrow-row/src/lib.rs | 60 +++++
arrow/benches/array_from_vec.rs | 46 ++++
arrow/benches/builder.rs | 38 +++
arrow/benches/cast_kernels.rs | 48 ++++
arrow/benches/decimal_validate.rs | 55 +++-
arrow/src/tensor.rs | 4 +
arrow/tests/array_cast.rs | 87 ++++++-
14 files changed, 1158 insertions(+), 43 deletions(-)
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 41fffc4bc8..de590ff87c 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -1132,6 +1132,18 @@ mod tests {
assert!(!as_string_array(&array).is_empty())
}
+ #[test]
+ fn test_decimal32array() {
+ let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+ assert!(!as_primitive_array::<Decimal32Type>(&a).is_empty());
+ }
+
+ #[test]
+ fn test_decimal64array() {
+ let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+ assert!(!as_primitive_array::<Decimal64Type>(&a).is_empty());
+ }
+
#[test]
fn test_decimal128array() {
let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 96c496a536..144de8dbec 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -1820,6 +1820,8 @@ mod tests {
test_layout::<Float16Type>();
test_layout::<Float32Type>();
test_layout::<Float64Type>();
+ test_layout::<Decimal32Type>();
+ test_layout::<Decimal64Type>();
test_layout::<Decimal128Type>();
test_layout::<Decimal256Type>();
test_layout::<TimestampNanosecondType>();
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index 597f384fa4..00bfc57e12 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -20,6 +20,10 @@ use crate::cast::*;
/// A utility trait that provides checked conversions between
/// decimal types inspired by [`NumCast`]
pub(crate) trait DecimalCast: Sized {
+ fn to_i32(self) -> Option<i32>;
+
+ fn to_i64(self) -> Option<i64>;
+
fn to_i128(self) -> Option<i128>;
fn to_i256(self) -> Option<i256>;
@@ -29,7 +33,67 @@ pub(crate) trait DecimalCast: Sized {
fn from_f64(n: f64) -> Option<Self>;
}
+impl DecimalCast for i32 {
+ fn to_i32(self) -> Option<i32> {
+ Some(self)
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ Some(self as i64)
+ }
+
+ fn to_i128(self) -> Option<i128> {
+ Some(self as i128)
+ }
+
+ fn to_i256(self) -> Option<i256> {
+ Some(i256::from_i128(self as i128))
+ }
+
+ fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+ n.to_i32()
+ }
+
+ fn from_f64(n: f64) -> Option<Self> {
+ n.to_i32()
+ }
+}
+
+impl DecimalCast for i64 {
+ fn to_i32(self) -> Option<i32> {
+ i32::try_from(self).ok()
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ Some(self)
+ }
+
+ fn to_i128(self) -> Option<i128> {
+ Some(self as i128)
+ }
+
+ fn to_i256(self) -> Option<i256> {
+ Some(i256::from_i128(self as i128))
+ }
+
+ fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+ n.to_i64()
+ }
+
+ fn from_f64(n: f64) -> Option<Self> {
+ n.to_i64()
+ }
+}
+
impl DecimalCast for i128 {
+ fn to_i32(self) -> Option<i32> {
+ i32::try_from(self).ok()
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ i64::try_from(self).ok()
+ }
+
fn to_i128(self) -> Option<i128> {
Some(self)
}
@@ -48,6 +112,14 @@ impl DecimalCast for i128 {
}
impl DecimalCast for i256 {
+ fn to_i32(self) -> Option<i32> {
+ self.to_i128().map(|x| i32::try_from(x).ok())?
+ }
+
+ fn to_i64(self) -> Option<i64> {
+ self.to_i128().map(|x| i64::try_from(x).ok())?
+ }
+
fn to_i128(self) -> Option<i128> {
self.to_i128()
}
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 8fb0c4fdd1..e2bb3db859 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -148,8 +148,8 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
can_cast_types(list_from.data_type(), list_to.data_type())
}
(List(_), _) => false,
- (FixedSizeList(list_from,_), List(list_to)) |
- (FixedSizeList(list_from,_), LargeList(list_to)) => {
+ (FixedSizeList(list_from, _), List(list_to))
+ | (FixedSizeList(list_from, _), LargeList(list_to)) => {
can_cast_types(list_from.data_type(), list_to.data_type())
}
(FixedSizeList(inner, size), FixedSizeList(inner_to, size_to)) if size
== size_to => {
@@ -157,38 +157,66 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
}
(_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
(_, LargeList(list_to)) => can_cast_types(from_type,
list_to.data_type()),
- (_, FixedSizeList(list_to,size)) if *size == 1 => {
- can_cast_types(from_type, list_to.data_type())},
- (FixedSizeList(list_from,size), _) if *size == 1 => {
- can_cast_types(list_from.data_type(), to_type)},
- (Map(from_entries,ordered_from), Map(to_entries, ordered_to)) if
ordered_from == ordered_to =>
- match (key_field(from_entries), key_field(to_entries),
value_field(from_entries), value_field(to_entries)) {
- (Some(from_key), Some(to_key), Some(from_value),
Some(to_value)) =>
- can_cast_types(from_key.data_type(), to_key.data_type())
&& can_cast_types(from_value.data_type(), to_value.data_type()),
- _ => false
- },
+ (_, FixedSizeList(list_to, size)) if *size == 1 => {
+ can_cast_types(from_type, list_to.data_type())
+ }
+ (FixedSizeList(list_from, size), _) if *size == 1 => {
+ can_cast_types(list_from.data_type(), to_type)
+ }
+ (Map(from_entries, ordered_from), Map(to_entries, ordered_to))
+ if ordered_from == ordered_to =>
+ {
+ match (
+ key_field(from_entries),
+ key_field(to_entries),
+ value_field(from_entries),
+ value_field(to_entries),
+ ) {
+ (Some(from_key), Some(to_key), Some(from_value),
Some(to_value)) => {
+ can_cast_types(from_key.data_type(), to_key.data_type())
+ && can_cast_types(from_value.data_type(),
to_value.data_type())
+ }
+ _ => false,
+ }
+ }
// cast one decimal type to another decimal type
- (Decimal128(_, _), Decimal128(_, _)) => true,
- (Decimal256(_, _), Decimal256(_, _)) => true,
- (Decimal128(_, _), Decimal256(_, _)) => true,
- (Decimal256(_, _), Decimal128(_, _)) => true,
+ (
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ ) => true,
// unsigned integer to decimal
- (UInt8 | UInt16 | UInt32 | UInt64, Decimal128(_, _)) |
- (UInt8 | UInt16 | UInt32 | UInt64, Decimal256(_, _)) |
+ (
+ UInt8 | UInt16 | UInt32 | UInt64,
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ ) => true,
// signed numeric to decimal
- (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
Decimal128(_, _)) |
- (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
Decimal256(_, _)) |
+ (
+ Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ ) => true,
// decimal to unsigned numeric
- (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 |
UInt64) |
+ (
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ UInt8 | UInt16 | UInt32 | UInt64,
+ ) => true,
// decimal to signed numeric
- (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 |
Int64 | Float32 | Float64) => true,
+ (
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+ ) => true,
// decimal to string
- (Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) =>
true,
+ (
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ Utf8View | Utf8 | LargeUtf8,
+ ) => true,
// string to decimal
- (Utf8View | Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) =>
true,
+ (
+ Utf8View | Utf8 | LargeUtf8,
+ Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) |
Decimal256(_, _),
+ ) => true,
(Struct(from_fields), Struct(to_fields)) => {
- from_fields.len() == to_fields.len() &&
- from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
+ from_fields.len() == to_fields.len()
+ && from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
// Assume that nullability between two structs are
compatible, if not,
// cast kernel will return error.
can_cast_types(f1.data_type(), f2.data_type())
@@ -211,8 +239,12 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
|| to_type == &LargeUtf8
}
- (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) |
BinaryView | Utf8View ) => true,
- (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) |
BinaryView | Utf8View ) => true,
+ (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) |
BinaryView | Utf8View) => {
+ true
+ }
+ (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) |
BinaryView | Utf8View) => {
+ true
+ }
(FixedSizeBinary(_), Binary | LargeBinary | BinaryView) => true,
(
Utf8 | LargeUtf8 | Utf8View,
@@ -243,8 +275,10 @@ pub fn can_cast_types(from_type: &DataType, to_type:
&DataType) -> bool {
// start numeric casts
(
- UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 |
Float16 | Float32 | Float64,
- UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 |
Float16 | Float32 | Float64,
+ UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 |
Float16 | Float32
+ | Float64,
+ UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 |
Float16 | Float32
+ | Float64,
) => true,
// end numeric casts
@@ -847,6 +881,26 @@ pub fn cast_with_options(
cast_map_values(array.as_map(), to_type, cast_options,
ordered1.to_owned())
}
// Decimal to decimal, same width
+ (Decimal32(p1, s1), Decimal32(p2, s2)) => {
+ cast_decimal_to_decimal_same_type::<Decimal32Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal64(p1, s1), Decimal64(p2, s2)) => {
+ cast_decimal_to_decimal_same_type::<Decimal64Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
(Decimal128(p1, s1), Decimal128(p2, s2)) => {
cast_decimal_to_decimal_same_type::<Decimal128Type>(
array.as_primitive(),
@@ -868,6 +922,86 @@ pub fn cast_with_options(
)
}
// Decimal to decimal, different width
+ (Decimal32(p1, s1), Decimal64(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal32(p1, s1), Decimal128(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal32Type, Decimal128Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal32(p1, s1), Decimal256(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal32Type, Decimal256Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal64(p1, s1), Decimal32(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal64Type, Decimal32Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal64(p1, s1), Decimal128(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal64Type, Decimal128Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal64(p1, s1), Decimal256(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal64Type, Decimal256Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal128(p1, s1), Decimal32(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal128Type, Decimal32Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal128(p1, s1), Decimal64(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal128Type, Decimal64Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
(Decimal128(p1, s1), Decimal256(p2, s2)) => {
cast_decimal_to_decimal::<Decimal128Type, Decimal256Type>(
array.as_primitive(),
@@ -878,6 +1012,26 @@ pub fn cast_with_options(
cast_options,
)
}
+ (Decimal256(p1, s1), Decimal32(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal256Type, Decimal32Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
+ (Decimal256(p1, s1), Decimal64(p2, s2)) => {
+ cast_decimal_to_decimal::<Decimal256Type, Decimal64Type>(
+ array.as_primitive(),
+ *p1,
+ *s1,
+ *p2,
+ *s2,
+ cast_options,
+ )
+ }
(Decimal256(p1, s1), Decimal128(p2, s2)) => {
cast_decimal_to_decimal::<Decimal256Type, Decimal128Type>(
array.as_primitive(),
@@ -889,6 +1043,28 @@ pub fn cast_with_options(
)
}
// Decimal to non-decimal
+ (Decimal32(_, scale), _) if !to_type.is_temporal() => {
+ cast_from_decimal::<Decimal32Type, _>(
+ array,
+ 10_i32,
+ scale,
+ from_type,
+ to_type,
+ |x: i32| x as f64,
+ cast_options,
+ )
+ }
+ (Decimal64(_, scale), _) if !to_type.is_temporal() => {
+ cast_from_decimal::<Decimal64Type, _>(
+ array,
+ 10_i64,
+ scale,
+ from_type,
+ to_type,
+ |x: i64| x as f64,
+ cast_options,
+ )
+ }
(Decimal128(_, scale), _) if !to_type.is_temporal() => {
cast_from_decimal::<Decimal128Type, _>(
array,
@@ -912,6 +1088,28 @@ pub fn cast_with_options(
)
}
// Non-decimal to decimal
+ (_, Decimal32(precision, scale)) if !from_type.is_temporal() => {
+ cast_to_decimal::<Decimal32Type, _>(
+ array,
+ 10_i32,
+ precision,
+ scale,
+ from_type,
+ to_type,
+ cast_options,
+ )
+ }
+ (_, Decimal64(precision, scale)) if !from_type.is_temporal() => {
+ cast_to_decimal::<Decimal64Type, _>(
+ array,
+ 10_i64,
+ precision,
+ scale,
+ from_type,
+ to_type,
+ cast_options,
+ )
+ }
(_, Decimal128(precision, scale)) if !from_type.is_temporal() => {
cast_to_decimal::<Decimal128Type, _>(
array,
@@ -2524,6 +2722,28 @@ mod tests {
}
}
+ fn create_decimal32_array(
+ array: Vec<Option<i32>>,
+ precision: u8,
+ scale: i8,
+ ) -> Result<Decimal32Array, ArrowError> {
+ array
+ .into_iter()
+ .collect::<Decimal32Array>()
+ .with_precision_and_scale(precision, scale)
+ }
+
+ fn create_decimal64_array(
+ array: Vec<Option<i64>>,
+ precision: u8,
+ scale: i8,
+ ) -> Result<Decimal64Array, ArrowError> {
+ array
+ .into_iter()
+ .collect::<Decimal64Array>()
+ .with_precision_and_scale(precision, scale)
+ }
+
fn create_decimal128_array(
array: Vec<Option<i128>>,
precision: u8,
@@ -2672,8 +2892,77 @@ mod tests {
);
}
+ #[test]
+ fn test_cast_decimal32_to_decimal32() {
+ // test changing precision
+ let input_type = DataType::Decimal32(9, 3);
+ let output_type = DataType::Decimal32(9, 4);
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal32_array(array, 9, 3).unwrap();
+ generate_cast_test_case!(
+ &array,
+ Decimal32Array,
+ &output_type,
+ vec![
+ Some(11234560_i32),
+ Some(21234560_i32),
+ Some(31234560_i32),
+ None
+ ]
+ );
+ // negative test
+ let array = vec![Some(123456), None];
+ let array = create_decimal32_array(array, 9, 0).unwrap();
+ let result_safe = cast(&array, &DataType::Decimal32(2, 2));
+ assert!(result_safe.is_ok());
+ let options = CastOptions {
+ safe: false,
+ ..Default::default()
+ };
+
+ let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2,
2), &options);
+ assert_eq!("Invalid argument error: 12345600 is too large to store in
a Decimal32 of precision 2. Max is 99",
+ result_unsafe.unwrap_err().to_string());
+ }
+
+ #[test]
+ fn test_cast_decimal64_to_decimal64() {
+ // test changing precision
+ let input_type = DataType::Decimal64(17, 3);
+ let output_type = DataType::Decimal64(17, 4);
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal64_array(array, 17, 3).unwrap();
+ generate_cast_test_case!(
+ &array,
+ Decimal64Array,
+ &output_type,
+ vec![
+ Some(11234560_i64),
+ Some(21234560_i64),
+ Some(31234560_i64),
+ None
+ ]
+ );
+ // negative test
+ let array = vec![Some(123456), None];
+ let array = create_decimal64_array(array, 9, 0).unwrap();
+ let result_safe = cast(&array, &DataType::Decimal64(2, 2));
+ assert!(result_safe.is_ok());
+ let options = CastOptions {
+ safe: false,
+ ..Default::default()
+ };
+
+ let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2,
2), &options);
+ assert_eq!("Invalid argument error: 12345600 is too large to store in
a Decimal64 of precision 2. Max is 99",
+ result_unsafe.unwrap_err().to_string());
+ }
+
#[test]
fn test_cast_decimal128_to_decimal128() {
+ // test changing precision
let input_type = DataType::Decimal128(20, 3);
let output_type = DataType::Decimal128(20, 4);
assert!(can_cast_types(&input_type, &output_type));
@@ -2705,6 +2994,38 @@ mod tests {
result_unsafe.unwrap_err().to_string());
}
+ #[test]
+ fn test_cast_decimal32_to_decimal32_dict() {
+ let p = 9;
+ let s = 3;
+ let input_type = DataType::Decimal32(p, s);
+ let output_type = DataType::Dictionary(
+ Box::new(DataType::Int32),
+ Box::new(DataType::Decimal32(p, s)),
+ );
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal32_array(array, p, s).unwrap();
+ let cast_array = cast_with_options(&array, &output_type,
&CastOptions::default()).unwrap();
+ assert_eq!(cast_array.data_type(), &output_type);
+ }
+
+ #[test]
+ fn test_cast_decimal64_to_decimal64_dict() {
+ let p = 15;
+ let s = 3;
+ let input_type = DataType::Decimal64(p, s);
+ let output_type = DataType::Dictionary(
+ Box::new(DataType::Int32),
+ Box::new(DataType::Decimal64(p, s)),
+ );
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal64_array(array, p, s).unwrap();
+ let cast_array = cast_with_options(&array, &output_type,
&CastOptions::default()).unwrap();
+ assert_eq!(cast_array.data_type(), &output_type);
+ }
+
#[test]
fn test_cast_decimal128_to_decimal128_dict() {
let p = 20;
@@ -2737,6 +3058,50 @@ mod tests {
assert_eq!(cast_array.data_type(), &output_type);
}
+ #[test]
+ fn test_cast_decimal32_to_decimal32_overflow() {
+ let input_type = DataType::Decimal32(9, 3);
+ let output_type = DataType::Decimal32(9, 9);
+ assert!(can_cast_types(&input_type, &output_type));
+
+ let array = vec![Some(i32::MAX)];
+ let array = create_decimal32_array(array, 9, 3).unwrap();
+ let result = cast_with_options(
+ &array,
+ &output_type,
+ &CastOptions {
+ safe: false,
+ format_options: FormatOptions::default(),
+ },
+ );
+ assert_eq!(
+ "Cast error: Cannot cast to Decimal32(9, 9). Overflowing on
2147483647",
+ result.unwrap_err().to_string()
+ );
+ }
+
+ #[test]
+ fn test_cast_decimal64_to_decimal64_overflow() {
+ let input_type = DataType::Decimal64(18, 3);
+ let output_type = DataType::Decimal64(18, 18);
+ assert!(can_cast_types(&input_type, &output_type));
+
+ let array = vec![Some(i64::MAX)];
+ let array = create_decimal64_array(array, 18, 3).unwrap();
+ let result = cast_with_options(
+ &array,
+ &output_type,
+ &CastOptions {
+ safe: false,
+ format_options: FormatOptions::default(),
+ },
+ );
+ assert_eq!(
+ "Cast error: Cannot cast to Decimal64(18, 18). Overflowing on
9223372036854775807",
+ result.unwrap_err().to_string()
+ );
+ }
+
#[test]
fn test_cast_decimal128_to_decimal128_overflow() {
let input_type = DataType::Decimal128(38, 3);
@@ -2777,6 +3142,44 @@ mod tests {
result.unwrap_err().to_string());
}
+ #[test]
+ fn test_cast_decimal32_to_decimal256() {
+ let input_type = DataType::Decimal32(8, 3);
+ let output_type = DataType::Decimal256(20, 4);
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal32_array(array, 8, 3).unwrap();
+ generate_cast_test_case!(
+ &array,
+ Decimal256Array,
+ &output_type,
+ vec![
+ Some(i256::from_i128(11234560_i128)),
+ Some(i256::from_i128(21234560_i128)),
+ Some(i256::from_i128(31234560_i128)),
+ None
+ ]
+ );
+ }
+ #[test]
+ fn test_cast_decimal64_to_decimal256() {
+ let input_type = DataType::Decimal64(12, 3);
+ let output_type = DataType::Decimal256(20, 4);
+ assert!(can_cast_types(&input_type, &output_type));
+ let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+ let array = create_decimal64_array(array, 12, 3).unwrap();
+ generate_cast_test_case!(
+ &array,
+ Decimal256Array,
+ &output_type,
+ vec![
+ Some(i256::from_i128(11234560_i128)),
+ Some(i256::from_i128(21234560_i128)),
+ Some(i256::from_i128(31234560_i128)),
+ None
+ ]
+ );
+ }
#[test]
fn test_cast_decimal128_to_decimal256() {
let input_type = DataType::Decimal128(20, 3);
@@ -2973,6 +3376,22 @@ mod tests {
);
}
+ #[test]
+ fn test_cast_decimal32_to_numeric() {
+ let value_array: Vec<Option<i32>> = vec![Some(125), Some(225),
Some(325), None, Some(525)];
+ let array = create_decimal32_array(value_array, 8, 2).unwrap();
+
+ generate_decimal_to_numeric_cast_test_case(&array);
+ }
+
+ #[test]
+ fn test_cast_decimal64_to_numeric() {
+ let value_array: Vec<Option<i64>> = vec![Some(125), Some(225),
Some(325), None, Some(525)];
+ let array = create_decimal64_array(value_array, 8, 2).unwrap();
+
+ generate_decimal_to_numeric_cast_test_case(&array);
+ }
+
#[test]
fn test_cast_decimal128_to_numeric() {
let value_array: Vec<Option<i128>> = vec![Some(125), Some(225),
Some(325), None, Some(525)];
@@ -9559,6 +9978,14 @@ mod tests {
#[test]
fn test_cast_decimal_to_string() {
+ assert!(can_cast_types(
+ &DataType::Decimal32(9, 4),
+ &DataType::Utf8View
+ ));
+ assert!(can_cast_types(
+ &DataType::Decimal64(16, 4),
+ &DataType::Utf8View
+ ));
assert!(can_cast_types(
&DataType::Decimal128(10, 4),
&DataType::Utf8View
@@ -9603,7 +10030,7 @@ mod tests {
}
}
- let array128: Vec<Option<i128>> = vec![
+ let array32: Vec<Option<i32>> = vec![
Some(1123454),
Some(2123456),
Some(-3123453),
@@ -9614,11 +10041,40 @@ mod tests {
Some(-123456789),
None,
];
+ let array64: Vec<Option<i64>> = array32.iter().map(|num| num.map(|x| x
as i64)).collect();
+ let array128: Vec<Option<i128>> =
+ array64.iter().map(|num| num.map(|x| x as i128)).collect();
let array256: Vec<Option<i256>> = array128
.iter()
.map(|num| num.map(i256::from_i128))
.collect();
+ test_decimal_to_string::<Decimal32Type, i32>(
+ DataType::Utf8View,
+ create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+ );
+ test_decimal_to_string::<Decimal32Type, i32>(
+ DataType::Utf8,
+ create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+ );
+ test_decimal_to_string::<Decimal32Type, i64>(
+ DataType::LargeUtf8,
+ create_decimal32_array(array32, 7, 3).unwrap(),
+ );
+
+ test_decimal_to_string::<Decimal64Type, i32>(
+ DataType::Utf8View,
+ create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+ );
+ test_decimal_to_string::<Decimal64Type, i32>(
+ DataType::Utf8,
+ create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+ );
+ test_decimal_to_string::<Decimal64Type, i64>(
+ DataType::LargeUtf8,
+ create_decimal64_array(array64, 7, 3).unwrap(),
+ );
+
test_decimal_to_string::<Decimal128Type, i32>(
DataType::Utf8View,
create_decimal128_array(array128.clone(), 7, 3).unwrap(),
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index bb82f54d49..f4daff8501 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -3059,6 +3059,120 @@ mod tests {
);
}
+ fn create_decimal_array<T: DecimalType>(data: Vec<Option<T::Native>>) ->
PrimitiveArray<T> {
+ data.into_iter().collect::<PrimitiveArray<T>>()
+ }
+
+ fn test_cmp_dict_decimal<T: DecimalType>(
+ values1: Vec<Option<T::Native>>,
+ values2: Vec<Option<T::Native>>,
+ ) {
+ let values = create_decimal_array::<T>(values1);
+ let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+ let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+ let values = create_decimal_array::<T>(values2);
+ let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+ let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+ let expected = BooleanArray::from(vec![false, false, false, true,
true, false]);
+ assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, false,
false, true]);
+ assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, true, true,
true]);
+ assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, false,
false, false]);
+ assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, true, true,
false]);
+ assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+ }
+
+ #[test]
+ fn test_cmp_dict_decimal32() {
+ test_cmp_dict_decimal::<Decimal32Type>(
+ vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
+ vec![Some(7), Some(-3), Some(4), Some(3), Some(5)],
+ );
+ }
+
+ #[test]
+ fn test_cmp_dict_non_dict_decimal32() {
+ let array1: Decimal32Array = Decimal32Array::from_iter_values([1, 2,
5, 4, 3, 0]);
+
+ let values = Decimal32Array::from_iter_values([7, -3, 4, 3, 5]);
+ let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+ let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+ let expected = BooleanArray::from(vec![false, false, false, true,
true, false]);
+ assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, false,
false, true]);
+ assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, true, true,
true]);
+ assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, false,
false, false]);
+ assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, true, true,
false]);
+ assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+ }
+
+ #[test]
+ fn test_cmp_dict_decimal64() {
+ let values = Decimal64Array::from_iter_values([0, 1, 2, 3, 4, 5]);
+ let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+ let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+ let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+ let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+ let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+ let expected = BooleanArray::from(vec![false, false, false, true,
true, false]);
+ assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, false,
false, true]);
+ assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, true, true,
true]);
+ assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, false,
false, false]);
+ assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, true, true,
false]);
+ assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+ }
+
+ #[test]
+ fn test_cmp_dict_non_dict_decimal64() {
+ let array1: Decimal64Array = Decimal64Array::from_iter_values([1, 2,
5, 4, 3, 0]);
+
+ let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+ let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+ let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+ let expected = BooleanArray::from(vec![false, false, false, true,
true, false]);
+ assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, false,
false, true]);
+ assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![true, true, false, true, true,
true]);
+ assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, false,
false, false]);
+ assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+ let expected = BooleanArray::from(vec![false, false, true, true, true,
false]);
+ assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+ }
+
#[test]
fn test_cmp_dict_decimal128() {
let values = Decimal128Array::from_iter_values([0, 1, 2, 3, 4, 5]);
@@ -3163,6 +3277,103 @@ mod tests {
assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
}
+ #[test]
+ fn test_decimal32() {
+ let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+ let b = Decimal32Array::from_iter_values([7, -3, 4, 3]);
+ let e = BooleanArray::from(vec![false, false, true, false]);
+ let r = crate::cmp::eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![true, false, false, false]);
+ let r = crate::cmp::lt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![true, false, true, false]);
+ let r = crate::cmp::lt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![false, true, false, true]);
+ let r = crate::cmp::gt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![false, true, true, true]);
+ let r = crate::cmp::gt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+ }
+
+ #[test]
+ fn test_decimal32_scalar() {
+ let a = Decimal32Array::from(vec![Some(1), Some(2), Some(3), None,
Some(4), Some(5)]);
+ let b = Decimal32Array::new_scalar(3_i32);
+ // array eq scalar
+ let e = BooleanArray::from(
+ vec![Some(false), Some(false), Some(true), None, Some(false),
Some(false)],
+ );
+ let r = crate::cmp::eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ // array neq scalar
+ let e = BooleanArray::from(
+ vec![Some(true), Some(true), Some(false), None, Some(true),
Some(true)],
+ );
+ let r = crate::cmp::neq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ // array lt scalar
+ let e = BooleanArray::from(
+ vec![Some(true), Some(true), Some(false), None, Some(false),
Some(false)],
+ );
+ let r = crate::cmp::lt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ // array lt_eq scalar
+ let e = BooleanArray::from(
+ vec![Some(true), Some(true), Some(true), None, Some(false),
Some(false)],
+ );
+ let r = crate::cmp::lt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ // array gt scalar
+ let e = BooleanArray::from(
+ vec![Some(false), Some(false), Some(false), None, Some(true),
Some(true)],
+ );
+ let r = crate::cmp::gt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ // array gt_eq scalar
+ let e = BooleanArray::from(
+ vec![Some(false), Some(false), Some(true), None, Some(true),
Some(true)],
+ );
+ let r = crate::cmp::gt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+ }
+
+ #[test]
+ fn test_decimal64() {
+ let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+ let b = Decimal64Array::from_iter_values([7, -3, 4, 3]);
+ let e = BooleanArray::from(vec![false, false, true, false]);
+ let r = crate::cmp::eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![true, false, false, false]);
+ let r = crate::cmp::lt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![true, false, true, false]);
+ let r = crate::cmp::lt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![false, true, false, true]);
+ let r = crate::cmp::gt(&a, &b).unwrap();
+ assert_eq!(e, r);
+
+ let e = BooleanArray::from(vec![false, true, true, true]);
+ let r = crate::cmp::gt_eq(&a, &b).unwrap();
+ assert_eq!(e, r);
+ }
+
#[test]
fn test_decimal128() {
let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 7d1c9b0c13..6ff0766324 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -575,7 +575,33 @@ mod tests {
}
#[test]
- fn test_decimal() {
+ fn test_decimali32() {
+ let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
+ .into_iter()
+ .collect::<Decimal32Array>()
+ .with_precision_and_scale(8, 6)
+ .unwrap();
+
+ let cmp = make_comparator(&array, &array,
SortOptions::default()).unwrap();
+ assert_eq!(Ordering::Less, cmp(1, 0));
+ assert_eq!(Ordering::Greater, cmp(0, 2));
+ }
+
+ #[test]
+ fn test_decimali64() {
+ let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
+ .into_iter()
+ .collect::<Decimal64Array>()
+ .with_precision_and_scale(16, 6)
+ .unwrap();
+
+ let cmp = make_comparator(&array, &array,
SortOptions::default()).unwrap();
+ assert_eq!(Ordering::Less, cmp(1, 0));
+ assert_eq!(Ordering::Greater, cmp(0, 2));
+ }
+
+ #[test]
+ fn test_decimali128() {
let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
.into_iter()
.collect::<Decimal128Array>()
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index ba026af637..170fa027ea 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -2307,6 +2307,16 @@ mod tests {
);
}
+ #[test]
+ fn test_sort_indices_decimal32() {
+ test_sort_indices_decimal::<Decimal32Type>(8, 3);
+ }
+
+ #[test]
+ fn test_sort_indices_decimal64() {
+ test_sort_indices_decimal::<Decimal64Type>(17, 5);
+ }
+
#[test]
fn test_sort_indices_decimal128() {
test_sort_indices_decimal::<Decimal128Type>(23, 6);
@@ -2460,6 +2470,16 @@ mod tests {
);
}
+ #[test]
+ fn test_sort_decimal32() {
+ test_sort_decimal::<Decimal32Type>(8, 3);
+ }
+
+ #[test]
+ fn test_sort_decimal64() {
+ test_sort_decimal::<Decimal64Type>(17, 5);
+ }
+
#[test]
fn test_sort_decimal128() {
test_sort_decimal::<Decimal128Type>(23, 6);
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 9508249324..a3b9f58772 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1800,6 +1800,66 @@ mod tests {
}
}
+ #[test]
+ fn test_decimal32() {
+ let converter =
RowConverter::new(vec![SortField::new(DataType::Decimal32(
+ DECIMAL32_MAX_PRECISION,
+ 7,
+ ))])
+ .unwrap();
+ let col = Arc::new(
+ Decimal32Array::from_iter([
+ None,
+ Some(i32::MIN),
+ Some(-13),
+ Some(46_i32),
+ Some(5456_i32),
+ Some(i32::MAX),
+ ])
+ .with_precision_and_scale(9, 7)
+ .unwrap(),
+ ) as ArrayRef;
+
+ let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+ for i in 0..rows.num_rows() - 1 {
+ assert!(rows.row(i) < rows.row(i + 1));
+ }
+
+ let back = converter.convert_rows(&rows).unwrap();
+ assert_eq!(back.len(), 1);
+ assert_eq!(col.as_ref(), back[0].as_ref())
+ }
+
+ #[test]
+ fn test_decimal64() {
+ let converter =
RowConverter::new(vec![SortField::new(DataType::Decimal64(
+ DECIMAL64_MAX_PRECISION,
+ 7,
+ ))])
+ .unwrap();
+ let col = Arc::new(
+ Decimal64Array::from_iter([
+ None,
+ Some(i64::MIN),
+ Some(-13),
+ Some(46_i64),
+ Some(5456_i64),
+ Some(i64::MAX),
+ ])
+ .with_precision_and_scale(18, 7)
+ .unwrap(),
+ ) as ArrayRef;
+
+ let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+ for i in 0..rows.num_rows() - 1 {
+ assert!(rows.row(i) < rows.row(i + 1));
+ }
+
+ let back = converter.convert_rows(&rows).unwrap();
+ assert_eq!(back.len(), 1);
+ assert_eq!(col.as_ref(), back[0].as_ref())
+ }
+
#[test]
fn test_decimal128() {
let converter =
RowConverter::new(vec![SortField::new(DataType::Decimal128(
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index 2850eae5d7..dc1b2d7b74 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -73,6 +73,28 @@ fn struct_array_from_vec(
hint::black_box(StructArray::try_from(vec![(field1, strings), (field2,
ints)]).unwrap());
}
+fn decimal32_array_from_vec(array: &[Option<i32>]) {
+ hint::black_box(
+ array
+ .iter()
+ .copied()
+ .collect::<Decimal32Array>()
+ .with_precision_and_scale(9, 2)
+ .unwrap(),
+ );
+}
+
+fn decimal64_array_from_vec(array: &[Option<i64>]) {
+ hint::black_box(
+ array
+ .iter()
+ .copied()
+ .collect::<Decimal64Array>()
+ .with_precision_and_scale(17, 2)
+ .unwrap(),
+ );
+}
+
fn decimal128_array_from_vec(array: &[Option<i128>]) {
hint::black_box(
array
@@ -96,6 +118,30 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
}
fn decimal_benchmark(c: &mut Criterion) {
+ // bench decimal32 array
+ // create option<i32> array
+ let size: usize = 1 << 15;
+ let mut rng = rand::rng();
+ let mut array = vec![];
+ for _ in 0..size {
+ array.push(Some(rng.random_range::<i32, _>(0..99999999)));
+ }
+ c.bench_function("decimal32_array_from_vec 32768", |b| {
+ b.iter(|| decimal32_array_from_vec(array.as_slice()))
+ });
+
+ // bench decimal64 array
+ // create option<i64> array
+ let size: usize = 1 << 15;
+ let mut rng = rand::rng();
+ let mut array = vec![];
+ for _ in 0..size {
+ array.push(Some(rng.random_range::<i64, _>(0..9999999999)));
+ }
+ c.bench_function("decimal64_array_from_vec 32768", |b| {
+ b.iter(|| decimal64_array_from_vec(array.as_slice()))
+ });
+
// bench decimal128 array
// create option<i128> array
let size: usize = 1 << 15;
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 46dd18c0fa..2374797961 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -108,6 +108,42 @@ fn bench_string(c: &mut Criterion) {
group.finish();
}
+fn bench_decimal32(c: &mut Criterion) {
+ c.bench_function("bench_decimal32_builder", |b| {
+ b.iter(|| {
+ let mut rng = rand::rng();
+ let mut decimal_builder =
Decimal32Builder::with_capacity(BATCH_SIZE);
+ for _ in 0..BATCH_SIZE {
+ decimal_builder.append_value(rng.random_range::<i32,
_>(0..999999999));
+ }
+ hint::black_box(
+ decimal_builder
+ .finish()
+ .with_precision_and_scale(9, 0)
+ .unwrap(),
+ );
+ })
+ });
+}
+
+fn bench_decimal64(c: &mut Criterion) {
+ c.bench_function("bench_decimal64_builder", |b| {
+ b.iter(|| {
+ let mut rng = rand::rng();
+ let mut decimal_builder =
Decimal64Builder::with_capacity(BATCH_SIZE);
+ for _ in 0..BATCH_SIZE {
+ decimal_builder.append_value(rng.random_range::<i64,
_>(0..9999999999));
+ }
+ hint::black_box(
+ decimal_builder
+ .finish()
+ .with_precision_and_scale(18, 0)
+ .unwrap(),
+ );
+ })
+ });
+}
+
fn bench_decimal128(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
@@ -151,6 +187,8 @@ criterion_group!(
bench_primitive_nulls,
bench_bool,
bench_string,
+ bench_decimal32,
+ bench_decimal64,
bench_decimal128,
bench_decimal256,
);
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index d01031be5f..179fde0a70 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -83,6 +83,36 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool)
-> ArrayRef {
Arc::new(builder.finish())
}
+fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+ let mut rng = seedable_rng();
+ let mut builder = Decimal32Builder::with_capacity(size);
+
+ for _ in 0..size {
+ builder.append_value(rng.random_range::<i32, _>(0..1000000));
+ }
+ Arc::new(
+ builder
+ .finish()
+ .with_precision_and_scale(precision, scale)
+ .unwrap(),
+ )
+}
+
+fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+ let mut rng = seedable_rng();
+ let mut builder = Decimal64Builder::with_capacity(size);
+
+ for _ in 0..size {
+ builder.append_value(rng.random_range::<i64, _>(0..1000000000));
+ }
+ Arc::new(
+ builder
+ .finish()
+ .with_precision_and_scale(precision, scale)
+ .unwrap(),
+ )
+}
+
fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::with_capacity(size);
@@ -159,6 +189,8 @@ fn add_benchmark(c: &mut Criterion) {
let utf8_date_array = build_utf8_date_array(512, true);
let utf8_date_time_array = build_utf8_date_time_array(512, true);
+ let decimal32_array = build_decimal32_array(512, 9, 3);
+ let decimal64_array = build_decimal64_array(512, 10, 3);
let decimal128_array = build_decimal128_array(512, 10, 3);
let decimal256_array = build_decimal256_array(512, 50, 3);
let string_array = build_string_array(512);
@@ -248,6 +280,22 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
});
+ c.bench_function("cast decimal32 to decimal32 512", |b| {
+ b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(9, 4)))
+ });
+ c.bench_function("cast decimal32 to decimal32 512 lower precision", |b| {
+ b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(6, 5)))
+ });
+ c.bench_function("cast decimal32 to decimal64 512", |b| {
+ b.iter(|| cast_array(&decimal32_array, DataType::Decimal64(11, 5)))
+ });
+ c.bench_function("cast decimal64 to decimal32 512", |b| {
+ b.iter(|| cast_array(&decimal64_array, DataType::Decimal32(9, 2)))
+ });
+ c.bench_function("cast decimal64 to decimal64 512", |b| {
+ b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(12, 4)))
+ });
+
c.bench_function("cast decimal128 to decimal128 512", |b| {
b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
});
diff --git a/arrow/benches/decimal_validate.rs
b/arrow/benches/decimal_validate.rs
index dfa4f59920..7867b10ba2 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,7 +18,10 @@
#[macro_use]
extern crate criterion;
-use arrow::array::{Array, Decimal128Array, Decimal128Builder, Decimal256Array,
Decimal256Builder};
+use arrow::array::{
+ Array, Decimal128Array, Decimal128Builder, Decimal256Array,
Decimal256Builder, Decimal32Array,
+ Decimal32Builder, Decimal64Array, Decimal64Builder,
+};
use criterion::Criterion;
use rand::Rng;
@@ -26,6 +29,14 @@ extern crate arrow;
use arrow_buffer::i256;
+fn validate_decimal32_array(array: Decimal32Array) {
+ array.with_precision_and_scale(8, 0).unwrap();
+}
+
+fn validate_decimal64_array(array: Decimal64Array) {
+ array.with_precision_and_scale(16, 0).unwrap();
+}
+
fn validate_decimal128_array(array: Decimal128Array) {
array.with_precision_and_scale(35, 0).unwrap();
}
@@ -34,6 +45,46 @@ fn validate_decimal256_array(array: Decimal256Array) {
array.with_precision_and_scale(35, 0).unwrap();
}
+fn validate_decimal32_benchmark(c: &mut Criterion) {
+ let mut rng = rand::rng();
+ let size: i32 = 20000;
+ let mut decimal_builder = Decimal32Builder::with_capacity(size as usize);
+ for _ in 0..size {
+ decimal_builder.append_value(rng.random_range::<i32, _>(0..99999999));
+ }
+ let decimal_array = decimal_builder
+ .finish()
+ .with_precision_and_scale(9, 0)
+ .unwrap();
+ let data = decimal_array.into_data();
+ c.bench_function("validate_decimal32_array 20000", |b| {
+ b.iter(|| {
+ let array = Decimal32Array::from(data.clone());
+ validate_decimal32_array(array);
+ })
+ });
+}
+
+fn validate_decimal64_benchmark(c: &mut Criterion) {
+ let mut rng = rand::rng();
+ let size: i64 = 20000;
+ let mut decimal_builder = Decimal64Builder::with_capacity(size as usize);
+ for _ in 0..size {
+ decimal_builder.append_value(rng.random_range::<i64,
_>(0..999999999999));
+ }
+ let decimal_array = decimal_builder
+ .finish()
+ .with_precision_and_scale(18, 0)
+ .unwrap();
+ let data = decimal_array.into_data();
+ c.bench_function("validate_decimal64_array 20000", |b| {
+ b.iter(|| {
+ let array = Decimal64Array::from(data.clone());
+ validate_decimal64_array(array);
+ })
+ });
+}
+
fn validate_decimal128_benchmark(c: &mut Criterion) {
let mut rng = rand::rng();
let size: i128 = 20000;
@@ -78,6 +129,8 @@ fn validate_decimal256_benchmark(c: &mut Criterion) {
criterion_group!(
benches,
+ validate_decimal32_benchmark,
+ validate_decimal64_benchmark,
validate_decimal128_benchmark,
validate_decimal256_benchmark,
);
diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs
index cd135a2f04..3b65ea7b52 100644
--- a/arrow/src/tensor.rs
+++ b/arrow/src/tensor.rs
@@ -86,6 +86,10 @@ pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
pub type Date32Tensor<'a> = Tensor<'a, Date32Type>;
/// [Tensor] of type [Int16Type]
pub type Date64Tensor<'a> = Tensor<'a, Date64Type>;
+/// [Tensor] of type [Decimal32Type]
+pub type Decimal32Tensor<'a> = Tensor<'a, Decimal32Type>;
+/// [Tensor] of type [Decimal64Type]
+pub type Decimal64Tensor<'a> = Tensor<'a, Decimal64Type>;
/// [Tensor] of type [Decimal128Type]
pub type Decimal128Tensor<'a> = Tensor<'a, Decimal128Type>;
/// [Tensor] of type [Decimal256Type]
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index da7d37fc48..522687c3e4 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -18,19 +18,21 @@
use arrow_array::builder::{PrimitiveDictionaryBuilder,
StringDictionaryBuilder, UnionBuilder};
use arrow_array::cast::AsArray;
use arrow_array::types::{
- ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type,
Int32Type, Int64Type,
- Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type,
UInt8Type,
+ ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Decimal32Type,
Decimal64Type,
+ Int16Type, Int32Type, Int64Type, Int8Type, TimestampMicrosecondType,
UInt16Type, UInt32Type,
+ UInt64Type, UInt8Type,
};
use arrow_array::{
Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray,
Date32Array, Date64Array,
- Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray,
DurationNanosecondArray,
- DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
Float16Array, Float32Array,
- Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
IntervalDayTimeArray,
- IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
LargeListArray,
- LargeStringArray, ListArray, NullArray, PrimitiveArray, StringArray,
StructArray,
- Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
Time64NanosecondArray,
- TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray,
- TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
UnionArray,
+ Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
DurationMicrosecondArray,
+ DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray,
FixedSizeBinaryArray,
+ FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int16Array,
Int32Array,
+ Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
IntervalYearMonthArray,
+ LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, NullArray,
PrimitiveArray,
+ StringArray, StructArray, Time32MillisecondArray, Time32SecondArray,
Time64MicrosecondArray,
+ Time64NanosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray,
+ TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
UInt64Array,
+ UInt8Array, UnionArray,
};
use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano};
use arrow_cast::pretty::pretty_format_columns;
@@ -261,7 +263,37 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
+ Arc::new(create_decimal32_array(vec![Some(1), Some(2), Some(3)], 9,
0).unwrap()),
+ Arc::new(create_decimal64_array(vec![Some(1), Some(2), Some(3)], 18,
0).unwrap()),
Arc::new(create_decimal128_array(vec![Some(1), Some(2), Some(3)], 38,
0).unwrap()),
+ Arc::new(
+ create_decimal256_array(
+ vec![
+ Some(i256::from_i128(1)),
+ Some(i256::from_i128(2)),
+ Some(i256::from_i128(3)),
+ ],
+ 40,
+ 0,
+ )
+ .unwrap(),
+ ),
+ make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int64Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt8Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt16Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt32Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt64Type, Decimal32Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int8Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int16Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int32Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<Int64Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt8Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt16Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt32Type, Decimal64Type>(vec![1, 2]),
+ make_dictionary_primitive::<UInt64Type, Decimal64Type>(vec![1, 2]),
make_dictionary_primitive::<Int8Type, Decimal128Type>(vec![1, 2]),
make_dictionary_primitive::<Int16Type, Decimal128Type>(vec![1, 2]),
make_dictionary_primitive::<Int32Type, Decimal128Type>(vec![1, 2]),
@@ -411,6 +443,28 @@ fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() ->
ArrayRef {
Arc::new(b.finish())
}
+fn create_decimal32_array(
+ array: Vec<Option<i32>>,
+ precision: u8,
+ scale: i8,
+) -> Result<Decimal32Array, ArrowError> {
+ array
+ .into_iter()
+ .collect::<Decimal32Array>()
+ .with_precision_and_scale(precision, scale)
+}
+
+fn create_decimal64_array(
+ array: Vec<Option<i64>>,
+ precision: u8,
+ scale: i8,
+) -> Result<Decimal64Array, ArrowError> {
+ array
+ .into_iter()
+ .collect::<Decimal64Array>()
+ .with_precision_and_scale(precision, scale)
+}
+
fn create_decimal128_array(
array: Vec<Option<i128>>,
precision: u8,
@@ -422,6 +476,17 @@ fn create_decimal128_array(
.with_precision_and_scale(precision, scale)
}
+fn create_decimal256_array(
+ array: Vec<Option<i256>>,
+ precision: u8,
+ scale: i8,
+) -> Result<Decimal256Array, ArrowError> {
+ array
+ .into_iter()
+ .collect::<Decimal256Array>()
+ .with_precision_and_scale(precision, scale)
+}
+
// Get a selection of datatypes to try and cast to
fn get_all_types() -> Vec<DataType> {
use DataType::*;
@@ -501,6 +566,8 @@ fn get_all_types() -> Vec<DataType> {
Dictionary(Box::new(key_type.clone()), Box::new(LargeUtf8)),
Dictionary(Box::new(key_type.clone()), Box::new(Binary)),
Dictionary(Box::new(key_type.clone()), Box::new(LargeBinary)),
+ Dictionary(Box::new(key_type.clone()), Box::new(Decimal32(9,
0))),
+ Dictionary(Box::new(key_type.clone()), Box::new(Decimal64(18,
0))),
Dictionary(Box::new(key_type.clone()), Box::new(Decimal128(38,
0))),
Dictionary(Box::new(key_type), Box::new(Decimal256(76, 0))),
]