This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e1e1fb8b30a Split arrow_cast::cast::decimal into it's own submodule
(#5552)
e1e1fb8b30a is described below
commit e1e1fb8b30a39a55a5523a1463466a28c3a46707
Author: Clide S <[email protected]>
AuthorDate: Mon Mar 25 23:18:18 2024 -0400
Split arrow_cast::cast::decimal into it's own submodule (#5552)
* Split arrow-cast::cast::decimal from arrow-cast::cast
* Minor formatting change
---------
Co-authored-by: Clide Stefani <[email protected]>
---
arrow-cast/src/cast/decimal.rs | 573 +++++++++++++++++++++++++++++++++++++++++
arrow-cast/src/cast/mod.rs | 557 +--------------------------------------
2 files changed, 575 insertions(+), 555 deletions(-)
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
new file mode 100644
index 00000000000..600f868a3e0
--- /dev/null
+++ b/arrow-cast/src/cast/decimal.rs
@@ -0,0 +1,573 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::cast::*;
+
+/// A utility trait that provides checked conversions between
+/// decimal types inspired by [`NumCast`]
+pub(crate) trait DecimalCast: Sized {
+ fn to_i128(self) -> Option<i128>;
+
+ fn to_i256(self) -> Option<i256>;
+
+ fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
+}
+
+impl DecimalCast for i128 {
+ fn to_i128(self) -> Option<i128> {
+ Some(self)
+ }
+
+ fn to_i256(self) -> Option<i256> {
+ Some(i256::from_i128(self))
+ }
+
+ fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+ n.to_i128()
+ }
+}
+
+impl DecimalCast for i256 {
+ fn to_i128(self) -> Option<i128> {
+ self.to_i128()
+ }
+
+ fn to_i256(self) -> Option<i256> {
+ Some(self)
+ }
+
+ fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+ n.to_i256()
+ }
+}
+
+pub(crate) fn cast_decimal_to_decimal_error<I, O>(
+ output_precision: u8,
+ output_scale: i8,
+) -> impl Fn(<I as ArrowPrimitiveType>::Native) -> ArrowError
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast + ArrowNativeTypeOp,
+ O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ move |x: I::Native| {
+ ArrowError::CastError(format!(
+ "Cannot cast to {}({}, {}). Overflowing on {:?}",
+ O::PREFIX,
+ output_precision,
+ output_scale,
+ x
+ ))
+ }
+}
+
+pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
+ array: &PrimitiveArray<I>,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+ cast_options: &CastOptions,
+) -> Result<PrimitiveArray<O>, ArrowError>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast + ArrowNativeTypeOp,
+ O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ let error = cast_decimal_to_decimal_error::<I, O>(output_precision,
output_scale);
+ let div = I::Native::from_decimal(10_i128)
+ .unwrap()
+ .pow_checked((input_scale - output_scale) as u32)?;
+
+ let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
+ let half_neg = half.neg_wrapping();
+
+ let f = |x: I::Native| {
+ // div is >= 10 and so this cannot overflow
+ let d = x.div_wrapping(div);
+ let r = x.mod_wrapping(div);
+
+ // Round result
+ let adjusted = match x >= I::Native::ZERO {
+ true if r >= half => d.add_wrapping(I::Native::ONE),
+ false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
+ _ => d,
+ };
+ O::Native::from_decimal(adjusted)
+ };
+
+ Ok(match cast_options.safe {
+ true => array.unary_opt(f),
+ false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
+ })
+}
+
+pub(crate) fn convert_to_bigger_or_equal_scale_decimal<I, O>(
+ array: &PrimitiveArray<I>,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+ cast_options: &CastOptions,
+) -> Result<PrimitiveArray<O>, ArrowError>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast + ArrowNativeTypeOp,
+ O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ let error = cast_decimal_to_decimal_error::<I, O>(output_precision,
output_scale);
+ let mul = O::Native::from_decimal(10_i128)
+ .unwrap()
+ .pow_checked((output_scale - input_scale) as u32)?;
+
+ let f = |x| O::Native::from_decimal(x).and_then(|x|
x.mul_checked(mul).ok());
+
+ Ok(match cast_options.safe {
+ true => array.unary_opt(f),
+ false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
+ })
+}
+
+// Only support one type of decimal cast operations
+pub(crate) fn cast_decimal_to_decimal_same_type<T>(
+ array: &PrimitiveArray<T>,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ T: DecimalType,
+ T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ let array: PrimitiveArray<T> = match input_scale.cmp(&output_scale) {
+ Ordering::Equal => {
+ // the scale doesn't change, the native value don't need to be
changed
+ array.clone()
+ }
+ Ordering::Greater => convert_to_smaller_scale_decimal::<T, T>(
+ array,
+ input_scale,
+ output_precision,
+ output_scale,
+ cast_options,
+ )?,
+ Ordering::Less => {
+ // input_scale < output_scale
+ convert_to_bigger_or_equal_scale_decimal::<T, T>(
+ array,
+ input_scale,
+ output_precision,
+ output_scale,
+ cast_options,
+ )?
+ }
+ };
+
+ Ok(Arc::new(array.with_precision_and_scale(
+ output_precision,
+ output_scale,
+ )?))
+}
+
+// Support two different types of decimal cast operations
+pub(crate) fn cast_decimal_to_decimal<I, O>(
+ array: &PrimitiveArray<I>,
+ input_scale: i8,
+ output_precision: u8,
+ output_scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ I: DecimalType,
+ O: DecimalType,
+ I::Native: DecimalCast + ArrowNativeTypeOp,
+ O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ let array: PrimitiveArray<O> = if input_scale > output_scale {
+ convert_to_smaller_scale_decimal::<I, O>(
+ array,
+ input_scale,
+ output_precision,
+ output_scale,
+ cast_options,
+ )?
+ } else {
+ convert_to_bigger_or_equal_scale_decimal::<I, O>(
+ array,
+ input_scale,
+ output_precision,
+ output_scale,
+ cast_options,
+ )?
+ };
+
+ Ok(Arc::new(array.with_precision_and_scale(
+ output_precision,
+ output_scale,
+ )?))
+}
+
+/// Parses given string to specified decimal native (i128/i256) based on given
+/// scale. Returns an `Err` if it cannot parse given string.
+pub(crate) fn parse_string_to_decimal_native<T: DecimalType>(
+ value_str: &str,
+ scale: usize,
+) -> Result<T::Native, ArrowError>
+where
+ T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ let value_str = value_str.trim();
+ let parts: Vec<&str> = value_str.split('.').collect();
+ if parts.len() > 2 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid decimal format: {value_str:?}"
+ )));
+ }
+
+ let (negative, first_part) = if parts[0].is_empty() {
+ (false, parts[0])
+ } else {
+ match parts[0].as_bytes()[0] {
+ b'-' => (true, &parts[0][1..]),
+ b'+' => (false, &parts[0][1..]),
+ _ => (false, parts[0]),
+ }
+ };
+
+ let integers = first_part.trim_start_matches('0');
+ let decimals = if parts.len() == 2 { parts[1] } else { "" };
+
+ if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid decimal format: {value_str:?}"
+ )));
+ }
+
+ if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid decimal format: {value_str:?}"
+ )));
+ }
+
+ // Adjust decimal based on scale
+ let mut number_decimals = if decimals.len() > scale {
+ let decimal_number = i256::from_string(decimals).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!("Cannot parse decimal
format: {value_str}"))
+ })?;
+
+ let div = i256::from_i128(10_i128).pow_checked((decimals.len() -
scale) as u32)?;
+
+ let half = div.div_wrapping(i256::from_i128(2));
+ let half_neg = half.neg_wrapping();
+
+ let d = decimal_number.div_wrapping(div);
+ let r = decimal_number.mod_wrapping(div);
+
+ // Round result
+ let adjusted = match decimal_number >= i256::ZERO {
+ true if r >= half => d.add_wrapping(i256::ONE),
+ false if r <= half_neg => d.sub_wrapping(i256::ONE),
+ _ => d,
+ };
+
+ let integers = if !integers.is_empty() {
+ i256::from_string(integers)
+ .ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Cannot parse decimal format: {value_str}"
+ ))
+ })
+ .map(|v|
v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))?
+ } else {
+ i256::ZERO
+ };
+
+ format!("{}", integers.add_wrapping(adjusted))
+ } else {
+ let padding = if scale > decimals.len() { scale } else { 0 };
+
+ let decimals = format!("{decimals:0<padding$}");
+ format!("{integers}{decimals}")
+ };
+
+ if negative {
+ number_decimals.insert(0, '-');
+ }
+
+ let value = i256::from_string(number_decimals.as_str()).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Cannot convert {} to {}: Overflow",
+ value_str,
+ T::PREFIX
+ ))
+ })?;
+
+ T::Native::from_decimal(value).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!("Cannot convert {} to {}",
value_str, T::PREFIX))
+ })
+}
+
+pub(crate) fn string_to_decimal_cast<T, Offset: OffsetSizeTrait>(
+ from: &GenericStringArray<Offset>,
+ precision: u8,
+ scale: i8,
+ cast_options: &CastOptions,
+) -> Result<PrimitiveArray<T>, ArrowError>
+where
+ T: DecimalType,
+ T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ if cast_options.safe {
+ let iter = from.iter().map(|v| {
+ v.and_then(|v| parse_string_to_decimal_native::<T>(v, scale as
usize).ok())
+ .and_then(|v| {
+ T::validate_decimal_precision(v, precision)
+ .is_ok()
+ .then_some(v)
+ })
+ });
+ // Benefit:
+ // 20% performance improvement
+ // Soundness:
+ // The iterator is trustedLen because it comes from an
`StringArray`.
+ Ok(unsafe {
+ PrimitiveArray::<T>::from_trusted_len_iter(iter)
+ .with_precision_and_scale(precision, scale)?
+ })
+ } else {
+ let vec = from
+ .iter()
+ .map(|v| {
+ v.map(|v| {
+ parse_string_to_decimal_native::<T>(v, scale as usize)
+ .map_err(|_| {
+ ArrowError::CastError(format!(
+ "Cannot cast string '{}' to value of {:?}
type",
+ v,
+ T::DATA_TYPE,
+ ))
+ })
+ .and_then(|v| T::validate_decimal_precision(v,
precision).map(|_| v))
+ })
+ .transpose()
+ })
+ .collect::<Result<Vec<_>, _>>()?;
+ // Benefit:
+ // 20% performance improvement
+ // Soundness:
+ // The iterator is trustedLen because it comes from an
`StringArray`.
+ Ok(unsafe {
+ PrimitiveArray::<T>::from_trusted_len_iter(vec.iter())
+ .with_precision_and_scale(precision, scale)?
+ })
+ }
+}
+
+/// Cast Utf8 to decimal
+pub(crate) fn cast_string_to_decimal<T, Offset: OffsetSizeTrait>(
+ from: &dyn Array,
+ precision: u8,
+ scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ T: DecimalType,
+ T::Native: DecimalCast + ArrowNativeTypeOp,
+{
+ if scale < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Cannot cast string to decimal with negative scale {scale}"
+ )));
+ }
+
+ if scale > T::MAX_SCALE {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Cannot cast string to decimal greater than maximum scale {}",
+ T::MAX_SCALE
+ )));
+ }
+
+ Ok(Arc::new(string_to_decimal_cast::<T, Offset>(
+ from.as_any()
+ .downcast_ref::<GenericStringArray<Offset>>()
+ .unwrap(),
+ precision,
+ scale,
+ cast_options,
+ )?))
+}
+
+pub(crate) fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
+ array: &PrimitiveArray<T>,
+ precision: u8,
+ scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
+{
+ let mul = 10_f64.powi(scale as i32);
+
+ if cast_options.safe {
+ array
+ .unary_opt::<_, Decimal128Type>(|v| {
+ (mul * v.as_())
+ .round()
+ .to_i128()
+ .filter(|v| Decimal128Type::validate_decimal_precision(*v,
precision).is_ok())
+ })
+ .with_precision_and_scale(precision, scale)
+ .map(|a| Arc::new(a) as ArrayRef)
+ } else {
+ array
+ .try_unary::<_, Decimal128Type, _>(|v| {
+ (mul * v.as_())
+ .round()
+ .to_i128()
+ .ok_or_else(|| {
+ ArrowError::CastError(format!(
+ "Cannot cast to {}({}, {}). Overflowing on {:?}",
+ Decimal128Type::PREFIX,
+ precision,
+ scale,
+ v
+ ))
+ })
+ .and_then(|v| {
+ Decimal128Type::validate_decimal_precision(v,
precision).map(|_| v)
+ })
+ })?
+ .with_precision_and_scale(precision, scale)
+ .map(|a| Arc::new(a) as ArrayRef)
+ }
+}
+
+pub(crate) fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
+ array: &PrimitiveArray<T>,
+ precision: u8,
+ scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
+{
+ let mul = 10_f64.powi(scale as i32);
+
+ if cast_options.safe {
+ array
+ .unary_opt::<_, Decimal256Type>(|v| {
+ i256::from_f64((v.as_() * mul).round())
+ .filter(|v| Decimal256Type::validate_decimal_precision(*v,
precision).is_ok())
+ })
+ .with_precision_and_scale(precision, scale)
+ .map(|a| Arc::new(a) as ArrayRef)
+ } else {
+ array
+ .try_unary::<_, Decimal256Type, _>(|v| {
+ i256::from_f64((v.as_() * mul).round())
+ .ok_or_else(|| {
+ ArrowError::CastError(format!(
+ "Cannot cast to {}({}, {}). Overflowing on {:?}",
+ Decimal256Type::PREFIX,
+ precision,
+ scale,
+ v
+ ))
+ })
+ .and_then(|v| {
+ Decimal256Type::validate_decimal_precision(v,
precision).map(|_| v)
+ })
+ })?
+ .with_precision_and_scale(precision, scale)
+ .map(|a| Arc::new(a) as ArrayRef)
+ }
+}
+
+pub(crate) fn cast_decimal_to_integer<D, T>(
+ array: &dyn Array,
+ base: D::Native,
+ scale: i8,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+ T: ArrowPrimitiveType,
+ <T as ArrowPrimitiveType>::Native: NumCast,
+ D: DecimalType + ArrowPrimitiveType,
+ <D as ArrowPrimitiveType>::Native: ArrowNativeTypeOp + ToPrimitive,
+{
+ let array = array.as_primitive::<D>();
+
+ let div: D::Native = base.pow_checked(scale as u32).map_err(|_| {
+ ArrowError::CastError(format!(
+ "Cannot cast to {:?}. The scale {} causes overflow.",
+ D::PREFIX,
+ scale,
+ ))
+ })?;
+
+ let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len());
+
+ if cast_options.safe {
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ value_builder.append_null();
+ } else {
+ let v = array
+ .value(i)
+ .div_checked(div)
+ .ok()
+ .and_then(<T::Native as NumCast>::from::<D::Native>);
+
+ value_builder.append_option(v);
+ }
+ }
+ } else {
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ value_builder.append_null();
+ } else {
+ let v = array.value(i).div_checked(div)?;
+
+ let value = <T::Native as
NumCast>::from::<D::Native>(v).ok_or_else(|| {
+ ArrowError::CastError(format!(
+ "value of {:?} is out of range {}",
+ v,
+ T::DATA_TYPE
+ ))
+ })?;
+
+ value_builder.append_value(value);
+ }
+ }
+ }
+ Ok(Arc::new(value_builder.finish()))
+}
+
+// Cast the decimal array to floating-point array
+pub(crate) fn cast_decimal_to_float<D: DecimalType, T: ArrowPrimitiveType, F>(
+ array: &dyn Array,
+ op: F,
+) -> Result<ArrayRef, ArrowError>
+where
+ F: Fn(D::Native) -> T::Native,
+{
+ let array = array.as_primitive::<D>();
+ let array = array.unary::<_, T>(op);
+ Ok(Arc::new(array))
+}
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 2b9892aa3fb..61bbf128003 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -37,7 +37,9 @@
//! assert_eq!(7.0, c.value(2));
//! ```
+mod decimal;
mod list;
+use crate::cast::decimal::*;
use crate::cast::list::*;
use chrono::{NaiveTime, Offset, TimeZone, Utc};
@@ -337,92 +339,6 @@ where
Ok(Arc::new(array.with_precision_and_scale(precision, scale)?))
}
-fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
- array: &PrimitiveArray<T>,
- precision: u8,
- scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
-{
- let mul = 10_f64.powi(scale as i32);
-
- if cast_options.safe {
- array
- .unary_opt::<_, Decimal128Type>(|v| {
- (mul * v.as_())
- .round()
- .to_i128()
- .filter(|v| Decimal128Type::validate_decimal_precision(*v,
precision).is_ok())
- })
- .with_precision_and_scale(precision, scale)
- .map(|a| Arc::new(a) as ArrayRef)
- } else {
- array
- .try_unary::<_, Decimal128Type, _>(|v| {
- (mul * v.as_())
- .round()
- .to_i128()
- .ok_or_else(|| {
- ArrowError::CastError(format!(
- "Cannot cast to {}({}, {}). Overflowing on {:?}",
- Decimal128Type::PREFIX,
- precision,
- scale,
- v
- ))
- })
- .and_then(|v| {
- Decimal128Type::validate_decimal_precision(v,
precision).map(|_| v)
- })
- })?
- .with_precision_and_scale(precision, scale)
- .map(|a| Arc::new(a) as ArrayRef)
- }
-}
-
-fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
- array: &PrimitiveArray<T>,
- precision: u8,
- scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
-{
- let mul = 10_f64.powi(scale as i32);
-
- if cast_options.safe {
- array
- .unary_opt::<_, Decimal256Type>(|v| {
- i256::from_f64((v.as_() * mul).round())
- .filter(|v| Decimal256Type::validate_decimal_precision(*v,
precision).is_ok())
- })
- .with_precision_and_scale(precision, scale)
- .map(|a| Arc::new(a) as ArrayRef)
- } else {
- array
- .try_unary::<_, Decimal256Type, _>(|v| {
- i256::from_f64((v.as_() * mul).round())
- .ok_or_else(|| {
- ArrowError::CastError(format!(
- "Cannot cast to {}({}, {}). Overflowing on {:?}",
- Decimal256Type::PREFIX,
- precision,
- scale,
- v
- ))
- })
- .and_then(|v| {
- Decimal256Type::validate_decimal_precision(v,
precision).map(|_| v)
- })
- })?
- .with_precision_and_scale(precision, scale)
- .map(|a| Arc::new(a) as ArrayRef)
- }
-}
-
/// Cast the array from interval year month to month day nano
fn cast_interval_year_month_to_interval_month_day_nano(
array: &dyn Array,
@@ -552,79 +468,6 @@ fn cast_reinterpret_arrays<I: ArrowPrimitiveType, O:
ArrowPrimitiveType<Native =
Ok(Arc::new(array.as_primitive::<I>().reinterpret_cast::<O>()))
}
-fn cast_decimal_to_integer<D, T>(
- array: &dyn Array,
- base: D::Native,
- scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- T: ArrowPrimitiveType,
- <T as ArrowPrimitiveType>::Native: NumCast,
- D: DecimalType + ArrowPrimitiveType,
- <D as ArrowPrimitiveType>::Native: ArrowNativeTypeOp + ToPrimitive,
-{
- let array = array.as_primitive::<D>();
-
- let div: D::Native = base.pow_checked(scale as u32).map_err(|_| {
- ArrowError::CastError(format!(
- "Cannot cast to {:?}. The scale {} causes overflow.",
- D::PREFIX,
- scale,
- ))
- })?;
-
- let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len());
-
- if cast_options.safe {
- for i in 0..array.len() {
- if array.is_null(i) {
- value_builder.append_null();
- } else {
- let v = array
- .value(i)
- .div_checked(div)
- .ok()
- .and_then(<T::Native as NumCast>::from::<D::Native>);
-
- value_builder.append_option(v);
- }
- }
- } else {
- for i in 0..array.len() {
- if array.is_null(i) {
- value_builder.append_null();
- } else {
- let v = array.value(i).div_checked(div)?;
-
- let value = <T::Native as
NumCast>::from::<D::Native>(v).ok_or_else(|| {
- ArrowError::CastError(format!(
- "value of {:?} is out of range {}",
- v,
- T::DATA_TYPE
- ))
- })?;
-
- value_builder.append_value(value);
- }
- }
- }
- Ok(Arc::new(value_builder.finish()))
-}
-
-// cast the decimal array to floating-point array
-fn cast_decimal_to_float<D: DecimalType, T: ArrowPrimitiveType, F>(
- array: &dyn Array,
- op: F,
-) -> Result<ArrayRef, ArrowError>
-where
- F: Fn(D::Native) -> T::Native,
-{
- let array = array.as_primitive::<D>();
- let array = array.unary::<_, T>(op);
- Ok(Arc::new(array))
-}
-
fn make_timestamp_array(
array: &PrimitiveArray<Int64Type>,
unit: TimeUnit,
@@ -2100,212 +1943,6 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
}
}
-/// A utility trait that provides checked conversions between
-/// decimal types inspired by [`NumCast`]
-trait DecimalCast: Sized {
- fn to_i128(self) -> Option<i128>;
-
- fn to_i256(self) -> Option<i256>;
-
- fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
-}
-
-impl DecimalCast for i128 {
- fn to_i128(self) -> Option<i128> {
- Some(self)
- }
-
- fn to_i256(self) -> Option<i256> {
- Some(i256::from_i128(self))
- }
-
- fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
- n.to_i128()
- }
-}
-
-impl DecimalCast for i256 {
- fn to_i128(self) -> Option<i128> {
- self.to_i128()
- }
-
- fn to_i256(self) -> Option<i256> {
- Some(self)
- }
-
- fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
- n.to_i256()
- }
-}
-
-fn cast_decimal_to_decimal_error<I, O>(
- output_precision: u8,
- output_scale: i8,
-) -> impl Fn(<I as ArrowPrimitiveType>::Native) -> ArrowError
-where
- I: DecimalType,
- O: DecimalType,
- I::Native: DecimalCast + ArrowNativeTypeOp,
- O::Native: DecimalCast + ArrowNativeTypeOp,
-{
- move |x: I::Native| {
- ArrowError::CastError(format!(
- "Cannot cast to {}({}, {}). Overflowing on {:?}",
- O::PREFIX,
- output_precision,
- output_scale,
- x
- ))
- }
-}
-
-fn convert_to_smaller_scale_decimal<I, O>(
- array: &PrimitiveArray<I>,
- input_scale: i8,
- output_precision: u8,
- output_scale: i8,
- cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
-where
- I: DecimalType,
- O: DecimalType,
- I::Native: DecimalCast + ArrowNativeTypeOp,
- O::Native: DecimalCast + ArrowNativeTypeOp,
-{
- let error = cast_decimal_to_decimal_error::<I, O>(output_precision,
output_scale);
- let div = I::Native::from_decimal(10_i128)
- .unwrap()
- .pow_checked((input_scale - output_scale) as u32)?;
-
- let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
- let half_neg = half.neg_wrapping();
-
- let f = |x: I::Native| {
- // div is >= 10 and so this cannot overflow
- let d = x.div_wrapping(div);
- let r = x.mod_wrapping(div);
-
- // Round result
- let adjusted = match x >= I::Native::ZERO {
- true if r >= half => d.add_wrapping(I::Native::ONE),
- false if r <= half_neg => d.sub_wrapping(I::Native::ONE),
- _ => d,
- };
- O::Native::from_decimal(adjusted)
- };
-
- Ok(match cast_options.safe {
- true => array.unary_opt(f),
- false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
- })
-}
-
-fn convert_to_bigger_or_equal_scale_decimal<I, O>(
- array: &PrimitiveArray<I>,
- input_scale: i8,
- output_precision: u8,
- output_scale: i8,
- cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
-where
- I: DecimalType,
- O: DecimalType,
- I::Native: DecimalCast + ArrowNativeTypeOp,
- O::Native: DecimalCast + ArrowNativeTypeOp,
-{
- let error = cast_decimal_to_decimal_error::<I, O>(output_precision,
output_scale);
- let mul = O::Native::from_decimal(10_i128)
- .unwrap()
- .pow_checked((output_scale - input_scale) as u32)?;
-
- let f = |x| O::Native::from_decimal(x).and_then(|x|
x.mul_checked(mul).ok());
-
- Ok(match cast_options.safe {
- true => array.unary_opt(f),
- false => array.try_unary(|x| f(x).ok_or_else(|| error(x)))?,
- })
-}
-
-// Only support one type of decimal cast operations
-fn cast_decimal_to_decimal_same_type<T>(
- array: &PrimitiveArray<T>,
- input_scale: i8,
- output_precision: u8,
- output_scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- T: DecimalType,
- T::Native: DecimalCast + ArrowNativeTypeOp,
-{
- let array: PrimitiveArray<T> = match input_scale.cmp(&output_scale) {
- Ordering::Equal => {
- // the scale doesn't change, the native value don't need to be
changed
- array.clone()
- }
- Ordering::Greater => convert_to_smaller_scale_decimal::<T, T>(
- array,
- input_scale,
- output_precision,
- output_scale,
- cast_options,
- )?,
- Ordering::Less => {
- // input_scale < output_scale
- convert_to_bigger_or_equal_scale_decimal::<T, T>(
- array,
- input_scale,
- output_precision,
- output_scale,
- cast_options,
- )?
- }
- };
-
- Ok(Arc::new(array.with_precision_and_scale(
- output_precision,
- output_scale,
- )?))
-}
-
-// Support two different types of decimal cast operations
-fn cast_decimal_to_decimal<I, O>(
- array: &PrimitiveArray<I>,
- input_scale: i8,
- output_precision: u8,
- output_scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- I: DecimalType,
- O: DecimalType,
- I::Native: DecimalCast + ArrowNativeTypeOp,
- O::Native: DecimalCast + ArrowNativeTypeOp,
-{
- let array: PrimitiveArray<O> = if input_scale > output_scale {
- convert_to_smaller_scale_decimal::<I, O>(
- array,
- input_scale,
- output_precision,
- output_scale,
- cast_options,
- )?
- } else {
- convert_to_bigger_or_equal_scale_decimal::<I, O>(
- array,
- input_scale,
- output_precision,
- output_scale,
- cast_options,
- )?
- };
-
- Ok(Arc::new(array.with_precision_and_scale(
- output_precision,
- output_scale,
- )?))
-}
-
/// Convert Array into a PrimitiveArray of type, and apply numeric cast
fn cast_numeric_arrays<FROM, TO>(
from: &dyn Array,
@@ -2618,196 +2255,6 @@ where
Ok(Arc::new(output_array))
}
-/// Parses given string to specified decimal native (i128/i256) based on given
-/// scale. Returns an `Err` if it cannot parse given string.
-fn parse_string_to_decimal_native<T: DecimalType>(
- value_str: &str,
- scale: usize,
-) -> Result<T::Native, ArrowError>
-where
- T::Native: DecimalCast + ArrowNativeTypeOp,
-{
- let value_str = value_str.trim();
- let parts: Vec<&str> = value_str.split('.').collect();
- if parts.len() > 2 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Invalid decimal format: {value_str:?}"
- )));
- }
-
- let (negative, first_part) = if parts[0].is_empty() {
- (false, parts[0])
- } else {
- match parts[0].as_bytes()[0] {
- b'-' => (true, &parts[0][1..]),
- b'+' => (false, &parts[0][1..]),
- _ => (false, parts[0]),
- }
- };
-
- let integers = first_part.trim_start_matches('0');
- let decimals = if parts.len() == 2 { parts[1] } else { "" };
-
- if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Invalid decimal format: {value_str:?}"
- )));
- }
-
- if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Invalid decimal format: {value_str:?}"
- )));
- }
-
- // Adjust decimal based on scale
- let mut number_decimals = if decimals.len() > scale {
- let decimal_number = i256::from_string(decimals).ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!("Cannot parse decimal
format: {value_str}"))
- })?;
-
- let div = i256::from_i128(10_i128).pow_checked((decimals.len() -
scale) as u32)?;
-
- let half = div.div_wrapping(i256::from_i128(2));
- let half_neg = half.neg_wrapping();
-
- let d = decimal_number.div_wrapping(div);
- let r = decimal_number.mod_wrapping(div);
-
- // Round result
- let adjusted = match decimal_number >= i256::ZERO {
- true if r >= half => d.add_wrapping(i256::ONE),
- false if r <= half_neg => d.sub_wrapping(i256::ONE),
- _ => d,
- };
-
- let integers = if !integers.is_empty() {
- i256::from_string(integers)
- .ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!(
- "Cannot parse decimal format: {value_str}"
- ))
- })
- .map(|v|
v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))?
- } else {
- i256::ZERO
- };
-
- format!("{}", integers.add_wrapping(adjusted))
- } else {
- let padding = if scale > decimals.len() { scale } else { 0 };
-
- let decimals = format!("{decimals:0<padding$}");
- format!("{integers}{decimals}")
- };
-
- if negative {
- number_decimals.insert(0, '-');
- }
-
- let value = i256::from_string(number_decimals.as_str()).ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!(
- "Cannot convert {} to {}: Overflow",
- value_str,
- T::PREFIX
- ))
- })?;
-
- T::Native::from_decimal(value).ok_or_else(|| {
- ArrowError::InvalidArgumentError(format!("Cannot convert {} to {}",
value_str, T::PREFIX))
- })
-}
-
-fn string_to_decimal_cast<T, Offset: OffsetSizeTrait>(
- from: &GenericStringArray<Offset>,
- precision: u8,
- scale: i8,
- cast_options: &CastOptions,
-) -> Result<PrimitiveArray<T>, ArrowError>
-where
- T: DecimalType,
- T::Native: DecimalCast + ArrowNativeTypeOp,
-{
- if cast_options.safe {
- let iter = from.iter().map(|v| {
- v.and_then(|v| parse_string_to_decimal_native::<T>(v, scale as
usize).ok())
- .and_then(|v| {
- T::validate_decimal_precision(v, precision)
- .is_ok()
- .then_some(v)
- })
- });
- // Benefit:
- // 20% performance improvement
- // Soundness:
- // The iterator is trustedLen because it comes from an
`StringArray`.
- Ok(unsafe {
- PrimitiveArray::<T>::from_trusted_len_iter(iter)
- .with_precision_and_scale(precision, scale)?
- })
- } else {
- let vec = from
- .iter()
- .map(|v| {
- v.map(|v| {
- parse_string_to_decimal_native::<T>(v, scale as usize)
- .map_err(|_| {
- ArrowError::CastError(format!(
- "Cannot cast string '{}' to value of {:?}
type",
- v,
- T::DATA_TYPE,
- ))
- })
- .and_then(|v| T::validate_decimal_precision(v,
precision).map(|_| v))
- })
- .transpose()
- })
- .collect::<Result<Vec<_>, _>>()?;
- // Benefit:
- // 20% performance improvement
- // Soundness:
- // The iterator is trustedLen because it comes from an
`StringArray`.
- Ok(unsafe {
- PrimitiveArray::<T>::from_trusted_len_iter(vec.iter())
- .with_precision_and_scale(precision, scale)?
- })
- }
-}
-
-/// Cast Utf8 to decimal
-fn cast_string_to_decimal<T, Offset: OffsetSizeTrait>(
- from: &dyn Array,
- precision: u8,
- scale: i8,
- cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
- T: DecimalType,
- T::Native: DecimalCast + ArrowNativeTypeOp,
-{
- if scale < 0 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Cannot cast string to decimal with negative scale {scale}"
- )));
- }
-
- if scale > T::MAX_SCALE {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Cannot cast string to decimal greater than maximum scale {}",
- T::MAX_SCALE
- )));
- }
-
- Ok(Arc::new(string_to_decimal_cast::<T, Offset>(
- from.as_any()
- .downcast_ref::<GenericStringArray<Offset>>()
- .unwrap(),
- precision,
- scale,
- cast_options,
- )?))
-}
-
/// Cast numeric types to Boolean
///
/// Any zero value returns `false` while non-zero returns `true`