This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f0df5e04d Add Decimal256 API (#1914)
f0df5e04d is described below
commit f0df5e04d3e55fff20f166f0cb8c7ec22cf3ffb9
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Jun 23 12:29:48 2022 -0700
Add Decimal256 API (#1914)
* Add Decimal256
* Dedup
* Truncate string representation by precision
* Update arrow/src/util/decimal.rs
Co-authored-by: Remzi Yang <[email protected]>
* Update arrow/src/util/decimal.rs
Co-authored-by: Remzi Yang <[email protected]>
* Update arrow/src/util/decimal.rs
Co-authored-by: Andrew Lamb <[email protected]>
* For review
* Fix clippy
* For review
* Move another one
Co-authored-by: Remzi Yang <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow/src/array/array_binary.rs | 4 +-
arrow/src/util/decimal.rs | 308 ++++++++++++++++++++++++++++++----------
2 files changed, 233 insertions(+), 79 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 8846b0c97..6d7856475 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,7 +33,7 @@ use crate::datatypes::{
};
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
-use crate::util::decimal::Decimal128;
+use crate::util::decimal::{BasicDecimal, Decimal128};
use crate::{buffer::MutableBuffer, datatypes::DataType};
/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
@@ -823,7 +823,7 @@ impl DecimalArray {
#[inline]
pub fn value_as_string(&self, row: usize) -> String {
- self.value(row).as_string()
+ self.value(row).to_string()
}
pub fn from_fixed_size_list_array(
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
index b78af3acc..7649b6b41 100644
--- a/arrow/src/util/decimal.rs
+++ b/arrow/src/util/decimal.rs
@@ -17,124 +17,278 @@
//! Decimal related utils
-use std::cmp::Ordering;
+use crate::error::{ArrowError, Result};
+use num::bigint::BigInt;
+use std::cmp::{min, Ordering};
+
+pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
+ /// The bit-width of the internal representation.
+ const BIT_WIDTH: usize;
+
+ /// Tries to create a decimal value from precision, scale and bytes.
+ /// If the length of bytes isn't same as the bit width of this decimal,
+ /// returning an error. The bytes should be stored in little-endian order.
+ ///
+ /// Safety:
+ /// This method doesn't validate if the decimal value represented by the
bytes
+ /// can be fitted into the specified precision.
+ fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) ->
Result<Self>
+ where
+ Self: Sized,
+ {
+ if precision < scale {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Precision {} is less than scale {}",
+ precision, scale
+ )));
+ }
+
+ if bytes.len() == Self::BIT_WIDTH / 8 {
+ Ok(Self::new(precision, scale, bytes))
+ } else {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "Input to Decimal{} must be {} bytes",
+ Self::BIT_WIDTH,
+ Self::BIT_WIDTH / 8
+ )))
+ }
+ }
+
+ /// Creates a decimal value from precision, scale, and bytes.
+ ///
+ /// Safety:
+ /// This method doesn't check if the length of bytes is compatible with
this decimal.
+ /// Use `try_new_from_bytes` for safe constructor.
+ fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self;
+
+ /// Returns the raw bytes of the integer representation of the decimal.
+ fn raw_value(&self) -> &[u8];
+
+ /// Returns the precision of the decimal.
+ fn precision(&self) -> usize;
+
+ /// Returns the scale of the decimal.
+ fn scale(&self) -> usize;
+
+ /// Returns the string representation of the decimal.
+ /// If the string representation cannot be fitted with the precision of
the decimal,
+ /// the string will be truncated.
+ fn to_string(&self) -> String {
+ let raw_bytes = self.raw_value();
+ let integer = BigInt::from_signed_bytes_le(raw_bytes);
+ let value_str = integer.to_string();
+ let (sign, rest) =
+ value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 });
+ let bound = min(self.precision(), rest.len()) + sign.len();
+ let value_str = &value_str[0..bound];
+
+ if self.scale() == 0 {
+ value_str.to_string()
+ } else if rest.len() > self.scale() {
+ // Decimal separator is in the middle of the string
+ let (whole, decimal) = value_str.split_at(value_str.len() -
self.scale());
+ format!("{}.{}", whole, decimal)
+ } else {
+ // String has to be padded
+ format!("{}0.{:0>width$}", sign, rest, width = self.scale())
+ }
+ }
+}
/// Represents a decimal value with precision and scale.
-/// The decimal value is represented by a signed 128-bit integer.
+/// The decimal value could represented by a signed 128-bit integer.
#[derive(Debug)]
pub struct Decimal128 {
#[allow(dead_code)]
precision: usize,
scale: usize,
- value: i128,
+ value: [u8; 16],
}
-impl PartialOrd for Decimal128 {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- assert_eq!(
- self.scale, other.scale,
- "Cannot compare two Decimal128 with different scale: {}, {}",
- self.scale, other.scale
- );
- self.value.partial_cmp(&other.value)
+impl Decimal128 {
+ /// Creates `Decimal128` from an `i128` value.
+ pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128)
-> Self {
+ Decimal128 {
+ precision,
+ scale,
+ value: value.to_le_bytes(),
+ }
}
-}
-impl Ord for Decimal128 {
- fn cmp(&self, other: &Self) -> Ordering {
- assert_eq!(
- self.scale, other.scale,
- "Cannot compare two Decimal128 with different scale: {}, {}",
- self.scale, other.scale
- );
- self.value.cmp(&other.value)
+ /// Returns `i128` representation of the decimal.
+ pub fn as_i128(&self) -> i128 {
+ i128::from_le_bytes(self.value)
}
}
-impl PartialEq<Self> for Decimal128 {
- fn eq(&self, other: &Self) -> bool {
- assert_eq!(
- self.scale, other.scale,
- "Cannot compare two Decimal128 with different scale: {}, {}",
- self.scale, other.scale
- );
- self.value.eq(&other.value)
+impl From<Decimal128> for i128 {
+ fn from(decimal: Decimal128) -> Self {
+ decimal.as_i128()
}
}
-impl Eq for Decimal128 {}
+/// Represents a decimal value with precision and scale.
+/// The decimal value could be represented by a signed 256-bit integer.
+#[derive(Debug)]
+pub struct Decimal256 {
+ #[allow(dead_code)]
+ precision: usize,
+ scale: usize,
+ value: [u8; 32],
+}
-impl Decimal128 {
- pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) ->
Self {
- let as_array = bytes.try_into();
- let value = match as_array {
- Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
- _ => panic!("Input to Decimal128 is not 128bit integer."),
- };
+macro_rules! def_decimal {
+ ($ty:ident, $bit:expr) => {
+ impl BasicDecimal for $ty {
+ const BIT_WIDTH: usize = $bit;
- Decimal128 {
- precision,
- scale,
- value,
- }
- }
+ fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+ $ty {
+ precision,
+ scale,
+ value: bytes.try_into().unwrap(),
+ }
+ }
- pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
- Decimal128 {
- precision,
- scale,
- value,
+ fn raw_value(&self) -> &[u8] {
+ &self.value
+ }
+
+ fn precision(&self) -> usize {
+ self.precision
+ }
+
+ fn scale(&self) -> usize {
+ self.scale
+ }
}
- }
- pub fn as_i128(&self) -> i128 {
- self.value
- }
+ impl PartialOrd for $ty {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimals with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.partial_cmp(&other.value)
+ }
+ }
- pub fn as_string(&self) -> String {
- let value_str = self.value.to_string();
+ impl Ord for $ty {
+ fn cmp(&self, other: &Self) -> Ordering {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimals with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.cmp(&other.value)
+ }
+ }
- if self.scale == 0 {
- value_str
- } else {
- let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 }
else { 1 });
-
- if rest.len() > self.scale {
- // Decimal separator is in the middle of the string
- let (whole, decimal) = value_str.split_at(value_str.len() -
self.scale);
- format!("{}.{}", whole, decimal)
- } else {
- // String has to be padded
- format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+ impl PartialEq<Self> for $ty {
+ fn eq(&self, other: &Self) -> bool {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimals with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.eq(&other.value)
}
}
- }
-}
-impl From<Decimal128> for i128 {
- fn from(decimal: Decimal128) -> Self {
- decimal.as_i128()
- }
+ impl Eq for $ty {}
+ };
}
+def_decimal!(Decimal128, 128);
+def_decimal!(Decimal256, 256);
+
#[cfg(test)]
mod tests {
- use crate::util::decimal::Decimal128;
+ use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
#[test]
fn decimal_128_to_string() {
let mut value = Decimal128::new_from_i128(5, 2, 100);
- assert_eq!(value.as_string(), "1.00");
+ assert_eq!(value.to_string(), "1.00");
value = Decimal128::new_from_i128(5, 3, 100);
- assert_eq!(value.as_string(), "0.100");
+ assert_eq!(value.to_string(), "0.100");
}
#[test]
- fn decimal_128_from_bytes() {
+ fn decimal_invalid_precision_scale() {
let bytes = 100_i128.to_le_bytes();
- let value = Decimal128::new_from_bytes(5, 2, &bytes);
- assert_eq!(value.as_string(), "1.00");
+ let err = Decimal128::try_new_from_bytes(5, 6, &bytes);
+ assert!(err.is_err());
+ }
+
+ #[test]
+ fn decimal_128_from_bytes() {
+ let mut bytes = 100_i128.to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(), "1.00");
+
+ bytes = (-1_i128).to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(), "-0.01");
+
+ bytes = i128::MAX.to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(),
"170141183460469231731687303715884105.72");
+
+ bytes = i128::MIN.to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
+ assert_eq!(
+ value.to_string(),
+ "-170141183460469231731687303715884105.72"
+ );
+
+ // Truncated
+ bytes = 12345_i128.to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(), "1.23");
+
+ bytes = (-12345_i128).to_le_bytes();
+ let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(), "-1.23");
+ }
+
+ #[test]
+ fn decimal_256_from_bytes() {
+ let mut bytes = vec![0; 32];
+ bytes[0..16].clone_from_slice(&100_i128.to_le_bytes());
+ let value = Decimal256::try_new_from_bytes(5, 2,
bytes.as_slice()).unwrap();
+ assert_eq!(value.to_string(), "1.00");
+
+ bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes());
+ let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
+ assert_eq!(
+ value.to_string(),
+ "17014118346046923173168730371588410.5727"
+ );
+
+ // i128 maximum + 1
+ bytes[0..16].clone_from_slice(&0_i128.to_le_bytes());
+ bytes[15] = 128;
+ let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
+ assert_eq!(
+ value.to_string(),
+ "17014118346046923173168730371588410.5728"
+ );
+
+ // smaller than i128 minimum
+ bytes = vec![255; 32];
+ bytes[31] = 128;
+ let value = Decimal256::try_new_from_bytes(79, 4, &bytes).unwrap();
+ assert_eq!(
+ value.to_string(),
+
"-5744373177007483132341216834415376678658315645522012356644966081642565415.7313"
+ );
+
+ bytes = vec![255; 32];
+ let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap();
+ assert_eq!(value.to_string(), "-0.01");
}
fn i128_func(value: impl Into<i128>) -> i128 {