[arrow-rs] branch master updated: Add Decimal256 API (#1914)

alamb Thu, 23 Jun 2022 12:30:01 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/master by this push:
     new f0df5e04d Add Decimal256 API (#1914)
f0df5e04d is described below

commit f0df5e04d3e55fff20f166f0cb8c7ec22cf3ffb9
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Jun 23 12:29:48 2022 -0700

    Add Decimal256 API (#1914)
    
    * Add Decimal256
    
    * Dedup
    
    * Truncate string representation by precision
    
    * Update arrow/src/util/decimal.rs
    
    Co-authored-by: Remzi Yang <[email protected]>
    
    * Update arrow/src/util/decimal.rs
    
    Co-authored-by: Remzi Yang <[email protected]>
    
    * Update arrow/src/util/decimal.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * For review
    
    * Fix clippy
    
    * For review
    
    * Move another one
    
    Co-authored-by: Remzi Yang <[email protected]>
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow/src/array/array_binary.rs |   4 +-
 arrow/src/util/decimal.rs       | 308 ++++++++++++++++++++++++++++++----------
 2 files changed, 233 insertions(+), 79 deletions(-)

diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 8846b0c97..6d7856475 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,7 +33,7 @@ use crate::datatypes::{
 };
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
-use crate::util::decimal::Decimal128;
+use crate::util::decimal::{BasicDecimal, Decimal128};
 use crate::{buffer::MutableBuffer, datatypes::DataType};
 
 /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
@@ -823,7 +823,7 @@ impl DecimalArray {
 
     #[inline]
     pub fn value_as_string(&self, row: usize) -> String {
-        self.value(row).as_string()
+        self.value(row).to_string()
     }
 
     pub fn from_fixed_size_list_array(
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
index b78af3acc..7649b6b41 100644
--- a/arrow/src/util/decimal.rs
+++ b/arrow/src/util/decimal.rs
@@ -17,124 +17,278 @@
 
 //! Decimal related utils
 
-use std::cmp::Ordering;
+use crate::error::{ArrowError, Result};
+use num::bigint::BigInt;
+use std::cmp::{min, Ordering};
+
+pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
+    /// The bit-width of the internal representation.
+    const BIT_WIDTH: usize;
+
+    /// Tries to create a decimal value from precision, scale and bytes.
+    /// If the length of bytes isn't same as the bit width of this decimal,
+    /// returning an error. The bytes should be stored in little-endian order.
+    ///
+    /// Safety:
+    /// This method doesn't validate if the decimal value represented by the 
bytes
+    /// can be fitted into the specified precision.
+    fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> 
Result<Self>
+    where
+        Self: Sized,
+    {
+        if precision < scale {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Precision {} is less than scale {}",
+                precision, scale
+            )));
+        }
+
+        if bytes.len() == Self::BIT_WIDTH / 8 {
+            Ok(Self::new(precision, scale, bytes))
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "Input to Decimal{} must be {} bytes",
+                Self::BIT_WIDTH,
+                Self::BIT_WIDTH / 8
+            )))
+        }
+    }
+
+    /// Creates a decimal value from precision, scale, and bytes.
+    ///
+    /// Safety:
+    /// This method doesn't check if the length of bytes is compatible with 
this decimal.
+    /// Use `try_new_from_bytes` for safe constructor.
+    fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self;
+
+    /// Returns the raw bytes of the integer representation of the decimal.
+    fn raw_value(&self) -> &[u8];
+
+    /// Returns the precision of the decimal.
+    fn precision(&self) -> usize;
+
+    /// Returns the scale of the decimal.
+    fn scale(&self) -> usize;
+
+    /// Returns the string representation of the decimal.
+    /// If the string representation cannot be fitted with the precision of 
the decimal,
+    /// the string will be truncated.
+    fn to_string(&self) -> String {
+        let raw_bytes = self.raw_value();
+        let integer = BigInt::from_signed_bytes_le(raw_bytes);
+        let value_str = integer.to_string();
+        let (sign, rest) =
+            value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 });
+        let bound = min(self.precision(), rest.len()) + sign.len();
+        let value_str = &value_str[0..bound];
+
+        if self.scale() == 0 {
+            value_str.to_string()
+        } else if rest.len() > self.scale() {
+            // Decimal separator is in the middle of the string
+            let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale());
+            format!("{}.{}", whole, decimal)
+        } else {
+            // String has to be padded
+            format!("{}0.{:0>width$}", sign, rest, width = self.scale())
+        }
+    }
+}
 
 /// Represents a decimal value with precision and scale.
-/// The decimal value is represented by a signed 128-bit integer.
+/// The decimal value could represented by a signed 128-bit integer.
 #[derive(Debug)]
 pub struct Decimal128 {
     #[allow(dead_code)]
     precision: usize,
     scale: usize,
-    value: i128,
+    value: [u8; 16],
 }
 
-impl PartialOrd for Decimal128 {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.partial_cmp(&other.value)
+impl Decimal128 {
+    /// Creates `Decimal128` from an `i128` value.
+    pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) 
-> Self {
+        Decimal128 {
+            precision,
+            scale,
+            value: value.to_le_bytes(),
+        }
     }
-}
 
-impl Ord for Decimal128 {
-    fn cmp(&self, other: &Self) -> Ordering {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.cmp(&other.value)
+    /// Returns `i128` representation of the decimal.
+    pub fn as_i128(&self) -> i128 {
+        i128::from_le_bytes(self.value)
     }
 }
 
-impl PartialEq<Self> for Decimal128 {
-    fn eq(&self, other: &Self) -> bool {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.eq(&other.value)
+impl From<Decimal128> for i128 {
+    fn from(decimal: Decimal128) -> Self {
+        decimal.as_i128()
     }
 }
 
-impl Eq for Decimal128 {}
+/// Represents a decimal value with precision and scale.
+/// The decimal value could be represented by a signed 256-bit integer.
+#[derive(Debug)]
+pub struct Decimal256 {
+    #[allow(dead_code)]
+    precision: usize,
+    scale: usize,
+    value: [u8; 32],
+}
 
-impl Decimal128 {
-    pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> 
Self {
-        let as_array = bytes.try_into();
-        let value = match as_array {
-            Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
-            _ => panic!("Input to Decimal128 is not 128bit integer."),
-        };
+macro_rules! def_decimal {
+    ($ty:ident, $bit:expr) => {
+        impl BasicDecimal for $ty {
+            const BIT_WIDTH: usize = $bit;
 
-        Decimal128 {
-            precision,
-            scale,
-            value,
-        }
-    }
+            fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+                $ty {
+                    precision,
+                    scale,
+                    value: bytes.try_into().unwrap(),
+                }
+            }
 
-    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
-        Decimal128 {
-            precision,
-            scale,
-            value,
+            fn raw_value(&self) -> &[u8] {
+                &self.value
+            }
+
+            fn precision(&self) -> usize {
+                self.precision
+            }
+
+            fn scale(&self) -> usize {
+                self.scale
+            }
         }
-    }
 
-    pub fn as_i128(&self) -> i128 {
-        self.value
-    }
+        impl PartialOrd for $ty {
+            fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.partial_cmp(&other.value)
+            }
+        }
 
-    pub fn as_string(&self) -> String {
-        let value_str = self.value.to_string();
+        impl Ord for $ty {
+            fn cmp(&self, other: &Self) -> Ordering {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.cmp(&other.value)
+            }
+        }
 
-        if self.scale == 0 {
-            value_str
-        } else {
-            let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } 
else { 1 });
-
-            if rest.len() > self.scale {
-                // Decimal separator is in the middle of the string
-                let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale);
-                format!("{}.{}", whole, decimal)
-            } else {
-                // String has to be padded
-                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+        impl PartialEq<Self> for $ty {
+            fn eq(&self, other: &Self) -> bool {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.eq(&other.value)
             }
         }
-    }
-}
 
-impl From<Decimal128> for i128 {
-    fn from(decimal: Decimal128) -> Self {
-        decimal.as_i128()
-    }
+        impl Eq for $ty {}
+    };
 }
 
+def_decimal!(Decimal128, 128);
+def_decimal!(Decimal256, 256);
+
 #[cfg(test)]
 mod tests {
-    use crate::util::decimal::Decimal128;
+    use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
 
     #[test]
     fn decimal_128_to_string() {
         let mut value = Decimal128::new_from_i128(5, 2, 100);
-        assert_eq!(value.as_string(), "1.00");
+        assert_eq!(value.to_string(), "1.00");
 
         value = Decimal128::new_from_i128(5, 3, 100);
-        assert_eq!(value.as_string(), "0.100");
+        assert_eq!(value.to_string(), "0.100");
     }
 
     #[test]
-    fn decimal_128_from_bytes() {
+    fn decimal_invalid_precision_scale() {
         let bytes = 100_i128.to_le_bytes();
-        let value = Decimal128::new_from_bytes(5, 2, &bytes);
-        assert_eq!(value.as_string(), "1.00");
+        let err = Decimal128::try_new_from_bytes(5, 6, &bytes);
+        assert!(err.is_err());
+    }
+
+    #[test]
+    fn decimal_128_from_bytes() {
+        let mut bytes = 100_i128.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "1.00");
+
+        bytes = (-1_i128).to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "-0.01");
+
+        bytes = i128::MAX.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), 
"170141183460469231731687303715884105.72");
+
+        bytes = i128::MIN.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
+        assert_eq!(
+            value.to_string(),
+            "-170141183460469231731687303715884105.72"
+        );
+
+        // Truncated
+        bytes = 12345_i128.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "1.23");
+
+        bytes = (-12345_i128).to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "-1.23");
+    }
+
+    #[test]
+    fn decimal_256_from_bytes() {
+        let mut bytes = vec![0; 32];
+        bytes[0..16].clone_from_slice(&100_i128.to_le_bytes());
+        let value = Decimal256::try_new_from_bytes(5, 2, 
bytes.as_slice()).unwrap();
+        assert_eq!(value.to_string(), "1.00");
+
+        bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes());
+        let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
+        assert_eq!(
+            value.to_string(),
+            "17014118346046923173168730371588410.5727"
+        );
+
+        // i128 maximum + 1
+        bytes[0..16].clone_from_slice(&0_i128.to_le_bytes());
+        bytes[15] = 128;
+        let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
+        assert_eq!(
+            value.to_string(),
+            "17014118346046923173168730371588410.5728"
+        );
+
+        // smaller than i128 minimum
+        bytes = vec![255; 32];
+        bytes[31] = 128;
+        let value = Decimal256::try_new_from_bytes(79, 4, &bytes).unwrap();
+        assert_eq!(
+            value.to_string(),
+            
"-5744373177007483132341216834415376678658315645522012356644966081642565415.7313"
+        );
+
+        bytes = vec![255; 32];
+        let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "-0.01");
     }
 
     fn i128_func(value: impl Into<i128>) -> i128 {

[arrow-rs] branch master updated: Add Decimal256 API (#1914)

Reply via email to