[GitHub] [arrow-rs] viirya commented on a diff in pull request #1914: Add Decimal256 API

GitBox Thu, 23 Jun 2022 09:33:23 -0700


viirya commented on code in PR #1914:
URL: https://github.com/apache/arrow-rs/pull/1914#discussion_r905240412



##########
arrow/src/util/decimal.rs:
##########
@@ -17,124 +17,278 @@
 
 //! Decimal related utils
 
-use std::cmp::Ordering;
+use crate::error::{ArrowError, Result};
+use num::bigint::BigInt;
+use std::cmp::{min, Ordering};
+
+pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
+    /// The bit-width of the internal representation.
+    const BIT_WIDTH: usize;
+
+    /// Tries to create a decimal value from precision, scale and bytes.
+    /// If the length of bytes isn't same as the bit width of this decimal,
+    /// returning an error. The bytes should be stored in little-endian order.
+    ///
+    /// Safety:
+    /// This method doesn't validate if the decimal value represented by the 
bytes
+    /// can be fitted into the specified precision.
+    fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> 
Result<Self>
+    where
+        Self: Sized,
+    {
+        if precision < scale {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Precision {} is less than scale {}",
+                precision, scale
+            )));
+        }
+
+        if bytes.len() == Self::BIT_WIDTH / 8 {
+            Ok(Self::new(precision, scale, bytes))
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "Input to Decimal{} must be {} bytes",
+                Self::BIT_WIDTH,
+                Self::BIT_WIDTH / 8
+            )))
+        }
+    }
+
+    /// Creates a decimal value from precision, scale, and bytes.
+    ///
+    /// Safety:
+    /// This method doesn't check if the length of bytes is compatible with 
this decimal.
+    /// Use `try_new_from_bytes` for safe constructor.
+    fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self;
+
+    /// Returns the raw bytes of the integer representation of the decimal.
+    fn raw_value(&self) -> &[u8];
+
+    /// Returns the precision of the decimal.
+    fn precision(&self) -> usize;
+
+    /// Returns the scale of the decimal.
+    fn scale(&self) -> usize;
+
+    /// Returns the string representation of the decimal.
+    /// If the string representation cannot be fitted with the precision of 
the decimal,
+    /// the string will be truncated.
+    fn to_string(&self) -> String {
+        let raw_bytes = self.raw_value();
+        let integer = BigInt::from_signed_bytes_le(raw_bytes);
+        let value_str = integer.to_string();
+        let (sign, rest) =
+            value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 });
+        let bound = min(self.precision(), rest.len()) + sign.len();
+        let value_str = &value_str[0..bound];
+
+        if self.scale() == 0 {
+            value_str.to_string()
+        } else if rest.len() > self.scale() {
+            // Decimal separator is in the middle of the string
+            let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale());
+            format!("{}.{}", whole, decimal)
+        } else {
+            // String has to be padded
+            format!("{}0.{:0>width$}", sign, rest, width = self.scale())
+        }
+    }
+}
 
 /// Represents a decimal value with precision and scale.
-/// The decimal value is represented by a signed 128-bit integer.
+/// The decimal value could represented by a signed 128-bit integer.
 #[derive(Debug)]
 pub struct Decimal128 {
     #[allow(dead_code)]
     precision: usize,
     scale: usize,
-    value: i128,
+    value: [u8; 16],
 }
 
-impl PartialOrd for Decimal128 {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.partial_cmp(&other.value)
+impl Decimal128 {
+    /// Creates `Decimal128` from an `i128` value.
+    pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) 
-> Self {
+        Decimal128 {
+            precision,
+            scale,
+            value: value.to_le_bytes(),
+        }
     }
-}
 
-impl Ord for Decimal128 {
-    fn cmp(&self, other: &Self) -> Ordering {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.cmp(&other.value)
+    /// Returns `i128` representation of the decimal.
+    pub fn as_i128(&self) -> i128 {
+        i128::from_le_bytes(self.value)
     }
 }
 
-impl PartialEq<Self> for Decimal128 {
-    fn eq(&self, other: &Self) -> bool {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.eq(&other.value)
+impl From<Decimal128> for i128 {
+    fn from(decimal: Decimal128) -> Self {
+        decimal.as_i128()
     }
 }
 
-impl Eq for Decimal128 {}
+/// Represents a decimal value with precision and scale.
+/// The decimal value could be represented by a signed 256-bit integer.
+#[derive(Debug)]
+pub struct Decimal256 {
+    #[allow(dead_code)]
+    precision: usize,
+    scale: usize,
+    value: [u8; 32],
+}
 
-impl Decimal128 {
-    pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> 
Self {
-        let as_array = bytes.try_into();
-        let value = match as_array {
-            Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
-            _ => panic!("Input to Decimal128 is not 128bit integer."),
-        };
+macro_rules! def_decimal {
+    ($ty:ident, $bit:expr) => {
+        impl BasicDecimal for $ty {
+            const BIT_WIDTH: usize = $bit;
 
-        Decimal128 {
-            precision,
-            scale,
-            value,
-        }
-    }
+            fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+                $ty {
+                    precision,
+                    scale,
+                    value: bytes.try_into().unwrap(),
+                }
+            }
 
-    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
-        Decimal128 {
-            precision,
-            scale,
-            value,
+            fn raw_value(&self) -> &[u8] {
+                &self.value
+            }
+
+            fn precision(&self) -> usize {
+                self.precision
+            }
+
+            fn scale(&self) -> usize {
+                self.scale
+            }
         }
-    }
 
-    pub fn as_i128(&self) -> i128 {
-        self.value
-    }
+        impl PartialOrd for $ty {
+            fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.partial_cmp(&other.value)
+            }
+        }
 
-    pub fn as_string(&self) -> String {
-        let value_str = self.value.to_string();
+        impl Ord for $ty {
+            fn cmp(&self, other: &Self) -> Ordering {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.cmp(&other.value)
+            }
+        }
 
-        if self.scale == 0 {
-            value_str
-        } else {
-            let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } 
else { 1 });
-
-            if rest.len() > self.scale {
-                // Decimal separator is in the middle of the string
-                let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale);
-                format!("{}.{}", whole, decimal)
-            } else {
-                // String has to be padded
-                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+        impl PartialEq<Self> for $ty {
+            fn eq(&self, other: &Self) -> bool {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.eq(&other.value)
             }
         }
-    }
-}
 
-impl From<Decimal128> for i128 {
-    fn from(decimal: Decimal128) -> Self {
-        decimal.as_i128()
-    }
+        impl Eq for $ty {}
+    };
 }
 
+def_decimal!(Decimal128, 128);
+def_decimal!(Decimal256, 256);
+
 #[cfg(test)]
 mod tests {
-    use crate::util::decimal::Decimal128;
+    use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
 
     #[test]
     fn decimal_128_to_string() {
         let mut value = Decimal128::new_from_i128(5, 2, 100);
-        assert_eq!(value.as_string(), "1.00");
+        assert_eq!(value.to_string(), "1.00");
 
         value = Decimal128::new_from_i128(5, 3, 100);
-        assert_eq!(value.as_string(), "0.100");
+        assert_eq!(value.to_string(), "0.100");
     }
 
     #[test]
-    fn decimal_128_from_bytes() {
+    fn decimal_invalid_precision_scale() {
         let bytes = 100_i128.to_le_bytes();
-        let value = Decimal128::new_from_bytes(5, 2, &bytes);
-        assert_eq!(value.as_string(), "1.00");
+        let err = Decimal128::try_new_from_bytes(5, 6, &bytes);
+        assert!(err.is_err());
+    }
+
+    #[test]
+    fn decimal_128_from_bytes() {
+        let mut bytes = 100_i128.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "1.00");
+
+        bytes = (-1_i128).to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.to_string(), "-0.01");
+
+        bytes = i128::MAX.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();

Review Comment:
   Alternatively it is an option to get error from `to_string` if string length 
is larger than precision. But as DecimalBuilder can optionally skip value 
validation, it means we probably can have Decimal structs with such case. Then 
having an error seems not fitting with it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-rs] viirya commented on a diff in pull request #1914: Add Decimal256 API

Reply via email to