[GitHub] [arrow] jorgecarleitao commented on a change in pull request #9232: ARROW-10818: [Rust] Initial implementation for Decimal (Decimal128Type only)

GitBox Tue, 09 Mar 2021 21:58:13 -0800


jorgecarleitao commented on a change in pull request #9232:
URL: https://github.com/apache/arrow/pull/9232#discussion_r590660386




##########
File path: rust/arrow/src/buffer/mutable.rs
##########
@@ -247,7 +247,7 @@ impl MutableBuffer {
     /// # Safety
     /// This function must only be used when this buffer was extended with 
items of type `T`.
     /// Failure to do so results in undefined behavior.
-    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {

Review comment:
       If we do this, we must mark the function as unsafe.
   
   The underlying design here is that every type representable in a buffer must 
be `ArrowNativeType`. This opens up the opportunity to transmute the buffer to 
any type, which is undefined behavior.
   
   I do not think we should do this because it violates the design of this 
crate whereby every type that can be written into a Buffer implements 
`ArrowNativeType`.

##########
File path: rust/arrow/src/datatypes/decimal.rs
##########
@@ -0,0 +1,926 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::datatypes::JsonSerializable;
+use num::{NumCast, ToPrimitive, Zero};
+use serde_json::Value;
+use std::hash::{Hash, Hasher};
+use std::{
+    cmp::Ordering,
+    convert::TryInto,
+    fmt,
+    mem::size_of,
+    num::ParseIntError,
+    ops::{Add, AddAssign, Sub, SubAssign},
+    str::FromStr,
+};
+
+#[derive(Debug, PartialEq)]
+pub enum ParseDecimalError {
+    ParseIntError(ParseIntError),
+    Other(String),
+}
+
+impl From<ParseIntError> for ParseDecimalError {
+    fn from(err: ParseIntError) -> ParseDecimalError {
+        ParseDecimalError::ParseIntError(err)
+    }
+}
+
+// Decimal (precision, scale) = Decimal(1, 2) = 1.00
+pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq {
+    const MAX_DIGITS: usize;
+
+    // fn into_json_value(self) -> Option<Value>;
+
+    fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> 
usize;
+
+    // Rescale scale part
+    fn rescale(&mut self, scale: usize);
+
+    fn rescale_to_new(self, scale: usize) -> Self;
+
+    // Try to parse string with precision, scale
+    fn from_i128(
+        n: i128,
+        precision: usize,
+        scale: usize,
+    ) -> std::result::Result<Self, ParseDecimalError>;
+
+    fn from_i64(
+        n: i64,
+        precision: usize,
+        scale: usize,
+    ) -> std::result::Result<Self, ParseDecimalError>;
+
+    fn from_f64(
+        n: f64,
+        precision: usize,
+        scale: usize,
+    ) -> std::result::Result<Self, ParseDecimalError>;
+
+    // Try to parse string with precision, scale
+    fn parse(
+        string: &str,
+        precision: usize,
+        scale: usize,
+    ) -> std::result::Result<Self, ParseDecimalError>;
+
+    fn to_le_bytes(&self) -> [u8; 16];
+
+    fn to_be_bytes(&self) -> [u8; 16];
+
+    fn to_byte_slice(&self) -> Vec<u8>;
+
+    fn get_signed_lead_part(&self) -> i128;
+
+    fn from_bytes_with_precision_scale(
+        bytes: &[u8],
+        precision: usize,
+        scale: usize,
+    ) -> Self;
+}
+
+#[inline]
+pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>(
+    n: T,
+    p: usize,
+    s: usize,
+) -> Option<U> {
+    Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| {
+        panic!("unable to represent");
+    }))
+}
+
+#[inline]
+pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> {
+    T::from(n)
+}
+
+pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive {
+    fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>;
+}
+
+impl DecimalCast for Decimal128Type {
+    fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> {
+        Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap())
+    }
+}
+
+#[inline(always)]
+fn pow_ten(pow: usize) -> u64 {
+    10u64.pow(pow as u32)
+}
+
+macro_rules! make_type {
+    ($name:ident, $native_ty:ty, $max_digits:expr) => {
+        #[derive(Copy, Clone, Eq)]
+        pub struct $name {
+            pub digits: $native_ty,
+            pub precision: usize,
+            pub scale: usize,
+        }
+
+        impl $name {
+            pub fn new(digits: $native_ty, precision: usize, scale: usize) -> 
$name {
+                assert!(
+                    (precision + scale) <= $max_digits,
+                    "Unable to use {} to represent Decimal({}, {}), max digits 
reached ({}).",
+                    stringify!($name),
+                    precision,
+                    scale,
+                    stringify!($max_digits),
+                );
+
+                $name {
+                    digits,
+                    precision,
+                    scale,
+                }
+            }
+        }
+
+        impl ArrowDecimalType for $name {
+            const MAX_DIGITS: usize = $max_digits;
+
+            /// Returns the byte width of this primitive type.
+            fn get_byte_width_for_precision_scale(
+                _precision: usize,
+                _scale: usize,
+            ) -> usize {
+                size_of::<$native_ty>()
+            }
+
+            #[inline(always)]
+            fn rescale(&mut self, scale: usize) {
+                if self.digits.is_zero() {
+                    self.scale = scale;
+                } else {
+                    match self.scale.cmp(&scale) {
+                        Ordering::Greater => {
+                            self.digits /= pow_ten(self.scale - scale) as 
$native_ty;
+                            self.scale = scale;
+                        }
+                        Ordering::Less => {
+                            self.digits *= pow_ten(scale - self.scale) as 
$native_ty;
+                            self.scale = scale;
+                        }
+                        Ordering::Equal => {}
+                    };
+                }
+            }
+
+            #[inline(always)]
+            fn get_signed_lead_part(&self) -> i128 {
+                self.rescale_to_new(0).digits
+            }
+
+            #[inline(always)]
+            fn rescale_to_new(self, scale: usize) -> $name {
+                if self.digits.is_zero() {
+                    return $name::new(0, 0, scale);
+                }
+
+                let digits = match self.scale.cmp(&scale) {
+                    Ordering::Greater => {
+                        self.digits / pow_ten(self.scale - scale) as $native_ty
+                    }
+                    Ordering::Less => {
+                        self.digits * pow_ten(scale - self.scale) as $native_ty
+                    }
+                    Ordering::Equal => self.digits,
+                };
+
+                $name::new(digits, self.precision, scale)
+            }
+
+            fn from_i128(
+                n: i128,
+                precision: usize,
+                scale: usize,
+            ) -> Result<Self, ParseDecimalError> {
+                let mut as_decimal = $name::new(n as $native_ty, precision, 0);
+
+                if as_decimal.scale != scale {
+                    as_decimal.rescale(scale)
+                }
+
+                as_decimal.precision = precision;
+
+                Ok(as_decimal)
+            }
+
+            fn from_i64(
+                n: i64,
+                precision: usize,
+                scale: usize,
+            ) -> Result<Self, ParseDecimalError> {
+                let mut as_decimal = $name::new(n as $native_ty, precision, 0);
+
+                if as_decimal.scale != scale {
+                    as_decimal.rescale(scale)
+                }
+
+                as_decimal.precision = precision;
+
+                Ok(as_decimal)
+            }
+
+            fn from_f64(
+                n: f64,
+                precision: usize,
+                scale: usize,
+            ) -> Result<Self, ParseDecimalError> {
+                $name::parse(n.to_string().as_str(), precision, scale)
+            }
+
+            fn parse(
+                string: &str,
+                precision: usize,
+                scale: usize,
+            ) -> Result<Self, ParseDecimalError> {
+                let mut as_decimal = $name::from_str(string)?;
+
+                if as_decimal.scale != scale {
+                    as_decimal.rescale(scale)
+                }
+
+                as_decimal.precision = precision;
+
+                Ok(as_decimal)
+            }
+
+            fn to_le_bytes(&self) -> [u8; 16] {
+                self.digits.to_le_bytes()
+            }
+
+            fn to_be_bytes(&self) -> [u8; 16] {
+                self.digits.to_be_bytes()
+            }
+
+            fn to_byte_slice(&self) -> Vec<u8> {
+                self.digits.to_le_bytes().to_vec()
+            }
+
+            fn from_bytes_with_precision_scale(
+                bytes: &[u8],
+                precision: usize,
+                scale: usize,
+            ) -> $name {
+                let as_array = bytes.try_into();
+                match as_array {
+                    Ok(v) if bytes.len() == 16 => $name {
+                        digits: <$native_ty>::from_le_bytes(v),
+                        precision,
+                        scale,
+                    },
+                    Err(e) => panic!(
+                        "Unable to load Decimal from bytes slice ({}): {}",
+                        bytes.len(),
+                        e
+                    ),
+                    _ => panic!(
+                        "Unable to load Decimal from bytes slice with length 
{}",
+                        bytes.len()
+                    ),
+                }
+            }
+        }
+
+        impl ToPrimitive for $name {
+            fn to_isize(&self) -> Option<isize> {
+                unimplemented!("Unimplemented to_isize for {}", 
stringify!($name))
+            }
+
+            fn to_usize(&self) -> Option<usize> {
+                unimplemented!("Unimplemented to_usize for {}", 
stringify!($name))
+            }
+
+            fn to_i8(&self) -> Option<i8> {
+                Some(self.get_signed_lead_part() as i8)
+            }
+
+            fn to_i16(&self) -> Option<i16> {
+                Some(self.get_signed_lead_part() as i16)
+            }
+
+            fn to_i32(&self) -> Option<i32> {
+                Some(self.get_signed_lead_part() as i32)
+            }
+
+            fn to_i64(&self) -> Option<i64> {
+                Some(self.get_signed_lead_part() as i64)
+            }
+
+            fn to_i128(&self) -> Option<i128> {
+                Some(self.get_signed_lead_part())
+            }
+
+            fn to_u8(&self) -> Option<u8> {
+                Some(self.get_signed_lead_part() as u8)
+            }
+
+            fn to_u16(&self) -> Option<u16> {
+                Some(self.get_signed_lead_part() as u16)
+            }
+
+            fn to_u32(&self) -> Option<u32> {
+                Some(self.get_signed_lead_part() as u32)
+            }
+
+            fn to_u64(&self) -> Option<u64> {
+                Some(self.get_signed_lead_part() as u64)
+            }
+
+            fn to_u128(&self) -> Option<u128> {
+                Some(self.get_signed_lead_part() as u128)
+            }
+
+            fn to_f32(&self) -> Option<f32> {
+                // @todo Optimize this
+                Some(self.to_string().parse::<f32>().unwrap())
+            }
+
+            fn to_f64(&self) -> Option<f64> {
+                // @todo Optimize this
+                Some(self.to_string().parse::<f64>().unwrap())
+            }
+        }
+
+        impl ToString for $name {
+            fn to_string(&self) -> String {
+                println!("<{},{}>({})", self.digits, self.precision, 
self.scale);

Review comment:
       print.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [arrow] jorgecarleitao commented on a change in pull request #9232: ARROW-10818: [Rust] Initial implementation for Decimal (Decimal128Type only)

Reply via email to