[arrow-rs] branch master updated: Add Decimal128 API and use it in DecimalArray and DecimalBuilder (#1871)

viirya Wed, 15 Jun 2022 23:50:54 -0700

This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/master by this push:
     new f0bf7f932  Add Decimal128 API and use it in DecimalArray and 
DecimalBuilder (#1871)
f0bf7f932 is described below

commit f0bf7f9324e6820fb9f4b4f836a15559b963f7d1
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed Jun 15 23:50:39 2022 -0700

     Add Decimal128 API and use it in DecimalArray and DecimalBuilder (#1871)
    
    * Add Decimal128
    
    * Fix clippy
    
    * Add code comment
---
 arrow/src/array/array_binary.rs       |  44 ++++------
 arrow/src/array/builder.rs            |  31 +++++--
 arrow/src/array/equal_json.rs         |   2 +-
 arrow/src/array/iterator.rs           |   2 +-
 arrow/src/compute/kernels/cast.rs     |  41 +++++-----
 arrow/src/compute/kernels/sort.rs     |   2 +-
 arrow/src/compute/kernels/take.rs     |   2 +-
 arrow/src/util/decimal.rs             | 150 ++++++++++++++++++++++++++++++++++
 arrow/src/util/mod.rs                 |   1 +
 parquet/src/arrow/arrow_reader.rs     |   2 +-
 parquet/src/arrow/arrow_writer/mod.rs |   2 +-
 11 files changed, 219 insertions(+), 60 deletions(-)

diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index b1fc06d36..481ea92d6 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,6 +33,7 @@ use crate::datatypes::{
 };
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
+use crate::util::decimal::Decimal128;
 use crate::{buffer::MutableBuffer, datatypes::DataType};
 
 /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
@@ -756,7 +757,7 @@ impl Array for FixedSizeBinaryArray {
 ///     .unwrap();
 ///
 ///    assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-///    assert_eq!(8_887_000_000, decimal_array.value(0));
+///    assert_eq!(8_887_000_000_i128, decimal_array.value(0).as_i128());
 ///    assert_eq!("8887.000000", decimal_array.value_as_string(0));
 ///    assert_eq!(3, decimal_array.len());
 ///    assert_eq!(1, decimal_array.null_count());
@@ -775,8 +776,8 @@ pub struct DecimalArray {
 }
 
 impl DecimalArray {
-    /// Returns the element at index `i` as i128.
-    pub fn value(&self, i: usize) -> i128 {
+    /// Returns the element at index `i`.
+    pub fn value(&self, i: usize) -> Decimal128 {
         assert!(i < self.data.len(), "DecimalArray out of bounds access");
         let offset = i.checked_add(self.data.offset()).unwrap();
         let raw_val = unsafe {
@@ -787,10 +788,11 @@ impl DecimalArray {
             )
         };
         let as_array = raw_val.try_into();
-        match as_array {
+        let integer = match as_array {
             Ok(v) if raw_val.len() == 16 => i128::from_le_bytes(v),
             _ => panic!("DecimalArray elements are not 128bit integers."),
-        }
+        };
+        Decimal128::new_from_i128(self.precision, self.scale, integer)
     }
 
     /// Returns the offset for the element at index `i`.
@@ -821,23 +823,7 @@ impl DecimalArray {
 
     #[inline]
     pub fn value_as_string(&self, row: usize) -> String {
-        let value = self.value(row);
-        let value_str = value.to_string();
-
-        if self.scale == 0 {
-            value_str
-        } else {
-            let (sign, rest) = value_str.split_at(if value >= 0 { 0 } else { 1 
});
-
-            if rest.len() > self.scale {
-                // Decimal separator is in the middle of the string
-                let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale);
-                format!("{}.{}", whole, decimal)
-            } else {
-                // String has to be padded
-                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
-            }
-        }
+        self.value(row).as_string()
     }
 
     pub fn from_fixed_size_list_array(
@@ -1498,8 +1484,8 @@ mod tests {
             .build()
             .unwrap();
         let decimal_array = DecimalArray::from(array_data);
-        assert_eq!(8_887_000_000, decimal_array.value(0));
-        assert_eq!(-8_887_000_000, decimal_array.value(1));
+        assert_eq!(8_887_000_000_i128, decimal_array.value(0).into());
+        assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into());
         assert_eq!(16, decimal_array.value_length());
     }
 
@@ -1550,11 +1536,11 @@ mod tests {
         let array = DecimalArray::from_iter_values(vec![-100, 0, 
101].into_iter());
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
-        assert_eq!(-100, array.value(0));
+        assert_eq!(-100_i128, array.value(0).into());
         assert!(!array.is_null(0));
-        assert_eq!(0, array.value(1));
+        assert_eq!(0_i128, array.value(1).into());
         assert!(!array.is_null(1));
-        assert_eq!(101, array.value(2));
+        assert_eq!(101_i128, array.value(2).into());
         assert!(!array.is_null(2));
     }
 
@@ -1563,10 +1549,10 @@ mod tests {
         let array: DecimalArray = vec![Some(-100), None, 
Some(101)].into_iter().collect();
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
-        assert_eq!(-100, array.value(0));
+        assert_eq!(-100_i128, array.value(0).into());
         assert!(!array.is_null(0));
         assert!(array.is_null(1));
-        assert_eq!(101, array.value(2));
+        assert_eq!(101_i128, array.value(2).into());
         assert!(!array.is_null(2));
     }
 
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 2df4aecf6..2338736af 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -1567,11 +1567,11 @@ impl DecimalBuilder {
     /// Automatically calls the `append` method to delimit the slice appended 
in as a
     /// distinct array element.
     #[inline]
-    pub fn append_value(&mut self, value: i128) -> Result<()> {
+    pub fn append_value(&mut self, value: impl Into<i128>) -> Result<()> {
         let value = if self.value_validation {
-            validate_decimal_precision(value, self.precision)?
+            validate_decimal_precision(value.into(), self.precision)?
         } else {
-            value
+            value.into()
         };
 
         let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
@@ -2619,6 +2619,7 @@ mod tests {
 
     use crate::array::Array;
     use crate::bitmap::Bitmap;
+    use crate::util::decimal::Decimal128;
 
     #[test]
     fn test_builder_i32_empty() {
@@ -3531,9 +3532,29 @@ mod tests {
     fn test_decimal_builder() {
         let mut builder = DecimalBuilder::new(30, 38, 6);
 
-        builder.append_value(8_887_000_000).unwrap();
+        builder.append_value(8_887_000_000_i128).unwrap();
         builder.append_null().unwrap();
-        builder.append_value(-8_887_000_000).unwrap();
+        builder.append_value(-8_887_000_000_i128).unwrap();
+        let decimal_array: DecimalArray = builder.finish();
+
+        assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
+        assert_eq!(3, decimal_array.len());
+        assert_eq!(1, decimal_array.null_count());
+        assert_eq!(32, decimal_array.value_offset(2));
+        assert_eq!(16, decimal_array.value_length());
+    }
+
+    #[test]
+    fn test_decimal_builder_with_decimal128() {
+        let mut builder = DecimalBuilder::new(30, 38, 6);
+
+        builder
+            .append_value(Decimal128::new_from_i128(30, 38, 
8_887_000_000_i128))
+            .unwrap();
+        builder.append_null().unwrap();
+        builder
+            .append_value(Decimal128::new_from_i128(30, 38, 
-8_887_000_000_i128))
+            .unwrap();
         let decimal_array: DecimalArray = builder.finish();
 
         assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
index 64f109df5..9db1a4397 100644
--- a/arrow/src/array/equal_json.rs
+++ b/arrow/src/array/equal_json.rs
@@ -370,7 +370,7 @@ impl JsonEqual for DecimalArray {
                 self.is_valid(i)
                     && (s
                         .parse::<i128>()
-                        .map_or_else(|_| false, |v| v == self.value(i)))
+                        .map_or_else(|_| false, |v| v == 
self.value(i).as_i128()))
             }
             JNull => self.is_null(i),
             _ => false,
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
index 18bdca621..bc70d1a2a 100644
--- a/arrow/src/array/iterator.rs
+++ b/arrow/src/array/iterator.rs
@@ -425,7 +425,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
             if self.array.is_null(old) {
                 Some(None)
             } else {
-                Some(Some(self.array.value(old)))
+                Some(Some(self.array.value(old).as_i128()))
             }
         }
     }
diff --git a/arrow/src/compute/kernels/cast.rs 
b/arrow/src/compute/kernels/cast.rs
index 9a4638d97..fa92179b7 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -353,7 +353,7 @@ macro_rules! cast_decimal_to_integer {
             if array.is_null(i) {
                 value_builder.append_null()?;
             } else {
-                let v = array.value(i) / div;
+                let v = array.value(i).as_i128() / div;
                 // check the overflow
                 // For example: Decimal(128,10,0) as i8
                 // 128 is out of range i8
@@ -383,7 +383,7 @@ macro_rules! cast_decimal_to_float {
             } else {
                 // The range of f32 or f64 is larger than i128, we don't need 
to check overflow.
                 // cast the i128 to f64 will lose precision, for example the 
`112345678901234568` will be as `112345678901234560`.
-                let v = (array.value(i) as f64 / div) as $NATIVE_TYPE;
+                let v = (array.value(i).as_i128() as f64 / div) as 
$NATIVE_TYPE;
                 value_builder.append_value(v)?;
             }
         }
@@ -2196,6 +2196,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::util::decimal::Decimal128;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
 
     macro_rules! generate_cast_test_case {
@@ -2247,9 +2248,9 @@ mod tests {
             DecimalArray,
             &output_type,
             vec![
-                Some(11234560_i128),
-                Some(21234560_i128),
-                Some(31234560_i128),
+                Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
                 None
             ]
         );
@@ -2426,11 +2427,11 @@ mod tests {
                 DecimalArray,
                 &decimal_type,
                 vec![
-                    Some(1000000_i128),
-                    Some(2000000_i128),
-                    Some(3000000_i128),
+                    Some(Decimal128::new_from_i128(38, 6, 1000000_i128)),
+                    Some(Decimal128::new_from_i128(38, 6, 2000000_i128)),
+                    Some(Decimal128::new_from_i128(38, 6, 3000000_i128)),
                     None,
-                    Some(5000000_i128)
+                    Some(Decimal128::new_from_i128(38, 6, 5000000_i128))
                 ]
             );
         }
@@ -2458,12 +2459,12 @@ mod tests {
             DecimalArray,
             &decimal_type,
             vec![
-                Some(1100000_i128),
-                Some(2200000_i128),
-                Some(4400000_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
                 None,
-                Some(1123456_i128),
-                Some(1123456_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
             ]
         );
 
@@ -2483,13 +2484,13 @@ mod tests {
             DecimalArray,
             &decimal_type,
             vec![
-                Some(1100000_i128),
-                Some(2200000_i128),
-                Some(4400000_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
                 None,
-                Some(1123456_i128),
-                Some(1123456_i128),
-                Some(1123456_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
             ]
         );
     }
diff --git a/arrow/src/compute/kernels/sort.rs 
b/arrow/src/compute/kernels/sort.rs
index 72ee8b68d..e399cf9f0 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -503,7 +503,7 @@ where
         .expect("Unable to downcast to decimal array");
     let valids = value_indices
         .into_iter()
-        .map(|index| (index, decimal_array.value(index as usize)))
+        .map(|index| (index, decimal_array.value(index as usize).as_i128()))
         .collect::<Vec<(u32, i128)>>();
     sort_primitive_inner(decimal_values, null_indices, cmp, options, limit, 
valids)
 }
diff --git a/arrow/src/compute/kernels/take.rs 
b/arrow/src/compute/kernels/take.rs
index 03637ec81..624e9ddcd 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -524,7 +524,7 @@ where
                 if decimal_values.is_null(index) {
                     Ok(None)
                 } else {
-                    Ok(Some(decimal_values.value(index)))
+                    Ok(Some(decimal_values.value(index).as_i128()))
                 }
             });
             let t: Result<Option<Option<_>>> = t.transpose();
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
new file mode 100644
index 000000000..b78af3acc
--- /dev/null
+++ b/arrow/src/util/decimal.rs
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decimal related utils
+
+use std::cmp::Ordering;
+
+/// Represents a decimal value with precision and scale.
+/// The decimal value is represented by a signed 128-bit integer.
+#[derive(Debug)]
+pub struct Decimal128 {
+    #[allow(dead_code)]
+    precision: usize,
+    scale: usize,
+    value: i128,
+}
+
+impl PartialOrd for Decimal128 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.partial_cmp(&other.value)
+    }
+}
+
+impl Ord for Decimal128 {
+    fn cmp(&self, other: &Self) -> Ordering {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.cmp(&other.value)
+    }
+}
+
+impl PartialEq<Self> for Decimal128 {
+    fn eq(&self, other: &Self) -> bool {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.eq(&other.value)
+    }
+}
+
+impl Eq for Decimal128 {}
+
+impl Decimal128 {
+    pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> 
Self {
+        let as_array = bytes.try_into();
+        let value = match as_array {
+            Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
+            _ => panic!("Input to Decimal128 is not 128bit integer."),
+        };
+
+        Decimal128 {
+            precision,
+            scale,
+            value,
+        }
+    }
+
+    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
+        Decimal128 {
+            precision,
+            scale,
+            value,
+        }
+    }
+
+    pub fn as_i128(&self) -> i128 {
+        self.value
+    }
+
+    pub fn as_string(&self) -> String {
+        let value_str = self.value.to_string();
+
+        if self.scale == 0 {
+            value_str
+        } else {
+            let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } 
else { 1 });
+
+            if rest.len() > self.scale {
+                // Decimal separator is in the middle of the string
+                let (whole, decimal) = value_str.split_at(value_str.len() - 
self.scale);
+                format!("{}.{}", whole, decimal)
+            } else {
+                // String has to be padded
+                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+            }
+        }
+    }
+}
+
+impl From<Decimal128> for i128 {
+    fn from(decimal: Decimal128) -> Self {
+        decimal.as_i128()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::util::decimal::Decimal128;
+
+    #[test]
+    fn decimal_128_to_string() {
+        let mut value = Decimal128::new_from_i128(5, 2, 100);
+        assert_eq!(value.as_string(), "1.00");
+
+        value = Decimal128::new_from_i128(5, 3, 100);
+        assert_eq!(value.as_string(), "0.100");
+    }
+
+    #[test]
+    fn decimal_128_from_bytes() {
+        let bytes = 100_i128.to_le_bytes();
+        let value = Decimal128::new_from_bytes(5, 2, &bytes);
+        assert_eq!(value.as_string(), "1.00");
+    }
+
+    fn i128_func(value: impl Into<i128>) -> i128 {
+        value.into()
+    }
+
+    #[test]
+    fn decimal_128_to_i128() {
+        let value = Decimal128::new_from_i128(5, 2, 100);
+        let integer = i128_func(value);
+        assert_eq!(integer, 100);
+    }
+}
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index 3b6de8a4b..dcb9cd52a 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -35,4 +35,5 @@ pub mod test_util;
 mod trusted_len;
 pub(crate) use trusted_len::trusted_len_unzip;
 
+pub mod decimal;
 pub(crate) mod reader_parser;
diff --git a/parquet/src/arrow/arrow_reader.rs 
b/parquet/src/arrow/arrow_reader.rs
index 92d4ff264..89406cd61 100644
--- a/parquet/src/arrow/arrow_reader.rs
+++ b/parquet/src/arrow/arrow_reader.rs
@@ -644,7 +644,7 @@ mod tests {
             assert_eq!(col.scale(), 2);
 
             for (i, v) in expected.enumerate() {
-                assert_eq!(col.value(i), v * 100_i128);
+                assert_eq!(col.value(i).as_i128(), v * 100_i128);
             }
         }
     }
diff --git a/parquet/src/arrow/arrow_writer/mod.rs 
b/parquet/src/arrow/arrow_writer/mod.rs
index b64517ad1..83f1bc70b 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -673,7 +673,7 @@ fn get_decimal_array_slice(
     let mut values = Vec::with_capacity(indices.len());
     let size = decimal_length_from_precision(array.precision());
     for i in indices {
-        let as_be_bytes = array.value(*i).to_be_bytes();
+        let as_be_bytes = array.value(*i).as_i128().to_be_bytes();
         let resized_value = as_be_bytes[(16 - size)..].to_vec();
         values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
     }

[arrow-rs] branch master updated: Add Decimal128 API and use it in DecimalArray and DecimalBuilder (#1871)

Reply via email to