This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f0bf7f932 Add Decimal128 API and use it in DecimalArray and
DecimalBuilder (#1871)
f0bf7f932 is described below
commit f0bf7f9324e6820fb9f4b4f836a15559b963f7d1
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed Jun 15 23:50:39 2022 -0700
Add Decimal128 API and use it in DecimalArray and DecimalBuilder (#1871)
* Add Decimal128
* Fix clippy
* Add code comment
---
arrow/src/array/array_binary.rs | 44 ++++------
arrow/src/array/builder.rs | 31 +++++--
arrow/src/array/equal_json.rs | 2 +-
arrow/src/array/iterator.rs | 2 +-
arrow/src/compute/kernels/cast.rs | 41 +++++-----
arrow/src/compute/kernels/sort.rs | 2 +-
arrow/src/compute/kernels/take.rs | 2 +-
arrow/src/util/decimal.rs | 150 ++++++++++++++++++++++++++++++++++
arrow/src/util/mod.rs | 1 +
parquet/src/arrow/arrow_reader.rs | 2 +-
parquet/src/arrow/arrow_writer/mod.rs | 2 +-
11 files changed, 219 insertions(+), 60 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index b1fc06d36..481ea92d6 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,6 +33,7 @@ use crate::datatypes::{
};
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
+use crate::util::decimal::Decimal128;
use crate::{buffer::MutableBuffer, datatypes::DataType};
/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
@@ -756,7 +757,7 @@ impl Array for FixedSizeBinaryArray {
/// .unwrap();
///
/// assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-/// assert_eq!(8_887_000_000, decimal_array.value(0));
+/// assert_eq!(8_887_000_000_i128, decimal_array.value(0).as_i128());
/// assert_eq!("8887.000000", decimal_array.value_as_string(0));
/// assert_eq!(3, decimal_array.len());
/// assert_eq!(1, decimal_array.null_count());
@@ -775,8 +776,8 @@ pub struct DecimalArray {
}
impl DecimalArray {
- /// Returns the element at index `i` as i128.
- pub fn value(&self, i: usize) -> i128 {
+ /// Returns the element at index `i`.
+ pub fn value(&self, i: usize) -> Decimal128 {
assert!(i < self.data.len(), "DecimalArray out of bounds access");
let offset = i.checked_add(self.data.offset()).unwrap();
let raw_val = unsafe {
@@ -787,10 +788,11 @@ impl DecimalArray {
)
};
let as_array = raw_val.try_into();
- match as_array {
+ let integer = match as_array {
Ok(v) if raw_val.len() == 16 => i128::from_le_bytes(v),
_ => panic!("DecimalArray elements are not 128bit integers."),
- }
+ };
+ Decimal128::new_from_i128(self.precision, self.scale, integer)
}
/// Returns the offset for the element at index `i`.
@@ -821,23 +823,7 @@ impl DecimalArray {
#[inline]
pub fn value_as_string(&self, row: usize) -> String {
- let value = self.value(row);
- let value_str = value.to_string();
-
- if self.scale == 0 {
- value_str
- } else {
- let (sign, rest) = value_str.split_at(if value >= 0 { 0 } else { 1
});
-
- if rest.len() > self.scale {
- // Decimal separator is in the middle of the string
- let (whole, decimal) = value_str.split_at(value_str.len() -
self.scale);
- format!("{}.{}", whole, decimal)
- } else {
- // String has to be padded
- format!("{}0.{:0>width$}", sign, rest, width = self.scale)
- }
- }
+ self.value(row).as_string()
}
pub fn from_fixed_size_list_array(
@@ -1498,8 +1484,8 @@ mod tests {
.build()
.unwrap();
let decimal_array = DecimalArray::from(array_data);
- assert_eq!(8_887_000_000, decimal_array.value(0));
- assert_eq!(-8_887_000_000, decimal_array.value(1));
+ assert_eq!(8_887_000_000_i128, decimal_array.value(0).into());
+ assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into());
assert_eq!(16, decimal_array.value_length());
}
@@ -1550,11 +1536,11 @@ mod tests {
let array = DecimalArray::from_iter_values(vec![-100, 0,
101].into_iter());
assert_eq!(array.len(), 3);
assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
- assert_eq!(-100, array.value(0));
+ assert_eq!(-100_i128, array.value(0).into());
assert!(!array.is_null(0));
- assert_eq!(0, array.value(1));
+ assert_eq!(0_i128, array.value(1).into());
assert!(!array.is_null(1));
- assert_eq!(101, array.value(2));
+ assert_eq!(101_i128, array.value(2).into());
assert!(!array.is_null(2));
}
@@ -1563,10 +1549,10 @@ mod tests {
let array: DecimalArray = vec![Some(-100), None,
Some(101)].into_iter().collect();
assert_eq!(array.len(), 3);
assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
- assert_eq!(-100, array.value(0));
+ assert_eq!(-100_i128, array.value(0).into());
assert!(!array.is_null(0));
assert!(array.is_null(1));
- assert_eq!(101, array.value(2));
+ assert_eq!(101_i128, array.value(2).into());
assert!(!array.is_null(2));
}
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
index 2df4aecf6..2338736af 100644
--- a/arrow/src/array/builder.rs
+++ b/arrow/src/array/builder.rs
@@ -1567,11 +1567,11 @@ impl DecimalBuilder {
/// Automatically calls the `append` method to delimit the slice appended
in as a
/// distinct array element.
#[inline]
- pub fn append_value(&mut self, value: i128) -> Result<()> {
+ pub fn append_value(&mut self, value: impl Into<i128>) -> Result<()> {
let value = if self.value_validation {
- validate_decimal_precision(value, self.precision)?
+ validate_decimal_precision(value.into(), self.precision)?
} else {
- value
+ value.into()
};
let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
@@ -2619,6 +2619,7 @@ mod tests {
use crate::array::Array;
use crate::bitmap::Bitmap;
+ use crate::util::decimal::Decimal128;
#[test]
fn test_builder_i32_empty() {
@@ -3531,9 +3532,29 @@ mod tests {
fn test_decimal_builder() {
let mut builder = DecimalBuilder::new(30, 38, 6);
- builder.append_value(8_887_000_000).unwrap();
+ builder.append_value(8_887_000_000_i128).unwrap();
builder.append_null().unwrap();
- builder.append_value(-8_887_000_000).unwrap();
+ builder.append_value(-8_887_000_000_i128).unwrap();
+ let decimal_array: DecimalArray = builder.finish();
+
+ assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
+ assert_eq!(3, decimal_array.len());
+ assert_eq!(1, decimal_array.null_count());
+ assert_eq!(32, decimal_array.value_offset(2));
+ assert_eq!(16, decimal_array.value_length());
+ }
+
+ #[test]
+ fn test_decimal_builder_with_decimal128() {
+ let mut builder = DecimalBuilder::new(30, 38, 6);
+
+ builder
+ .append_value(Decimal128::new_from_i128(30, 38,
8_887_000_000_i128))
+ .unwrap();
+ builder.append_null().unwrap();
+ builder
+ .append_value(Decimal128::new_from_i128(30, 38,
-8_887_000_000_i128))
+ .unwrap();
let decimal_array: DecimalArray = builder.finish();
assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
index 64f109df5..9db1a4397 100644
--- a/arrow/src/array/equal_json.rs
+++ b/arrow/src/array/equal_json.rs
@@ -370,7 +370,7 @@ impl JsonEqual for DecimalArray {
self.is_valid(i)
&& (s
.parse::<i128>()
- .map_or_else(|_| false, |v| v == self.value(i)))
+ .map_or_else(|_| false, |v| v ==
self.value(i).as_i128()))
}
JNull => self.is_null(i),
_ => false,
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
index 18bdca621..bc70d1a2a 100644
--- a/arrow/src/array/iterator.rs
+++ b/arrow/src/array/iterator.rs
@@ -425,7 +425,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
if self.array.is_null(old) {
Some(None)
} else {
- Some(Some(self.array.value(old)))
+ Some(Some(self.array.value(old).as_i128()))
}
}
}
diff --git a/arrow/src/compute/kernels/cast.rs
b/arrow/src/compute/kernels/cast.rs
index 9a4638d97..fa92179b7 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -353,7 +353,7 @@ macro_rules! cast_decimal_to_integer {
if array.is_null(i) {
value_builder.append_null()?;
} else {
- let v = array.value(i) / div;
+ let v = array.value(i).as_i128() / div;
// check the overflow
// For example: Decimal(128,10,0) as i8
// 128 is out of range i8
@@ -383,7 +383,7 @@ macro_rules! cast_decimal_to_float {
} else {
// The range of f32 or f64 is larger than i128, we don't need
to check overflow.
// cast the i128 to f64 will lose precision, for example the
`112345678901234568` will be as `112345678901234560`.
- let v = (array.value(i) as f64 / div) as $NATIVE_TYPE;
+ let v = (array.value(i).as_i128() as f64 / div) as
$NATIVE_TYPE;
value_builder.append_value(v)?;
}
}
@@ -2196,6 +2196,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
+ use crate::util::decimal::Decimal128;
use crate::{buffer::Buffer, util::display::array_value_to_string};
macro_rules! generate_cast_test_case {
@@ -2247,9 +2248,9 @@ mod tests {
DecimalArray,
&output_type,
vec![
- Some(11234560_i128),
- Some(21234560_i128),
- Some(31234560_i128),
+ Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
+ Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
+ Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
None
]
);
@@ -2426,11 +2427,11 @@ mod tests {
DecimalArray,
&decimal_type,
vec![
- Some(1000000_i128),
- Some(2000000_i128),
- Some(3000000_i128),
+ Some(Decimal128::new_from_i128(38, 6, 1000000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 2000000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 3000000_i128)),
None,
- Some(5000000_i128)
+ Some(Decimal128::new_from_i128(38, 6, 5000000_i128))
]
);
}
@@ -2458,12 +2459,12 @@ mod tests {
DecimalArray,
&decimal_type,
vec![
- Some(1100000_i128),
- Some(2200000_i128),
- Some(4400000_i128),
+ Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
None,
- Some(1123456_i128),
- Some(1123456_i128),
+ Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
]
);
@@ -2483,13 +2484,13 @@ mod tests {
DecimalArray,
&decimal_type,
vec![
- Some(1100000_i128),
- Some(2200000_i128),
- Some(4400000_i128),
+ Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
None,
- Some(1123456_i128),
- Some(1123456_i128),
- Some(1123456_i128),
+ Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+ Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
]
);
}
diff --git a/arrow/src/compute/kernels/sort.rs
b/arrow/src/compute/kernels/sort.rs
index 72ee8b68d..e399cf9f0 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -503,7 +503,7 @@ where
.expect("Unable to downcast to decimal array");
let valids = value_indices
.into_iter()
- .map(|index| (index, decimal_array.value(index as usize)))
+ .map(|index| (index, decimal_array.value(index as usize).as_i128()))
.collect::<Vec<(u32, i128)>>();
sort_primitive_inner(decimal_values, null_indices, cmp, options, limit,
valids)
}
diff --git a/arrow/src/compute/kernels/take.rs
b/arrow/src/compute/kernels/take.rs
index 03637ec81..624e9ddcd 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -524,7 +524,7 @@ where
if decimal_values.is_null(index) {
Ok(None)
} else {
- Ok(Some(decimal_values.value(index)))
+ Ok(Some(decimal_values.value(index).as_i128()))
}
});
let t: Result<Option<Option<_>>> = t.transpose();
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
new file mode 100644
index 000000000..b78af3acc
--- /dev/null
+++ b/arrow/src/util/decimal.rs
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decimal related utils
+
+use std::cmp::Ordering;
+
+/// Represents a decimal value with precision and scale.
+/// The decimal value is represented by a signed 128-bit integer.
+#[derive(Debug)]
+pub struct Decimal128 {
+ #[allow(dead_code)]
+ precision: usize,
+ scale: usize,
+ value: i128,
+}
+
+impl PartialOrd for Decimal128 {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimal128 with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.partial_cmp(&other.value)
+ }
+}
+
+impl Ord for Decimal128 {
+ fn cmp(&self, other: &Self) -> Ordering {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimal128 with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.cmp(&other.value)
+ }
+}
+
+impl PartialEq<Self> for Decimal128 {
+ fn eq(&self, other: &Self) -> bool {
+ assert_eq!(
+ self.scale, other.scale,
+ "Cannot compare two Decimal128 with different scale: {}, {}",
+ self.scale, other.scale
+ );
+ self.value.eq(&other.value)
+ }
+}
+
+impl Eq for Decimal128 {}
+
+impl Decimal128 {
+ pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) ->
Self {
+ let as_array = bytes.try_into();
+ let value = match as_array {
+ Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
+ _ => panic!("Input to Decimal128 is not 128bit integer."),
+ };
+
+ Decimal128 {
+ precision,
+ scale,
+ value,
+ }
+ }
+
+ pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
+ Decimal128 {
+ precision,
+ scale,
+ value,
+ }
+ }
+
+ pub fn as_i128(&self) -> i128 {
+ self.value
+ }
+
+ pub fn as_string(&self) -> String {
+ let value_str = self.value.to_string();
+
+ if self.scale == 0 {
+ value_str
+ } else {
+ let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 }
else { 1 });
+
+ if rest.len() > self.scale {
+ // Decimal separator is in the middle of the string
+ let (whole, decimal) = value_str.split_at(value_str.len() -
self.scale);
+ format!("{}.{}", whole, decimal)
+ } else {
+ // String has to be padded
+ format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+ }
+ }
+ }
+}
+
+impl From<Decimal128> for i128 {
+ fn from(decimal: Decimal128) -> Self {
+ decimal.as_i128()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::util::decimal::Decimal128;
+
+ #[test]
+ fn decimal_128_to_string() {
+ let mut value = Decimal128::new_from_i128(5, 2, 100);
+ assert_eq!(value.as_string(), "1.00");
+
+ value = Decimal128::new_from_i128(5, 3, 100);
+ assert_eq!(value.as_string(), "0.100");
+ }
+
+ #[test]
+ fn decimal_128_from_bytes() {
+ let bytes = 100_i128.to_le_bytes();
+ let value = Decimal128::new_from_bytes(5, 2, &bytes);
+ assert_eq!(value.as_string(), "1.00");
+ }
+
+ fn i128_func(value: impl Into<i128>) -> i128 {
+ value.into()
+ }
+
+ #[test]
+ fn decimal_128_to_i128() {
+ let value = Decimal128::new_from_i128(5, 2, 100);
+ let integer = i128_func(value);
+ assert_eq!(integer, 100);
+ }
+}
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index 3b6de8a4b..dcb9cd52a 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -35,4 +35,5 @@ pub mod test_util;
mod trusted_len;
pub(crate) use trusted_len::trusted_len_unzip;
+pub mod decimal;
pub(crate) mod reader_parser;
diff --git a/parquet/src/arrow/arrow_reader.rs
b/parquet/src/arrow/arrow_reader.rs
index 92d4ff264..89406cd61 100644
--- a/parquet/src/arrow/arrow_reader.rs
+++ b/parquet/src/arrow/arrow_reader.rs
@@ -644,7 +644,7 @@ mod tests {
assert_eq!(col.scale(), 2);
for (i, v) in expected.enumerate() {
- assert_eq!(col.value(i), v * 100_i128);
+ assert_eq!(col.value(i).as_i128(), v * 100_i128);
}
}
}
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index b64517ad1..83f1bc70b 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -673,7 +673,7 @@ fn get_decimal_array_slice(
let mut values = Vec::with_capacity(indices.len());
let size = decimal_length_from_precision(array.precision());
for i in indices {
- let as_be_bytes = array.value(*i).to_be_bytes();
+ let as_be_bytes = array.value(*i).as_i128().to_be_bytes();
let resized_value = as_be_bytes[(16 - size)..].to_vec();
values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
}