alamb commented on a change in pull request #9232: URL: https://github.com/apache/arrow/pull/9232#discussion_r590748878
########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; Review comment: perhaps this could be cleaned up? ########## File path: rust/arrow/src/array/iterator.rs ########## @@ -240,6 +242,48 @@ impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a, } } +// In the feature, DecimalArray will become Generic and this iterator will use type of Decimal as T here Review comment: It seems to me as if the code already is generic over `T` so I am not quite sure what this comment is trying to say ########## File path: rust/arrow/src/array/builder.rs ########## @@ -1214,11 +1214,12 @@ impl DecimalBuilder { } } - /// Appends a byte slice into the builder. + /// Append i128 to into the builder. For DecimalNType please use append_value. + /// This method is useful when you are reading a data and doesnt require to cast value to DecimalNType /// /// Automatically calls the `append` method to delimit the slice appended in as a /// distinct array element. - pub fn append_value(&mut self, value: i128) -> Result<()> { + pub fn append_value_i128(&mut self, value: i128) -> Result<()> { Review comment: What about something like ```suggestion pub fn append_value(&mut self, value: impl Into<ArrowDecimalType>) -> Result<()> { let value: ArrowDecimalType = value.into(); ``` And then as long as you implement `impl From<i128>` for Decimal128Type` it should be possible to call `append_value` with either a `Decimal128Type` or an `i128` ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { Review comment: This seems like it will likely be quite slow (as it will convert the number to an allocated string, and then parse that string into a decimal, and then throw the string away) Given that this is a new set of features, I think it would be ok potentially to merge this in (as it isn't a performance regression!) and optimize later, but I predict this will be needed as soon as anyone tries out the decimal type. I bet you could do something in terms of `ArrowNumericType` instead of `T: ToString`, especially as `ArrowDecimalType::from_i32` etc exists ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { + panic!("unable to represent"); + })) +} + +#[inline] +pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> { + T::from(n) +} + +pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>; +} + +impl DecimalCast for Decimal128Type { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> { + Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap()) + } +} + +#[inline(always)] +fn pow_ten(pow: usize) -> u64 { + 10u64.pow(pow as u32) +} + +macro_rules! make_type { + ($name:ident, $native_ty:ty, $max_digits:expr) => { + #[derive(Copy, Clone, Eq)] + pub struct $name { + pub digits: $native_ty, + pub precision: usize, + pub scale: usize, + } + + impl $name { + pub fn new(digits: $native_ty, precision: usize, scale: usize) -> $name { + assert!( + (precision + scale) <= $max_digits, + "Unable to use {} to represent Decimal({}, {}), max digits reached ({}).", + stringify!($name), + precision, + scale, + stringify!($max_digits), + ); + + $name { + digits, + precision, + scale, + } + } + } + + impl ArrowDecimalType for $name { + const MAX_DIGITS: usize = $max_digits; + + /// Returns the byte width of this primitive type. + fn get_byte_width_for_precision_scale( + _precision: usize, + _scale: usize, + ) -> usize { + size_of::<$native_ty>() + } + + #[inline(always)] + fn rescale(&mut self, scale: usize) { + if self.digits.is_zero() { + self.scale = scale; + } else { + match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits /= pow_ten(self.scale - scale) as $native_ty; + self.scale = scale; + } + Ordering::Less => { + self.digits *= pow_ten(scale - self.scale) as $native_ty; + self.scale = scale; + } + Ordering::Equal => {} + }; + } + } + + #[inline(always)] + fn get_signed_lead_part(&self) -> i128 { + self.rescale_to_new(0).digits + } + + #[inline(always)] + fn rescale_to_new(self, scale: usize) -> $name { + if self.digits.is_zero() { + return $name::new(0, 0, scale); + } + + let digits = match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits / pow_ten(self.scale - scale) as $native_ty + } + Ordering::Less => { + self.digits * pow_ten(scale - self.scale) as $native_ty + } + Ordering::Equal => self.digits, + }; + + $name::new(digits, self.precision, scale) + } + + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + $name::parse(n.to_string().as_str(), precision, scale) + } + + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::from_str(string)?; + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn to_le_bytes(&self) -> [u8; 16] { + self.digits.to_le_bytes() + } + + fn to_be_bytes(&self) -> [u8; 16] { + self.digits.to_be_bytes() + } + + fn to_byte_slice(&self) -> Vec<u8> { + self.digits.to_le_bytes().to_vec() + } + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> $name { + let as_array = bytes.try_into(); + match as_array { + Ok(v) if bytes.len() == 16 => $name { + digits: <$native_ty>::from_le_bytes(v), + precision, + scale, + }, + Err(e) => panic!( + "Unable to load Decimal from bytes slice ({}): {}", + bytes.len(), + e + ), + _ => panic!( + "Unable to load Decimal from bytes slice with length {}", + bytes.len() + ), + } + } + } + + impl ToPrimitive for $name { + fn to_isize(&self) -> Option<isize> { + unimplemented!("Unimplemented to_isize for {}", stringify!($name)) + } + + fn to_usize(&self) -> Option<usize> { + unimplemented!("Unimplemented to_usize for {}", stringify!($name)) + } + + fn to_i8(&self) -> Option<i8> { + Some(self.get_signed_lead_part() as i8) Review comment: I feel like this is likely to `panic!` (at least in debug builds) if the lead part is greater than 128 / less than -128. Would it perhaps be better to check for overflow and return `None`? ########## File path: rust/arrow/src/csv/reader.rs ########## @@ -633,6 +636,45 @@ fn build_primitive_array<T: ArrowPrimitiveType + Parser>( .map(|e| Arc::new(e) as ArrayRef) } +// parses a specific column (col_idx) into an Arrow Array. +fn build_decimal_array( + line_number: usize, + rows: &[StringRecord], + col_idx: usize, + precision: usize, + scale: usize, +) -> Result<ArrayRef> { + let mut builder = DecimalBuilder::new(rows.len(), precision, scale); + + for (row_index, row) in rows.iter().enumerate() { + match row.get(col_idx) { + Some(s) => { + if s.is_empty() { + builder.append_null()? + } + + let parsed = match Decimal128Type::parse(s, precision, scale) { + Ok(number) => number, + Err(_) => { + return Err(ArrowError::ParseError(format!( + // TODO: we should surface the underlying error here. + "Error while parsing value {} for column {} at line {}", + s, + col_idx, + line_number + row_index Review comment: ```suggestion "Error while parsing value {} for column {} at line {}: {}", s, col_idx, line_number + row_index, e ``` might be one way of surfacing the underlying error ########## File path: rust/datafusion/tests/sql.rs ########## @@ -366,6 +366,27 @@ async fn csv_query_group_by_float32() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_query_group_by_decimal() -> Result<()> { + let mut ctx = ExecutionContext::new(); + register_aggregate_simple_csv(&mut ctx)?; + + let sql = + "SELECT COUNT(*) as cnt, c4 FROM aggregate_simple GROUP BY c4 ORDER BY cnt DESC"; + let actual = execute(&mut ctx, sql).await; + + let expected = vec![ Review comment: 😮 👍 ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; Review comment: It looks like the comments could use some cleanup ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale Review comment: I don't think this is parsing a string ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { + panic!("unable to represent"); + })) +} + +#[inline] +pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> { + T::from(n) +} + +pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>; +} + +impl DecimalCast for Decimal128Type { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> { + Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap()) + } +} + +#[inline(always)] +fn pow_ten(pow: usize) -> u64 { + 10u64.pow(pow as u32) +} + +macro_rules! make_type { + ($name:ident, $native_ty:ty, $max_digits:expr) => { + #[derive(Copy, Clone, Eq)] + pub struct $name { + pub digits: $native_ty, + pub precision: usize, + pub scale: usize, + } + + impl $name { + pub fn new(digits: $native_ty, precision: usize, scale: usize) -> $name { + assert!( + (precision + scale) <= $max_digits, + "Unable to use {} to represent Decimal({}, {}), max digits reached ({}).", + stringify!($name), + precision, + scale, + stringify!($max_digits), + ); + + $name { + digits, + precision, + scale, + } + } + } + + impl ArrowDecimalType for $name { + const MAX_DIGITS: usize = $max_digits; + + /// Returns the byte width of this primitive type. + fn get_byte_width_for_precision_scale( + _precision: usize, + _scale: usize, + ) -> usize { + size_of::<$native_ty>() + } + + #[inline(always)] + fn rescale(&mut self, scale: usize) { + if self.digits.is_zero() { + self.scale = scale; + } else { + match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits /= pow_ten(self.scale - scale) as $native_ty; + self.scale = scale; + } + Ordering::Less => { + self.digits *= pow_ten(scale - self.scale) as $native_ty; + self.scale = scale; + } + Ordering::Equal => {} + }; + } + } + + #[inline(always)] + fn get_signed_lead_part(&self) -> i128 { + self.rescale_to_new(0).digits + } + + #[inline(always)] + fn rescale_to_new(self, scale: usize) -> $name { + if self.digits.is_zero() { + return $name::new(0, 0, scale); + } + + let digits = match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits / pow_ten(self.scale - scale) as $native_ty + } + Ordering::Less => { + self.digits * pow_ten(scale - self.scale) as $native_ty + } + Ordering::Equal => self.digits, + }; + + $name::new(digits, self.precision, scale) + } + + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + $name::parse(n.to_string().as_str(), precision, scale) + } + + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::from_str(string)?; + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn to_le_bytes(&self) -> [u8; 16] { + self.digits.to_le_bytes() + } + + fn to_be_bytes(&self) -> [u8; 16] { + self.digits.to_be_bytes() + } + + fn to_byte_slice(&self) -> Vec<u8> { + self.digits.to_le_bytes().to_vec() + } + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> $name { + let as_array = bytes.try_into(); + match as_array { + Ok(v) if bytes.len() == 16 => $name { + digits: <$native_ty>::from_le_bytes(v), + precision, + scale, + }, + Err(e) => panic!( + "Unable to load Decimal from bytes slice ({}): {}", + bytes.len(), + e + ), + _ => panic!( + "Unable to load Decimal from bytes slice with length {}", + bytes.len() + ), + } + } + } + + impl ToPrimitive for $name { + fn to_isize(&self) -> Option<isize> { + unimplemented!("Unimplemented to_isize for {}", stringify!($name)) + } + + fn to_usize(&self) -> Option<usize> { + unimplemented!("Unimplemented to_usize for {}", stringify!($name)) + } + + fn to_i8(&self) -> Option<i8> { + Some(self.get_signed_lead_part() as i8) + } + + fn to_i16(&self) -> Option<i16> { + Some(self.get_signed_lead_part() as i16) + } + + fn to_i32(&self) -> Option<i32> { + Some(self.get_signed_lead_part() as i32) + } + + fn to_i64(&self) -> Option<i64> { + Some(self.get_signed_lead_part() as i64) + } + + fn to_i128(&self) -> Option<i128> { + Some(self.get_signed_lead_part()) + } + + fn to_u8(&self) -> Option<u8> { + Some(self.get_signed_lead_part() as u8) + } + + fn to_u16(&self) -> Option<u16> { + Some(self.get_signed_lead_part() as u16) + } + + fn to_u32(&self) -> Option<u32> { + Some(self.get_signed_lead_part() as u32) + } + + fn to_u64(&self) -> Option<u64> { + Some(self.get_signed_lead_part() as u64) + } + + fn to_u128(&self) -> Option<u128> { + Some(self.get_signed_lead_part() as u128) + } + + fn to_f32(&self) -> Option<f32> { + // @todo Optimize this + Some(self.to_string().parse::<f32>().unwrap()) + } + + fn to_f64(&self) -> Option<f64> { + // @todo Optimize this + Some(self.to_string().parse::<f64>().unwrap()) + } + } + + impl ToString for $name { + fn to_string(&self) -> String { + println!("<{},{}>({})", self.digits, self.precision, self.scale); + + // Skip sign, because we split string to lead, trail + let as_str = self.digits.abs().to_string(); + let len = as_str.len(); + + let (lead, trail) = + (&as_str[..(len - self.scale)], &as_str[(len - self.scale)..]); + + let mut result = String::new(); + + if self.digits < 0 { + result.push_str("-") + } + + if lead == "" { + result.push_str(&"0"); + } else { + result.push_str(&lead); + } + + if !trail.is_empty() { + result.push_str(&"."); + result.push_str(&trail); + } + + result + } + } + + impl Default for $name { + #[inline] + fn default() -> $name { + Zero::zero() + } + } + + impl Zero for $name { + #[inline] + fn zero() -> $name { + $name::new(0, 1, 0) + } + + #[inline] + fn is_zero(&self) -> bool { + self.digits == 0 + } + } + + impl Add<$name> for $name { + type Output = $name; + + #[inline] + fn add(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits + rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) + rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) + self, + } + } + } + + impl AddAssign<$name> for $name { + #[inline] + fn add_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits += rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits += rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits += rhs.digits; + } + } + } + } + + impl Sub<$name> for $name { + type Output = $name; + + #[inline] + fn sub(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits - rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) - rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) - self, + } + } + } + + impl SubAssign<$name> for $name { + #[inline] + fn sub_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits -= rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits -= rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits -= rhs.digits; + } + } + } + } + + impl Ord for $name { + fn cmp(&self, rhs: &Self) -> Ordering { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits.cmp(&rhs.digits), + Ordering::Less => { + self.rescale_to_new(rhs.scale).digits.cmp(&rhs.digits) + } + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits.cmp(&self.digits) + } + } + } + } + + impl PartialOrd for $name { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + Some(self.cmp(rhs)) + } + + fn lt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits < rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits < rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits < self.digits + } + } + } + + fn le(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits <= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits <= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits <= self.digits + } + } + } + + fn gt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits > rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits > rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits > self.digits + } + } + } + + fn ge(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits >= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits >= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits >= self.digits + } + } + } + } + + impl JsonSerializable for $name { + fn into_json_value(self) -> Option<Value> { + unimplemented!("Unimplemented JsonSerializable::into_json_value for {}", stringify!($name)) + } + } + + impl fmt::Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Decimal<{}, {}>(\"{}\")", + self.precision, + self.scale, + self.to_string() + ) + } + } + + // To fix clippy you are deriving `Hash` but have implemented `PartialEq` explicitly + impl Hash for $name { + fn hash<H: Hasher>(&self, state: &mut H) { + self.digits.hash(state); + self.precision.hash(state); + self.scale.hash(state); + } + + fn hash_slice<H: Hasher>(_data: &[Self], _state: &mut H) where + Self: Sized, { + unimplemented!("Unimplemented hash_slice for {}", stringify!($name)) + } + } + + impl PartialEq<$name> for $name { + #[inline] + fn eq(&self, rhs: &$name) -> bool { + // @todo What is a correct behaviour for it? Rescaling? Review comment: this seems like an important TODO... ########## File path: rust/datafusion/src/physical_plan/group_scalar.rs ########## @@ -120,12 +131,9 @@ mod tests { use crate::error::DataFusionError; macro_rules! scalar_eq_test { - ($TYPE:expr, $VALUE:expr) => {{ - let scalar_value = $TYPE($VALUE); - let a = GroupByScalar::try_from(&scalar_value).unwrap(); - - let scalar_value = $TYPE($VALUE); - let b = GroupByScalar::try_from(&scalar_value).unwrap(); + ($EXPR:expr) => {{ Review comment: I don't understand this change -- it seems to now be testing that `try_from` returns the same value for the same input... ########## File path: rust/arrow/src/record_batch.rs ########## @@ -132,10 +132,12 @@ impl RecordBatch { if options.match_field_names { for (i, column) in columns.iter().enumerate() { if column.len() != len { - return Err(ArrowError::InvalidArgumentError( - "all columns in a record batch must have the same length" - .to_string(), - )); + return Err(ArrowError::InvalidArgumentError(format!( + "all columns in a record batch must have the same length, expected {:?} but found {:?} at column {}", + len, + column.len(), Review comment: I think the order of these arguments is not correct (data type should be second, right?) ########## File path: rust/datafusion/src/scalar.rs ########## @@ -210,6 +213,14 @@ impl ScalarValue { /// Converts a scalar value into an array of `size` rows. pub fn to_array_of_size(&self, size: usize) -> ArrayRef { match self { + ScalarValue::Decimal128(e, p, s) => match e { Review comment: I don't understand this limitation ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { Review comment: ```suggestion /// Decimal (precision, scale) = Decimal(1, 2) = 1.00 /// Note that this is a trait to support different types of Decimal implementations /// in the future, not just those that are 128 bits. pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { ``` I think adding some comments about the rationale for this trait would also help. I took an educated guess, but I am not sure that it is correct ########## File path: rust/arrow/src/compute/kernels/take.rs ########## @@ -524,6 +528,75 @@ where Ok(BooleanArray::from(Arc::new(data))) } +fn take_decimal<I>( + values: &DecimalArray, + indices: &PrimitiveArray<I>, +) -> Result<DecimalArray> +where + I: ArrowNumericType, + I::Native: ToPrimitive, +{ + let data_len = indices.len(); + + // @todo This should be rewritten to try_from_trusted_len_iter when DecimalArray will be generic + let mut buffer = + MutableBuffer::from_len_zeroed(data_len * std::mem::size_of::<[u8; 16]>()); + let data = buffer.typed_data_mut::<[u8; 16]>(); + + let nulls; + + if values.null_count() == 0 { + // Take indices without null checking + for (i, elem) in data.iter_mut().enumerate() { + let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { + ArrowError::ComputeError("Cast to usize failed".to_string()) + })?; + + *elem = values.value(index).to_le_bytes(); + } + nulls = indices.data_ref().null_buffer().cloned(); + } else { + let num_bytes = bit_util::ceil(data_len, 8); + let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); + + let null_slice = null_buf.as_slice_mut(); + + for (i, elem) in data.iter_mut().enumerate() { + let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { + ArrowError::ComputeError("Cast to usize failed".to_string()) + })?; + + if values.is_null(index) { + bit_util::unset_bit(null_slice, i); + } + + *elem = values.value(index).to_le_bytes(); + } + nulls = match indices.data_ref().null_buffer() { + Some(buffer) => Some(buffer_bin_and( + buffer, + 0, + &null_buf.into(), + 0, + indices.len(), + )), + None => Some(null_buf.into()), + }; + } + + let data = ArrayData::new( + values.data_type().clone(), + indices.len(), + None, + nulls, + 0, + vec![buffer.into()], + vec![], + ); + + Ok(DecimalArray::from(Arc::new(data) as ArrayDataRef)) +} + /// `take` implementation for string arrays Review comment: I suggest adding some coverage for the `take` kernels with `DecimalArrays` ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { + panic!("unable to represent"); + })) +} + +#[inline] +pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> { + T::from(n) +} + +pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>; +} + +impl DecimalCast for Decimal128Type { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> { + Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap()) + } +} + +#[inline(always)] +fn pow_ten(pow: usize) -> u64 { + 10u64.pow(pow as u32) +} + +macro_rules! make_type { + ($name:ident, $native_ty:ty, $max_digits:expr) => { + #[derive(Copy, Clone, Eq)] + pub struct $name { + pub digits: $native_ty, + pub precision: usize, + pub scale: usize, + } + + impl $name { + pub fn new(digits: $native_ty, precision: usize, scale: usize) -> $name { + assert!( + (precision + scale) <= $max_digits, + "Unable to use {} to represent Decimal({}, {}), max digits reached ({}).", + stringify!($name), + precision, + scale, + stringify!($max_digits), + ); + + $name { + digits, + precision, + scale, + } + } + } + + impl ArrowDecimalType for $name { + const MAX_DIGITS: usize = $max_digits; + + /// Returns the byte width of this primitive type. + fn get_byte_width_for_precision_scale( + _precision: usize, + _scale: usize, + ) -> usize { + size_of::<$native_ty>() + } + + #[inline(always)] + fn rescale(&mut self, scale: usize) { + if self.digits.is_zero() { + self.scale = scale; + } else { + match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits /= pow_ten(self.scale - scale) as $native_ty; + self.scale = scale; + } + Ordering::Less => { + self.digits *= pow_ten(scale - self.scale) as $native_ty; + self.scale = scale; + } + Ordering::Equal => {} + }; + } + } + + #[inline(always)] + fn get_signed_lead_part(&self) -> i128 { + self.rescale_to_new(0).digits + } + + #[inline(always)] + fn rescale_to_new(self, scale: usize) -> $name { + if self.digits.is_zero() { + return $name::new(0, 0, scale); + } + + let digits = match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits / pow_ten(self.scale - scale) as $native_ty + } + Ordering::Less => { + self.digits * pow_ten(scale - self.scale) as $native_ty + } + Ordering::Equal => self.digits, + }; + + $name::new(digits, self.precision, scale) + } + + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + $name::parse(n.to_string().as_str(), precision, scale) + } + + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::from_str(string)?; + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn to_le_bytes(&self) -> [u8; 16] { + self.digits.to_le_bytes() + } + + fn to_be_bytes(&self) -> [u8; 16] { + self.digits.to_be_bytes() + } + + fn to_byte_slice(&self) -> Vec<u8> { + self.digits.to_le_bytes().to_vec() + } + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> $name { + let as_array = bytes.try_into(); + match as_array { + Ok(v) if bytes.len() == 16 => $name { + digits: <$native_ty>::from_le_bytes(v), + precision, + scale, + }, + Err(e) => panic!( + "Unable to load Decimal from bytes slice ({}): {}", + bytes.len(), + e + ), + _ => panic!( + "Unable to load Decimal from bytes slice with length {}", + bytes.len() + ), + } + } + } + + impl ToPrimitive for $name { + fn to_isize(&self) -> Option<isize> { + unimplemented!("Unimplemented to_isize for {}", stringify!($name)) + } + + fn to_usize(&self) -> Option<usize> { + unimplemented!("Unimplemented to_usize for {}", stringify!($name)) + } + + fn to_i8(&self) -> Option<i8> { + Some(self.get_signed_lead_part() as i8) + } + + fn to_i16(&self) -> Option<i16> { + Some(self.get_signed_lead_part() as i16) + } + + fn to_i32(&self) -> Option<i32> { + Some(self.get_signed_lead_part() as i32) + } + + fn to_i64(&self) -> Option<i64> { + Some(self.get_signed_lead_part() as i64) + } + + fn to_i128(&self) -> Option<i128> { + Some(self.get_signed_lead_part()) + } + + fn to_u8(&self) -> Option<u8> { + Some(self.get_signed_lead_part() as u8) + } + + fn to_u16(&self) -> Option<u16> { + Some(self.get_signed_lead_part() as u16) + } + + fn to_u32(&self) -> Option<u32> { + Some(self.get_signed_lead_part() as u32) + } + + fn to_u64(&self) -> Option<u64> { + Some(self.get_signed_lead_part() as u64) + } + + fn to_u128(&self) -> Option<u128> { + Some(self.get_signed_lead_part() as u128) + } + + fn to_f32(&self) -> Option<f32> { + // @todo Optimize this + Some(self.to_string().parse::<f32>().unwrap()) + } + + fn to_f64(&self) -> Option<f64> { + // @todo Optimize this + Some(self.to_string().parse::<f64>().unwrap()) + } + } + + impl ToString for $name { + fn to_string(&self) -> String { + println!("<{},{}>({})", self.digits, self.precision, self.scale); + + // Skip sign, because we split string to lead, trail + let as_str = self.digits.abs().to_string(); + let len = as_str.len(); + + let (lead, trail) = + (&as_str[..(len - self.scale)], &as_str[(len - self.scale)..]); + + let mut result = String::new(); + + if self.digits < 0 { + result.push_str("-") + } + + if lead == "" { + result.push_str(&"0"); + } else { + result.push_str(&lead); + } + + if !trail.is_empty() { + result.push_str(&"."); + result.push_str(&trail); + } + + result + } + } + + impl Default for $name { + #[inline] + fn default() -> $name { + Zero::zero() + } + } + + impl Zero for $name { + #[inline] + fn zero() -> $name { + $name::new(0, 1, 0) + } + + #[inline] + fn is_zero(&self) -> bool { + self.digits == 0 + } + } + + impl Add<$name> for $name { + type Output = $name; + + #[inline] + fn add(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits + rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) + rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) + self, + } + } + } + + impl AddAssign<$name> for $name { + #[inline] + fn add_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits += rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits += rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits += rhs.digits; + } + } + } + } + + impl Sub<$name> for $name { + type Output = $name; + + #[inline] + fn sub(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits - rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) - rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) - self, + } + } + } + + impl SubAssign<$name> for $name { + #[inline] + fn sub_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits -= rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits -= rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits -= rhs.digits; + } + } + } + } + + impl Ord for $name { + fn cmp(&self, rhs: &Self) -> Ordering { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits.cmp(&rhs.digits), + Ordering::Less => { + self.rescale_to_new(rhs.scale).digits.cmp(&rhs.digits) + } + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits.cmp(&self.digits) + } + } + } + } + + impl PartialOrd for $name { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + Some(self.cmp(rhs)) + } + + fn lt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits < rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits < rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits < self.digits + } + } + } + + fn le(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits <= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits <= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits <= self.digits + } + } + } + + fn gt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits > rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits > rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits > self.digits + } + } + } + + fn ge(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits >= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits >= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits >= self.digits + } + } + } + } + + impl JsonSerializable for $name { + fn into_json_value(self) -> Option<Value> { + unimplemented!("Unimplemented JsonSerializable::into_json_value for {}", stringify!($name)) + } + } + + impl fmt::Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Decimal<{}, {}>(\"{}\")", + self.precision, + self.scale, + self.to_string() + ) + } + } + + // To fix clippy you are deriving `Hash` but have implemented `PartialEq` explicitly + impl Hash for $name { + fn hash<H: Hasher>(&self, state: &mut H) { + self.digits.hash(state); + self.precision.hash(state); + self.scale.hash(state); + } + + fn hash_slice<H: Hasher>(_data: &[Self], _state: &mut H) where + Self: Sized, { + unimplemented!("Unimplemented hash_slice for {}", stringify!($name)) + } + } + + impl PartialEq<$name> for $name { + #[inline] + fn eq(&self, rhs: &$name) -> bool { + // @todo What is a correct behaviour for it? Rescaling? + self.digits == rhs.digits + } + } + + impl FromStr for $name { + type Err = ParseDecimalError; + + fn from_str(s: &str) -> Result<Self, ParseDecimalError> { + let (digits, precision, scale) = match s.find('.') { + // Decimal with empty scale + None => { + let digits = s.parse::<$native_ty>()?; + + if digits < 0 { + (s.parse::<$native_ty>()?, s.len() - 1, 0) + } else { + (s.parse::<$native_ty>()?, s.len(), 0) + } + } + Some(loc) => { + let (lead, trail) = (&s[..loc], &s[loc + 1..]); + + // Concat both parts to make bigint from int + let mut parts = String::from(lead); + parts.push_str(trail); + + let digits = parts.parse::<$native_ty>()?; + + if digits < 0 { + (digits, lead.len() - 1, trail.len()) + } else { + (digits, lead.len(), trail.len()) + } + } + }; + + Ok($name::new(digits, precision, scale)) + } + } + }; +} + +// This types are disabled, because Arrow doesnt declare Decimals for 32 / 64 +// i32 max - 2_147_483_647i32 +// make_type!(Decimal32Type, i32, 9); +// i64 max - 9_223_372_036_854_775_807i64 +//make_type!(Decimal64Type, i64, 18); + +// i128 max - 170_141_183_460_469_231_731_687_303_715_884_105_727i128 +make_type!(Decimal128Type, i128, 38); + +impl From<Decimal128Type> for i128 { + fn from(d: Decimal128Type) -> Self { + d.digits + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn decimal_test_from_and_to_str() { + let values = vec![ + // None + ("0", Decimal128Type::new(0_i128, 1, 0)), + // Positive + ("1.0", Decimal128Type::new(10_i128, 1, 1)), + ("1.001", Decimal128Type::new(1001_i128, 1, 3)), + ("1", Decimal128Type::new(1_i128, 1, 0)), + ("0.98", Decimal128Type::new(98_i128, 1, 2)), + // Negative + ("-1.0", Decimal128Type::new(-10_i128, 1, 1)), + ("-1.001", Decimal128Type::new(-1001_i128, 1, 3)), + ("-1", Decimal128Type::new(-1_i128, 1, 0)), + // + ( + "1.000000000001", + Decimal128Type::new(1_000_000_000_001_i128, 1, 12), + ), + ( + "5.000000000005", + Decimal128Type::new(5_000_000_000_005_i128, 1, 12), + ), + ]; + + for (source, expected) in values { + let actual = Decimal128Type::from_str(source).unwrap(); + assert_eq!(actual, expected); + assert_eq!(actual.to_string(), source); + } + } + + #[test] + fn decimal_test_partial_eq() { + let values = vec![ + // Eq scale + ( + Decimal128Type::new(0_i128, 1, 0), + Decimal128Type::new(0_i128, 1, 0), + true, + ), + ( + Decimal128Type::new(0_i128, 1, 0), + Decimal128Type::new(1_i128, 1, 0), + false, + ), + // Scaling is disabled in PartialEq, probably we will use it, but I dont know for now + // What is a correct behaviour + // > scale + // ( + // Decimal128Type::new(10_i128, 1, 1), + // Decimal128Type::new(1_i128, 1, 0), + // true, + // ), + // ( + // Decimal128Type::new(20_i128, 1, 1), + // Decimal128Type::new(1_i128, 1, 0), + // false, + // ), + // // < scale + // ( + // Decimal128Type::new(1_i128, 1, 0), + // Decimal128Type::new(10_i128, 1, 1), + // true, + // ), + // ( + // Decimal128Type::new(1_i128, 1, 0), + // Decimal128Type::new(20_i128, 1, 1), + // false, + // ), + ]; + + for (left, right, expected) in values { + assert_eq!( + left == right, + expected, + "{} == {}, expected {}", + left.to_string(), + right.to_string(), + expected + ); + } + } + + #[test] + fn decimal_test_add() { + let values = vec![ + // without rescaling + ( + Decimal128Type::new(0, 2, 0), + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(5, 2, 0), + ), + ( + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(10, 2, 0), + ), + // with rescaling left + ( + Decimal128Type::new(1, 1, 0), + Decimal128Type::new(101, 1, 2), + Decimal128Type::new(201, 1, 2), + ), + // with rescaling right + ( + Decimal128Type::new(101, 1, 2), + Decimal128Type::new(1, 1, 0), + Decimal128Type::new(201, 1, 2), + ), + ]; + + for (left, right, result) in values { + assert_eq!( + left + right, + result, + "{} + {} = {}", + left.to_string(), + right.to_string(), + result.to_string() + ); + } + } + + #[test] + fn decimal_test_sub() { + let values = vec![ + // without rescaling + ( + Decimal128Type::new(10, 2, 0), + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(5, 2, 0), + ), + ( + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(5, 2, 0), + Decimal128Type::new(0, 2, 0), + ), + // with rescaling left + ( + Decimal128Type::new(2, 1, 0), + Decimal128Type::new(101, 1, 2), + Decimal128Type::new(99, 1, 2), + ), + ]; + + for (left, right, result) in values { + assert_eq!( + left - right, + result, + "{} - {} = {}", + left.to_string(), + right.to_string(), + result.to_string() + ); + } + } + + #[test] + fn decimal_test_cmp() { + let values = vec![ + ( + Decimal128Type::new(0_i128, 1, 0), + Decimal128Type::new(0_i128, 1, 0), + Ordering::Equal, + ), + ( + Decimal128Type::new(1_i128, 1, 0), + Decimal128Type::new(0_i128, 1, 0), + Ordering::Greater, + ), + ( + Decimal128Type::new(0_i128, 1, 0), + Decimal128Type::new(1_i128, 1, 0), + Ordering::Less, + ), + ]; + + for (left, right, expected) in values { + assert_eq!(left.cmp(&right), expected); + } + } + + #[test] + fn decimal_test_cmp_lt() { + let values = vec![ + ( + Decimal128Type::new(0_i128, 1, 0), + Decimal128Type::new(1_i128, 1, 0), + true, + ), + ( + Decimal128Type::new(1_i128, 1, 0), + Decimal128Type::new(1_i128, 1, 0), + false, + ), + ]; + + for (left, right, expected) in values { + assert_eq!(left < right, expected); + } + } + + #[test] Review comment: tests for `==` (aka `PartialEq`) are probably important ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { + panic!("unable to represent"); + })) +} + +#[inline] +pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> { + T::from(n) +} + +pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>; +} + +impl DecimalCast for Decimal128Type { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> { + Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap()) + } +} + +#[inline(always)] +fn pow_ten(pow: usize) -> u64 { + 10u64.pow(pow as u32) +} + +macro_rules! make_type { + ($name:ident, $native_ty:ty, $max_digits:expr) => { + #[derive(Copy, Clone, Eq)] + pub struct $name { + pub digits: $native_ty, + pub precision: usize, + pub scale: usize, + } + + impl $name { + pub fn new(digits: $native_ty, precision: usize, scale: usize) -> $name { + assert!( + (precision + scale) <= $max_digits, + "Unable to use {} to represent Decimal({}, {}), max digits reached ({}).", + stringify!($name), + precision, + scale, + stringify!($max_digits), + ); + + $name { + digits, + precision, + scale, + } + } + } + + impl ArrowDecimalType for $name { + const MAX_DIGITS: usize = $max_digits; + + /// Returns the byte width of this primitive type. + fn get_byte_width_for_precision_scale( + _precision: usize, + _scale: usize, + ) -> usize { + size_of::<$native_ty>() + } + + #[inline(always)] + fn rescale(&mut self, scale: usize) { + if self.digits.is_zero() { + self.scale = scale; + } else { + match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits /= pow_ten(self.scale - scale) as $native_ty; + self.scale = scale; + } + Ordering::Less => { + self.digits *= pow_ten(scale - self.scale) as $native_ty; + self.scale = scale; + } + Ordering::Equal => {} + }; + } + } + + #[inline(always)] + fn get_signed_lead_part(&self) -> i128 { + self.rescale_to_new(0).digits + } + + #[inline(always)] + fn rescale_to_new(self, scale: usize) -> $name { + if self.digits.is_zero() { + return $name::new(0, 0, scale); + } + + let digits = match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits / pow_ten(self.scale - scale) as $native_ty + } + Ordering::Less => { + self.digits * pow_ten(scale - self.scale) as $native_ty + } + Ordering::Equal => self.digits, + }; + + $name::new(digits, self.precision, scale) + } + + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + $name::parse(n.to_string().as_str(), precision, scale) + } + + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::from_str(string)?; + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn to_le_bytes(&self) -> [u8; 16] { + self.digits.to_le_bytes() + } + + fn to_be_bytes(&self) -> [u8; 16] { + self.digits.to_be_bytes() + } + + fn to_byte_slice(&self) -> Vec<u8> { + self.digits.to_le_bytes().to_vec() + } + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> $name { + let as_array = bytes.try_into(); + match as_array { + Ok(v) if bytes.len() == 16 => $name { + digits: <$native_ty>::from_le_bytes(v), + precision, + scale, + }, + Err(e) => panic!( + "Unable to load Decimal from bytes slice ({}): {}", + bytes.len(), + e + ), + _ => panic!( + "Unable to load Decimal from bytes slice with length {}", + bytes.len() + ), + } + } + } + + impl ToPrimitive for $name { + fn to_isize(&self) -> Option<isize> { + unimplemented!("Unimplemented to_isize for {}", stringify!($name)) + } + + fn to_usize(&self) -> Option<usize> { + unimplemented!("Unimplemented to_usize for {}", stringify!($name)) + } + + fn to_i8(&self) -> Option<i8> { + Some(self.get_signed_lead_part() as i8) + } + + fn to_i16(&self) -> Option<i16> { + Some(self.get_signed_lead_part() as i16) + } + + fn to_i32(&self) -> Option<i32> { + Some(self.get_signed_lead_part() as i32) + } + + fn to_i64(&self) -> Option<i64> { + Some(self.get_signed_lead_part() as i64) + } + + fn to_i128(&self) -> Option<i128> { + Some(self.get_signed_lead_part()) + } + + fn to_u8(&self) -> Option<u8> { + Some(self.get_signed_lead_part() as u8) + } + + fn to_u16(&self) -> Option<u16> { + Some(self.get_signed_lead_part() as u16) + } + + fn to_u32(&self) -> Option<u32> { + Some(self.get_signed_lead_part() as u32) + } + + fn to_u64(&self) -> Option<u64> { + Some(self.get_signed_lead_part() as u64) + } + + fn to_u128(&self) -> Option<u128> { + Some(self.get_signed_lead_part() as u128) + } + + fn to_f32(&self) -> Option<f32> { + // @todo Optimize this + Some(self.to_string().parse::<f32>().unwrap()) + } + + fn to_f64(&self) -> Option<f64> { + // @todo Optimize this + Some(self.to_string().parse::<f64>().unwrap()) + } + } + + impl ToString for $name { + fn to_string(&self) -> String { + println!("<{},{}>({})", self.digits, self.precision, self.scale); + + // Skip sign, because we split string to lead, trail + let as_str = self.digits.abs().to_string(); + let len = as_str.len(); + + let (lead, trail) = + (&as_str[..(len - self.scale)], &as_str[(len - self.scale)..]); + + let mut result = String::new(); + + if self.digits < 0 { + result.push_str("-") + } + + if lead == "" { + result.push_str(&"0"); + } else { + result.push_str(&lead); + } + + if !trail.is_empty() { + result.push_str(&"."); + result.push_str(&trail); + } + + result + } + } + + impl Default for $name { + #[inline] + fn default() -> $name { + Zero::zero() + } + } + + impl Zero for $name { + #[inline] + fn zero() -> $name { + $name::new(0, 1, 0) + } + + #[inline] + fn is_zero(&self) -> bool { + self.digits == 0 + } + } + + impl Add<$name> for $name { + type Output = $name; + + #[inline] + fn add(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits + rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) + rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) + self, + } + } + } + + impl AddAssign<$name> for $name { + #[inline] + fn add_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits += rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits += rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits += rhs.digits; + } + } + } + } + + impl Sub<$name> for $name { + type Output = $name; + + #[inline] + fn sub(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits - rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) - rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) - self, + } + } + } + + impl SubAssign<$name> for $name { + #[inline] + fn sub_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits -= rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits -= rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits -= rhs.digits; + } + } + } + } + + impl Ord for $name { + fn cmp(&self, rhs: &Self) -> Ordering { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits.cmp(&rhs.digits), + Ordering::Less => { + self.rescale_to_new(rhs.scale).digits.cmp(&rhs.digits) + } + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits.cmp(&self.digits) + } + } + } + } + + impl PartialOrd for $name { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + Some(self.cmp(rhs)) + } + + fn lt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits < rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits < rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits < self.digits + } + } + } + + fn le(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits <= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits <= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits <= self.digits + } + } + } + + fn gt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits > rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits > rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits > self.digits + } + } + } + + fn ge(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits >= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits >= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits >= self.digits + } + } + } + } + + impl JsonSerializable for $name { + fn into_json_value(self) -> Option<Value> { + unimplemented!("Unimplemented JsonSerializable::into_json_value for {}", stringify!($name)) + } + } + + impl fmt::Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Decimal<{}, {}>(\"{}\")", + self.precision, + self.scale, + self.to_string() + ) + } + } + + // To fix clippy you are deriving `Hash` but have implemented `PartialEq` explicitly + impl Hash for $name { + fn hash<H: Hasher>(&self, state: &mut H) { + self.digits.hash(state); + self.precision.hash(state); + self.scale.hash(state); + } + + fn hash_slice<H: Hasher>(_data: &[Self], _state: &mut H) where + Self: Sized, { + unimplemented!("Unimplemented hash_slice for {}", stringify!($name)) + } + } + + impl PartialEq<$name> for $name { + #[inline] + fn eq(&self, rhs: &$name) -> bool { + // @todo What is a correct behaviour for it? Rescaling? + self.digits == rhs.digits + } + } + + impl FromStr for $name { + type Err = ParseDecimalError; + + fn from_str(s: &str) -> Result<Self, ParseDecimalError> { + let (digits, precision, scale) = match s.find('.') { + // Decimal with empty scale + None => { + let digits = s.parse::<$native_ty>()?; + + if digits < 0 { + (s.parse::<$native_ty>()?, s.len() - 1, 0) + } else { + (s.parse::<$native_ty>()?, s.len(), 0) + } + } + Some(loc) => { + let (lead, trail) = (&s[..loc], &s[loc + 1..]); + + // Concat both parts to make bigint from int + let mut parts = String::from(lead); + parts.push_str(trail); + + let digits = parts.parse::<$native_ty>()?; + + if digits < 0 { + (digits, lead.len() - 1, trail.len()) + } else { + (digits, lead.len(), trail.len()) + } + } + }; + + Ok($name::new(digits, precision, scale)) + } + } + }; +} + +// This types are disabled, because Arrow doesnt declare Decimals for 32 / 64 +// i32 max - 2_147_483_647i32 +// make_type!(Decimal32Type, i32, 9); +// i64 max - 9_223_372_036_854_775_807i64 +//make_type!(Decimal64Type, i64, 18); + +// i128 max - 170_141_183_460_469_231_731_687_303_715_884_105_727i128 +make_type!(Decimal128Type, i128, 38); Review comment: ```suggestion /// Type which stores a decimal number using 128 bits make_type!(Decimal128Type, i128, 38); ``` ########## File path: rust/arrow/src/datatypes/decimal.rs ########## @@ -0,0 +1,926 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datatypes::JsonSerializable; +use num::{NumCast, ToPrimitive, Zero}; +use serde_json::Value; +use std::hash::{Hash, Hasher}; +use std::{ + cmp::Ordering, + convert::TryInto, + fmt, + mem::size_of, + num::ParseIntError, + ops::{Add, AddAssign, Sub, SubAssign}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum ParseDecimalError { + ParseIntError(ParseIntError), + Other(String), +} + +impl From<ParseIntError> for ParseDecimalError { + fn from(err: ParseIntError) -> ParseDecimalError { + ParseDecimalError::ParseIntError(err) + } +} + +// Decimal (precision, scale) = Decimal(1, 2) = 1.00 +pub trait ArrowDecimalType: fmt::Debug + Send + Sync + FromStr + PartialEq { + const MAX_DIGITS: usize; + + // fn into_json_value(self) -> Option<Value>; + + fn get_byte_width_for_precision_scale(precision: usize, scale: usize) -> usize; + + // Rescale scale part + fn rescale(&mut self, scale: usize); + + fn rescale_to_new(self, scale: usize) -> Self; + + // Try to parse string with precision, scale + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + // Try to parse string with precision, scale + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> std::result::Result<Self, ParseDecimalError>; + + fn to_le_bytes(&self) -> [u8; 16]; + + fn to_be_bytes(&self) -> [u8; 16]; + + fn to_byte_slice(&self) -> Vec<u8>; + + fn get_signed_lead_part(&self) -> i128; + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> Self; +} + +#[inline] +pub fn numeric_to_decimal<T: ToString, U: ArrowDecimalType>( + n: T, + p: usize, + s: usize, +) -> Option<U> { + Some(U::parse(n.to_string().as_str(), p, s).unwrap_or_else(|_e| { + panic!("unable to represent"); + })) +} + +#[inline] +pub fn decimal_to_numeric<U: DecimalCast, T: NumCast>(n: U) -> Option<T> { + T::from(n) +} + +pub trait DecimalCast: Sized + ArrowDecimalType + ToPrimitive { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self>; +} + +impl DecimalCast for Decimal128Type { + fn from<T: ToPrimitive>(n: T, p: usize, s: usize) -> Option<Self> { + Some(Decimal128Type::from_f64(n.to_f64().unwrap(), p, s).unwrap()) + } +} + +#[inline(always)] +fn pow_ten(pow: usize) -> u64 { + 10u64.pow(pow as u32) +} + +macro_rules! make_type { + ($name:ident, $native_ty:ty, $max_digits:expr) => { + #[derive(Copy, Clone, Eq)] + pub struct $name { + pub digits: $native_ty, + pub precision: usize, + pub scale: usize, + } + + impl $name { + pub fn new(digits: $native_ty, precision: usize, scale: usize) -> $name { + assert!( + (precision + scale) <= $max_digits, + "Unable to use {} to represent Decimal({}, {}), max digits reached ({}).", + stringify!($name), + precision, + scale, + stringify!($max_digits), + ); + + $name { + digits, + precision, + scale, + } + } + } + + impl ArrowDecimalType for $name { + const MAX_DIGITS: usize = $max_digits; + + /// Returns the byte width of this primitive type. + fn get_byte_width_for_precision_scale( + _precision: usize, + _scale: usize, + ) -> usize { + size_of::<$native_ty>() + } + + #[inline(always)] + fn rescale(&mut self, scale: usize) { + if self.digits.is_zero() { + self.scale = scale; + } else { + match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits /= pow_ten(self.scale - scale) as $native_ty; + self.scale = scale; + } + Ordering::Less => { + self.digits *= pow_ten(scale - self.scale) as $native_ty; + self.scale = scale; + } + Ordering::Equal => {} + }; + } + } + + #[inline(always)] + fn get_signed_lead_part(&self) -> i128 { + self.rescale_to_new(0).digits + } + + #[inline(always)] + fn rescale_to_new(self, scale: usize) -> $name { + if self.digits.is_zero() { + return $name::new(0, 0, scale); + } + + let digits = match self.scale.cmp(&scale) { + Ordering::Greater => { + self.digits / pow_ten(self.scale - scale) as $native_ty + } + Ordering::Less => { + self.digits * pow_ten(scale - self.scale) as $native_ty + } + Ordering::Equal => self.digits, + }; + + $name::new(digits, self.precision, scale) + } + + fn from_i128( + n: i128, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_i64( + n: i64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::new(n as $native_ty, precision, 0); + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn from_f64( + n: f64, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + $name::parse(n.to_string().as_str(), precision, scale) + } + + fn parse( + string: &str, + precision: usize, + scale: usize, + ) -> Result<Self, ParseDecimalError> { + let mut as_decimal = $name::from_str(string)?; + + if as_decimal.scale != scale { + as_decimal.rescale(scale) + } + + as_decimal.precision = precision; + + Ok(as_decimal) + } + + fn to_le_bytes(&self) -> [u8; 16] { + self.digits.to_le_bytes() + } + + fn to_be_bytes(&self) -> [u8; 16] { + self.digits.to_be_bytes() + } + + fn to_byte_slice(&self) -> Vec<u8> { + self.digits.to_le_bytes().to_vec() + } + + fn from_bytes_with_precision_scale( + bytes: &[u8], + precision: usize, + scale: usize, + ) -> $name { + let as_array = bytes.try_into(); + match as_array { + Ok(v) if bytes.len() == 16 => $name { + digits: <$native_ty>::from_le_bytes(v), + precision, + scale, + }, + Err(e) => panic!( + "Unable to load Decimal from bytes slice ({}): {}", + bytes.len(), + e + ), + _ => panic!( + "Unable to load Decimal from bytes slice with length {}", + bytes.len() + ), + } + } + } + + impl ToPrimitive for $name { + fn to_isize(&self) -> Option<isize> { + unimplemented!("Unimplemented to_isize for {}", stringify!($name)) + } + + fn to_usize(&self) -> Option<usize> { + unimplemented!("Unimplemented to_usize for {}", stringify!($name)) + } + + fn to_i8(&self) -> Option<i8> { + Some(self.get_signed_lead_part() as i8) + } + + fn to_i16(&self) -> Option<i16> { + Some(self.get_signed_lead_part() as i16) + } + + fn to_i32(&self) -> Option<i32> { + Some(self.get_signed_lead_part() as i32) + } + + fn to_i64(&self) -> Option<i64> { + Some(self.get_signed_lead_part() as i64) + } + + fn to_i128(&self) -> Option<i128> { + Some(self.get_signed_lead_part()) + } + + fn to_u8(&self) -> Option<u8> { + Some(self.get_signed_lead_part() as u8) + } + + fn to_u16(&self) -> Option<u16> { + Some(self.get_signed_lead_part() as u16) + } + + fn to_u32(&self) -> Option<u32> { + Some(self.get_signed_lead_part() as u32) + } + + fn to_u64(&self) -> Option<u64> { + Some(self.get_signed_lead_part() as u64) + } + + fn to_u128(&self) -> Option<u128> { + Some(self.get_signed_lead_part() as u128) + } + + fn to_f32(&self) -> Option<f32> { + // @todo Optimize this + Some(self.to_string().parse::<f32>().unwrap()) + } + + fn to_f64(&self) -> Option<f64> { + // @todo Optimize this + Some(self.to_string().parse::<f64>().unwrap()) + } + } + + impl ToString for $name { + fn to_string(&self) -> String { + println!("<{},{}>({})", self.digits, self.precision, self.scale); + + // Skip sign, because we split string to lead, trail + let as_str = self.digits.abs().to_string(); + let len = as_str.len(); + + let (lead, trail) = + (&as_str[..(len - self.scale)], &as_str[(len - self.scale)..]); + + let mut result = String::new(); + + if self.digits < 0 { + result.push_str("-") + } + + if lead == "" { + result.push_str(&"0"); + } else { + result.push_str(&lead); + } + + if !trail.is_empty() { + result.push_str(&"."); + result.push_str(&trail); + } + + result + } + } + + impl Default for $name { + #[inline] + fn default() -> $name { + Zero::zero() + } + } + + impl Zero for $name { + #[inline] + fn zero() -> $name { + $name::new(0, 1, 0) + } + + #[inline] + fn is_zero(&self) -> bool { + self.digits == 0 + } + } + + impl Add<$name> for $name { + type Output = $name; + + #[inline] + fn add(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits + rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) + rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) + self, + } + } + } + + impl AddAssign<$name> for $name { + #[inline] + fn add_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits += rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits += rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits += rhs.digits; + } + } + } + } + + impl Sub<$name> for $name { + type Output = $name; + + #[inline] + fn sub(self, rhs: $name) -> $name { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + $name::new(self.digits - rhs.digits, self.precision, self.scale) + } + Ordering::Less => self.rescale_to_new(rhs.scale) - rhs, + Ordering::Greater => rhs.rescale_to_new(self.scale) - self, + } + } + } + + impl SubAssign<$name> for $name { + #[inline] + fn sub_assign(&mut self, rhs: $name) { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => { + self.digits -= rhs.digits; + } + Ordering::Less => { + self.rescale(rhs.scale); + self.digits -= rhs.digits; + } + Ordering::Greater => { + self.rescale(self.scale); + self.digits -= rhs.digits; + } + } + } + } + + impl Ord for $name { + fn cmp(&self, rhs: &Self) -> Ordering { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits.cmp(&rhs.digits), + Ordering::Less => { + self.rescale_to_new(rhs.scale).digits.cmp(&rhs.digits) + } + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits.cmp(&self.digits) + } + } + } + } + + impl PartialOrd for $name { + fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { + Some(self.cmp(rhs)) + } + + fn lt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits < rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits < rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits < self.digits + } + } + } + + fn le(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits <= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits <= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits <= self.digits + } + } + } + + fn gt(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits > rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits > rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits > self.digits + } + } + } + + fn ge(&self, rhs: &Self) -> bool { + match self.scale.cmp(&rhs.scale) { + Ordering::Equal => self.digits >= rhs.digits, + Ordering::Less => self.rescale_to_new(rhs.scale).digits >= rhs.digits, + Ordering::Greater => { + rhs.rescale_to_new(self.scale).digits >= self.digits + } + } + } + } + + impl JsonSerializable for $name { + fn into_json_value(self) -> Option<Value> { + unimplemented!("Unimplemented JsonSerializable::into_json_value for {}", stringify!($name)) + } + } + + impl fmt::Debug for $name { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Decimal<{}, {}>(\"{}\")", + self.precision, + self.scale, + self.to_string() + ) + } + } + + // To fix clippy you are deriving `Hash` but have implemented `PartialEq` explicitly + impl Hash for $name { + fn hash<H: Hasher>(&self, state: &mut H) { + self.digits.hash(state); + self.precision.hash(state); + self.scale.hash(state); + } + + fn hash_slice<H: Hasher>(_data: &[Self], _state: &mut H) where + Self: Sized, { + unimplemented!("Unimplemented hash_slice for {}", stringify!($name)) + } + } + + impl PartialEq<$name> for $name { + #[inline] + fn eq(&self, rhs: &$name) -> bool { + // @todo What is a correct behaviour for it? Rescaling? + self.digits == rhs.digits + } + } + + impl FromStr for $name { + type Err = ParseDecimalError; + + fn from_str(s: &str) -> Result<Self, ParseDecimalError> { + let (digits, precision, scale) = match s.find('.') { + // Decimal with empty scale + None => { + let digits = s.parse::<$native_ty>()?; + + if digits < 0 { + (s.parse::<$native_ty>()?, s.len() - 1, 0) + } else { + (s.parse::<$native_ty>()?, s.len(), 0) + } + } + Some(loc) => { + let (lead, trail) = (&s[..loc], &s[loc + 1..]); + + // Concat both parts to make bigint from int + let mut parts = String::from(lead); + parts.push_str(trail); + + let digits = parts.parse::<$native_ty>()?; + + if digits < 0 { + (digits, lead.len() - 1, trail.len()) + } else { + (digits, lead.len(), trail.len()) + } + } + }; + + Ok($name::new(digits, precision, scale)) + } + } + }; +} + +// This types are disabled, because Arrow doesnt declare Decimals for 32 / 64 +// i32 max - 2_147_483_647i32 +// make_type!(Decimal32Type, i32, 9); +// i64 max - 9_223_372_036_854_775_807i64 +//make_type!(Decimal64Type, i64, 18); + +// i128 max - 170_141_183_460_469_231_731_687_303_715_884_105_727i128 +make_type!(Decimal128Type, i128, 38); Review comment: 👍 for comments explaining why this macro is as it is ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
