This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 34f58f96d Add validation for Decimal256 (#2113)
34f58f96d is described below
commit 34f58f96d421302aa2c67b689ff1a63ac77fa1a1
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Fri Jul 22 10:02:24 2022 -0700
Add validation for Decimal256 (#2113)
* Add validation for decimal256
* Add validation in array data
---
arrow/src/array/array_decimal.rs | 2 +-
arrow/src/array/builder/decimal_builder.rs | 67 +++++++++++++-
arrow/src/array/data.rs | 17 +++-
arrow/src/datatypes/datatype.rs | 144 ++++++++++++++++++++++++++++-
4 files changed, 225 insertions(+), 5 deletions(-)
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
index f42192658..494b4e9d1 100644
--- a/arrow/src/array/array_decimal.rs
+++ b/arrow/src/array/array_decimal.rs
@@ -352,7 +352,7 @@ impl From<ArrayData> for Decimal256Array {
assert_eq!(
data.buffers().len(),
1,
- "Decimal128Array data should contain 1 buffer only (values)"
+ "Decimal256Array data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
diff --git a/arrow/src/array/builder/decimal_builder.rs
b/arrow/src/array/builder/decimal_builder.rs
index 42623df95..d015d3dce 100644
--- a/arrow/src/array/builder/decimal_builder.rs
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use num::BigInt;
use std::any::Any;
use std::sync::Arc;
@@ -25,7 +26,7 @@ use crate::array::{ArrayBuilder, FixedSizeBinaryBuilder};
use crate::error::{ArrowError, Result};
-use crate::datatypes::validate_decimal_precision;
+use crate::datatypes::{validate_decimal256_precision,
validate_decimal_precision};
use crate::util::decimal::{BasicDecimal, Decimal256};
/// Array Builder for [`Decimal128Array`]
@@ -51,6 +52,10 @@ pub struct Decimal256Builder {
builder: FixedSizeBinaryBuilder,
precision: usize,
scale: usize,
+
+ /// Should decimal values be validated for compatibility with scale and
precision?
+ /// defaults to true
+ value_validation: bool,
}
impl Decimal128Builder {
@@ -163,14 +168,35 @@ impl Decimal256Builder {
builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH),
precision,
scale,
+ value_validation: true,
}
}
+ /// Disable validation
+ ///
+ /// # Safety
+ ///
+ /// After disabling validation, caller must ensure that appended values
are compatible
+ /// for the specified precision and scale.
+ pub unsafe fn disable_value_validation(&mut self) {
+ self.value_validation = false;
+ }
+
/// Appends a [`Decimal256`] number into the builder.
///
/// Returns an error if `value` has different precision, scale or length
in bytes than this builder
#[inline]
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
+ let value = if self.value_validation {
+ let raw_bytes = value.raw_value();
+ let integer = BigInt::from_signed_bytes_le(raw_bytes);
+ let value_str = integer.to_string();
+ validate_decimal256_precision(&value_str, self.precision)?;
+ value
+ } else {
+ value
+ };
+
if self.precision != value.precision() || self.scale != value.scale() {
return Err(ArrowError::InvalidArgumentError(
"Decimal value does not have the same precision or scale as
Decimal256Builder".to_string()
@@ -206,9 +232,10 @@ impl Decimal256Builder {
#[cfg(test)]
mod tests {
use super::*;
+ use num::Num;
use crate::array::array_decimal::{BasicDecimalArray, Decimal128Array};
- use crate::array::Array;
+ use crate::array::{array_decimal, Array};
use crate::datatypes::DataType;
use crate::util::decimal::Decimal128;
@@ -298,4 +325,40 @@ mod tests {
let value = Decimal256::try_new_from_bytes(40, 6,
bytes.as_slice()).unwrap();
builder.append_value(&value).unwrap();
}
+
+ #[test]
+ #[should_panic(
+ expected =
"9999999999999999999999999999999999999999999999999999999999999999999999999999
is too large to store in a Decimal256 of precision 76. Max is
999999999999999999999999999999999999999999999999999999999999999999999999999"
+ )]
+ fn test_decimal256_builder_out_of_range_precision_scale() {
+ let mut builder = Decimal256Builder::new(30, 76, 6);
+
+ let big_value =
BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999",
10).unwrap();
+ let bytes = big_value.to_signed_bytes_le();
+ let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
+ builder.append_value(&value).unwrap();
+ }
+
+ #[test]
+ #[should_panic(
+ expected =
"9999999999999999999999999999999999999999999999999999999999999999999999999999
is too large to store in a Decimal256 of precision 76. Max is
999999999999999999999999999999999999999999999999999999999999999999999999999"
+ )]
+ fn test_decimal256_data_validation() {
+ let mut builder = Decimal256Builder::new(30, 76, 6);
+ // Disable validation at builder
+ unsafe {
+ builder.disable_value_validation();
+ }
+
+ let big_value =
BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999",
10).unwrap();
+ let bytes = big_value.to_signed_bytes_le();
+ let value = Decimal256::try_new_from_bytes(76, 6, &bytes).unwrap();
+ builder
+ .append_value(&value)
+ .expect("should not validate invalid value at builder");
+
+ let array = builder.finish();
+ let array_data = array_decimal::BasicDecimalArray::data(&array);
+ array_data.validate_values().unwrap();
+ }
}
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 3cd42e366..4ae7f069e 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -18,7 +18,10 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which
encapsulates
//! common attributes and operations for Arrow array.
-use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit,
UnionMode};
+use crate::datatypes::{
+ validate_decimal256_precision, validate_decimal_precision, DataType,
IntervalUnit,
+ UnionMode,
+};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
@@ -26,6 +29,7 @@ use crate::{
util::bit_util,
};
use half::f16;
+use num::BigInt;
use std::convert::TryInto;
use std::mem;
use std::ops::Range;
@@ -1018,6 +1022,17 @@ impl ArrayData {
}
Ok(())
}
+ DataType::Decimal256(p, _) => {
+ let values = self.buffers()[0].as_slice();
+ for pos in 0..self.len() {
+ let offset = pos * 32;
+ let raw_bytes = &values[offset..offset + 32];
+ let integer = BigInt::from_signed_bytes_le(raw_bytes);
+ let value_str = integer.to_string();
+ validate_decimal256_precision(&value_str, *p)?;
+ }
+ Ok(())
+ }
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary =>
self.validate_offsets_full::<i32>(self.buffers[1].len()),
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 8f787b97a..74a2ab450 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+use num::{BigInt, Num, ToPrimitive};
use std::fmt;
use serde_derive::{Deserialize, Serialize};
@@ -300,6 +301,49 @@ pub const MAX_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
99999999999999999999999999999999999999,
];
+/// `MAX_DECIMAL_FOR_LARGER_PRECISION[p]` holds the maximum integer value
+/// that can be stored in [DataType::Decimal256] value of precision `p` > 38
+pub const MAX_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
+ "99999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999999999999999",
+ "9999999999999999999999999999999999999999999999999999999999999999999999",
+ "99999999999999999999999999999999999999999999999999999999999999999999999",
+ "999999999999999999999999999999999999999999999999999999999999999999999999",
+
"9999999999999999999999999999999999999999999999999999999999999999999999999",
+
"99999999999999999999999999999999999999999999999999999999999999999999999999",
+
"999999999999999999999999999999999999999999999999999999999999999999999999999",
+];
+
/// `MIN_DECIMAL_FOR_EACH_PRECISION[p]` holds the minimum `i128` value
/// that can be stored in a [DataType::Decimal] value of precision `p`
pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
@@ -343,13 +387,62 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [
-99999999999999999999999999999999999999,
];
+/// `MIN_DECIMAL_FOR_LARGER_PRECISION[p]` holds the minimum integer value
+/// that can be stored in a [DataType::Decimal256] value of precision `p` > 38
+pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
+ "-99999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999999999999999",
+ "-999999999999999999999999999999999999999999999999999999999999999999999",
+ "-9999999999999999999999999999999999999999999999999999999999999999999999",
+ "-99999999999999999999999999999999999999999999999999999999999999999999999",
+
"-999999999999999999999999999999999999999999999999999999999999999999999999",
+
"-9999999999999999999999999999999999999999999999999999999999999999999999999",
+
"-99999999999999999999999999999999999999999999999999999999999999999999999999",
+
"-999999999999999999999999999999999999999999999999999999999999999999999999999",
+];
+
/// The maximum precision for [DataType::Decimal] values
pub const DECIMAL_MAX_PRECISION: usize = 38;
/// The maximum scale for [DataType::Decimal] values
pub const DECIMAL_MAX_SCALE: usize = 38;
-/// The default scale for [DataType::Decimal] values
+/// The maximum precision for [DataType::Decimal256] values
+pub const DECIMAL256_MAX_PRECISION: usize = 76;
+
+/// The maximum scale for [DataType::Decimal256] values
+pub const DECIMAL256_MAX_SCALE: usize = 76;
+
+/// The default scale for [DataType::Decimal] and [DataType::Decimal256] values
pub const DECIMAL_DEFAULT_SCALE: usize = 10;
/// Validates that the specified `i128` value can be properly
@@ -379,6 +472,55 @@ pub(crate) fn validate_decimal_precision(value: i128,
precision: usize) -> Resul
}
}
+/// Validates that the specified string value can be properly
+/// interpreted as a Decimal256 number with precision `precision`
+#[inline]
+pub(crate) fn validate_decimal256_precision(
+ value: &str,
+ precision: usize,
+) -> Result<BigInt> {
+ if precision > 38 {
+ let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
+ let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
+
+ let max = BigInt::from_str_radix(max_str, 10).unwrap();
+ let min = BigInt::from_str_radix(min_str, 10).unwrap();
+
+ let value = BigInt::from_str_radix(value, 10).unwrap();
+ if value > max {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too large to store in a Decimal256 of precision {}. Max
is {}",
+ value, precision, max
+ )))
+ } else if value < min {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too small to store in a Decimal256 of precision {}. Min
is {}",
+ value, precision, min
+ )))
+ } else {
+ Ok(value)
+ }
+ } else {
+ let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+ let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
+ let value = BigInt::from_str_radix(value, 10).unwrap();
+
+ if value.to_i128().unwrap() > max {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too large to store in a Decimal256 of precision {}. Max
is {}",
+ value, precision, max
+ )))
+ } else if value.to_i128().unwrap() < min {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "{} is too small to store in a Decimal256 of precision {}. Min
is {}",
+ value, precision, min
+ )))
+ } else {
+ Ok(value)
+ }
+ }
+}
+
impl DataType {
/// Parse a data type from a JSON representation.
pub(crate) fn from(json: &Value) -> Result<DataType> {