This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new e470187b93 fix: Reject empty strings when casting strings to decimal
(#10010)
e470187b93 is described below
commit e470187b93b13bf9821e67d5b2348a1d89612a39
Author: Neil Conway <[email protected]>
AuthorDate: Wed May 27 16:16:41 2026 -0400
fix: Reject empty strings when casting strings to decimal (#10010)
# Which issue does this PR close?
- Closes #10009.
# Rationale for this change
When casting string values to decimal, `parse_string_to_decimal_native`
treated empty strings and whitespace-only strings as valid input,
resulting in a decimal with a value of 0. This is inconsistent with
`parse_decimal` and how parsing and string -> numeric casts work for
floating point types: in all of those cases, empty strings and
whitespace-only strings are rejected.
# What changes are included in this PR?
* Change `parse_string_to_decimal_native` to reject empty strings and
whitespace-only strings
* Add test coverage
# Are these changes tested?
Yes, new tests added.
# Are there any user-facing changes?
Yes, this changes the behavior of string -> decimal casts. The previous
behavior is (IMO) clearly incorrect but it is possible that some user
code relies upon it.
---
arrow-cast/src/cast/decimal.rs | 17 +++++++++++++++++
arrow-cast/src/cast/mod.rs | 21 +++++++++++++++------
2 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index 789dcea89e..9d1465567e 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -559,6 +559,12 @@ where
let integers = first_part;
let decimals = if parts.len() == 2 { parts[1] } else { "" };
+ if integers.is_empty() && decimals.is_empty() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid decimal format: {value_str:?}"
+ )));
+ }
+
if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid decimal format: {value_str:?}"
@@ -973,6 +979,17 @@ mod tests {
parse_string_to_decimal_native::<Decimal128Type>("123.4567891",
5)?,
12345679_i128
);
+
+ for value in ["", " ", ".", "+", "-", "+.", "-."] {
+ assert!(
+ parse_string_to_decimal_native::<Decimal128Type>(value,
2).is_err(),
+ "expected {value:?} to fail parsing as Decimal128"
+ );
+ assert!(
+ parse_string_to_decimal_native::<Decimal256Type>(value,
2).is_err(),
+ "expected {value:?} to fail parsing as Decimal256"
+ );
+ }
Ok(())
}
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 12442bd9fd..67da85b8c1 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -10775,8 +10775,8 @@ mod tests {
assert_eq!("0.12", decimal_arr.value_as_string(7));
assert_eq!("12.23", decimal_arr.value_as_string(8));
assert!(decimal_arr.is_null(9));
- assert_eq!("0.00", decimal_arr.value_as_string(10));
- assert_eq!("0.00", decimal_arr.value_as_string(11));
+ assert!(decimal_arr.is_null(10));
+ assert!(decimal_arr.is_null(11));
assert!(decimal_arr.is_null(12));
assert_eq!("-1.23", decimal_arr.value_as_string(13));
assert_eq!("-1.24", decimal_arr.value_as_string(14));
@@ -10815,8 +10815,8 @@ mod tests {
assert_eq!("0.123", decimal_arr.value_as_string(7));
assert_eq!("12.234", decimal_arr.value_as_string(8));
assert!(decimal_arr.is_null(9));
- assert_eq!("0.000", decimal_arr.value_as_string(10));
- assert_eq!("0.000", decimal_arr.value_as_string(11));
+ assert!(decimal_arr.is_null(10));
+ assert!(decimal_arr.is_null(11));
assert!(decimal_arr.is_null(12));
assert_eq!("-1.235", decimal_arr.value_as_string(13));
assert_eq!("-1.236", decimal_arr.value_as_string(14));
@@ -10880,8 +10880,8 @@ mod tests {
let test_cases = [
(None, None),
- // (Some(""), None),
- // (Some(" "), None),
+ (Some(""), None),
+ (Some(" "), None),
(Some("0"), Some("0")),
(Some("000.000"), Some("0")),
(Some("12345"), Some("12345")),
@@ -11029,6 +11029,15 @@ mod tests {
.to_string()
.contains("Cannot cast string '. 0.123' to value of
Decimal128(38, 10) type")
);
+
+ let str_array = StringArray::from(vec![""]);
+ let array = Arc::new(str_array) as ArrayRef;
+ let casted_err = cast_with_options(&array, &output_type,
&option).unwrap_err();
+ assert!(
+ casted_err
+ .to_string()
+ .contains("Cannot cast string '' to value of Decimal128(38,
10) type")
+ );
}
fn test_cast_string_to_decimal128_overflow(overflow_array: ArrayRef) {