This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new eb4be68a823 Return null for overflow when casting string to integer
under safe option enabled (#5398)
eb4be68a823 is described below
commit eb4be68a823c02533e9b58e0534ac99437b07718
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Feb 15 00:47:59 2024 -0800
Return null for overflow when casting string to integer under safe option
enabled (#5398)
* Return null for overflow when casting string to integer
* Use atoi_simd
* Use atoi
* Return to str.parse.
* Revert "Return to str.parse."
This reverts commit 53dd0479a5221e6bc7b6447389abdf712a1819ad.
* Check trailing string
---
arrow-cast/Cargo.toml | 1 +
arrow-cast/src/cast.rs | 19 +++++++++++++++++++
arrow-cast/src/parse.rs | 7 ++++++-
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index 19b857297d1..81dd0ebd415 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -49,6 +49,7 @@ chrono = { workspace = true }
half = { version = "2.1", default-features = false }
num = { version = "0.4", default-features = false, features = ["std"] }
lexical-core = { version = "^0.8", default-features = false, features =
["write-integers", "write-floats", "parse-integers", "parse-floats"] }
+atoi = "2.0.0"
comfy-table = { version = "7.0", optional = true, default-features = false }
base64 = "0.21"
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index a813c5f6c87..7868946532c 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -4911,6 +4911,25 @@ mod tests {
assert!(c.is_null(2));
}
+ #[test]
+ fn test_cast_string_to_integral_overflow() {
+ let str = Arc::new(StringArray::from(vec![
+ Some("123"),
+ Some("-123"),
+ Some("86374"),
+ None,
+ ])) as ArrayRef;
+
+ let options = CastOptions {
+ safe: true,
+ format_options: FormatOptions::default(),
+ };
+ let res = cast_with_options(&str, &DataType::Int16,
&options).expect("should cast to i16");
+ let expected =
+ Arc::new(Int16Array::from(vec![Some(123), Some(-123), None,
None])) as ArrayRef;
+ assert_eq!(&res, &expected);
+ }
+
#[test]
fn test_cast_string_to_timestamp() {
let a1 = Arc::new(StringArray::from(vec![
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 50e9fda672f..72942af8394 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -438,7 +438,12 @@ macro_rules! parser_primitive {
($t:ty) => {
impl Parser for $t {
fn parse(string: &str) -> Option<Self::Native> {
- lexical_core::parse::<Self::Native>(string.as_bytes()).ok()
+ match
atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
+ string.as_bytes(),
+ ) {
+ (Some(n), x) if x == string.len() => Some(n),
+ _ => None,
+ }
}
}
};