This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new eb4be68a823 Return null for overflow when casting string to integer 
under safe option enabled (#5398)
eb4be68a823 is described below

commit eb4be68a823c02533e9b58e0534ac99437b07718
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Feb 15 00:47:59 2024 -0800

    Return null for overflow when casting string to integer under safe option 
enabled (#5398)
    
    * Return null for overflow when casting string to integer
    
    * Use atoi_simd
    
    * Use atoi
    
    * Return to str.parse.
    
    * Revert "Return to str.parse."
    
    This reverts commit 53dd0479a5221e6bc7b6447389abdf712a1819ad.
    
    * Check trailing string
---
 arrow-cast/Cargo.toml   |  1 +
 arrow-cast/src/cast.rs  | 19 +++++++++++++++++++
 arrow-cast/src/parse.rs |  7 ++++++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index 19b857297d1..81dd0ebd415 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -49,6 +49,7 @@ chrono = { workspace = true }
 half = { version = "2.1", default-features = false }
 num = { version = "0.4", default-features = false, features = ["std"] }
 lexical-core = { version = "^0.8", default-features = false, features = 
["write-integers", "write-floats", "parse-integers", "parse-floats"] }
+atoi = "2.0.0"
 comfy-table = { version = "7.0", optional = true, default-features = false }
 base64 = "0.21"
 
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index a813c5f6c87..7868946532c 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -4911,6 +4911,25 @@ mod tests {
         assert!(c.is_null(2));
     }
 
+    #[test]
+    fn test_cast_string_to_integral_overflow() {
+        let str = Arc::new(StringArray::from(vec![
+            Some("123"),
+            Some("-123"),
+            Some("86374"),
+            None,
+        ])) as ArrayRef;
+
+        let options = CastOptions {
+            safe: true,
+            format_options: FormatOptions::default(),
+        };
+        let res = cast_with_options(&str, &DataType::Int16, 
&options).expect("should cast to i16");
+        let expected =
+            Arc::new(Int16Array::from(vec![Some(123), Some(-123), None, 
None])) as ArrayRef;
+        assert_eq!(&res, &expected);
+    }
+
     #[test]
     fn test_cast_string_to_timestamp() {
         let a1 = Arc::new(StringArray::from(vec![
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 50e9fda672f..72942af8394 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -438,7 +438,12 @@ macro_rules! parser_primitive {
     ($t:ty) => {
         impl Parser for $t {
             fn parse(string: &str) -> Option<Self::Native> {
-                lexical_core::parse::<Self::Native>(string.as_bytes()).ok()
+                match 
atoi::FromRadix10SignedChecked::from_radix_10_signed_checked(
+                    string.as_bytes(),
+                ) {
+                    (Some(n), x) if x == string.len() => Some(n),
+                    _ => None,
+                }
             }
         }
     };

Reply via email to