tustvold commented on code in PR #3805:
URL: https://github.com/apache/arrow-rs/pull/3805#discussion_r1126848940


##########
arrow-cast/src/parse.rs:
##########
@@ -459,10 +460,114 @@ impl Parser for Date64Type {
     }
 }
 
+/// Parse the string format decimal value to i128/i256 format and checking the 
precision and scale.
+/// The result value can't be out of bounds.
+pub fn parse_decimal<T: DecimalType>(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<T::Native, ArrowError> {
+    if !is_valid_decimal(s) {
+        return Err(ArrowError::ParseError(format!(
+            "can't parse the string value {s} to decimal"
+        )));
+    }
+    let mut offset = s.len();
+    let len = s.len();
+    let mut base = T::Native::usize_as(1);
+    let scale_usize = usize::from(scale as u8);
+
+    // handle the value after the '.' and meet the scale
+    let delimiter_position = s.find('.');
+    match delimiter_position {
+        None => {
+            // there is no '.'
+            base = T::Native::usize_as(10).pow_checked(scale as u32)?;
+        }
+        Some(mid) => {
+            // there is the '.'
+            if len - mid >= scale_usize + 1 {
+                // If the string value is "123.12345" and the scale is 2, we 
should just remain '.12' and drop the '345' value.
+                offset -= len - mid - 1 - scale_usize;
+            } else {
+                // If the string value is "123.12" and the scale is 4, we 
should append '00' to the tail.
+                base = T::Native::usize_as(10)
+                    .pow_checked((scale_usize + 1 + mid - len) as u32)?;
+            }
+        }
+    };
+
+    // each byte is digit、'-' or '.'
+    let bytes = s.as_bytes();
+    let mut negative = false;
+    let mut result = T::Native::usize_as(0);
+
+    bytes[0..offset]
+        .iter()
+        .rev()
+        .try_for_each::<_, Result<(), ArrowError>>(|&byte| {
+            match byte {
+                b'-' => {
+                    negative = true;
+                }
+                b'0'..=b'9' => {
+                    let add =
+                        T::Native::usize_as((byte - b'0') as 
usize).mul_checked(base)?;
+                    result = result.add_checked(add)?;
+                    base = base.mul_checked(T::Native::usize_as(10))?;
+                }
+                // because we have checked the string value
+                _ => (),
+            }
+            Ok(())
+        })?;
+
+    if negative {
+        result = result.neg_checked()?;
+    }
+
+    match T::validate_decimal_precision(result, precision) {
+        Ok(_) => Ok(result),
+        Err(e) => Err(ArrowError::ParseError(format!(
+            "parse decimal overflow: {e}"
+        ))),
+    }
+}
+
+pub fn is_valid_decimal(s: &str) -> bool {
+    let mut seen_dot = false;
+    let mut seen_digit = false;
+    let mut seen_sign = false;
+
+    for c in s.chars() {
+        match c {
+            '-' | '+' => {
+                if seen_digit || seen_dot || seen_sign {
+                    return false;
+                }
+                seen_sign = true;
+            }
+            '.' => {
+                if seen_dot {
+                    return false;
+                }
+                seen_dot = true;
+            }
+            '0'..='9' => {
+                seen_digit = true;
+            }
+            _ => return false,

Review Comment:
   ```suggestion
       for c in s.as_bytes() {
           match c {
               b'-' | b'+' => {
                   if seen_digit || seen_dot || seen_sign {
                       return false;
                   }
                   seen_sign = true;
               }
               b'.' => {
                   if seen_dot {
                       return false;
                   }
                   seen_dot = true;
               }
               b'0'..=b'9' => {
                   seen_digit = true;
               }
               _ => return false,
   ```



##########
arrow-cast/src/parse.rs:
##########
@@ -459,10 +460,114 @@ impl Parser for Date64Type {
     }
 }
 
+/// Parse the string format decimal value to i128/i256 format and checking the 
precision and scale.
+/// The result value can't be out of bounds.
+pub fn parse_decimal<T: DecimalType>(
+    s: &str,
+    precision: u8,
+    scale: i8,
+) -> Result<T::Native, ArrowError> {
+    if !is_valid_decimal(s) {
+        return Err(ArrowError::ParseError(format!(
+            "can't parse the string value {s} to decimal"
+        )));
+    }
+    let mut offset = s.len();
+    let len = s.len();
+    let mut base = T::Native::usize_as(1);
+    let scale_usize = usize::from(scale as u8);
+
+    // handle the value after the '.' and meet the scale
+    let delimiter_position = s.find('.');
+    match delimiter_position {
+        None => {
+            // there is no '.'
+            base = T::Native::usize_as(10).pow_checked(scale as u32)?;
+        }
+        Some(mid) => {
+            // there is the '.'
+            if len - mid >= scale_usize + 1 {
+                // If the string value is "123.12345" and the scale is 2, we 
should just remain '.12' and drop the '345' value.
+                offset -= len - mid - 1 - scale_usize;
+            } else {
+                // If the string value is "123.12" and the scale is 4, we 
should append '00' to the tail.
+                base = T::Native::usize_as(10)
+                    .pow_checked((scale_usize + 1 + mid - len) as u32)?;
+            }
+        }
+    };
+
+    // each byte is digit、'-' or '.'
+    let bytes = s.as_bytes();
+    let mut negative = false;
+    let mut result = T::Native::usize_as(0);
+
+    bytes[0..offset]
+        .iter()
+        .rev()
+        .try_for_each::<_, Result<(), ArrowError>>(|&byte| {
+            match byte {
+                b'-' => {
+                    negative = true;
+                }
+                b'0'..=b'9' => {
+                    let add =
+                        T::Native::usize_as((byte - b'0') as 
usize).mul_checked(base)?;
+                    result = result.add_checked(add)?;
+                    base = base.mul_checked(T::Native::usize_as(10))?;
+                }
+                // because we have checked the string value
+                _ => (),
+            }
+            Ok(())
+        })?;
+
+    if negative {
+        result = result.neg_checked()?;
+    }
+
+    match T::validate_decimal_precision(result, precision) {
+        Ok(_) => Ok(result),
+        Err(e) => Err(ArrowError::ParseError(format!(
+            "parse decimal overflow: {e}"
+        ))),
+    }
+}
+
+pub fn is_valid_decimal(s: &str) -> bool {

Review Comment:
   ```suggestion
   fn is_valid_decimal(s: &str) -> bool {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to