coderfender commented on code in PR #3017:
URL: https://github.com/apache/datafusion-comet/pull/3017#discussion_r2658673305


##########
native/spark-expr/src/conversion_funcs/cast.rs:
##########
@@ -1965,33 +1975,41 @@ fn do_cast_string_to_int<
     type_name: &str,
     min_value: T,
 ) -> SparkResult<Option<T>> {
-    let trimmed_str = str.trim();
-    if trimmed_str.is_empty() {
+    let bytes = str.as_bytes();
+    let mut start = 0;
+    let mut end = bytes.len();
+
+    while start < end && bytes[start].is_ascii_whitespace() {
+        start += 1;
+    }
+    while end > start && bytes[end - 1].is_ascii_whitespace() {
+        end -= 1;
+    }
+
+    if start == end {
         return none_or_err(eval_mode, type_name, str);
     }
+    let trimmed_str = &str[start..end];
     let len = trimmed_str.len();
+    let trimmed_bytes = trimmed_str.as_bytes();
     let mut result: T = T::zero();
-    let mut negative = false;
+    let mut idx = 0;
+    let first_char = trimmed_bytes[0];
+    let negative = first_char == b'-';
+    if negative || first_char == b'+' {
+        idx = 1;
+        if len == 1 {
+            return none_or_err(eval_mode, type_name, str);
+        }
+    }

Review Comment:
   Same (removed unwanted `If` branching) 



##########
native/spark-expr/src/conversion_funcs/cast.rs:
##########
@@ -389,13 +389,23 @@ macro_rules! cast_utf8_to_int {
     ($array:expr, $eval_mode:expr, $array_type:ty, $cast_method:ident) => {{
         let len = $array.len();
         let mut cast_array = PrimitiveArray::<$array_type>::builder(len);
-        for i in 0..len {
-            if $array.is_null(i) {
-                cast_array.append_null()
-            } else if let Some(cast_value) = $cast_method($array.value(i), 
$eval_mode)? {
-                cast_array.append_value(cast_value);
-            } else {
-                cast_array.append_null()
+        if $array.null_count() == 0 {
+            for i in 0..len {
+                if let Some(cast_value) = $cast_method($array.value(i), 
$eval_mode)? {
+                    cast_array.append_value(cast_value);
+                } else {
+                    cast_array.append_null()
+                }
+            }
+        } else {
+            for i in 0..len {
+                if $array.is_null(i) {
+                    cast_array.append_null()
+                } else if let Some(cast_value) = $cast_method($array.value(i), 
$eval_mode)? {
+                    cast_array.append_value(cast_value);
+                } else {
+                    cast_array.append_null()
+                }

Review Comment:
   made  null check  conditional to remove unwanted branching 



##########
native/spark-expr/src/conversion_funcs/cast.rs:
##########
@@ -1965,33 +1975,41 @@ fn do_cast_string_to_int<
     type_name: &str,
     min_value: T,
 ) -> SparkResult<Option<T>> {
-    let trimmed_str = str.trim();
-    if trimmed_str.is_empty() {
+    let bytes = str.as_bytes();
+    let mut start = 0;
+    let mut end = bytes.len();
+
+    while start < end && bytes[start].is_ascii_whitespace() {
+        start += 1;
+    }
+    while end > start && bytes[end - 1].is_ascii_whitespace() {
+        end -= 1;
+    }
+
+    if start == end {
         return none_or_err(eval_mode, type_name, str);
     }
+    let trimmed_str = &str[start..end];
     let len = trimmed_str.len();
+    let trimmed_bytes = trimmed_str.as_bytes();
     let mut result: T = T::zero();
-    let mut negative = false;
+    let mut idx = 0;
+    let first_char = trimmed_bytes[0];
+    let negative = first_char == b'-';
+    if negative || first_char == b'+' {
+        idx = 1;
+        if len == 1 {
+            return none_or_err(eval_mode, type_name, str);
+        }
+    }
+
     let radix = T::from(10);
     let stop_value = min_value / radix;
     let mut parse_sign_and_digits = true;
 
-    for (i, ch) in trimmed_str.char_indices() {
+    for &ch in &trimmed_bytes[idx..] {

Review Comment:
   Cleaner and faster approach to access the chars directly 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to