theirix commented on code in PR #19980:
URL: https://github.com/apache/datafusion/pull/19980#discussion_r2725488603


##########
datafusion/functions/src/unicode/left.rs:
##########
@@ -121,61 +125,175 @@ impl ScalarUDFImpl for LeftFunc {
 
 /// Returns first n characters in the string, or when n is negative, returns 
all but last |n| characters.
 /// left('abcde', 2) = 'ab'
+/// left('abcde', -2) = 'ab'
 /// The implementation uses UTF-8 code points as characters
 fn left<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let n_array = as_int64_array(&args[1])?;
 
     if args[0].data_type() == &DataType::Utf8View {
-        let string_array = as_string_view_array(&args[0])?;
-        left_impl::<T, _>(string_array, n_array)
+        let string_view_array = as_string_view_array(&args[0])?;
+        left_impl_view(string_view_array, n_array)
     } else {
         let string_array = as_generic_string_array::<T>(&args[0])?;
         left_impl::<T, _>(string_array, n_array)
     }
 }
 
+/// `left` implementation for strings
 fn left_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
     string_array: V,
     n_array: &Int64Array,
 ) -> Result<ArrayRef> {
     let iter = ArrayIter::new(string_array);
-    let mut chars_buf = Vec::new();
     let result = iter
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
-                Ordering::Less => {
-                    // Collect chars once and reuse for both count and take
-                    chars_buf.clear();
-                    chars_buf.extend(string.chars());
-                    let len = chars_buf.len() as i64;
-
-                    // For negative n, take (len + n) chars if n > -len 
(avoiding abs() which panics on i64::MIN)
-                    Some(if n > -len {
-                        chars_buf
-                            .iter()
-                            .take((len + n) as usize)
-                            .collect::<String>()
-                    } else {
-                        "".to_string()
-                    })
-                }
-                Ordering::Equal => Some("".to_string()),
-                Ordering::Greater => {
-                    Some(string.chars().take(n as usize).collect::<String>())
-                }
-            },
+            (Some(string), Some(n)) => {
+                let byte_length = left_byte_length(string, n);
+                // Extract first `byte_length` bytes from a byte-indexed slice
+                Some(&string[0..byte_length])
+            }
             _ => None,
         })
         .collect::<GenericStringArray<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
+/// `left` implementation for StringViewArray
+fn left_impl_view(
+    string_view_array: &StringViewArray,
+    n_array: &Int64Array,
+) -> Result<ArrayRef> {
+    let len = n_array.len();
+
+    let (views, buffers, _nulls) = string_view_array.clone().into_parts();
+
+    if string_view_array.len() != n_array.len() {
+        return exec_err!(
+            "Expected same shape of arrays, given {} and {}",
+            string_view_array.len(),
+            n_array.len()
+        );
+    }
+
+    // Every string in StringViewArray has one corresponding view in `views`
+    if views.len() != string_view_array.len() {
+        return exec_err!(
+            "StringViewArray views length {} does not match array length {}",
+            views.len(),
+            string_view_array.len()
+        );
+    }

Review Comment:
   The first check should be omitted, I agree.
   
   For the second one - if this doesn't stay true (it is quite low-level code), 
things could break - I switched to `debug_assert` instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to