martin-g commented on code in PR #19570:
URL: https://github.com/apache/datafusion/pull/19570#discussion_r2661528785


##########
datafusion/functions/src/string/split_part.rs:
##########
@@ -219,22 +219,32 @@ where
         .try_for_each(|((string, delimiter), n)| -> Result<(), 
DataFusionError> {
             match (string, delimiter, n) {
                 (Some(string), Some(delimiter), Some(n)) => {
-                    let split_string: Vec<&str> = 
string.split(delimiter).collect();
-                    let len = split_string.len();
-
-                    let index = match n.cmp(&0) {
-                        std::cmp::Ordering::Less => len as i64 + n,
+                    let result = match n.cmp(&0) {
+                        std::cmp::Ordering::Greater => {
+                            // Positive index: use nth() to avoid collecting 
all parts
+                            // This stops iteration as soon as we find the nth 
element
+                            let idx: usize = (n - 1).try_into().map_err(|_| {
+                                exec_datafusion_err!(
+                                    "split_part index {n} exceeds maximum 
supported value"
+                                )
+                            })?;
+                            string.split(delimiter).nth(idx)
+                        }
+                        std::cmp::Ordering::Less => {
+                            // Negative index: use rsplit().nth() to 
efficiently get from the end
+                            // rsplit iterates in reverse, so -1 means first 
from rsplit (index 0)
+                            let idx: usize = (-n - 1).try_into().map_err(|_| {
+                                exec_datafusion_err!(
+                                    "split_part index {n} exceeds maximum 
supported value"

Review Comment:
   ```suggestion
                                       "split_part index {n} exceeds minimum 
supported value"
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to