martin-g commented on code in PR #19570:
URL: https://github.com/apache/datafusion/pull/19570#discussion_r2661528785
##########
datafusion/functions/src/string/split_part.rs:
##########
@@ -219,22 +219,32 @@ where
.try_for_each(|((string, delimiter), n)| -> Result<(),
DataFusionError> {
match (string, delimiter, n) {
(Some(string), Some(delimiter), Some(n)) => {
- let split_string: Vec<&str> =
string.split(delimiter).collect();
- let len = split_string.len();
-
- let index = match n.cmp(&0) {
- std::cmp::Ordering::Less => len as i64 + n,
+ let result = match n.cmp(&0) {
+ std::cmp::Ordering::Greater => {
+ // Positive index: use nth() to avoid collecting
all parts
+ // This stops iteration as soon as we find the nth
element
+ let idx: usize = (n - 1).try_into().map_err(|_| {
+ exec_datafusion_err!(
+ "split_part index {n} exceeds maximum
supported value"
+ )
+ })?;
+ string.split(delimiter).nth(idx)
+ }
+ std::cmp::Ordering::Less => {
+ // Negative index: use rsplit().nth() to
efficiently get from the end
+ // rsplit iterates in reverse, so -1 means first
from rsplit (index 0)
+ let idx: usize = (-n - 1).try_into().map_err(|_| {
+ exec_datafusion_err!(
+ "split_part index {n} exceeds maximum
supported value"
Review Comment:
```suggestion
"split_part index {n} exceeds minimum
supported value"
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]