This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 180f3e8af1 Support negatives in split part (#10780)
180f3e8af1 is described below
commit 180f3e8af12d6da8e814a0ee0e5718b48d7d8aee
Author: Trent Hauck <[email protected]>
AuthorDate: Mon Jun 3 11:50:36 2024 -0700
Support negatives in split part (#10780)
* impv: support negative indexes for split_part
* tests: update unittests in func
* tests: add out of bounds negative test
* style: fix clippy
---
datafusion/functions/src/string/split_part.rs | 37 +++++++++++++++++++++------
datafusion/sqllogictest/test_files/expr.slt | 13 ++++++++++
2 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/datafusion/functions/src/string/split_part.rs
b/datafusion/functions/src/string/split_part.rs
index 517fa93e52..d6f7bb4a4d 100644
--- a/datafusion/functions/src/string/split_part.rs
+++ b/datafusion/functions/src/string/split_part.rs
@@ -97,14 +97,21 @@ fn split_part<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
.zip(n_array.iter())
.map(|((string, delimiter), n)| match (string, delimiter, n) {
(Some(string), Some(delimiter), Some(n)) => {
- if n <= 0 {
- exec_err!("field position must be greater than zero")
- } else {
- let split_string: Vec<&str> =
string.split(delimiter).collect();
- match split_string.get(n as usize - 1) {
- Some(s) => Ok(Some(*s)),
- None => Ok(Some("")),
+ let split_string: Vec<&str> =
string.split(delimiter).collect();
+ let len = split_string.len();
+
+ let index = match n.cmp(&0) {
+ std::cmp::Ordering::Less => len as i64 + n,
+ std::cmp::Ordering::Equal => {
+ return exec_err!("field position must not be zero");
}
+ std::cmp::Ordering::Greater => n - 1,
+ } as usize;
+
+ if index < len {
+ Ok(Some(split_string[index]))
+ } else {
+ Ok(Some(""))
}
}
_ => Ok(None),
@@ -165,7 +172,21 @@ mod tests {
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))),
ColumnarValue::Scalar(ScalarValue::Int64(Some(-1))),
],
- exec_err!("field position must be greater than zero"),
+ Ok(Some("ghi")),
+ &str,
+ Utf8,
+ StringArray
+ );
+ test_function!(
+ SplitPartFunc::new(),
+ &[
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from(
+ "abc~@~def~@~ghi"
+ )))),
+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))),
+ ColumnarValue::Scalar(ScalarValue::Int64(Some(0))),
+ ],
+ exec_err!("field position must not be zero"),
&str,
Utf8,
StringArray
diff --git a/datafusion/sqllogictest/test_files/expr.slt
b/datafusion/sqllogictest/test_files/expr.slt
index b6477f0b57..cb2bb9fad1 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -626,6 +626,19 @@ SELECT split_part('abc~@~def~@~ghi', '~@~', CAST(NULL AS
INT))
----
NULL
+query T
+SELECT split_part('abc~@~def~@~ghi', '~@~', -1)
+----
+ghi
+
+query T
+SELECT split_part('abc~@~def~@~ghi', '~@~', -100)
+----
+(empty)
+
+statement error DataFusion error: Execution error: field position must not be
zero
+SELECT split_part('abc~@~def~@~ghi', '~@~', 0)
+
query B
SELECT starts_with('alphabet', 'alph')
----
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]