alamb commented on code in PR #9475:
URL: https://github.com/apache/arrow-datafusion/pull/9475#discussion_r1518223454


##########
datafusion/sqllogictest/test_files/functions.slt:
##########
@@ -933,80 +933,130 @@ SELECT levenshtein(NULL, NULL)
 ----
 NULL
 
-query T
-SELECT substr_index('www.apache.org', '.', 1)
-----
-www
-
-query T
-SELECT substr_index('www.apache.org', '.', 2)
-----
-www.apache
-
-query T
-SELECT substr_index('www.apache.org', '.', -1)
-----
-org
-
-query T
-SELECT substr_index('www.apache.org', '.', -2)
-----
-apache.org
-
-query T
-SELECT substr_index('www.apache.org', 'ac', 1)
-----
-www.ap
-
-query T
-SELECT substr_index('www.apache.org', 'ac', -1)
-----
-he.org
-
-query T
-SELECT substr_index('www.apache.org', 'ac', 2)
-----
-www.apache.org
-
-query T
-SELECT substr_index('www.apache.org', 'ac', -2)
-----
-www.apache.org
-
-query ?
-SELECT substr_index(NULL, 'ac', 1)
-----
-NULL
-
-query T
-SELECT substr_index('www.apache.org', NULL, 1)
-----
-NULL
-
-query T
-SELECT substr_index('www.apache.org', 'ac', NULL)
-----
-NULL
-
-query T
-SELECT substr_index('', 'ac', 1)
+# Test substring_index using '.' as delimiter
+# This query is compatible with MySQL(8.0.19 or later), convenient for 
comparing results
+query TIT
+SELECT str, n, substring_index(str, '.', n) AS c FROM
+  (VALUES
+    ROW('arrow.apache.org'),
+    ROW('.'),
+    ROW('...')
+  ) AS strings(str),
+  (VALUES
+    ROW(1),
+    ROW(2),
+    ROW(3),
+    ROW(100),
+    ROW(-1),
+    ROW(-2),
+    ROW(-3),
+    ROW(-100)
+  ) AS occurrences(n)
+ORDER BY str DESC, n;
+----
+arrow.apache.org -100 arrow.apache.org
+arrow.apache.org -3 arrow.apache.org
+arrow.apache.org -2 apache.org
+arrow.apache.org -1 org
+arrow.apache.org 1 arrow
+arrow.apache.org 2 arrow.apache
+arrow.apache.org 3 arrow.apache.org
+arrow.apache.org 100 arrow.apache.org
+... -100 ...
+... -3 ..
+... -2 .
+... -1 (empty)
+... 1 (empty)
+... 2 .
+... 3 ..
+... 100 ...
+. -100 .
+. -3 .
+. -2 .
+. -1 (empty)
+. 1 (empty)
+. 2 .
+. 3 .
+. 100 .
+
+# Test substring_index using 'ac' as delimiter
+query TIT
+SELECT str, n, substring_index(str, 'ac', n) AS c FROM

Review Comment:
   the CTE style of these tests is very nice 👌 



##########
datafusion/physical-expr/src/unicode_expressions.rs:
##########
@@ -481,40 +481,28 @@ pub fn substr_index<T: OffsetSizeTrait>(args: 
&[ArrayRef]) -> Result<ArrayRef> {
         .zip(count_array.iter())
         .map(|((string, delimiter), n)| match (string, delimiter, n) {
             (Some(string), Some(delimiter), Some(n)) => {
-                let mut res = String::new();
-                match n {
-                    0 => {
-                        "".to_string();
-                    }
-                    _other => {
-                        if n > 0 {
-                            let idx = string
-                                .split(delimiter)
-                                .take(n as usize)
-                                .fold(0, |len, x| len + x.len() + 
delimiter.len())
-                                - delimiter.len();
-                            res.push_str(if idx >= string.len() {
-                                string
-                            } else {
-                                &string[..idx]
-                            });
-                        } else {
-                            let idx = (string.split(delimiter).take((-n) as 
usize).fold(
-                                string.len() as isize,
-                                |len, x| {
-                                    len - x.len() as isize - delimiter.len() 
as isize
-                                },
-                            ) + delimiter.len() as isize)
-                                as usize;
-                            res.push_str(if idx >= string.len() {
-                                string
-                            } else {
-                                &string[idx..]
-                            });
-                        }
-                    }
+                // In MySQL, these cases will return an empty string.

Review Comment:
   I tried it out here too 
https://www.w3schools.com/mysql/trymysql.asp?filename=trysql_func_mysql_substring_index2
   
   And this behavior is consistent with what I observed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to