This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7518b3dcab Fix string split which should be on whole separator (#9650)
7518b3dcab is described below

commit 7518b3dcabb37b899f4ad8f3efd355bfe0e0fa26
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Tue Oct 25 12:18:23 2022 -0700

    Fix string split which should be on whole separator (#9650)
---
 .../common/function/scalar/StringFunctions.java    |  7 ++++---
 .../ScalarTransformFunctionWrapperTest.java        | 24 +++++++++++-----------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 59862ccd86..a00f01315d 100644
--- 
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ 
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -473,7 +473,7 @@ public class StringFunctions {
    */
   @ScalarFunction
   public static String[] split(String input, String delimiter) {
-    return StringUtils.split(input, delimiter);
+    return StringUtils.splitByWholeSeparator(input, delimiter);
   }
 
   /**
@@ -484,10 +484,11 @@ public class StringFunctions {
    */
   @ScalarFunction
   public static String[] split(String input, String delimiter, int limit) {
-    return StringUtils.split(input, delimiter, limit);
+    return StringUtils.splitByWholeSeparator(input, delimiter, limit);
   }
 
   /**
+   * TODO: Revisit if index should be one-based (both Presto and Postgres use 
one-based index, which starts with 1)
    * @param input
    * @param delimiter
    * @param index
@@ -495,7 +496,7 @@ public class StringFunctions {
    */
   @ScalarFunction
   public static String splitPart(String input, String delimiter, int index) {
-    String[] splitString = StringUtils.split(input, delimiter);
+    String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter);
     if (index < splitString.length) {
       return splitString[index];
     } else {
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
index 7053d9a901..afd1dedacc 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/ScalarTransformFunctionWrapperTest.java
@@ -280,24 +280,24 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
   @Test
   public void testStringSplitTransformFunction() {
     ExpressionContext expression =
-        RequestContextUtils.getExpression(String.format("split(%s, ',')", 
STRING_ALPHANUM_SV_COLUMN));
+        RequestContextUtils.getExpression(String.format("split(%s, 'ab')", 
STRING_ALPHANUM_SV_COLUMN));
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getName(), "split");
     String[][] expectedValues = new String[NUM_ROWS][];
     for (int i = 0; i < NUM_ROWS; i++) {
-      expectedValues[i] = StringUtils.split(_stringAlphaNumericSVValues[i], 
",");
+      expectedValues[i] = 
StringUtils.splitByWholeSeparator(_stringAlphaNumericSVValues[i], "ab");
     }
     testTransformFunctionMV(transformFunction, expectedValues);
 
     expression = RequestContextUtils.getExpression(
-        String.format("split(%s, ',', %s)", STRING_ALPHANUM_SV_COLUMN, 
INT_SV_COLUMN));
+        String.format("split(%s, 'ab', %s)", STRING_ALPHANUM_SV_COLUMN, 
INT_SV_COLUMN));
     transformFunction = TransformFunctionFactory.get(expression, 
_dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getName(), "split");
     expectedValues = new String[NUM_ROWS][];
     for (int i = 0; i < NUM_ROWS; i++) {
-      expectedValues[i] = StringUtils.split(_stringAlphaNumericSVValues[i], 
",", _intSVValues[i]);
+      expectedValues[i] = 
StringUtils.splitByWholeSeparator(_stringAlphaNumericSVValues[i], "ab", 
_intSVValues[i]);
     }
     testTransformFunctionMV(transformFunction, expectedValues);
   }
@@ -306,13 +306,13 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
   public void testStringSplitPartTransformFunction() {
     int index = 2;
     ExpressionContext expression =
-        RequestContextUtils.getExpression(String.format("split_part(%s, ',', 
%d)", STRING_ALPHANUM_SV_COLUMN, index));
+        RequestContextUtils.getExpression(String.format("split_part(%s, 'ab', 
%d)", STRING_ALPHANUM_SV_COLUMN, index));
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getName(), "splitPart");
     String[] expectedValues = new String[NUM_ROWS];
     for (int i = 0; i < NUM_ROWS; i++) {
-      String[] splitString = StringUtils.split(_stringAlphaNumericSVValues[i], 
",");
+      String[] splitString = 
StringUtils.splitByWholeSeparator(_stringAlphaNumericSVValues[i], "ab");
       if (splitString.length > index) {
         expectedValues[i] = splitString[i];
       } else {
@@ -871,8 +871,8 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
   public void testArrayElementAtInt() {
     Random rand = new Random();
     int index = rand.nextInt(MAX_NUM_MULTI_VALUES);
-    ExpressionContext expression = RequestContextUtils.getExpression(
-        String.format("array_element_at_int(%s, %d)", INT_MV_COLUMN, index + 
1));
+    ExpressionContext expression =
+        
RequestContextUtils.getExpression(String.format("array_element_at_int(%s, %d)", 
INT_MV_COLUMN, index + 1));
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getResultMetadata().getDataType(), 
DataType.INT);
@@ -889,8 +889,8 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
   public void testArrayElementAtLong() {
     Random rand = new Random();
     int index = rand.nextInt(MAX_NUM_MULTI_VALUES);
-    ExpressionContext expression = RequestContextUtils.getExpression(
-        String.format("array_element_at_long(%s, %d)", LONG_MV_COLUMN, index + 
1));
+    ExpressionContext expression =
+        
RequestContextUtils.getExpression(String.format("array_element_at_long(%s, 
%d)", LONG_MV_COLUMN, index + 1));
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getResultMetadata().getDataType(), 
DataType.LONG);
@@ -907,8 +907,8 @@ public class ScalarTransformFunctionWrapperTest extends 
BaseTransformFunctionTes
   public void testArrayElementAtFloat() {
     Random rand = new Random();
     int index = rand.nextInt(MAX_NUM_MULTI_VALUES);
-    ExpressionContext expression = RequestContextUtils.getExpression(
-        String.format("array_element_at_float(%s, %d)", FLOAT_MV_COLUMN, index 
+ 1));
+    ExpressionContext expression =
+        
RequestContextUtils.getExpression(String.format("array_element_at_float(%s, 
%d)", FLOAT_MV_COLUMN, index + 1));
     TransformFunction transformFunction = 
TransformFunctionFactory.get(expression, _dataSourceMap);
     assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
     assertEquals(transformFunction.getResultMetadata().getDataType(), 
DataType.FLOAT);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to