This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git

commit d2ca97e79917a3ced509421509189e0358ba774e
Author: Tanner Clary <[email protected]>
AuthorDate: Mon Mar 13 18:38:03 2023 -0700

    [CALCITE-5580] Add SPLIT function (enabled in BigQuery library)
    
    Close apache/calcite#3109
---
 babel/src/test/resources/sql/big-query.iq          | 82 ++++++++++++++++++++++
 .../calcite/adapter/enumerable/RexImpTable.java    |  2 +
 .../apache/calcite/runtime/CalciteResource.java    |  4 ++
 .../org/apache/calcite/runtime/SqlFunctions.java   | 46 ++++++++++++
 .../org/apache/calcite/sql/SqlBasicFunction.java   | 46 +++++++++---
 .../apache/calcite/sql/SqlWithinGroupOperator.java |  2 +-
 .../calcite/sql/fun/SqlLibraryOperators.java       | 26 +++++++
 .../org/apache/calcite/sql/type/OperandTypes.java  |  9 +++
 .../calcite/runtime/CalciteResource.properties     |  1 +
 .../org/apache/calcite/test/SqlFunctionsTest.java  | 68 ++++++++++++++++++
 site/_docs/reference.md                            |  1 +
 .../org/apache/calcite/test/SqlOperatorTest.java   | 34 +++++++++
 12 files changed, 309 insertions(+), 12 deletions(-)

diff --git a/babel/src/test/resources/sql/big-query.iq 
b/babel/src/test/resources/sql/big-query.iq
index 9b6a0fc51b..e3723221a0 100755
--- a/babel/src/test/resources/sql/big-query.iq
+++ b/babel/src/test/resources/sql/big-query.iq
@@ -648,6 +648,88 @@ SELECT (19 % 19) as result;
 
 !ok
 
+#####################################################################
+# SPLIT
+#
+# SPLIT(string [, delimiter])
+#
+# Splits string using the delimiter argument. For STRING, the default
+# delimiter is the comma.
+#
+# Returns a STRING array as result.
+WITH letters AS
+  (SELECT '' as letter_group
+  UNION ALL
+  SELECT 'a' as letter_group
+  UNION ALL
+  SELECT 'b c d' as letter_group)
+SELECT SPLIT(letter_group, ' ') as example
+FROM letters;
++-----------+
+| example   |
++-----------+
+| []        |
+| [a]       |
+| [b, c, d] |
++-----------+
+(3 rows)
+
+!ok
+
+SELECT SPLIT("h,e,l,l,o") as result;
++-----------------+
+| result          |
++-----------------+
+| [h, e, l, l, o] |
++-----------------+
+(1 row)
+
+!ok
+
+SELECT SPLIT("") as result;
++--------+
+| result |
++--------+
+| []     |
++--------+
+(1 row)
+
+!ok
+
+# Careful to treat the delimiter as a string, not a regular expression
+SELECT SPLIT("abc.de.", ".") as result;
++-------------+
+| result      |
++-------------+
+| [abc, de, ] |
++-------------+
+(1 row)
+
+!ok
+
+WITH letters AS
+  (SELECT x'' as letter_group
+  UNION ALL
+  SELECT x'41' as letter_group
+  UNION ALL
+  SELECT x'42ff43ff44' as letter_group)
+SELECT SPLIT(letter_group, x'ff') as example
+FROM letters;
++-----------+
+| example   |
++-----------+
+| []        |
+| [A]       |
+| [B, C, D] |
++-----------+
+(3 rows)
+
+!ok
+
+SELECT SPLIT(x'abc2') as result;
+Call to function 'SPLIT' with argument of type 'BINARY(2)' requires extra 
delimiter argument
+!error
+
 #####################################################################
 # STRING
 #
diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 503bee6e19..dabf91393d 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -169,6 +169,7 @@ import static 
org.apache.calcite.sql.fun.SqlLibraryOperators.SHA1;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPLIT;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.STRCMP;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.TANH;
@@ -443,6 +444,7 @@ public class RexImpTable {
       defineMethod(SOUNDEX, BuiltInMethod.SOUNDEX.method, NullPolicy.STRICT);
       defineMethod(DIFFERENCE, BuiltInMethod.DIFFERENCE.method, 
NullPolicy.STRICT);
       defineMethod(REVERSE, BuiltInMethod.REVERSE.method, NullPolicy.STRICT);
+      defineMethod(SPLIT, "split", NullPolicy.STRICT);
 
       map.put(TRIM, new TrimImplementor());
 
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java 
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index 98111b314e..02a7b4aa49 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -598,6 +598,10 @@ public interface CalciteResource {
   ExInst<SqlValidatorException> argumentMustBeValidPrecision(String a0, int a1,
       int a2);
 
+  @BaseMessage("Call to function ''{0}'' with argument of type ''{1}'' 
requires extra delimiter argument")
+  ExInst<SqlValidatorException> delimiterIsRequired(String functionName,
+      String argumentTypeName);
+
   @BaseMessage("Wrong arguments for table function ''{0}'' call. Expected 
''{1}'', actual ''{2}''")
   ExInst<CalciteException> illegalArgumentForTableFunctionCall(String a0,
       String a1, String a2);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index abbec30a08..50e4cd043c 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -49,6 +49,7 @@ import org.apache.commons.codec.language.Soundex;
 
 import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.checkerframework.checker.nullness.qual.PolyNull;
@@ -463,6 +464,51 @@ public class SqlFunctions {
     return s0.startsWith(s1);
   }
 
+  /** SQL {@code SPLIT(string, string)} function. */
+  public static List<String> split(String s, String delimiter) {
+    if (s.isEmpty()) {
+      return ImmutableList.of();
+    }
+    if (delimiter.isEmpty()) {
+      return ImmutableList.of(s); // prevent mischief
+    }
+    final ImmutableList.Builder<String> list = ImmutableList.builder();
+    for (int i = 0;;) {
+      int j = s.indexOf(delimiter, i);
+      if (j < 0) {
+        list.add(s.substring(i));
+        return list.build();
+      }
+      list.add(s.substring(i, j));
+      i = j + delimiter.length();
+    }
+  }
+
+  /** SQL {@code SPLIT(string)} function. */
+  public static List<String> split(String s) {
+    return split(s, ",");
+  }
+
+  /** SQL {@code SPLIT(binary, binary)} function. */
+  public static List<ByteString> split(ByteString s, ByteString delimiter) {
+    if (s.length() == 0) {
+      return ImmutableList.of();
+    }
+    if (delimiter.length() == 0) {
+      return ImmutableList.of(s); // prevent mischief
+    }
+    final ImmutableList.Builder<ByteString> list = ImmutableList.builder();
+    for (int i = 0;;) {
+      int j = s.indexOf(delimiter, i);
+      if (j < 0) {
+        list.add(s.substring(i));
+        return list.build();
+      }
+      list.add(s.substring(i, j));
+      i = j + delimiter.length();
+    }
+  }
+
   /** SQL SUBSTRING(string FROM ...) function. */
   public static String substring(String c, int s) {
     final int s0 = s - 1;
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java 
b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
index 5787938a44..a811228bea 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
@@ -23,6 +23,7 @@ import org.apache.calcite.sql.type.SqlOperandTypeInference;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.validate.SqlMonotonicity;
 import org.apache.calcite.sql.validate.SqlValidator;
+import org.apache.calcite.sql.validate.SqlValidatorScope;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
@@ -45,6 +46,7 @@ public class SqlBasicFunction extends SqlFunction {
   private final SqlSyntax syntax;
   private final boolean deterministic;
   private final SqlOperandHandler operandHandler;
+  private final int callValidator;
   private final Function<SqlOperatorBinding, SqlMonotonicity> 
monotonicityInference;
 
   //~ Constructors -----------------------------------------------------------
@@ -60,6 +62,7 @@ public class SqlBasicFunction extends SqlFunction {
    * @param operandTypeInference Strategy to use for parameter type inference
    * @param operandHandler Strategy to use for handling operands
    * @param operandTypeChecker Strategy to use for parameter type checking
+   * @param callValidator Strategy to validate calls
    * @param category Categorization for function
    * @param monotonicityInference Strategy to infer monotonicity of a call
    */
@@ -68,6 +71,7 @@ public class SqlBasicFunction extends SqlFunction {
       @Nullable SqlOperandTypeInference operandTypeInference,
       SqlOperandHandler operandHandler,
       SqlOperandTypeChecker operandTypeChecker,
+      Integer callValidator,
       SqlFunctionCategory category,
       Function<SqlOperatorBinding, SqlMonotonicity> monotonicityInference) {
     super(name, kind,
@@ -77,6 +81,7 @@ public class SqlBasicFunction extends SqlFunction {
     this.syntax = requireNonNull(syntax, "syntax");
     this.deterministic = deterministic;
     this.operandHandler = requireNonNull(operandHandler, "operandHandler");
+    this.callValidator = requireNonNull(callValidator, "callValidator");
     this.monotonicityInference =
         requireNonNull(monotonicityInference, "monotonicityInference");
   }
@@ -88,7 +93,7 @@ public class SqlBasicFunction extends SqlFunction {
       SqlOperandTypeChecker operandTypeChecker) {
     return new SqlBasicFunction(kind.name(), kind,
         SqlSyntax.FUNCTION, true, returnTypeInference, null,
-        OperandHandlers.DEFAULT, operandTypeChecker,
+        OperandHandlers.DEFAULT, operandTypeChecker, 0,
         SqlFunctionCategory.SYSTEM, call -> SqlMonotonicity.NOT_MONOTONIC);
   }
 
@@ -100,7 +105,7 @@ public class SqlBasicFunction extends SqlFunction {
       SqlOperandTypeChecker operandTypeChecker) {
     return new SqlBasicFunction(name, SqlKind.OTHER_FUNCTION,
         SqlSyntax.FUNCTION, true, returnTypeInference, null,
-        OperandHandlers.DEFAULT, operandTypeChecker,
+        OperandHandlers.DEFAULT, operandTypeChecker, 0,
         SqlFunctionCategory.NUMERIC, call -> SqlMonotonicity.NOT_MONOTONIC);
   }
 
@@ -111,7 +116,7 @@ public class SqlBasicFunction extends SqlFunction {
       SqlOperandTypeChecker operandTypeChecker, SqlFunctionCategory category) {
     return new SqlBasicFunction(name, SqlKind.OTHER_FUNCTION,
         SqlSyntax.FUNCTION, true, returnTypeInference, null,
-        OperandHandlers.DEFAULT, operandTypeChecker,
+        OperandHandlers.DEFAULT, operandTypeChecker, 0,
         category, call -> SqlMonotonicity.NOT_MONOTONIC);
   }
 
@@ -141,32 +146,40 @@ public class SqlBasicFunction extends SqlFunction {
     return operandHandler.rewriteCall(validator, call);
   }
 
+  @Override public void validateCall(SqlCall call, SqlValidator validator,
+      SqlValidatorScope scope, SqlValidatorScope operandScope) {
+    super.validateCall(call, validator, scope, operandScope);
+  }
+
   /** Returns a copy of this function with a given name. */
   public SqlBasicFunction withName(String name) {
     return new SqlBasicFunction(name, kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 
   /** Returns a copy of this function with a given kind. */
   public SqlBasicFunction withKind(SqlKind kind) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 
   /** Returns a copy of this function with a given category. */
   public SqlBasicFunction withFunctionType(SqlFunctionCategory category) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), category, monotonicityInference);
+        getOperandTypeChecker(), callValidator, category, 
monotonicityInference);
   }
 
   /** Returns a copy of this function with a given syntax. */
   public SqlBasicFunction withSyntax(SqlSyntax syntax) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 
   /** Returns a copy of this function with a given strategy for inferring
@@ -175,7 +188,8 @@ public class SqlBasicFunction extends SqlFunction {
       SqlOperandTypeInference operandTypeInference) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), operandTypeInference, operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 
   /** Returns a copy of this function with a given strategy for handling
@@ -183,13 +197,15 @@ public class SqlBasicFunction extends SqlFunction {
   public SqlBasicFunction withOperandHandler(SqlOperandHandler operandHandler) 
{
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
   /** Returns a copy of this function with a given determinism. */
   public SqlBasicFunction withDeterministic(boolean deterministic) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 
   /** Returns a copy of this function with a given strategy for inferring
@@ -198,6 +214,14 @@ public class SqlBasicFunction extends SqlFunction {
       Function<SqlOperatorBinding, SqlMonotonicity> monotonicityInference) {
     return new SqlBasicFunction(getName(), kind, syntax, deterministic,
         getReturnTypeInference(), getOperandTypeInference(), operandHandler,
-        getOperandTypeChecker(), getFunctionType(), monotonicityInference);
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
+  }
+
+  public SqlFunction withValidation(int callValidator) {
+    return new SqlBasicFunction(getName(), kind, syntax, deterministic,
+        getReturnTypeInference(), getOperandTypeInference(), operandHandler,
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
   }
 }
diff --git 
a/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java 
b/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java
index 87f6fb7b8f..d335948322 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java
@@ -106,7 +106,7 @@ public class SqlWithinGroupOperator extends 
SqlBinaryOperator {
     }
   }
 
-  private SqlNode getCollationColumn(SqlCall call) {
+  private static SqlNode getCollationColumn(SqlCall call) {
     return ((SqlNodeList) call.operand(1)).get(0);
   }
 
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index cde127a424..2cd4def708 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -41,9 +41,11 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeFamily;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.sql.type.SqlTypeTransforms;
+import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.apache.calcite.sql.validate.SqlValidator;
 import org.apache.calcite.util.Litmus;
 import org.apache.calcite.util.Optionality;
+import org.apache.calcite.util.Static;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
@@ -301,6 +303,30 @@ public abstract class SqlLibraryOperators {
           OperandTypes.STRING)
           .withFunctionType(SqlFunctionCategory.STRING);
 
+  /** The "SPLIT(string [, delimiter])" function. */
+  @LibraryOperator(libraries = {BIG_QUERY})
+  public static final SqlFunction SPLIT =
+      SqlBasicFunction.create("SPLIT",
+          ReturnTypes.ARG0
+              .andThen(SqlLibraryOperators::deriveTypeSplit)
+              .andThen(SqlTypeTransforms.TO_ARRAY),
+          OperandTypes.or(OperandTypes.CHARACTER_CHARACTER,
+              OperandTypes.CHARACTER,
+              OperandTypes.BINARY_BINARY,
+              OperandTypes.BINARY),
+          SqlFunctionCategory.STRING)
+          .withValidation(3);
+
+  static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
+      RelDataType type) {
+    if (SqlTypeUtil.isBinary(type) && operatorBinding.getOperandCount() == 1) {
+      throw operatorBinding.newError(
+          Static.RESOURCE.delimiterIsRequired(
+              operatorBinding.getOperator().getName(), type.toString()));
+    }
+    return type;
+  }
+
   /** Generic "SUBSTR(string, position [, substringLength ])" function. */
   private static final SqlBasicFunction SUBSTR =
       SqlBasicFunction.create("SUBSTR", ReturnTypes.ARG0_NULLABLE_VARYING,
diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java 
b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
index 7c191977aa..c336f87b84 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
@@ -374,12 +374,21 @@ public abstract class OperandTypes {
   public static final SqlSingleOperandTypeChecker BINARY =
       family(SqlTypeFamily.BINARY);
 
+  public static final SqlSingleOperandTypeChecker BINARY_BINARY =
+      family(SqlTypeFamily.BINARY, SqlTypeFamily.BINARY);
+
   public static final SqlSingleOperandTypeChecker STRING =
       family(SqlTypeFamily.STRING);
 
   public static final FamilyOperandTypeChecker STRING_STRING =
       family(SqlTypeFamily.STRING, SqlTypeFamily.STRING);
 
+  public static final FamilyOperandTypeChecker STRING_OPTIONAL_STRING =
+      family(
+          ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING),
+          // Second operand optional (operand index 0, 1)
+          number -> number == 1);
+
   public static final FamilyOperandTypeChecker STRING_STRING_STRING =
       family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING);
 
diff --git 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index 874e96043a..ad0acc822b 100644
--- 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++ 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -199,6 +199,7 @@ NullIllegal=Illegal use of ''NULL''
 DynamicParamIllegal=Illegal use of dynamic parameter
 InvalidBoolean=''{0}'' is not a valid boolean value
 ArgumentMustBeValidPrecision=Argument to function ''{0}'' must be a valid 
precision between ''{1,number,#}'' and ''{2,number,#}''
+DelimiterIsRequired=Call to function ''{0}'' with argument of type ''{1}'' 
requires extra delimiter argument
 IllegalArgumentForTableFunctionCall=Wrong arguments for table function ''{0}'' 
call. Expected ''{1}'', actual ''{2}''
 CannotCallTableFunctionHere=Cannot call table function here: ''{0}''
 InvalidTimeFrame=''{0}'' is not a valid time frame
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index 35d0375b4e..704b020871 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -22,6 +22,8 @@ import org.apache.calcite.runtime.CalciteException;
 import org.apache.calcite.runtime.SqlFunctions;
 import org.apache.calcite.runtime.Utilities;
 
+import com.google.common.collect.ImmutableList;
+
 import org.junit.jupiter.api.Test;
 
 import java.math.BigDecimal;
@@ -81,6 +83,14 @@ import static java.nio.charset.StandardCharsets.UTF_8;
  * rather than {@code assertEquals}.
  */
 class SqlFunctionsTest {
+  static <E> List<E> list(E... es) {
+    return Arrays.asList(es);
+  }
+
+  static <E> List<E> list() {
+    return ImmutableList.of();
+  }
+
   @Test void testCharLength() {
     assertThat(charLength("xyz"), is(3));
   }
@@ -507,6 +517,64 @@ class SqlFunctionsTest {
     assertThat(SqlFunctions.sround(-12000, -5), within(0d, 0.001));
   }
 
+  @Test void testSplit() {
+    assertThat("no occurrence of delimiter",
+        SqlFunctions.split("abc", ","), is(list("abc")));
+    assertThat("delimiter in middle",
+        SqlFunctions.split("abc", "b"), is(list("a", "c")));
+    assertThat("delimiter at end",
+        SqlFunctions.split("abc", "c"), is(list("ab", "")));
+    assertThat("delimiter at start",
+        SqlFunctions.split("abc", "a"), is(list("", "bc")));
+    assertThat("empty delimiter",
+        SqlFunctions.split("abc", ""), is(list("abc")));
+    assertThat("empty delimiter and string",
+        SqlFunctions.split("", ""), is(list()));
+    assertThat("empty string",
+        SqlFunctions.split("", ","), is(list()));
+    assertThat("long delimiter (occurs at start)",
+        SqlFunctions.split("abracadabra", "ab"), is(list("", "racad", "ra")));
+    assertThat("long delimiter (occurs at end)",
+        SqlFunctions.split("sabracadabrab", "ab"),
+        is(list("s", "racad", "r", "")));
+
+    // Same as above but for ByteString
+    final ByteString a = ByteString.of("aa", 16);
+    final ByteString ab = ByteString.of("aabb", 16);
+    final ByteString abc = ByteString.of("aabbcc", 16);
+    final ByteString abracadabra = ByteString.of("aabb44aaccaaddaabb44aa", 16);
+    final ByteString b = ByteString.of("bb", 16);
+    final ByteString bc = ByteString.of("bbcc", 16);
+    final ByteString c = ByteString.of("cc", 16);
+    final ByteString f = ByteString.of("ff", 16);
+    final ByteString r = ByteString.of("44", 16);
+    final ByteString ra = ByteString.of("44aa", 16);
+    final ByteString racad = ByteString.of("44aaccaadd", 16);
+    final ByteString empty = ByteString.of("", 16);
+    final ByteString s = ByteString.of("55", 16);
+    final ByteString sabracadabrab =
+        ByteString.of("55", 16).concat(abracadabra).concat(b);
+    assertThat("no occurrence of delimiter",
+        SqlFunctions.split(abc, f), is(list(abc)));
+    assertThat("delimiter in middle",
+        SqlFunctions.split(abc, b), is(list(a, c)));
+    assertThat("delimiter at end",
+        SqlFunctions.split(abc, c), is(list(ab, empty)));
+    assertThat("delimiter at start",
+        SqlFunctions.split(abc, a), is(list(empty, bc)));
+    assertThat("empty delimiter",
+        SqlFunctions.split(abc, empty), is(list(abc)));
+    assertThat("empty delimiter and string",
+        SqlFunctions.split(empty, empty), is(list()));
+    assertThat("empty string",
+        SqlFunctions.split(empty, f), is(list()));
+    assertThat("long delimiter (occurs at start)",
+        SqlFunctions.split(abracadabra, ab), is(list(empty, racad, ra)));
+    assertThat("long delimiter (occurs at end)",
+        SqlFunctions.split(sabracadabrab, ab),
+        is(list(s, racad, r, empty)));
+  }
+
   @Test void testByteString() {
     final byte[] bytes = {(byte) 0xAB, (byte) 0xFF};
     final ByteString byteString = new ByteString(bytes);
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index bd37dba74d..ddd8dd7e3a 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2713,6 +2713,7 @@ BigQuery's type system uses confusingly different names 
for types and functions:
 | b o | SINH(numeric)                                | Returns the hyperbolic 
sine of *numeric*
 | b m o p | SOUNDEX(string)                          | Returns the phonetic 
representation of *string*; throws if *string* is encoded with multi-byte 
encoding such as UTF-8
 | m | SPACE(integer)                                 | Returns a string of 
*integer* spaces; returns an empty string if *integer* is less than 1
+| b | SPLIT(string [, delimiter ])                   | Returns the string 
array of *string* split at *delimiter* (if omitted, default is comma)
 | b | STARTS_WITH(string1, string2)                  | Returns whether 
*string2* is a prefix of *string1*
 | m | STRCMP(string, string)                         | Returns 0 if both of 
the strings are same and returns -1 when the first argument is smaller than the 
second and 1 when the second one is smaller than the first one
 | b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion 
of *string*, beginning at character *position*, *substringLength* characters 
long. SUBSTR calculates lengths using characters as defined by the input 
character set
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 82fb160d9b..523cbe0609 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -6069,6 +6069,40 @@ public class SqlOperatorTest {
     f.checkBoolean("ends_with(x'', x'')", true);
   }
 
+  /** Tests the {@code SPLIT} operator. */
+  @Test void testSplitFunction() {
+    final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.SPLIT);
+    f0.checkFails("^split('hello')^",
+        "No match found for function signature SPLIT\\(<CHARACTER>\\)",
+        false);
+    final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY);
+    f.checkScalar("SPLIT('h,e,l,l,o')", "[h, e, l, l, o]",
+        "CHAR(9) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT('h-e-l-l-o', '-')", "[h, e, l, l, o]",
+        "CHAR(9) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT('hello', '-')", "[hello]",
+        "CHAR(5) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT('')", "[]",
+        "CHAR(0) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT('', '-')", "[]",
+        "CHAR(0) NOT NULL ARRAY NOT NULL");
+    f.checkNull("SPLIT(null)");
+    f.checkNull("SPLIT('hello', null)");
+
+    // In ASCII, x'41' = 'A', x'42' = 'B', x'43' = 'C'
+    f.checkScalar("SPLIT(x'414243', x'ff')", "[ABC]",
+        "BINARY(3) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT(x'414243', x'41')", "[, BC]",
+        "BINARY(3) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT(x'414243', x'42')", "[A, C]",
+        "BINARY(3) NOT NULL ARRAY NOT NULL");
+    f.checkScalar("SPLIT(x'414243', x'43')", "[AB, ]",
+        "BINARY(3) NOT NULL ARRAY NOT NULL");
+    f.checkFails("^SPLIT(x'aabbcc')^",
+        "Call to function 'SPLIT' with argument of type 'BINARY\\(3\\)' "
+            + "requires extra delimiter argument", false);
+  }
+
   /** Tests the {@code SUBSTRING} operator. Many test cases that used to be
    * have been moved to {@link SubFunChecker#assertSubFunReturns}, and are
    * called for both {@code SUBSTRING} and {@code SUBSTR}. */

Reply via email to