This is an automated email from the ASF dual-hosted git repository. jhyde pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
commit d2ca97e79917a3ced509421509189e0358ba774e Author: Tanner Clary <[email protected]> AuthorDate: Mon Mar 13 18:38:03 2023 -0700 [CALCITE-5580] Add SPLIT function (enabled in BigQuery library) Close apache/calcite#3109 --- babel/src/test/resources/sql/big-query.iq | 82 ++++++++++++++++++++++ .../calcite/adapter/enumerable/RexImpTable.java | 2 + .../apache/calcite/runtime/CalciteResource.java | 4 ++ .../org/apache/calcite/runtime/SqlFunctions.java | 46 ++++++++++++ .../org/apache/calcite/sql/SqlBasicFunction.java | 46 +++++++++--- .../apache/calcite/sql/SqlWithinGroupOperator.java | 2 +- .../calcite/sql/fun/SqlLibraryOperators.java | 26 +++++++ .../org/apache/calcite/sql/type/OperandTypes.java | 9 +++ .../calcite/runtime/CalciteResource.properties | 1 + .../org/apache/calcite/test/SqlFunctionsTest.java | 68 ++++++++++++++++++ site/_docs/reference.md | 1 + .../org/apache/calcite/test/SqlOperatorTest.java | 34 +++++++++ 12 files changed, 309 insertions(+), 12 deletions(-) diff --git a/babel/src/test/resources/sql/big-query.iq b/babel/src/test/resources/sql/big-query.iq index 9b6a0fc51b..e3723221a0 100755 --- a/babel/src/test/resources/sql/big-query.iq +++ b/babel/src/test/resources/sql/big-query.iq @@ -648,6 +648,88 @@ SELECT (19 % 19) as result; !ok +##################################################################### +# SPLIT +# +# SPLIT(string [, delimiter]) +# +# Splits string using the delimiter argument. For STRING, the default +# delimiter is the comma. +# +# Returns a STRING array as result. +WITH letters AS + (SELECT '' as letter_group + UNION ALL + SELECT 'a' as letter_group + UNION ALL + SELECT 'b c d' as letter_group) +SELECT SPLIT(letter_group, ' ') as example +FROM letters; ++-----------+ +| example | ++-----------+ +| [] | +| [a] | +| [b, c, d] | ++-----------+ +(3 rows) + +!ok + +SELECT SPLIT("h,e,l,l,o") as result; ++-----------------+ +| result | ++-----------------+ +| [h, e, l, l, o] | ++-----------------+ +(1 row) + +!ok + +SELECT SPLIT("") as result; ++--------+ +| result | ++--------+ +| [] | ++--------+ +(1 row) + +!ok + +# Careful to treat the delimiter as a string, not a regular expression +SELECT SPLIT("abc.de.", ".") as result; ++-------------+ +| result | ++-------------+ +| [abc, de, ] | ++-------------+ +(1 row) + +!ok + +WITH letters AS + (SELECT x'' as letter_group + UNION ALL + SELECT x'41' as letter_group + UNION ALL + SELECT x'42ff43ff44' as letter_group) +SELECT SPLIT(letter_group, x'ff') as example +FROM letters; ++-----------+ +| example | ++-----------+ +| [] | +| [A] | +| [B, C, D] | ++-----------+ +(3 rows) + +!ok + +SELECT SPLIT(x'abc2') as result; +Call to function 'SPLIT' with argument of type 'BINARY(2)' requires extra delimiter argument +!error + ##################################################################### # STRING # diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index 503bee6e19..dabf91393d 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -169,6 +169,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.SHA1; import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH; import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPLIT; import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH; import static org.apache.calcite.sql.fun.SqlLibraryOperators.STRCMP; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TANH; @@ -443,6 +444,7 @@ public class RexImpTable { defineMethod(SOUNDEX, BuiltInMethod.SOUNDEX.method, NullPolicy.STRICT); defineMethod(DIFFERENCE, BuiltInMethod.DIFFERENCE.method, NullPolicy.STRICT); defineMethod(REVERSE, BuiltInMethod.REVERSE.method, NullPolicy.STRICT); + defineMethod(SPLIT, "split", NullPolicy.STRICT); map.put(TRIM, new TrimImplementor()); diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java index 98111b314e..02a7b4aa49 100644 --- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java +++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java @@ -598,6 +598,10 @@ public interface CalciteResource { ExInst<SqlValidatorException> argumentMustBeValidPrecision(String a0, int a1, int a2); + @BaseMessage("Call to function ''{0}'' with argument of type ''{1}'' requires extra delimiter argument") + ExInst<SqlValidatorException> delimiterIsRequired(String functionName, + String argumentTypeName); + @BaseMessage("Wrong arguments for table function ''{0}'' call. Expected ''{1}'', actual ''{2}''") ExInst<CalciteException> illegalArgumentForTableFunctionCall(String a0, String a1, String a2); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index abbec30a08..50e4cd043c 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -49,6 +49,7 @@ import org.apache.commons.codec.language.Soundex; import com.google.common.base.Splitter; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; import org.checkerframework.checker.nullness.qual.Nullable; import org.checkerframework.checker.nullness.qual.PolyNull; @@ -463,6 +464,51 @@ public class SqlFunctions { return s0.startsWith(s1); } + /** SQL {@code SPLIT(string, string)} function. */ + public static List<String> split(String s, String delimiter) { + if (s.isEmpty()) { + return ImmutableList.of(); + } + if (delimiter.isEmpty()) { + return ImmutableList.of(s); // prevent mischief + } + final ImmutableList.Builder<String> list = ImmutableList.builder(); + for (int i = 0;;) { + int j = s.indexOf(delimiter, i); + if (j < 0) { + list.add(s.substring(i)); + return list.build(); + } + list.add(s.substring(i, j)); + i = j + delimiter.length(); + } + } + + /** SQL {@code SPLIT(string)} function. */ + public static List<String> split(String s) { + return split(s, ","); + } + + /** SQL {@code SPLIT(binary, binary)} function. */ + public static List<ByteString> split(ByteString s, ByteString delimiter) { + if (s.length() == 0) { + return ImmutableList.of(); + } + if (delimiter.length() == 0) { + return ImmutableList.of(s); // prevent mischief + } + final ImmutableList.Builder<ByteString> list = ImmutableList.builder(); + for (int i = 0;;) { + int j = s.indexOf(delimiter, i); + if (j < 0) { + list.add(s.substring(i)); + return list.build(); + } + list.add(s.substring(i, j)); + i = j + delimiter.length(); + } + } + /** SQL SUBSTRING(string FROM ...) function. */ public static String substring(String c, int s) { final int s0 = s - 1; diff --git a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java index 5787938a44..a811228bea 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java @@ -23,6 +23,7 @@ import org.apache.calcite.sql.type.SqlOperandTypeInference; import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.validate.SqlMonotonicity; import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql.validate.SqlValidatorScope; import org.checkerframework.checker.nullness.qual.Nullable; @@ -45,6 +46,7 @@ public class SqlBasicFunction extends SqlFunction { private final SqlSyntax syntax; private final boolean deterministic; private final SqlOperandHandler operandHandler; + private final int callValidator; private final Function<SqlOperatorBinding, SqlMonotonicity> monotonicityInference; //~ Constructors ----------------------------------------------------------- @@ -60,6 +62,7 @@ public class SqlBasicFunction extends SqlFunction { * @param operandTypeInference Strategy to use for parameter type inference * @param operandHandler Strategy to use for handling operands * @param operandTypeChecker Strategy to use for parameter type checking + * @param callValidator Strategy to validate calls * @param category Categorization for function * @param monotonicityInference Strategy to infer monotonicity of a call */ @@ -68,6 +71,7 @@ public class SqlBasicFunction extends SqlFunction { @Nullable SqlOperandTypeInference operandTypeInference, SqlOperandHandler operandHandler, SqlOperandTypeChecker operandTypeChecker, + Integer callValidator, SqlFunctionCategory category, Function<SqlOperatorBinding, SqlMonotonicity> monotonicityInference) { super(name, kind, @@ -77,6 +81,7 @@ public class SqlBasicFunction extends SqlFunction { this.syntax = requireNonNull(syntax, "syntax"); this.deterministic = deterministic; this.operandHandler = requireNonNull(operandHandler, "operandHandler"); + this.callValidator = requireNonNull(callValidator, "callValidator"); this.monotonicityInference = requireNonNull(monotonicityInference, "monotonicityInference"); } @@ -88,7 +93,7 @@ public class SqlBasicFunction extends SqlFunction { SqlOperandTypeChecker operandTypeChecker) { return new SqlBasicFunction(kind.name(), kind, SqlSyntax.FUNCTION, true, returnTypeInference, null, - OperandHandlers.DEFAULT, operandTypeChecker, + OperandHandlers.DEFAULT, operandTypeChecker, 0, SqlFunctionCategory.SYSTEM, call -> SqlMonotonicity.NOT_MONOTONIC); } @@ -100,7 +105,7 @@ public class SqlBasicFunction extends SqlFunction { SqlOperandTypeChecker operandTypeChecker) { return new SqlBasicFunction(name, SqlKind.OTHER_FUNCTION, SqlSyntax.FUNCTION, true, returnTypeInference, null, - OperandHandlers.DEFAULT, operandTypeChecker, + OperandHandlers.DEFAULT, operandTypeChecker, 0, SqlFunctionCategory.NUMERIC, call -> SqlMonotonicity.NOT_MONOTONIC); } @@ -111,7 +116,7 @@ public class SqlBasicFunction extends SqlFunction { SqlOperandTypeChecker operandTypeChecker, SqlFunctionCategory category) { return new SqlBasicFunction(name, SqlKind.OTHER_FUNCTION, SqlSyntax.FUNCTION, true, returnTypeInference, null, - OperandHandlers.DEFAULT, operandTypeChecker, + OperandHandlers.DEFAULT, operandTypeChecker, 0, category, call -> SqlMonotonicity.NOT_MONOTONIC); } @@ -141,32 +146,40 @@ public class SqlBasicFunction extends SqlFunction { return operandHandler.rewriteCall(validator, call); } + @Override public void validateCall(SqlCall call, SqlValidator validator, + SqlValidatorScope scope, SqlValidatorScope operandScope) { + super.validateCall(call, validator, scope, operandScope); + } + /** Returns a copy of this function with a given name. */ public SqlBasicFunction withName(String name) { return new SqlBasicFunction(name, kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given kind. */ public SqlBasicFunction withKind(SqlKind kind) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given category. */ public SqlBasicFunction withFunctionType(SqlFunctionCategory category) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), category, monotonicityInference); + getOperandTypeChecker(), callValidator, category, monotonicityInference); } /** Returns a copy of this function with a given syntax. */ public SqlBasicFunction withSyntax(SqlSyntax syntax) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given strategy for inferring @@ -175,7 +188,8 @@ public class SqlBasicFunction extends SqlFunction { SqlOperandTypeInference operandTypeInference) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), operandTypeInference, operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given strategy for handling @@ -183,13 +197,15 @@ public class SqlBasicFunction extends SqlFunction { public SqlBasicFunction withOperandHandler(SqlOperandHandler operandHandler) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given determinism. */ public SqlBasicFunction withDeterministic(boolean deterministic) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } /** Returns a copy of this function with a given strategy for inferring @@ -198,6 +214,14 @@ public class SqlBasicFunction extends SqlFunction { Function<SqlOperatorBinding, SqlMonotonicity> monotonicityInference) { return new SqlBasicFunction(getName(), kind, syntax, deterministic, getReturnTypeInference(), getOperandTypeInference(), operandHandler, - getOperandTypeChecker(), getFunctionType(), monotonicityInference); + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); + } + + public SqlFunction withValidation(int callValidator) { + return new SqlBasicFunction(getName(), kind, syntax, deterministic, + getReturnTypeInference(), getOperandTypeInference(), operandHandler, + getOperandTypeChecker(), callValidator, + getFunctionType(), monotonicityInference); } } diff --git a/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java b/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java index 87f6fb7b8f..d335948322 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlWithinGroupOperator.java @@ -106,7 +106,7 @@ public class SqlWithinGroupOperator extends SqlBinaryOperator { } } - private SqlNode getCollationColumn(SqlCall call) { + private static SqlNode getCollationColumn(SqlCall call) { return ((SqlNodeList) call.operand(1)).get(0); } diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index cde127a424..2cd4def708 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -41,9 +41,11 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeTransforms; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.util.Litmus; import org.apache.calcite.util.Optionality; +import org.apache.calcite.util.Static; import org.checkerframework.checker.nullness.qual.Nullable; @@ -301,6 +303,30 @@ public abstract class SqlLibraryOperators { OperandTypes.STRING) .withFunctionType(SqlFunctionCategory.STRING); + /** The "SPLIT(string [, delimiter])" function. */ + @LibraryOperator(libraries = {BIG_QUERY}) + public static final SqlFunction SPLIT = + SqlBasicFunction.create("SPLIT", + ReturnTypes.ARG0 + .andThen(SqlLibraryOperators::deriveTypeSplit) + .andThen(SqlTypeTransforms.TO_ARRAY), + OperandTypes.or(OperandTypes.CHARACTER_CHARACTER, + OperandTypes.CHARACTER, + OperandTypes.BINARY_BINARY, + OperandTypes.BINARY), + SqlFunctionCategory.STRING) + .withValidation(3); + + static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, + RelDataType type) { + if (SqlTypeUtil.isBinary(type) && operatorBinding.getOperandCount() == 1) { + throw operatorBinding.newError( + Static.RESOURCE.delimiterIsRequired( + operatorBinding.getOperator().getName(), type.toString())); + } + return type; + } + /** Generic "SUBSTR(string, position [, substringLength ])" function. */ private static final SqlBasicFunction SUBSTR = SqlBasicFunction.create("SUBSTR", ReturnTypes.ARG0_NULLABLE_VARYING, diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java index 7c191977aa..c336f87b84 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java +++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java @@ -374,12 +374,21 @@ public abstract class OperandTypes { public static final SqlSingleOperandTypeChecker BINARY = family(SqlTypeFamily.BINARY); + public static final SqlSingleOperandTypeChecker BINARY_BINARY = + family(SqlTypeFamily.BINARY, SqlTypeFamily.BINARY); + public static final SqlSingleOperandTypeChecker STRING = family(SqlTypeFamily.STRING); public static final FamilyOperandTypeChecker STRING_STRING = family(SqlTypeFamily.STRING, SqlTypeFamily.STRING); + public static final FamilyOperandTypeChecker STRING_OPTIONAL_STRING = + family( + ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), + // Second operand optional (operand index 0, 1) + number -> number == 1); + public static final FamilyOperandTypeChecker STRING_STRING_STRING = family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING); diff --git a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties index 874e96043a..ad0acc822b 100644 --- a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties +++ b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties @@ -199,6 +199,7 @@ NullIllegal=Illegal use of ''NULL'' DynamicParamIllegal=Illegal use of dynamic parameter InvalidBoolean=''{0}'' is not a valid boolean value ArgumentMustBeValidPrecision=Argument to function ''{0}'' must be a valid precision between ''{1,number,#}'' and ''{2,number,#}'' +DelimiterIsRequired=Call to function ''{0}'' with argument of type ''{1}'' requires extra delimiter argument IllegalArgumentForTableFunctionCall=Wrong arguments for table function ''{0}'' call. Expected ''{1}'', actual ''{2}'' CannotCallTableFunctionHere=Cannot call table function here: ''{0}'' InvalidTimeFrame=''{0}'' is not a valid time frame diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java index 35d0375b4e..704b020871 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java @@ -22,6 +22,8 @@ import org.apache.calcite.runtime.CalciteException; import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.runtime.Utilities; +import com.google.common.collect.ImmutableList; + import org.junit.jupiter.api.Test; import java.math.BigDecimal; @@ -81,6 +83,14 @@ import static java.nio.charset.StandardCharsets.UTF_8; * rather than {@code assertEquals}. */ class SqlFunctionsTest { + static <E> List<E> list(E... es) { + return Arrays.asList(es); + } + + static <E> List<E> list() { + return ImmutableList.of(); + } + @Test void testCharLength() { assertThat(charLength("xyz"), is(3)); } @@ -507,6 +517,64 @@ class SqlFunctionsTest { assertThat(SqlFunctions.sround(-12000, -5), within(0d, 0.001)); } + @Test void testSplit() { + assertThat("no occurrence of delimiter", + SqlFunctions.split("abc", ","), is(list("abc"))); + assertThat("delimiter in middle", + SqlFunctions.split("abc", "b"), is(list("a", "c"))); + assertThat("delimiter at end", + SqlFunctions.split("abc", "c"), is(list("ab", ""))); + assertThat("delimiter at start", + SqlFunctions.split("abc", "a"), is(list("", "bc"))); + assertThat("empty delimiter", + SqlFunctions.split("abc", ""), is(list("abc"))); + assertThat("empty delimiter and string", + SqlFunctions.split("", ""), is(list())); + assertThat("empty string", + SqlFunctions.split("", ","), is(list())); + assertThat("long delimiter (occurs at start)", + SqlFunctions.split("abracadabra", "ab"), is(list("", "racad", "ra"))); + assertThat("long delimiter (occurs at end)", + SqlFunctions.split("sabracadabrab", "ab"), + is(list("s", "racad", "r", ""))); + + // Same as above but for ByteString + final ByteString a = ByteString.of("aa", 16); + final ByteString ab = ByteString.of("aabb", 16); + final ByteString abc = ByteString.of("aabbcc", 16); + final ByteString abracadabra = ByteString.of("aabb44aaccaaddaabb44aa", 16); + final ByteString b = ByteString.of("bb", 16); + final ByteString bc = ByteString.of("bbcc", 16); + final ByteString c = ByteString.of("cc", 16); + final ByteString f = ByteString.of("ff", 16); + final ByteString r = ByteString.of("44", 16); + final ByteString ra = ByteString.of("44aa", 16); + final ByteString racad = ByteString.of("44aaccaadd", 16); + final ByteString empty = ByteString.of("", 16); + final ByteString s = ByteString.of("55", 16); + final ByteString sabracadabrab = + ByteString.of("55", 16).concat(abracadabra).concat(b); + assertThat("no occurrence of delimiter", + SqlFunctions.split(abc, f), is(list(abc))); + assertThat("delimiter in middle", + SqlFunctions.split(abc, b), is(list(a, c))); + assertThat("delimiter at end", + SqlFunctions.split(abc, c), is(list(ab, empty))); + assertThat("delimiter at start", + SqlFunctions.split(abc, a), is(list(empty, bc))); + assertThat("empty delimiter", + SqlFunctions.split(abc, empty), is(list(abc))); + assertThat("empty delimiter and string", + SqlFunctions.split(empty, empty), is(list())); + assertThat("empty string", + SqlFunctions.split(empty, f), is(list())); + assertThat("long delimiter (occurs at start)", + SqlFunctions.split(abracadabra, ab), is(list(empty, racad, ra))); + assertThat("long delimiter (occurs at end)", + SqlFunctions.split(sabracadabrab, ab), + is(list(s, racad, r, empty))); + } + @Test void testByteString() { final byte[] bytes = {(byte) 0xAB, (byte) 0xFF}; final ByteString byteString = new ByteString(bytes); diff --git a/site/_docs/reference.md b/site/_docs/reference.md index bd37dba74d..ddd8dd7e3a 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2713,6 +2713,7 @@ BigQuery's type system uses confusingly different names for types and functions: | b o | SINH(numeric) | Returns the hyperbolic sine of *numeric* | b m o p | SOUNDEX(string) | Returns the phonetic representation of *string*; throws if *string* is encoded with multi-byte encoding such as UTF-8 | m | SPACE(integer) | Returns a string of *integer* spaces; returns an empty string if *integer* is less than 1 +| b | SPLIT(string [, delimiter ]) | Returns the string array of *string* split at *delimiter* (if omitted, default is comma) | b | STARTS_WITH(string1, string2) | Returns whether *string2* is a prefix of *string1* | m | STRCMP(string, string) | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one | b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 82fb160d9b..523cbe0609 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -6069,6 +6069,40 @@ public class SqlOperatorTest { f.checkBoolean("ends_with(x'', x'')", true); } + /** Tests the {@code SPLIT} operator. */ + @Test void testSplitFunction() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.SPLIT); + f0.checkFails("^split('hello')^", + "No match found for function signature SPLIT\\(<CHARACTER>\\)", + false); + final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY); + f.checkScalar("SPLIT('h,e,l,l,o')", "[h, e, l, l, o]", + "CHAR(9) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT('h-e-l-l-o', '-')", "[h, e, l, l, o]", + "CHAR(9) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT('hello', '-')", "[hello]", + "CHAR(5) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT('')", "[]", + "CHAR(0) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT('', '-')", "[]", + "CHAR(0) NOT NULL ARRAY NOT NULL"); + f.checkNull("SPLIT(null)"); + f.checkNull("SPLIT('hello', null)"); + + // In ASCII, x'41' = 'A', x'42' = 'B', x'43' = 'C' + f.checkScalar("SPLIT(x'414243', x'ff')", "[ABC]", + "BINARY(3) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT(x'414243', x'41')", "[, BC]", + "BINARY(3) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT(x'414243', x'42')", "[A, C]", + "BINARY(3) NOT NULL ARRAY NOT NULL"); + f.checkScalar("SPLIT(x'414243', x'43')", "[AB, ]", + "BINARY(3) NOT NULL ARRAY NOT NULL"); + f.checkFails("^SPLIT(x'aabbcc')^", + "Call to function 'SPLIT' with argument of type 'BINARY\\(3\\)' " + + "requires extra delimiter argument", false); + } + /** Tests the {@code SUBSTRING} operator. Many test cases that used to be * have been moved to {@link SubFunChecker#assertSubFunReturns}, and are * called for both {@code SUBSTRING} and {@code SUBSTR}. */
