This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 18ba66da96 [CALCITE-5728] Add ARRAY_TO_STRING function (enabled in BigQuery library) 18ba66da96 is described below commit 18ba66da96aa5e68de542573e5509f1f937cf340 Author: zoudan <zou...@bytedance.com> AuthorDate: Fri Jun 2 15:25:31 2023 +0800 [CALCITE-5728] Add ARRAY_TO_STRING function (enabled in BigQuery library) --- .../calcite/adapter/enumerable/RexImpTable.java | 2 ++ .../org/apache/calcite/runtime/SqlFunctions.java | 36 ++++++++++++++++++++++ .../main/java/org/apache/calcite/sql/SqlKind.java | 3 ++ .../calcite/sql/fun/SqlLibraryOperators.java | 22 +++++++------ .../calcite/sql/fun/SqlRegexpReplaceFunction.java | 5 +-- .../org/apache/calcite/sql/type/OperandTypes.java | 27 ++++++++++++++++ .../org/apache/calcite/sql/type/ReturnTypes.java | 13 ++++++++ site/_docs/reference.md | 1 + .../org/apache/calcite/test/SqlOperatorTest.java | 25 +++++++++++++++ 9 files changed, 120 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index e7b80c68d6..f9ca53ecbe 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -131,6 +131,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_MIN; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_REPEAT; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_REVERSE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_SIZE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_TO_STRING; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ARRAY_UNION; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ASINH; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ATANH; @@ -705,6 +706,7 @@ public class RexImpTable { defineMethod(ARRAY_REPEAT, BuiltInMethod.ARRAY_REPEAT.method, NullPolicy.NONE); defineMethod(ARRAY_REVERSE, BuiltInMethod.ARRAY_REVERSE.method, NullPolicy.STRICT); defineMethod(ARRAY_SIZE, BuiltInMethod.COLLECTION_SIZE.method, NullPolicy.STRICT); + defineMethod(ARRAY_TO_STRING, "arrayToString", NullPolicy.STRICT); defineMethod(ARRAY_UNION, BuiltInMethod.ARRAY_UNION.method, NullPolicy.ANY); defineMethod(MAP_ENTRIES, BuiltInMethod.MAP_ENTRIES.method, NullPolicy.STRICT); defineMethod(MAP_KEYS, BuiltInMethod.MAP_KEYS.method, NullPolicy.STRICT); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 7547a5cbe1..401f98da5f 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -4119,6 +4119,42 @@ public class SqlFunctions { return list; } + /** SQL {@code ARRAY_TO_STRING(array, delimiter)} function. */ + public static String arrayToString(List list, String delimiter) { + return arrayToString(list, delimiter, null); + } + + /** SQL {@code ARRAY_TO_STRING(array, delimiter, nullText)} function. */ + public static String arrayToString(List list, String delimiter, @Nullable String nullText) { + StringBuilder sb = new StringBuilder(); + boolean isFirst = true; + for (Object item : list) { + String str; + if (item == null) { + if (nullText == null) { + continue; + } else { + str = nullText; + } + } else if (item instanceof String) { + str = (String) item; + } else if (item instanceof ByteString) { + str = item.toString(); + } else { + throw new IllegalStateException( + "arrayToString supports only String or ByteString, but got " + + item.getClass().getName()); + } + + if (!isFirst) { + sb.append(delimiter); + } + sb.append(str); + isFirst = false; + } + return sb.toString(); + } + /** * Function that, given a certain List containing single-item structs (i.e. arrays / lists with * a single item), builds an Enumerable that returns those single items inside the structs. diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java b/core/src/main/java/org/apache/calcite/sql/SqlKind.java index 4861005730..9a3a4c806f 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java @@ -713,6 +713,9 @@ public enum SqlKind { /** {@code ARRAY_SIZE} function (Spark semantics). */ ARRAY_SIZE, + /** {@code ARRAY_TO_STRING} function (BigQuery semantics). */ + ARRAY_TO_STRING, + /** {@code ARRAY_UNION} function (Spark semantics). */ ARRAY_UNION, diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 12c48c9bf0..7a3e4f193e 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -977,6 +977,13 @@ public abstract class SqlLibraryOperators { OperandTypes.SAME_SAME, OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.ARRAY))); + /** The "ARRAY_TO_STRING(array, delimiter [, nullText ])" function. */ + @LibraryOperator(libraries = {BIG_QUERY}) + public static final SqlFunction ARRAY_TO_STRING = + SqlBasicFunction.create(SqlKind.ARRAY_TO_STRING, + ReturnTypes.VARCHAR_NULLABLE, + OperandTypes.STRING_ARRAY_CHARACTER_OPTIONAL_CHARACTER); + /** The "SORT_ARRAY(array)" function (Spark). */ @LibraryOperator(libraries = {SPARK}) public static final SqlFunction SORT_ARRAY = @@ -1022,8 +1029,7 @@ public abstract class SqlLibraryOperators { @LibraryOperator(libraries = {MYSQL}) public static final SqlFunction TO_BASE64 = SqlBasicFunction.create("TO_BASE64", - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); @@ -1448,32 +1454,28 @@ public abstract class SqlLibraryOperators { @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL}) public static final SqlFunction MD5 = SqlBasicFunction.create("MD5", - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); @LibraryOperator(libraries = {BIG_QUERY, MYSQL, POSTGRESQL}) public static final SqlFunction SHA1 = SqlBasicFunction.create("SHA1", - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); @LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL}) public static final SqlFunction SHA256 = SqlBasicFunction.create("SHA256", - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); @LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL}) public static final SqlFunction SHA512 = SqlBasicFunction.create("SHA512", - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, OperandTypes.STRING.or(OperandTypes.BINARY), SqlFunctionCategory.STRING); diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java index 777e184810..e56e74e2ec 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlRegexpReplaceFunction.java @@ -25,8 +25,6 @@ import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlTypeFamily; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.sql.type.SqlTypeTransforms; import java.util.ArrayList; import java.util.List; @@ -40,8 +38,7 @@ public class SqlRegexpReplaceFunction extends SqlFunction { public SqlRegexpReplaceFunction() { super("REGEXP_REPLACE", SqlKind.OTHER_FUNCTION, - ReturnTypes.explicit(SqlTypeName.VARCHAR) - .andThen(SqlTypeTransforms.TO_NULLABLE), + ReturnTypes.VARCHAR_NULLABLE, null, null, SqlFunctionCategory.STRING); } diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java index c663ea1024..1b792233d8 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java +++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java @@ -474,6 +474,33 @@ public abstract class OperandTypes { .or(OperandTypes.family(SqlTypeFamily.MAP)) .or(OperandTypes.family(SqlTypeFamily.ANY)); + public static final SqlOperandTypeChecker STRING_ARRAY_CHARACTER_OPTIONAL_CHARACTER = + new FamilyOperandTypeChecker( + ImmutableList.of(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER), + i -> i == 2) { + @SuppressWarnings("argument.type.incompatible") + @Override public boolean checkOperandTypes( + SqlCallBinding callBinding, + boolean throwOnFailure) { + if (!super.checkOperandTypes(callBinding, throwOnFailure)) { + return false; + } + RelDataType elementType = callBinding.getOperandType(0).getComponentType(); + if (elementType == null || !SqlTypeUtil.isString(elementType)) { + if (throwOnFailure) { + throw callBinding.newValidationSignatureError(); + } + return false; + } + return true; + } + + @Override public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(<STRING ARRAY>, <CHARACTER>[, <CHARACTER>])"; + } + }; + + /** Checks that returns whether a value is a multiset or an array. * Cf Java, where list and set are collections but a map is not. */ public static final SqlSingleOperandTypeChecker COLLECTION = diff --git a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java index 1705f13135..d82eeaef6f 100644 --- a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java +++ b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java @@ -460,6 +460,19 @@ public abstract class ReturnTypes { public static final SqlReturnTypeInference VARCHAR_2000_NULLABLE = VARCHAR_2000.andThen(SqlTypeTransforms.TO_NULLABLE); + /** + * Type-inference strategy that always returns "VARCHAR". + */ + public static final SqlReturnTypeInference VARCHAR = + ReturnTypes.explicit(SqlTypeName.VARCHAR); + + /** + * Type-inference strategy that always returns "VARCHAR" with nulls + * allowed if any of the operands allow nulls. + */ + public static final SqlReturnTypeInference VARCHAR_NULLABLE = + VARCHAR.andThen(SqlTypeTransforms.TO_NULLABLE); + /** * Type-inference strategy for Histogram agg support. */ diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 22387144f8..c467c9caf7 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2664,6 +2664,7 @@ BigQuery's type system uses confusingly different names for types and functions: | s | ARRAY_REPEAT(element, count) | Returns the array containing element count times. | b | ARRAY_REVERSE(array) | Reverses elements of *array* | s | ARRAY_SIZE(array) | Synonym for `CARDINALITY` +| b | ARRAY_TO_STRING(array, delimiter [, nullText ])| Returns a concatenation of the elements in *array* as a STRING and take *delimiter* as the delimiter. If the *nullText* parameter is used, the function replaces any `NULL` values in the array with the value of *nullText*. If the *nullText* parameter is not used, the function omits the `NULL` value and its preceding delimiter | s | ARRAY_UNION(array1, array2) | Returns an array of the elements in the union of *array1* and *array2*, without duplicates | s | SORT_ARRAY(array [, ascendingOrder]) | Sorts the *array* in ascending or descending order according to the natural ordering of the array elements. The default order is ascending if *ascendingOrder* is not specified. Null elements will be placed at the beginning of the returned array in ascending order or at the end of the returned array in descending order | * | ASINH(numeric) | Returns the inverse hyperbolic sine of *numeric* diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 63fc2deb7a..b9bd7d8f78 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -5511,6 +5511,31 @@ public class SqlOperatorTest { f.checkNull("array_length(null)"); } + @Test void testArrayToStringFunc() { + final SqlOperatorFixture f0 = fixture(); + f0.setFor(SqlLibraryOperators.ARRAY_TO_STRING); + f0.checkFails("^array_to_string(array['aa', 'b', 'c'], '-')^", "No match found for function" + + " signature ARRAY_TO_STRING\\(<CHAR\\(2\\) ARRAY>, <CHARACTER>\\)", false); + + final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY); + f.checkScalar("array_to_string(array['aa', 'b', 'c'], '-')", "aa-b-c", + "VARCHAR NOT NULL"); + f.checkScalar("array_to_string(array[null, 'aa', null, 'b', null], '-', 'empty')", + "empty-aa-empty-b-empty", "VARCHAR NOT NULL"); + f.checkScalar("array_to_string(array[null, 'aa', null, 'b', null], '-')", "aa-b", + "VARCHAR NOT NULL"); + f.checkScalar("array_to_string(array[null, x'aa', null, x'bb', null], '-')", "aa-bb", + "VARCHAR NOT NULL"); + f.checkScalar("array_to_string(array['', 'b'], '-')", "-b", "VARCHAR NOT NULL"); + f.checkScalar("array_to_string(array['', ''], '-')", "-", "VARCHAR NOT NULL"); + f.checkNull("array_to_string(null, '-')"); + f.checkNull("array_to_string(array['a', 'b', null], null)"); + f.checkFails("^array_to_string(array[1, 2, 3], '-', ' ')^", + "Cannot apply 'ARRAY_TO_STRING' to arguments of type 'ARRAY_TO_STRING" + + "\\(<INTEGER ARRAY>, <CHAR\\(1\\)>, <CHAR\\(1\\)>\\)'\\. Supported form\\(s\\):" + + " ARRAY_TO_STRING\\(<STRING ARRAY>, <CHARACTER>\\[, <CHARACTER>\\]\\)", false); + } + /** Tests {@code ARRAY_EXCEPT} function from Spark. */ @Test void testArrayExceptFunc() { final SqlOperatorFixture f0 = fixture();