This is an automated email from the ASF dual-hosted git repository.
tanner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 3f2ae2f4dd [CALCITE-5771] Apply two different NULL semantics for
CONCAT function (enabled in MySQL, Postgres, BigQuery and MSSQL)
3f2ae2f4dd is described below
commit 3f2ae2f4dd2d6b1fab7c3a91e67a6a6d28523298
Author: ILuffZhe <[email protected]>
AuthorDate: Sun Jun 11 10:47:00 2023 +0800
[CALCITE-5771] Apply two different NULL semantics for CONCAT function
(enabled in MySQL, Postgres, BigQuery and MSSQL)
Close apache/calcite#3258
---
.../calcite/adapter/enumerable/RexImpTable.java | 3 +
.../org/apache/calcite/runtime/SqlFunctions.java | 10 ++
.../main/java/org/apache/calcite/sql/SqlKind.java | 3 +
.../calcite/sql/fun/SqlLibraryOperators.java | 35 +++-
.../org/apache/calcite/sql/type/ReturnTypes.java | 7 +
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
.../org/apache/calcite/test/SqlFunctionsTest.java | 21 +++
.../org/apache/calcite/test/SqlValidatorTest.java | 4 +-
core/src/test/resources/sql/functions.iq | 177 +++++++++++++++++++--
site/_docs/reference.md | 3 +-
.../org/apache/calcite/test/SqlOperatorTest.java | 21 ++-
11 files changed, 266 insertions(+), 19 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index ab4eb88d5a..1144b140fd 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -142,6 +142,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.CHR;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.COMPRESS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.CONCAT2;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.CONCAT_FUNCTION;
+import static
org.apache.calcite.sql.fun.SqlLibraryOperators.CONCAT_FUNCTION_WITH_NULL;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.COSH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.COTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.CSC;
@@ -492,6 +493,8 @@ public class RexImpTable {
map.put(CONCAT, new ConcatImplementor());
defineMethod(CONCAT_FUNCTION, BuiltInMethod.MULTI_STRING_CONCAT.method,
NullPolicy.STRICT);
+ defineMethod(CONCAT_FUNCTION_WITH_NULL,
+ BuiltInMethod.MULTI_STRING_CONCAT_WITH_NULL.method, NullPolicy.NONE);
defineMethod(CONCAT2, BuiltInMethod.STRING_CONCAT_WITH_NULL.method,
NullPolicy.ALL);
defineMethod(OVERLAY, BuiltInMethod.OVERLAY.method, NullPolicy.STRICT);
defineMethod(POSITION, BuiltInMethod.POSITION.method, NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 401f98da5f..6157af6061 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -792,6 +792,16 @@ public class SqlFunctions {
return String.join("", args);
}
+ /** SQL {@code CONCAT(arg0, ...)} function which can accept null
+ * but never return null. Always treats null as empty string. */
+ public static String concatMultiWithNull(String... args) {
+ StringBuilder sb = new StringBuilder();
+ for (String arg : args) {
+ sb.append(arg == null ? "" : arg);
+ }
+ return sb.toString();
+ }
+
/** SQL {@code CONVERT(s, src_charset, dest_charset)} function. */
public static String convertWithCharset(String s, String srcCharset,
String destCharset) {
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index 6f3f3a747c..06d4271e4a 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -424,6 +424,9 @@ public enum SqlKind {
/** The two-argument {@code CONCAT} function (Oracle). */
CONCAT2,
+ /** The {@code CONCAT} function (Postgresql and MSSQL) that ignores NULL. */
+ CONCAT_WITH_NULL,
+
/** The "IF" function (BigQuery, Hive, Spark). */
IF,
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 18de7a7c1e..71f5f1541a 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -831,16 +831,45 @@ public abstract class SqlLibraryOperators {
new SqlLikeOperator("NOT RLIKE", SqlKind.RLIKE, true, true);
/** The "CONCAT(arg, ...)" function that concatenates strings.
- * For example, "CONCAT('a', 'bc', 'd')" returns "abcd". */
- @LibraryOperator(libraries = {MYSQL, POSTGRESQL, BIG_QUERY})
+ * For example, "CONCAT('a', 'bc', 'd')" returns "abcd".
+ *
+ * <p>It accepts at least 1 argument and returns null if any of
+ * the arguments is null. */
+ @LibraryOperator(libraries = {MYSQL, BIG_QUERY})
public static final SqlFunction CONCAT_FUNCTION =
SqlBasicFunction.create("CONCAT",
ReturnTypes.MULTIVALENT_STRING_SUM_PRECISION_NULLABLE,
- OperandTypes.repeat(SqlOperandCountRanges.from(2),
+ OperandTypes.repeat(SqlOperandCountRanges.from(1),
OperandTypes.STRING),
SqlFunctionCategory.STRING)
.withOperandTypeInference(InferTypes.RETURN_TYPE);
+ /** The "CONCAT(arg, ...)" function that concatenates strings,
+ * which never returns null.
+ * For example, "CONCAT('a', 'bc', 'd')" returns "abcd".
+ *
+ * <p>If one of the arguments is null, it will be treated as empty string.
+ * "CONCAT('a', null)" returns "a".
+ * "CONCAT('a', null, 'b')" returns "ab".
+ *
+ * <p>Returns empty string only when all arguments are null
+ * or the empty string.
+ * "CONCAT(null)" returns "".
+ * "CONCAT(null, '')" returns "".
+ * "CONCAT(null, null, null)" returns "".
+ *
+ * <p>It differs from {@link #CONCAT_FUNCTION} when processing
+ * null values. */
+ @LibraryOperator(libraries = {MSSQL, POSTGRESQL})
+ public static final SqlFunction CONCAT_FUNCTION_WITH_NULL =
+ SqlBasicFunction.create("CONCAT",
+ ReturnTypes.MULTIVALENT_STRING_SUM_PRECISION_NOT_NULLABLE,
+ OperandTypes.repeat(SqlOperandCountRanges.from(1),
+ OperandTypes.STRING),
+ SqlFunctionCategory.STRING)
+ .withOperandTypeInference(InferTypes.RETURN_TYPE)
+ .withKind(SqlKind.CONCAT_WITH_NULL);
+
/** The "CONCAT(arg0, arg1)" function that concatenates strings.
* For example, "CONCAT('a', 'bc')" returns "abc".
*
diff --git a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
index d82eeaef6f..6699bd8fcf 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
@@ -960,6 +960,13 @@ public abstract class ReturnTypes {
public static final SqlReturnTypeInference
MULTIVALENT_STRING_SUM_PRECISION_NULLABLE =
MULTIVALENT_STRING_SUM_PRECISION.andThen(SqlTypeTransforms.TO_NULLABLE);
+ /**
+ * Same as {@link #MULTIVALENT_STRING_SUM_PRECISION} and using
+ * {@link org.apache.calcite.sql.type.SqlTypeTransforms#TO_NOT_NULLABLE}.
+ */
+ public static final SqlReturnTypeInference
MULTIVALENT_STRING_SUM_PRECISION_NOT_NULLABLE =
+
MULTIVALENT_STRING_SUM_PRECISION.andThen(SqlTypeTransforms.TO_NOT_NULLABLE);
+
/**
* Same as {@link #MULTIVALENT_STRING_SUM_PRECISION} and using
* {@link org.apache.calcite.sql.type.SqlTypeTransforms#TO_NULLABLE_ALL}.
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 1b34c280fa..e74bc6d553 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -421,6 +421,7 @@ public enum BuiltInMethod {
STRING_CONCAT(SqlFunctions.class, "concat", String.class, String.class),
STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatWithNull", String.class,
String.class),
MULTI_STRING_CONCAT(SqlFunctions.class, "concatMulti", String[].class),
+ MULTI_STRING_CONCAT_WITH_NULL(SqlFunctions.class, "concatMultiWithNull",
String[].class),
FLOOR_DIV(Math.class, "floorDiv", long.class, long.class),
FLOOR_MOD(Math.class, "floorMod", long.class, long.class),
ADD_MONTHS(DateTimeUtils.class, "addMonths", long.class, int.class),
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index a9be7b7bb4..1b47ba7ec7 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -42,6 +42,8 @@ import static
org.apache.calcite.avatica.util.DateTimeUtils.timeStringToUnixDate
import static
org.apache.calcite.avatica.util.DateTimeUtils.timestampStringToUnixDate;
import static org.apache.calcite.runtime.SqlFunctions.charLength;
import static org.apache.calcite.runtime.SqlFunctions.concat;
+import static org.apache.calcite.runtime.SqlFunctions.concatMulti;
+import static org.apache.calcite.runtime.SqlFunctions.concatMultiWithNull;
import static org.apache.calcite.runtime.SqlFunctions.concatWithNull;
import static org.apache.calcite.runtime.SqlFunctions.fromBase64;
import static org.apache.calcite.runtime.SqlFunctions.greater;
@@ -149,6 +151,25 @@ class SqlFunctionsTest {
assertThat(concatWithNull(null, "b"), is("b"));
}
+ @Test void testConcatMulti() {
+ assertThat(concatMulti("a b", "cd", "e"), is("a bcde"));
+ // The code generator will ensure that nulls are never passed in. If we
+ // pass in null, it is treated like the string "null", as the following
+ // tests show. Not the desired behavior for SQL.
+ assertThat(concatMulti((String) null), is("null"));
+ assertThat(concatMulti((String) null, null), is("nullnull"));
+ assertThat(concatMulti("a", null, "b"), is("anullb"));
+ }
+
+ @Test void testConcatMultiWithNull() {
+ assertThat(concatMultiWithNull("a b", "cd", "e"), is("a bcde"));
+ // Null value could be passed in which is treated as empty string
+ assertThat(concatMultiWithNull((String) null), is(""));
+ assertThat(concatMultiWithNull((String) null, ""), is(""));
+ assertThat(concatMultiWithNull((String) null, null, null), is(""));
+ assertThat(concatMultiWithNull("a", null, "b"), is("ab"));
+ }
+
@Test void testPosixRegex() {
assertThat(posixRegex("abc", "abc", true), is(true));
assertThat(posixRegex("abc", "^a", true), is(true));
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index 1ae294a41f..3a742499ae 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -751,7 +751,7 @@ public class SqlValidatorTest extends SqlValidatorTestCase {
}
/** Tests the CONCAT function, which unlike the concat operator ('||') is not
- * standard but only in the ORACLE and POSTGRESQL libraries. */
+ * standard but enabled in the ORACLE, MySQL, BigQuery, POSTGRESQL and MSSQL
libraries. */
@Test void testConcatFunction() {
// CONCAT is not in the library operator table
final SqlValidatorFixture s = fixture()
@@ -762,7 +762,7 @@ public class SqlValidatorTest extends SqlValidatorTestCase {
s.withExpr("concat('aabbcc', 'ab', '+-')")
.columnType("VARCHAR(10) NOT NULL");
s.withExpr("concat('aabbcc', CAST(NULL AS VARCHAR(20)), '+-')")
- .columnType("VARCHAR(28)");
+ .columnType("VARCHAR(28) NOT NULL");
s.withExpr("concat('aabbcc', 2)")
.withWhole(true)
.withTypeCoercion(false)
diff --git a/core/src/test/resources/sql/functions.iq
b/core/src/test/resources/sql/functions.iq
index 64c24a4367..e6a4e9f86a 100644
--- a/core/src/test/resources/sql/functions.iq
+++ b/core/src/test/resources/sql/functions.iq
@@ -68,28 +68,41 @@ SELECT char(null), char(-1), char(65), char(233),
char(256+66);
!ok
# CONCAT
-SELECT CONCAT('c', 'h', 'a', 'r');
+# CONCAT function accepts at least 1 argument
+SELECT CONCAT('a');
+--------+
| EXPR$0 |
+--------+
-| char |
+| a |
+--------+
(1 row)
!ok
-# CONCAT in MySQL, BigQuery returns NULL if any argument is NULL.
-# (CONCAT in Oracle and Postgres ignores NULL arguments.)
-SELECT CONCAT('c', 'h', 'a', null, 'r');
+SELECT CONCAT('c', 'h', 'a', 'r');
+--------+
| EXPR$0 |
+--------+
-| |
+| char |
+--------+
(1 row)
!ok
+# CONCAT in MySQL, BigQuery returns NULL if any argument is NULL.
+# (CONCAT in Oracle, Postgres and MSSQL ignores NULL arguments.)
+with t as (select concat('c', 'h', 'a', null, 'r') as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | true |
++---+-----------+
+(1 row)
+
+!ok
+
# Compression Functions
SELECT COMPRESS('sample');
@@ -320,12 +333,14 @@ select concat(cast(null as varchar), 'a');
!ok
-select concat(null, null);
-+--------+
-| EXPR$0 |
-+--------+
-| |
-+--------+
+with t as (select concat(null, null) as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | true |
++---+-----------+
(1 row)
!ok
@@ -499,6 +514,144 @@ select CONVERT(DATE, '05/01/2000', 103);
!ok
!}
+# [CALCITE-5771] Apply two different NULL semantics for CONCAT
function(enabled in MySQL, Postgresql, BigQuery and MSSQL)
+with t as (select concat(null) as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | false |
++---+-----------+
+(1 row)
+
+!ok
+
+with t as (select concat(null, '') as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | false |
++---+-----------+
+(1 row)
+
+!ok
+
+with t as (select concat('', '') as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | false |
++---+-----------+
+(1 row)
+
+!ok
+
+select concat('a', null, 'b');
++--------+
+| EXPR$0 |
++--------+
+| ab |
++--------+
+(1 row)
+
+!ok
+
+select concat('a', cast(null as varchar), 'b');
++--------+
+| EXPR$0 |
++--------+
+| ab |
++--------+
+(1 row)
+
+!ok
+
+select concat(cast(null as varchar), 'a', 'b');
++--------+
+| EXPR$0 |
++--------+
+| ab |
++--------+
+(1 row)
+
+!ok
+
+with t as (select concat(null, null, null) as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | false |
++---+-----------+
+(1 row)
+
+!ok
+
+# concat in BigQuery
+!use post-big-query
+
+select concat('a');
++--------+
+| EXPR$0 |
++--------+
+| a |
++--------+
+(1 row)
+
+!ok
+
+select concat('a', 'b');
++--------+
+| EXPR$0 |
++--------+
+| ab |
++--------+
+(1 row)
+
+!ok
+
+with t as (select concat(null, '') as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | true |
++---+-----------+
+(1 row)
+
+!ok
+
+with t as (select concat('a', cast(null as varchar), 'b') as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | true |
++---+-----------+
+(1 row)
+
+!ok
+
+with t as (select concat(null, null, null) as c)
+select c, c is null as c_is_null
+from t;
++---+-----------+
+| C | C_IS_NULL |
++---+-----------+
+| | true |
++---+-----------+
+(1 row)
+
+!ok
+
# -----------------------------------------------------------------------------
# The standard TRANSLATE function, for changing character sets.
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index c467c9caf7..fb2e94cbf5 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2672,7 +2672,8 @@ BigQuery's type system uses confusingly different names
for types and functions:
| m s | CHAR(integer) | Returns the character
whose ASCII code is *integer* % 256, or null if *integer* < 0
| b o p | CHR(integer) | Returns the character
whose UTF-8 code is *integer*
| o | CONCAT(string, string) | Concatenates two
strings, returns null only when both string arguments are null, otherwise
treats null as empty string
-| b m p | CONCAT(string [, string ]*) | Concatenates two or
more strings
+| b m | CONCAT(string [, string ]*) | Concatenates one or
more strings, returns null if any of the arguments is null
+| p q | CONCAT(string [, string ]*) | Concatenates one or
more strings, null is treated as empty string
| m | COMPRESS(string) | Compresses a string
using zlib compression and returns the result as a binary string
| q | CONVERT(type, expression [ , style ]) | Equivalent to
`CAST(expression AS type)`; ignores the *style* operand
| p | CONVERT_TIMEZONE(tz1, tz2, datetime) | Converts the timezone
of *datetime* from *tz1* to *tz2*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 5fd3078052..49c7d30c03 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -1942,8 +1942,9 @@ public class SqlOperatorTest {
@Test void testConcatFunc() {
final SqlOperatorFixture f = fixture();
checkConcatFunc(f.withLibrary(SqlLibrary.MYSQL));
- checkConcatFunc(f.withLibrary(SqlLibrary.POSTGRESQL));
checkConcatFunc(f.withLibrary(SqlLibrary.BIG_QUERY));
+ checkConcatFuncWithNull(f.withLibrary(SqlLibrary.POSTGRESQL));
+ checkConcatFuncWithNull(f.withLibrary(SqlLibrary.MSSQL));
checkConcat2Func(f.withLibrary(SqlLibrary.ORACLE));
}
@@ -1959,6 +1960,24 @@ public class SqlOperatorTest {
f.checkFails("^concat()^", INVALID_ARGUMENTS_NUMBER, false);
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-5771">[CALCITE-5771]
+ * Apply two different NULL semantics for CONCAT function(enabled in MySQL,
+ * Postgresql, BigQuery and MSSQL)</a>. */
+ private static void checkConcatFuncWithNull(SqlOperatorFixture f) {
+ f.setFor(SqlLibraryOperators.CONCAT_FUNCTION_WITH_NULL);
+ f.checkString("concat('a', 'b', 'c')", "abc", "VARCHAR(3) NOT NULL");
+ f.checkString("concat(cast('a' as varchar), cast('b' as varchar), "
+ + "cast('c' as varchar))", "abc", "VARCHAR NOT NULL");
+ f.checkString("concat('a', 'b', cast(null as char(2)))", "ab", "VARCHAR(4)
NOT NULL");
+ f.checkString("concat(cast(null as ANY), 'b', cast(null as char(2)))",
"b", "VARCHAR NOT NULL");
+ f.checkString("concat('', '', 'a')", "a", "VARCHAR(1) NOT NULL");
+ f.checkString("concat('', '', '')", "", "VARCHAR(0) NOT NULL");
+ f.checkString("concat(null, null, null)", "", "VARCHAR NOT NULL");
+ f.checkString("concat('', null, '')", "", "VARCHAR NOT NULL");
+ f.checkFails("^concat()^", INVALID_ARGUMENTS_NUMBER, false);
+ }
+
private static void checkConcat2Func(SqlOperatorFixture f) {
f.setFor(SqlLibraryOperators.CONCAT2);
f.checkString("concat(cast('fe' as char(2)), cast('df' as
varchar(65535)))",