This is an automated email from the ASF dual-hosted git repository.
zhehu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 4948817818 [CALCITE-6146] Target charset should be used when comparing
two strings through CONVERT/TRANSLATE function during validation
4948817818 is described below
commit 49488178183cb8c0089b02afde07b6cc6eb8348d
Author: Zhe Hu <[email protected]>
AuthorDate: Sat Dec 2 15:40:57 2023 +0800
[CALCITE-6146] Target charset should be used when comparing two strings
through CONVERT/TRANSLATE function during validation
---
.../calcite/rel/type/RelDataTypeFactoryImpl.java | 7 ++
.../apache/calcite/runtime/CalciteResource.java | 2 +-
.../apache/calcite/sql/fun/SqlConvertFunction.java | 34 +++++++++-
.../calcite/sql/fun/SqlTranslateFunction.java | 41 ++++++++++++
.../calcite/runtime/CalciteResource.properties | 2 +-
.../java/org/apache/calcite/test/JdbcTest.java | 48 +++++++++++++-
.../org/apache/calcite/test/SqlValidatorTest.java | 14 ++--
.../org/apache/calcite/test/SqlOperatorTest.java | 76 ++++++++++++++++++++++
8 files changed, 212 insertions(+), 12 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
index 56c0a2c2e5..ed952fc5b0 100644
--- a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
+++ b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
@@ -661,6 +661,13 @@ public abstract class RelDataTypeFactoryImpl implements
RelDataTypeFactory {
@Override protected void generateTypeString(StringBuilder sb, boolean
withDetail) {
sb.append("JavaType(");
sb.append(clazz);
+ if (clazz == String.class
+ && charset != null
+ && !SqlCollation.IMPLICIT.getCharset().equals(charset)) {
+ sb.append(" CHARACTER SET \"");
+ sb.append(charset.name());
+ sb.append("\"");
+ }
sb.append(")");
}
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index 6efa71d734..0f0dbaacc5 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -164,7 +164,7 @@ public interface CalciteResource {
@BaseMessage("Values in expression list must have compatible types")
ExInst<SqlValidatorException> incompatibleTypesInList();
- @BaseMessage("Cannot apply {0} to the two different charsets {1} and {2}")
+ @BaseMessage("Cannot apply operation ''{0}'' to strings with different
charsets ''{1}'' and ''{2}''")
ExInst<SqlValidatorException> incompatibleCharset(String a0, String a1,
String a2);
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
index c21d492860..a57e59aa90 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
@@ -17,6 +17,8 @@
package org.apache.calcite.sql.fun;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexCallBinding;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlFunction;
@@ -25,6 +27,7 @@ import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperatorBinding;
import org.apache.calcite.sql.SqlUtil;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.type.ReturnTypes;
@@ -40,8 +43,10 @@ import org.apache.calcite.sql.validate.SqlValidatorScope;
import org.checkerframework.checker.nullness.qual.Nullable;
+import java.nio.charset.Charset;
import java.util.List;
+import static org.apache.calcite.sql.type.NonNullableAccessors.getCollation;
import static org.apache.calcite.util.Static.RESOURCE;
import static java.util.Objects.requireNonNull;
@@ -107,13 +112,38 @@ public class SqlConvertFunction extends SqlFunction {
}
}
+ @Override public RelDataType inferReturnType(
+ SqlOperatorBinding opBinding) {
+ final RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+ final RelDataType ret = opBinding.getOperandType(0);
+ if (SqlTypeUtil.isNull(ret)) {
+ return ret;
+ }
+ final String descCharsetName;
+ if (opBinding instanceof SqlCallBinding) {
+ descCharsetName = ((SqlCallBinding)
opBinding).getCall().operand(2).toString();
+ } else {
+ descCharsetName = ((RexCallBinding)
opBinding).getStringLiteralOperand(2);
+ }
+ assert descCharsetName != null;
+ Charset descCharset = SqlUtil.getCharset(descCharsetName);
+ return typeFactory.createTypeWithCharsetAndCollation(ret, descCharset,
getCollation(ret));
+ }
+
@Override public RelDataType deriveType(SqlValidator validator,
SqlValidatorScope scope, SqlCall call) {
- // special case for CONVERT: don't need to derive type for Charsets
+ // don't need to derive type for Charsets
RelDataType nodeType =
validator.deriveType(scope, call.operand(0));
requireNonNull(nodeType, "nodeType");
- return validateOperands(validator, scope, call);
+ RelDataType ret = validateOperands(validator, scope, call);
+ if (SqlTypeUtil.isNull(ret)) {
+ return ret;
+ }
+ // descCharset should be the returning type Charset of CONVERT
+ Charset descCharset = SqlUtil.getCharset(call.operand(2).toString());
+ return validator.getTypeFactory()
+ .createTypeWithCharsetAndCollation(ret, descCharset,
getCollation(ret));
}
@Override public boolean checkOperandTypes(SqlCallBinding callBinding,
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
index a61706ca14..1699391d50 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
@@ -17,6 +17,8 @@
package org.apache.calcite.sql.fun;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexCallBinding;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlFunctionCategory;
@@ -24,6 +26,7 @@ import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperatorBinding;
import org.apache.calcite.sql.SqlUtil;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.type.ReturnTypes;
@@ -32,10 +35,14 @@ import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql.validate.SqlValidatorScope;
+import java.nio.charset.Charset;
import java.util.List;
+import static org.apache.calcite.sql.type.NonNullableAccessors.getCollation;
import static org.apache.calcite.util.Static.RESOURCE;
+import static java.util.Objects.requireNonNull;
+
/**
* Common base for the <code>TRANSLATE(USING)</code> and
* <code>CONVERT(USING)</code> function, which is different from
@@ -78,6 +85,40 @@ public class SqlTranslateFunction extends SqlConvertFunction
{
super.validateQuantifier(validator, call);
}
+ @Override public RelDataType inferReturnType(
+ SqlOperatorBinding opBinding) {
+ final RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+ final RelDataType ret = opBinding.getOperandType(0);
+ if (SqlTypeUtil.isNull(ret)) {
+ return ret;
+ }
+ final String descCharsetName;
+ if (opBinding instanceof SqlCallBinding) {
+ descCharsetName = ((SqlCallBinding)
opBinding).getCall().operand(1).toString();
+ } else {
+ descCharsetName = ((RexCallBinding)
opBinding).getStringLiteralOperand(1);
+ }
+ assert descCharsetName != null;
+ Charset descCharset = SqlUtil.getCharset(descCharsetName);
+ return typeFactory.createTypeWithCharsetAndCollation(ret, descCharset,
getCollation(ret));
+ }
+
+ @Override public RelDataType deriveType(SqlValidator validator,
+ SqlValidatorScope scope, SqlCall call) {
+ // don't need to derive type for Charsets
+ RelDataType nodeType =
+ validator.deriveType(scope, call.operand(0));
+ requireNonNull(nodeType, "nodeType");
+ RelDataType ret = validateOperands(validator, scope, call);
+ if (SqlTypeUtil.isNull(ret)) {
+ return ret;
+ }
+ // descCharset should be the returning type Charset of TRANSLATE
+ Charset descCharset = SqlUtil.getCharset(call.operand(1).toString());
+ return validator.getTypeFactory()
+ .createTypeWithCharsetAndCollation(ret, descCharset,
getCollation(ret));
+ }
+
@Override public boolean checkOperandTypes(SqlCallBinding callBinding,
boolean throwOnFailure) {
// type of operand[0] should be Character or NULL
diff --git
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index f46d05a116..bfad94a60a 100644
---
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -61,7 +61,7 @@ CannotCastValue=Cast function cannot convert value of type
{0} to type {1}
UnknownDatatypeName=Unknown datatype name ''{0}''
IncompatibleValueType=Values passed to {0} operator must have compatible types
IncompatibleTypesInList=Values in expression list must have compatible types
-IncompatibleCharset=Cannot apply {0} to the two different charsets {1} and {2}
+IncompatibleCharset=Cannot apply operation ''{0}'' to strings with different
charsets ''{1}'' and ''{2}''
InvalidOrderByPos=ORDER BY is only allowed on top-level SELECT
RecursiveWithMustHaveUnionSetOp=A recursive query only supports UNION [ALL]
operator
RecursiveWithMustHaveTwoChildUnionSetOp=A recursive query only supports binary
UNION [ALL] operator
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index 7c5f6b2a18..f08e99322e 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -6998,7 +6998,53 @@ public class JdbcTest {
with.query(
"select * from \"employee\" where \"full_name\" =
_UTF16'\u82f1\u56fd'")
.throws_(
- "Cannot apply = to the two different charsets ISO-8859-1 and
UTF-16LE");
+ "Cannot apply operation '=' to strings with different charsets
'ISO-8859-1' and 'UTF-16LE'");
+ }
+
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6146">[CALCITE-6146]
+ * Target charset should be used when comparing two strings through
+ * CONVERT/TRANSLATE function during validation</a>. */
+ @Test void testStringComparisonWithConvertFunc() {
+ CalciteAssert.AssertThat with = CalciteAssert.hr();
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where convert(\"name\" using GBK)=_GBK'Eric'")
+ .returns("name=Eric; empid=200\n");
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where _BIG5'Eric'=translate(\"name\" using BIG5)")
+ .returns("name=Eric; empid=200\n");
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where translate(null using UTF8) is null")
+ .returns("name=Bill; empid=100\n"
+ + "name=Eric; empid=200\n"
+ + "name=Sebastian; empid=150\n"
+ + "name=Theodore; empid=110\n");
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where _BIG5'Eric'=translate(\"name\" using LATIN1)")
+ .throws_("Cannot apply operation '=' to strings with "
+ + "different charsets 'Big5' and 'ISO-8859-1'");
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where convert(convert(\"name\" using GBK) using
BIG5)=_BIG5'Eric'")
+ .returns("name=Eric; empid=200\n");
+
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where convert(\"name\", UTF8, GBK)=_GBK'Sebastian'")
+ .returns("name=Sebastian; empid=150\n");
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where _BIG5'Sebastian'=convert(\"name\", UTF8, BIG5)")
+ .returns("name=Sebastian; empid=150\n");
+
+ // check cast
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where cast(convert(\"name\" using LATIN1) as char(5))='Eric'")
+ .returns("name=Eric; empid=200\n");
+ // the result of convert(\"name\" using GBK) has GBK charset
+ // while CHAR(5) has ISO-8859-1 charset, which is not allowed to cast
+ with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+ + "where cast(convert(\"name\" using GBK) as char(5))='Eric'")
+ .throws_(
+ "cannot convert value of type "
+ + "JavaType(class java.lang.String CHARACTER SET \"GBK\") to type
CHAR(5) NOT NULL");
}
/** Tests metadata for the MySQL lexical scheme. */
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index c0b85c69f4..2c3ca5436f 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -828,18 +828,18 @@ public class SqlValidatorTest extends
SqlValidatorTestCase {
@Test void testCharsetMismatch() {
wholeExpr("''=_UTF16''")
- .fails("Cannot apply .* to the two different charsets ISO-8859-1 and "
- + "UTF-16LE");
+ .fails("Cannot apply .* to strings with different charsets
'ISO-8859-1' and "
+ + "'UTF-16LE'");
wholeExpr("''<>_UTF16''")
- .fails("(?s).*Cannot apply .* to the two different charsets.*");
+ .fails("(?s).*Cannot apply .* to strings with different charsets.*");
wholeExpr("''>_UTF16''")
- .fails("(?s).*Cannot apply .* to the two different charsets.*");
+ .fails("(?s).*Cannot apply .* to strings with different charsets.*");
wholeExpr("''<_UTF16''")
- .fails("(?s).*Cannot apply .* to the two different charsets.*");
+ .fails("(?s).*Cannot apply .* to strings with different charsets.*");
wholeExpr("''<=_UTF16''")
- .fails("(?s).*Cannot apply .* to the two different charsets.*");
+ .fails("(?s).*Cannot apply .* to strings with different charsets.*");
wholeExpr("''>=_UTF16''")
- .fails("(?s).*Cannot apply .* to the two different charsets.*");
+ .fails("(?s).*Cannot apply .* to strings with different charsets.*");
wholeExpr("''||_UTF16''")
.fails(ANY);
wholeExpr("'a'||'b'||_UTF16'c'")
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 7219888558..edaabb8de0 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4491,6 +4491,82 @@ public class SqlOperatorTest {
f.checkType("convert(cast(1 as varchar(2)), utf8, latin1)", "VARCHAR(2)
NOT NULL");
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6146">[CALCITE-6146]
+ * Target charset should be used when comparing two strings through
+ * CONVERT/TRANSLATE function during validation</a>. */
+ @Test void testStringComparisonWithConvertFunc() {
+ final SqlOperatorFixture f = fixture();
+ f.setFor(SqlStdOperatorTable.CONVERT, VM_JAVA);
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where convert('col', utf8, latin1)='col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where convert('col', utf8, gbk)=_GBK'col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where convert(null, utf8, gbk) is null",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^convert('col', utf8, gbk)='col'^",
+ "Cannot apply operation '=' to strings with "
+ + "different charsets 'GBK' and 'ISO-8859-1'",
+ false);
+
+ // cast check
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where cast(convert('col', utf8, latin1) as
char(3))='col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^cast(convert('col', utf8, latin1)
as char(3))=_GBK'col'^",
+ "Cannot apply operation '=' to strings with "
+ + "different charsets 'ISO-8859-1' and 'GBK'",
+ false);
+ // the result of convert('col', utf8, gbk) has GBK charset
+ // while CHAR(3) has ISO-8859-1 charset, which is not allowed to cast
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^cast(convert('col', utf8, gbk) as
char(3))^=_GBK'col'",
+ "Cast function cannot convert value of type "
+ + "CHAR\\(3\\) CHARACTER SET \"GBK\" NOT NULL to type CHAR\\(3\\)
NOT NULL",
+ false);
+ }
+
+ @Test void testStringComparisonWithTranslateFunc() {
+ final SqlOperatorFixture f = fixture();
+ f.setFor(SqlStdOperatorTable.TRANSLATE, VM_JAVA);
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where translate('col' using latin1)='col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where convert('col' using gbk)=_GBK'col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where convert(null using gbk) is null",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^translate('col' using gbk)='col'^",
+ "Cannot apply operation '=' to strings with "
+ + "different charsets 'GBK' and 'ISO-8859-1'",
+ false);
+
+ // cast check
+ f.check("select 'a' as alia\n"
+ + " from (values(true)) where cast(convert('col' using latin1) as
char(3))='col'",
+ SqlTests.ANY_TYPE_CHECKER, 'a');
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^cast(translate('col' using latin1)
as char(3))=_GBK'col'^",
+ "Cannot apply operation '=' to strings with "
+ + "different charsets 'ISO-8859-1' and 'GBK'",
+ false);
+ // the result of translate('col' using gbk) has GBK charset
+ // while CHAR(3) has ISO-8859-1 charset, which is not allowed to cast
+ f.checkFails("select 'a' as alia\n"
+ + " from (values(true)) where ^cast(translate('col' using gbk) as
char(3))^=_GBK'col'",
+ "Cast function cannot convert value of type "
+ + "CHAR\\(3\\) CHARACTER SET \"GBK\" NOT NULL to type CHAR\\(3\\)
NOT NULL",
+ false);
+ }
+
@Test void testTranslateFunc() {
final SqlOperatorFixture f = fixture();
f.setFor(SqlStdOperatorTable.TRANSLATE, VM_JAVA);