This is an automated email from the ASF dual-hosted git repository.

zhehu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 4948817818 [CALCITE-6146] Target charset should be used when comparing 
two strings through CONVERT/TRANSLATE function during validation
4948817818 is described below

commit 49488178183cb8c0089b02afde07b6cc6eb8348d
Author: Zhe Hu <[email protected]>
AuthorDate: Sat Dec 2 15:40:57 2023 +0800

    [CALCITE-6146] Target charset should be used when comparing two strings 
through CONVERT/TRANSLATE function during validation
---
 .../calcite/rel/type/RelDataTypeFactoryImpl.java   |  7 ++
 .../apache/calcite/runtime/CalciteResource.java    |  2 +-
 .../apache/calcite/sql/fun/SqlConvertFunction.java | 34 +++++++++-
 .../calcite/sql/fun/SqlTranslateFunction.java      | 41 ++++++++++++
 .../calcite/runtime/CalciteResource.properties     |  2 +-
 .../java/org/apache/calcite/test/JdbcTest.java     | 48 +++++++++++++-
 .../org/apache/calcite/test/SqlValidatorTest.java  | 14 ++--
 .../org/apache/calcite/test/SqlOperatorTest.java   | 76 ++++++++++++++++++++++
 8 files changed, 212 insertions(+), 12 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java 
b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
index 56c0a2c2e5..ed952fc5b0 100644
--- a/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
+++ b/core/src/main/java/org/apache/calcite/rel/type/RelDataTypeFactoryImpl.java
@@ -661,6 +661,13 @@ public abstract class RelDataTypeFactoryImpl implements 
RelDataTypeFactory {
     @Override protected void generateTypeString(StringBuilder sb, boolean 
withDetail) {
       sb.append("JavaType(");
       sb.append(clazz);
+      if (clazz == String.class
+          && charset != null
+          && !SqlCollation.IMPLICIT.getCharset().equals(charset)) {
+        sb.append(" CHARACTER SET \"");
+        sb.append(charset.name());
+        sb.append("\"");
+      }
       sb.append(")");
     }
 
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java 
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index 6efa71d734..0f0dbaacc5 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -164,7 +164,7 @@ public interface CalciteResource {
   @BaseMessage("Values in expression list must have compatible types")
   ExInst<SqlValidatorException> incompatibleTypesInList();
 
-  @BaseMessage("Cannot apply {0} to the two different charsets {1} and {2}")
+  @BaseMessage("Cannot apply operation ''{0}'' to strings with different 
charsets ''{1}'' and ''{2}''")
   ExInst<SqlValidatorException> incompatibleCharset(String a0, String a1,
       String a2);
 
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
index c21d492860..a57e59aa90 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlConvertFunction.java
@@ -17,6 +17,8 @@
 package org.apache.calcite.sql.fun;
 
 import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexCallBinding;
 import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlCallBinding;
 import org.apache.calcite.sql.SqlFunction;
@@ -25,6 +27,7 @@ import org.apache.calcite.sql.SqlIdentifier;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperatorBinding;
 import org.apache.calcite.sql.SqlUtil;
 import org.apache.calcite.sql.SqlWriter;
 import org.apache.calcite.sql.type.ReturnTypes;
@@ -40,8 +43,10 @@ import org.apache.calcite.sql.validate.SqlValidatorScope;
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
+import java.nio.charset.Charset;
 import java.util.List;
 
+import static org.apache.calcite.sql.type.NonNullableAccessors.getCollation;
 import static org.apache.calcite.util.Static.RESOURCE;
 
 import static java.util.Objects.requireNonNull;
@@ -107,13 +112,38 @@ public class SqlConvertFunction extends SqlFunction {
     }
   }
 
+  @Override public RelDataType inferReturnType(
+      SqlOperatorBinding opBinding) {
+    final RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+    final RelDataType ret = opBinding.getOperandType(0);
+    if (SqlTypeUtil.isNull(ret)) {
+      return ret;
+    }
+    final String descCharsetName;
+    if (opBinding instanceof SqlCallBinding) {
+      descCharsetName = ((SqlCallBinding) 
opBinding).getCall().operand(2).toString();
+    } else {
+      descCharsetName = ((RexCallBinding) 
opBinding).getStringLiteralOperand(2);
+    }
+    assert descCharsetName != null;
+    Charset descCharset = SqlUtil.getCharset(descCharsetName);
+    return typeFactory.createTypeWithCharsetAndCollation(ret, descCharset, 
getCollation(ret));
+  }
+
   @Override public RelDataType deriveType(SqlValidator validator,
       SqlValidatorScope scope, SqlCall call) {
-    // special case for CONVERT: don't need to derive type for Charsets
+    // don't need to derive type for Charsets
     RelDataType nodeType =
         validator.deriveType(scope, call.operand(0));
     requireNonNull(nodeType, "nodeType");
-    return validateOperands(validator, scope, call);
+    RelDataType ret = validateOperands(validator, scope, call);
+    if (SqlTypeUtil.isNull(ret)) {
+      return ret;
+    }
+    // descCharset should be the returning type Charset of CONVERT
+    Charset descCharset = SqlUtil.getCharset(call.operand(2).toString());
+    return validator.getTypeFactory()
+        .createTypeWithCharsetAndCollation(ret, descCharset, 
getCollation(ret));
   }
 
   @Override public boolean checkOperandTypes(SqlCallBinding callBinding,
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
index a61706ca14..1699391d50 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlTranslateFunction.java
@@ -17,6 +17,8 @@
 package org.apache.calcite.sql.fun;
 
 import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexCallBinding;
 import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlCallBinding;
 import org.apache.calcite.sql.SqlFunctionCategory;
@@ -24,6 +26,7 @@ import org.apache.calcite.sql.SqlIdentifier;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperatorBinding;
 import org.apache.calcite.sql.SqlUtil;
 import org.apache.calcite.sql.SqlWriter;
 import org.apache.calcite.sql.type.ReturnTypes;
@@ -32,10 +35,14 @@ import org.apache.calcite.sql.type.SqlTypeUtil;
 import org.apache.calcite.sql.validate.SqlValidator;
 import org.apache.calcite.sql.validate.SqlValidatorScope;
 
+import java.nio.charset.Charset;
 import java.util.List;
 
+import static org.apache.calcite.sql.type.NonNullableAccessors.getCollation;
 import static org.apache.calcite.util.Static.RESOURCE;
 
+import static java.util.Objects.requireNonNull;
+
 /**
  * Common base for the <code>TRANSLATE(USING)</code> and
  * <code>CONVERT(USING)</code> function, which is different from
@@ -78,6 +85,40 @@ public class SqlTranslateFunction extends SqlConvertFunction 
{
     super.validateQuantifier(validator, call);
   }
 
+  @Override public RelDataType inferReturnType(
+      SqlOperatorBinding opBinding) {
+    final RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
+    final RelDataType ret = opBinding.getOperandType(0);
+    if (SqlTypeUtil.isNull(ret)) {
+      return ret;
+    }
+    final String descCharsetName;
+    if (opBinding instanceof SqlCallBinding) {
+      descCharsetName = ((SqlCallBinding) 
opBinding).getCall().operand(1).toString();
+    } else {
+      descCharsetName = ((RexCallBinding) 
opBinding).getStringLiteralOperand(1);
+    }
+    assert descCharsetName != null;
+    Charset descCharset = SqlUtil.getCharset(descCharsetName);
+    return typeFactory.createTypeWithCharsetAndCollation(ret, descCharset, 
getCollation(ret));
+  }
+
+  @Override public RelDataType deriveType(SqlValidator validator,
+      SqlValidatorScope scope, SqlCall call) {
+    // don't need to derive type for Charsets
+    RelDataType nodeType =
+        validator.deriveType(scope, call.operand(0));
+    requireNonNull(nodeType, "nodeType");
+    RelDataType ret = validateOperands(validator, scope, call);
+    if (SqlTypeUtil.isNull(ret)) {
+      return ret;
+    }
+    // descCharset should be the returning type Charset of TRANSLATE
+    Charset descCharset = SqlUtil.getCharset(call.operand(1).toString());
+    return validator.getTypeFactory()
+        .createTypeWithCharsetAndCollation(ret, descCharset, 
getCollation(ret));
+  }
+
   @Override public boolean checkOperandTypes(SqlCallBinding callBinding,
       boolean throwOnFailure) {
     // type of operand[0] should be Character or NULL
diff --git 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index f46d05a116..bfad94a60a 100644
--- 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++ 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -61,7 +61,7 @@ CannotCastValue=Cast function cannot convert value of type 
{0} to type {1}
 UnknownDatatypeName=Unknown datatype name ''{0}''
 IncompatibleValueType=Values passed to {0} operator must have compatible types
 IncompatibleTypesInList=Values in expression list must have compatible types
-IncompatibleCharset=Cannot apply {0} to the two different charsets {1} and {2}
+IncompatibleCharset=Cannot apply operation ''{0}'' to strings with different 
charsets ''{1}'' and ''{2}''
 InvalidOrderByPos=ORDER BY is only allowed on top-level SELECT
 RecursiveWithMustHaveUnionSetOp=A recursive query only supports UNION [ALL] 
operator
 RecursiveWithMustHaveTwoChildUnionSetOp=A recursive query only supports binary 
UNION [ALL] operator
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcTest.java 
b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
index 7c5f6b2a18..f08e99322e 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcTest.java
@@ -6998,7 +6998,53 @@ public class JdbcTest {
     with.query(
         "select * from \"employee\" where \"full_name\" = 
_UTF16'\u82f1\u56fd'")
         .throws_(
-            "Cannot apply = to the two different charsets ISO-8859-1 and 
UTF-16LE");
+            "Cannot apply operation '=' to strings with different charsets 
'ISO-8859-1' and 'UTF-16LE'");
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6146";>[CALCITE-6146]
+   * Target charset should be used when comparing two strings through
+   * CONVERT/TRANSLATE function during validation</a>. */
+  @Test void testStringComparisonWithConvertFunc() {
+    CalciteAssert.AssertThat with = CalciteAssert.hr();
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where convert(\"name\" using GBK)=_GBK'Eric'")
+        .returns("name=Eric; empid=200\n");
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where _BIG5'Eric'=translate(\"name\" using BIG5)")
+        .returns("name=Eric; empid=200\n");
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where translate(null using UTF8) is null")
+        .returns("name=Bill; empid=100\n"
+                + "name=Eric; empid=200\n"
+                + "name=Sebastian; empid=150\n"
+                + "name=Theodore; empid=110\n");
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where _BIG5'Eric'=translate(\"name\" using LATIN1)")
+        .throws_("Cannot apply operation '=' to strings with "
+                + "different charsets 'Big5' and 'ISO-8859-1'");
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where convert(convert(\"name\" using GBK) using 
BIG5)=_BIG5'Eric'")
+        .returns("name=Eric; empid=200\n");
+
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where convert(\"name\", UTF8, GBK)=_GBK'Sebastian'")
+        .returns("name=Sebastian; empid=150\n");
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where _BIG5'Sebastian'=convert(\"name\", UTF8, BIG5)")
+        .returns("name=Sebastian; empid=150\n");
+
+    // check cast
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where cast(convert(\"name\" using LATIN1) as char(5))='Eric'")
+        .returns("name=Eric; empid=200\n");
+    // the result of convert(\"name\" using GBK) has GBK charset
+    // while CHAR(5) has ISO-8859-1 charset, which is not allowed to cast
+    with.query("select \"name\", \"empid\" from \"hr\".\"emps\"\n"
+          + "where cast(convert(\"name\" using GBK) as char(5))='Eric'")
+        .throws_(
+            "cannot convert value of type "
+            + "JavaType(class java.lang.String CHARACTER SET \"GBK\") to type 
CHAR(5) NOT NULL");
   }
 
   /** Tests metadata for the MySQL lexical scheme. */
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index c0b85c69f4..2c3ca5436f 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -828,18 +828,18 @@ public class SqlValidatorTest extends 
SqlValidatorTestCase {
 
   @Test void testCharsetMismatch() {
     wholeExpr("''=_UTF16''")
-        .fails("Cannot apply .* to the two different charsets ISO-8859-1 and "
-            + "UTF-16LE");
+        .fails("Cannot apply .* to strings with different charsets 
'ISO-8859-1' and "
+            + "'UTF-16LE'");
     wholeExpr("''<>_UTF16''")
-        .fails("(?s).*Cannot apply .* to the two different charsets.*");
+        .fails("(?s).*Cannot apply .* to strings with different charsets.*");
     wholeExpr("''>_UTF16''")
-        .fails("(?s).*Cannot apply .* to the two different charsets.*");
+        .fails("(?s).*Cannot apply .* to strings with different charsets.*");
     wholeExpr("''<_UTF16''")
-        .fails("(?s).*Cannot apply .* to the two different charsets.*");
+        .fails("(?s).*Cannot apply .* to strings with different charsets.*");
     wholeExpr("''<=_UTF16''")
-        .fails("(?s).*Cannot apply .* to the two different charsets.*");
+        .fails("(?s).*Cannot apply .* to strings with different charsets.*");
     wholeExpr("''>=_UTF16''")
-        .fails("(?s).*Cannot apply .* to the two different charsets.*");
+        .fails("(?s).*Cannot apply .* to strings with different charsets.*");
     wholeExpr("''||_UTF16''")
         .fails(ANY);
     wholeExpr("'a'||'b'||_UTF16'c'")
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 7219888558..edaabb8de0 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4491,6 +4491,82 @@ public class SqlOperatorTest {
     f.checkType("convert(cast(1 as varchar(2)), utf8, latin1)", "VARCHAR(2) 
NOT NULL");
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6146";>[CALCITE-6146]
+   * Target charset should be used when comparing two strings through
+   * CONVERT/TRANSLATE function during validation</a>. */
+  @Test void testStringComparisonWithConvertFunc() {
+    final SqlOperatorFixture f = fixture();
+    f.setFor(SqlStdOperatorTable.CONVERT, VM_JAVA);
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where convert('col', utf8, latin1)='col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where convert('col', utf8, gbk)=_GBK'col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where convert(null, utf8, gbk) is null",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^convert('col', utf8, gbk)='col'^",
+        "Cannot apply operation '=' to strings with "
+            + "different charsets 'GBK' and 'ISO-8859-1'",
+        false);
+
+    // cast check
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where cast(convert('col', utf8, latin1) as 
char(3))='col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^cast(convert('col', utf8, latin1) 
as char(3))=_GBK'col'^",
+        "Cannot apply operation '=' to strings with "
+            + "different charsets 'ISO-8859-1' and 'GBK'",
+        false);
+    // the result of convert('col', utf8, gbk) has GBK charset
+    // while CHAR(3) has ISO-8859-1 charset, which is not allowed to cast
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^cast(convert('col', utf8, gbk) as 
char(3))^=_GBK'col'",
+        "Cast function cannot convert value of type "
+            + "CHAR\\(3\\) CHARACTER SET \"GBK\" NOT NULL to type CHAR\\(3\\) 
NOT NULL",
+        false);
+  }
+
+  @Test void testStringComparisonWithTranslateFunc() {
+    final SqlOperatorFixture f = fixture();
+    f.setFor(SqlStdOperatorTable.TRANSLATE, VM_JAVA);
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where translate('col' using latin1)='col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where convert('col' using gbk)=_GBK'col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where convert(null using gbk) is null",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^translate('col' using gbk)='col'^",
+        "Cannot apply operation '=' to strings with "
+            + "different charsets 'GBK' and 'ISO-8859-1'",
+        false);
+
+    // cast check
+    f.check("select 'a' as alia\n"
+            + " from (values(true)) where cast(convert('col' using latin1) as 
char(3))='col'",
+        SqlTests.ANY_TYPE_CHECKER, 'a');
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^cast(translate('col' using latin1) 
as char(3))=_GBK'col'^",
+        "Cannot apply operation '=' to strings with "
+            + "different charsets 'ISO-8859-1' and 'GBK'",
+        false);
+    // the result of translate('col' using gbk) has GBK charset
+    // while CHAR(3) has ISO-8859-1 charset, which is not allowed to cast
+    f.checkFails("select 'a' as alia\n"
+            + " from (values(true)) where ^cast(translate('col' using gbk) as 
char(3))^=_GBK'col'",
+        "Cast function cannot convert value of type "
+            + "CHAR\\(3\\) CHARACTER SET \"GBK\" NOT NULL to type CHAR\\(3\\) 
NOT NULL",
+        false);
+  }
+
   @Test void testTranslateFunc() {
     final SqlOperatorFixture f = fixture();
     f.setFor(SqlStdOperatorTable.TRANSLATE, VM_JAVA);

Reply via email to