This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d531fc  [SPARK-37047][SQL][FOLLOWUP] lpad/rpad should fail if 
parameter str and pad are different types
5d531fc is described below

commit 5d531fcb25cd3727d6adc458241c09d2ce177f16
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Mon Oct 25 09:55:24 2021 +0900

    [SPARK-37047][SQL][FOLLOWUP] lpad/rpad should fail if parameter str and pad 
are different types
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/34154 . Now 
lpad/rpad throws class cast exception at runtime if the parameter `str` and 
`pad` are different types (one is STRING and the other is BINARY). This PR 
makes it fail during analysis.
    
    ### Why are the changes needed?
    
    fail earlier for invalid functions.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no, the new lpad/rad change is not released yet.
    
    ### How was this patch tested?
    
    new tests
    
    Closes #34370 from cloud-fan/minor.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../catalyst/expressions/stringExpressions.scala   | 19 +++++++++++
 .../sql-tests/inputs/string-functions.sql          |  6 ++++
 .../results/ansi/string-functions.sql.out          | 38 +++++++++++++++++++++-
 .../sql-tests/results/string-functions.sql.out     | 38 +++++++++++++++++++++-
 4 files changed, 99 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index ae13afb..276a366 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1384,6 +1384,16 @@ case class StringLPad(str: Expression, len: Expression, 
pad: Expression = Litera
   override def inputTypes: Seq[AbstractDataType] =
     Seq(TypeCollection(StringType, BinaryType), IntegerType, 
TypeCollection(StringType, BinaryType))
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    super.checkInputDataTypes() match {
+      case fail: TypeCheckResult.TypeCheckFailure => fail
+      case _ if str.dataType != pad.dataType =>
+        TypeCheckResult.TypeCheckFailure(
+          s"Arguments 'str' and 'pad' of function '$prettyName' must be the 
same type.")
+      case other => other
+    }
+  }
+
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     str.dataType match {
       case StringType => string.asInstanceOf[UTF8String]
@@ -1448,6 +1458,15 @@ case class StringRPad(str: Expression, len: Expression, 
pad: Expression = Litera
   override def dataType: DataType = str.dataType
   override def inputTypes: Seq[AbstractDataType] =
     Seq(TypeCollection(StringType, BinaryType), IntegerType, 
TypeCollection(StringType, BinaryType))
+  override def checkInputDataTypes(): TypeCheckResult = {
+    super.checkInputDataTypes() match {
+      case fail: TypeCheckResult.TypeCheckFailure => fail
+      case _ if str.dataType != pad.dataType =>
+        TypeCheckResult.TypeCheckFailure(
+          s"Arguments 'str' and 'pad' of function '$prettyName' must be the 
same type.")
+      case other => other
+    }
+  }
 
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     str.dataType match {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index beacdbf..064c011 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -90,6 +90,12 @@ SELECT hex(rpad(unhex(''), 6, unhex('')));
 SELECT hex(rpad(unhex('aabbcc'), 6, unhex('')));
 SELECT hex(rpad(unhex('aabbcc'), 2, unhex('ff')));
 
+-- lpad/rpad with mixed STRING and BINARY input
+SELECT lpad('abc', 5, x'12');
+SELECT lpad(x'12', 5, 'abc');
+SELECT rpad('abc', 5, x'12');
+SELECT rpad(x'12', 5, 'abc');
+
 -- decode
 select decode();
 select decode(encode('abc', 'utf-8'));
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 56717af..0e29ee7 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 74
+-- Number of queries: 78
 
 
 -- !query
@@ -543,6 +543,42 @@ AABB
 
 
 -- !query
+SELECT lpad('abc', 5, x'12')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'lpad('abc', 5, X'12')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT lpad(x'12', 5, 'abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'lpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT rpad('abc', 5, x'12')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rpad('abc', 5, X'12')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT rpad(x'12', 5, 'abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7
+
+
+-- !query
 select decode()
 -- !query schema
 struct<>
diff --git 
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index e202521..d90274b 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 74
+-- Number of queries: 78
 
 
 -- !query
@@ -533,6 +533,42 @@ AABB
 
 
 -- !query
+SELECT lpad('abc', 5, x'12')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'lpad('abc', 5, X'12')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT lpad(x'12', 5, 'abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'lpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT rpad('abc', 5, x'12')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rpad('abc', 5, X'12')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7
+
+
+-- !query
+SELECT rpad(x'12', 5, 'abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 
'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7
+
+
+-- !query
 select decode()
 -- !query schema
 struct<>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to