This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5d531fc [SPARK-37047][SQL][FOLLOWUP] lpad/rpad should fail if parameter str and pad are different types 5d531fc is described below commit 5d531fcb25cd3727d6adc458241c09d2ce177f16 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Mon Oct 25 09:55:24 2021 +0900 [SPARK-37047][SQL][FOLLOWUP] lpad/rpad should fail if parameter str and pad are different types ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/34154 . Now lpad/rpad throws class cast exception at runtime if the parameter `str` and `pad` are different types (one is STRING and the other is BINARY). This PR makes it fail during analysis. ### Why are the changes needed? fail earlier for invalid functions. ### Does this PR introduce _any_ user-facing change? no, the new lpad/rad change is not released yet. ### How was this patch tested? new tests Closes #34370 from cloud-fan/minor. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../catalyst/expressions/stringExpressions.scala | 19 +++++++++++ .../sql-tests/inputs/string-functions.sql | 6 ++++ .../results/ansi/string-functions.sql.out | 38 +++++++++++++++++++++- .../sql-tests/results/string-functions.sql.out | 38 +++++++++++++++++++++- 4 files changed, 99 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index ae13afb..276a366 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1384,6 +1384,16 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression = Litera override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType), IntegerType, TypeCollection(StringType, BinaryType)) + override def checkInputDataTypes(): TypeCheckResult = { + super.checkInputDataTypes() match { + case fail: TypeCheckResult.TypeCheckFailure => fail + case _ if str.dataType != pad.dataType => + TypeCheckResult.TypeCheckFailure( + s"Arguments 'str' and 'pad' of function '$prettyName' must be the same type.") + case other => other + } + } + override def nullSafeEval(string: Any, len: Any, pad: Any): Any = { str.dataType match { case StringType => string.asInstanceOf[UTF8String] @@ -1448,6 +1458,15 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression = Litera override def dataType: DataType = str.dataType override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType), IntegerType, TypeCollection(StringType, BinaryType)) + override def checkInputDataTypes(): TypeCheckResult = { + super.checkInputDataTypes() match { + case fail: TypeCheckResult.TypeCheckFailure => fail + case _ if str.dataType != pad.dataType => + TypeCheckResult.TypeCheckFailure( + s"Arguments 'str' and 'pad' of function '$prettyName' must be the same type.") + case other => other + } + } override def nullSafeEval(string: Any, len: Any, pad: Any): Any = { str.dataType match { diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index beacdbf..064c011 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -90,6 +90,12 @@ SELECT hex(rpad(unhex(''), 6, unhex(''))); SELECT hex(rpad(unhex('aabbcc'), 6, unhex(''))); SELECT hex(rpad(unhex('aabbcc'), 2, unhex('ff'))); +-- lpad/rpad with mixed STRING and BINARY input +SELECT lpad('abc', 5, x'12'); +SELECT lpad(x'12', 5, 'abc'); +SELECT rpad('abc', 5, x'12'); +SELECT rpad(x'12', 5, 'abc'); + -- decode select decode(); select decode(encode('abc', 'utf-8')); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 56717af..0e29ee7 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 74 +-- Number of queries: 78 -- !query @@ -543,6 +543,42 @@ AABB -- !query +SELECT lpad('abc', 5, x'12') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'lpad('abc', 5, X'12')' due to data type mismatch: Arguments 'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT lpad(x'12', 5, 'abc') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'lpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT rpad('abc', 5, x'12') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'rpad('abc', 5, X'12')' due to data type mismatch: Arguments 'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT rpad(x'12', 5, 'abc') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'rpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7 + + +-- !query select decode() -- !query schema struct<> diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index e202521..d90274b 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 74 +-- Number of queries: 78 -- !query @@ -533,6 +533,42 @@ AABB -- !query +SELECT lpad('abc', 5, x'12') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'lpad('abc', 5, X'12')' due to data type mismatch: Arguments 'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT lpad(x'12', 5, 'abc') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'lpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 'str' and 'pad' of function 'lpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT rpad('abc', 5, x'12') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'rpad('abc', 5, X'12')' due to data type mismatch: Arguments 'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7 + + +-- !query +SELECT rpad(x'12', 5, 'abc') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'rpad(X'12', 5, 'abc')' due to data type mismatch: Arguments 'str' and 'pad' of function 'rpad' must be the same type.; line 1 pos 7 + + +-- !query select decode() -- !query schema struct<> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org