This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3433f2a77d3 [SPARK-41452][SQL] `to_char` should return null when
format is null
3433f2a77d3 is described below
commit 3433f2a77d3dd665f42aa3d558152cf4c912c54c
Author: Bruce Robbins <[email protected]>
AuthorDate: Thu Dec 8 16:14:43 2022 -0800
[SPARK-41452][SQL] `to_char` should return null when format is null
### What changes were proposed in this pull request?
When a user specifies a null format in `to_char`, return null instead of
throwing a `NullPointerException`.
### Why are the changes needed?
`to_char` currently throws a `NullPointerException` when the format is null:
```
spark-sql> select to_char(454, null);
[INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal
error. You hit a bug in Spark or the Spark plugins you use. Please, report this
bug to the corresponding communities or vendors, and provide the full stack
trace.
org.apache.spark.SparkException: [INTERNAL_ERROR] The Spark SQL phase
analysis failed with an internal error. You hit a bug in Spark or the Spark
plugins you use. Please, report this bug to the corresponding communities or
vendors, and provide the full stack trace.
...
Caused by: java.lang.NullPointerException
at
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat$lzycompute(numberFormatExpressions.scala:227)
at
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormat(numberFormatExpressions.scala:227)
at
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter$lzycompute(numberFormatExpressions.scala:228)
at
org.apache.spark.sql.catalyst.expressions.ToCharacter.numberFormatter(numberFormatExpressions.scala:228)
at
org.apache.spark.sql.catalyst.expressions.ToCharacter.checkInputDataTypes(numberFormatExpressions.scala:236)
```
Compare to `to_binary`:
```
spark-sql> SELECT to_binary('abc', null);
NULL
Time taken: 3.097 seconds, Fetched 1 row(s)
spark-sql>
```
Also compare to `to_char` in PostgreSQL 14.6:
```
select to_char(454, null) is null as to_char_is_null;
to_char_is_null
-----------------
t
(1 row)
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New unit test.
Closes #38986 from bersprockets/to_char_issue.
Authored-by: Bruce Robbins <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../expressions/numberFormatExpressions.scala | 20 ++++++++++++++------
.../expressions/StringExpressionsSuite.scala | 7 +++++++
2 files changed, 21 insertions(+), 6 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index f5f86bfac19..2d4f0438db7 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -224,17 +224,21 @@ case class TryToNumber(left: Expression, right:
Expression)
group = "string_funcs")
case class ToCharacter(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
- private lazy val numberFormat =
right.eval().toString.toUpperCase(Locale.ROOT)
- private lazy val numberFormatter = new ToNumberParser(numberFormat, true)
+ private lazy val numberFormatter = {
+ val value = right.eval()
+ if (value != null) {
+ new ToNumberParser(value.toString.toUpperCase(Locale.ROOT), true)
+ } else {
+ null
+ }
+ }
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringType)
override def checkInputDataTypes(): TypeCheckResult = {
val inputTypeCheck = super.checkInputDataTypes()
if (inputTypeCheck.isSuccess) {
- if (right.foldable) {
- numberFormatter.checkInputDataTypes()
- } else {
+ if (!right.foldable) {
DataTypeMismatch(
errorSubClass = "NON_FOLDABLE_INPUT",
messageParameters = Map(
@@ -243,6 +247,10 @@ case class ToCharacter(left: Expression, right: Expression)
"inputExpr" -> toSQLExpr(right)
)
)
+ } else if (numberFormatter == null) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ numberFormatter.checkInputDataTypes()
}
} else {
inputTypeCheck
@@ -260,7 +268,7 @@ case class ToCharacter(left: Expression, right: Expression)
val result =
code"""
|${eval.code}
- |boolean ${ev.isNull} = ${eval.isNull};
+ |boolean ${ev.isNull} = ${eval.isNull} || ($builder == null);
|${CodeGenerator.javaType(dataType)} ${ev.value} =
${CodeGenerator.defaultValue(dataType)};
|if (!${ev.isNull}) {
| ${ev.value} = $builder.format(${eval.value});
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index f0b320db3a5..8be732a52ce 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -1256,6 +1256,13 @@ class StringExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
)
}
+ test("SPARK-41452: ToCharacter: null format string") {
+ // if null format, to_number should return null
+ val toCharacterExpr = ToCharacter(Literal(Decimal(454)), Literal(null,
StringType))
+ assert(toCharacterExpr.checkInputDataTypes() ==
TypeCheckResult.TypeCheckSuccess)
+ checkEvaluation(toCharacterExpr, null)
+ }
+
test("ToBinary: fails analysis if fmt is not foldable") {
val wrongFmt = AttributeReference("invalidFormat", StringType)()
val toBinaryExpr = ToBinary(Literal("abc"), Some(wrongFmt))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]