This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e93bff6fc0bc [SPARK-46187][SQL] Align codegen and non-codegen 
implementation of `StringDecode`
e93bff6fc0bc is described below

commit e93bff6fc0bc3a549de02958ffc17b1bca3d50b8
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Fri Dec 1 10:42:31 2023 +0100

    [SPARK-46187][SQL] Align codegen and non-codegen implementation of 
`StringDecode`
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to change the implementation of interpretation mode of 
`StringDecode` and apparently of the `decode` function. And make it consistent 
to codegen. Both implementation raise the same error with of the error class 
`INVALID_PARAMETER_VALUE.CHARSET`.
    
    ### Why are the changes needed?
    To make codegen and non-codegen of the `StringDecode` expression 
consistent. So, users will observe the same behaviour in both modes.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, if user code depends on error from `decode()`.
    
    ### How was this patch tested?
    By running the following test suites:
    ```
    $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly 
org.apache.spark.sql.SQLQueryTestSuite -- -z string-functions.sql"
    $ build/sbt "core/testOnly *SparkThrowableSuite"
    $ build/sbt "test:testOnly *.StringFunctionsSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #44094 from MaxGekk/align-codegen-stringdecode.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../catalyst/expressions/stringExpressions.scala   | 18 ++++++++----
 .../analyzer-results/ansi/string-functions.sql.out | 15 ++++++++++
 .../analyzer-results/string-functions.sql.out      | 15 ++++++++++
 .../sql-tests/inputs/string-functions.sql          |  2 ++
 .../results/ansi/string-functions.sql.out          | 34 ++++++++++++++++++++++
 .../sql-tests/results/string-functions.sql.out     | 34 ++++++++++++++++++++++
 6 files changed, 113 insertions(+), 5 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 412422f4da4e..84a5eebd70ec 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2648,18 +2648,26 @@ case class StringDecode(bin: Expression, charset: 
Expression)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
     val fromCharset = input2.asInstanceOf[UTF8String].toString
-    UTF8String.fromString(new String(input1.asInstanceOf[Array[Byte]], 
fromCharset))
+    try {
+      UTF8String.fromString(new String(input1.asInstanceOf[Array[Byte]], 
fromCharset))
+    } catch {
+      case _: UnsupportedEncodingException =>
+        throw QueryExecutionErrors.invalidCharsetError(prettyName, fromCharset)
+    }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (bytes, charset) =>
+    nullSafeCodeGen(ctx, ev, (bytes, charset) => {
+      val fromCharset = ctx.freshName("fromCharset")
       s"""
+        String $fromCharset = $charset.toString();
         try {
-          ${ev.value} = UTF8String.fromString(new String($bytes, 
$charset.toString()));
+          ${ev.value} = UTF8String.fromString(new String($bytes, 
$fromCharset));
         } catch (java.io.UnsupportedEncodingException e) {
-          org.apache.spark.unsafe.Platform.throwException(e);
+          throw QueryExecutionErrors.invalidCharsetError("$prettyName", 
$fromCharset);
         }
-      """)
+      """
+    })
   }
 
   override protected def withNewChildrenInternal(
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
index 9d8705e3e862..7ace300001d5 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
@@ -799,6 +799,21 @@ Project [decode(null, 6, Spark, null, SQL, 4, rocks, null, 
.) AS decode(NULL, 6,
 +- OneRowRelation
 
 
+-- !query
+select decode(X'68656c6c6f', 'Windows-xxx')
+-- !query analysis
+Project [decode(0x68656C6C6F, Windows-xxx) AS decode(X'68656C6C6F', 
Windows-xxx)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'68656c6c6f', 'Windows-xxx') as t(scol, 
ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query analysis
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
index 9d8705e3e862..7ace300001d5 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
@@ -799,6 +799,21 @@ Project [decode(null, 6, Spark, null, SQL, 4, rocks, null, 
.) AS decode(NULL, 6,
 +- OneRowRelation
 
 
+-- !query
+select decode(X'68656c6c6f', 'Windows-xxx')
+-- !query analysis
+Project [decode(0x68656C6C6F, Windows-xxx) AS decode(X'68656C6C6F', 
Windows-xxx)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'68656c6c6f', 'Windows-xxx') as t(scol, 
ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 645f6bcb8327..a90557eba7c7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -138,6 +138,8 @@ select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 
'New Jersey', 4, 'Seattl
 select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 
'Seattle');
 select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks');
 select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks', NULL, '.');
+select decode(X'68656c6c6f', 'Windows-xxx');
+select decode(scol, ecol) from values(X'68656c6c6f', 'Windows-xxx') as t(scol, 
ecol);
 
 -- contains
 SELECT CONTAINS(null, 'Spark');
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 89bb20fc1bff..813200b8bb0e 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1017,6 +1017,40 @@ struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks, NULL, 
.):string>
 SQL
 
 
+-- !query
+select decode(X'68656c6c6f', 'Windows-xxx')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "Windows-xxx",
+    "functionName" : "`decode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
+-- !query
+select decode(scol, ecol) from values(X'68656c6c6f', 'Windows-xxx') as t(scol, 
ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "Windows-xxx",
+    "functionName" : "`decode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema
diff --git 
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 6d90a5091578..56782b9a7534 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -949,6 +949,40 @@ struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks, NULL, 
.):string>
 SQL
 
 
+-- !query
+select decode(X'68656c6c6f', 'Windows-xxx')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "Windows-xxx",
+    "functionName" : "`decode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
+-- !query
+select decode(scol, ecol) from values(X'68656c6c6f', 'Windows-xxx') as t(scol, 
ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.CHARSET",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "charset" : "Windows-xxx",
+    "functionName" : "`decode`",
+    "parameter" : "`charset`"
+  }
+}
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to