This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 01cdfb4b785 [SPARK-38590][SQL] New SQL function: try_to_binary
01cdfb4b785 is described below

commit 01cdfb4b7858e85a89162435ee176dc64b63b700
Author: Gengliang Wang <gengli...@apache.org>
AuthorDate: Thu Apr 7 08:52:50 2022 +0300

    [SPARK-38590][SQL] New SQL function: try_to_binary
    
    ### What changes were proposed in this pull request?
    
    Add a new SQL function: `try_to_binary`. It is identical to the function 
`to_binary`, except that it returns NULL results instead of throwing an 
exception on encoding errors.
    There is a similar function in Snowflake: 
https://docs.snowflake.com/en/sql-reference/functions/try_to_binary.html
    
    ### Why are the changes needed?
    
    Users can manage to finish queries without interruptions by encoding errors.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, adding a new SQL function: `try_to_binary`. It is identical to the 
function `to_binary`, except that it returns NULL results instead of throwing 
an exception on encoding errors.
    
    ### How was this patch tested?
    
    UT
    
    Closes #35897 from gengliangwang/try_to_binary.
    
    Authored-by: Gengliang Wang <gengli...@apache.org>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit becda3339381b3975ed567c156260eda036d7a1b)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |   1 +
 .../spark/sql/catalyst/expressions/TryEval.scala   |  35 +++++++++++++++++++++
 .../catalyst/expressions/stringExpressions.scala   |  24 ++++++++------
 .../sql-functions/sql-expression-schema.md         |   3 +-
 .../sql-tests/inputs/string-functions.sql          |   4 ++-
 .../sql-tests/inputs/try-string-functions.sql      |  21 +++++++++++++
 .../results/ansi/string-functions.sql.out          |  11 ++++++-
 .../sql-tests/results/string-functions.sql.out     |  11 ++++++-
 .../sql-tests/results/try-string-functions.sql.out | Bin 0 -> 1925 bytes
 9 files changed, 97 insertions(+), 13 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index bb4aa701102..5befa779d16 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -454,6 +454,7 @@ object FunctionRegistry {
     expression[TryMultiply]("try_multiply"),
     expression[TryElementAt]("try_element_at"),
     expression[TrySum]("try_sum"),
+    expression[TryToBinary]("try_to_binary"),
 
     // aggregate functions
     expression[HyperLogLogPlusPlus]("approx_count_distinct"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index 7a8a689a1bd..589e5801424 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -181,3 +181,38 @@ case class TryMultiply(left: Expression, right: 
Expression, replacement: Express
   override protected def withNewChildInternal(newChild: Expression): 
Expression =
     this.copy(replacement = newChild)
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(str[, fmt]) - This is a special version of `to_binary` that 
performs the same operation, but returns a NULL value instead of raising an 
error if the conversion cannot be performed.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('abc', 'utf-8');
+       abc
+      > select _FUNC_('a!', 'base64');
+       NULL
+      > select _FUNC_('abc', 'invalidFormat');
+       NULL
+  """,
+  since = "3.3.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class TryToBinary(
+    expr: Expression,
+    format: Option[Expression],
+    replacement: Expression) extends RuntimeReplaceable
+  with InheritAnalysisRules {
+  def this(expr: Expression) =
+    this(expr, None, TryEval(ToBinary(expr, None, nullOnInvalidFormat = true)))
+
+  def this(expr: Expression, formatExpression: Expression) =
+    this(expr, Some(formatExpression),
+      TryEval(ToBinary(expr, Some(formatExpression), nullOnInvalidFormat = 
true)))
+
+  override def prettyName: String = "try_to_binary"
+
+  override def parameters: Seq[Expression] = expr +: format.toSeq
+
+  override protected def withNewChildInternal(newChild: Expression): 
Expression =
+    this.copy(replacement = newChild)
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index a08ab84ac6f..88045f85bca 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2638,7 +2638,10 @@ case class Encode(value: Expression, charset: Expression)
   since = "3.3.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class ToBinary(expr: Expression, format: Option[Expression]) extends 
RuntimeReplaceable
+case class ToBinary(
+    expr: Expression,
+    format: Option[Expression],
+    nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable
   with ImplicitCastInputTypes {
 
   override lazy val replacement: Expression = format.map { f =>
@@ -2651,6 +2654,7 @@ case class ToBinary(expr: Expression, format: 
Option[Expression]) extends Runtim
         case "hex" => Unhex(expr)
         case "utf-8" => Encode(expr, Literal("UTF-8"))
         case "base64" => UnBase64(expr)
+        case _ if nullOnInvalidFormat => Literal(null, BinaryType)
         case other => throw 
QueryCompilationErrors.invalidStringLiteralParameter(
           "to_binary", "format", other,
           Some("The value has to be a case-insensitive string literal of " +
@@ -2659,16 +2663,18 @@ case class ToBinary(expr: Expression, format: 
Option[Expression]) extends Runtim
     }
   }.getOrElse(Unhex(expr))
 
-  def this(expr: Expression) = this(expr, None)
+  def this(expr: Expression) = this(expr, None, false)
 
   def this(expr: Expression, format: Expression) = this(expr, Some({
-    // We perform this check in the constructor to make it eager and not go 
through type coercion.
-    if (format.foldable && (format.dataType == StringType || format.dataType 
== NullType)) {
-      format
-    } else {
-      throw QueryCompilationErrors.requireLiteralParameter("to_binary", 
"format", "string")
-    }
-  }))
+      // We perform this check in the constructor to make it eager and not go 
through type coercion.
+      if (format.foldable && (format.dataType == StringType || format.dataType 
== NullType)) {
+        format
+      } else {
+        throw QueryCompilationErrors.requireLiteralParameter("to_binary", 
"format", "string")
+      }
+    }),
+    false
+    )
 
   override def prettyName: String = "to_binary"
 
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md 
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 644bfa926da..1dbf9678af9 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 385
+  - Number of queries: 386
   - Number of expressions that missing example: 12
   - Expressions missing examples: 
bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
@@ -316,6 +316,7 @@
 | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | 
SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 
3), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | 
SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
 | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | 
SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
+| org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | 
SELECT try_to_binary('abc', 'utf-8') | struct<try_to_binary(abc, utf-8):binary> 
|
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) 
| struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT 
unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT 
negative(1) | struct<negative(1):int> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 7d22e791570..0db28ad9f3e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -184,5 +184,7 @@ select to_binary(null, cast(null as string));
 -- 'format' parameter must be string type or void type.
 select to_binary(null, cast(null as int));
 select to_binary('abc', 1);
--- invalid inputs.
+-- invalid format
 select to_binary('abc', 'invalidFormat');
+-- invalid string input
+select to_binary('a!', 'base64');
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql
new file mode 100644
index 00000000000..20f02374e78
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql
@@ -0,0 +1,21 @@
+-- try_to_binary
+select try_to_binary('abc');
+select try_to_binary('abc', 'utf-8');
+select try_to_binary('abc', 'base64');
+select try_to_binary('abc', 'hex');
+-- 'format' parameter can be any foldable string value, not just literal.
+select try_to_binary('abc', concat('utf', '-8'));
+-- 'format' parameter is case insensitive.
+select try_to_binary('abc', 'Hex');
+-- null inputs lead to null result.
+select try_to_binary('abc', null);
+select try_to_binary(null, 'utf-8');
+select try_to_binary(null, null);
+select try_to_binary(null, cast(null as string));
+-- 'format' parameter must be string type or void type.
+select try_to_binary(null, cast(null as int));
+select try_to_binary('abc', 1);
+-- invalid format
+select try_to_binary('abc', 'invalidFormat');
+-- invalid string input
+select try_to_binary('a!', 'base64');
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 01213bd57ad..52d70e22a44 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 141
+-- Number of queries: 142
 
 
 -- !query
@@ -1140,3 +1140,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 Invalid value for the 'format' parameter of function 'to_binary': 
invalidformat. The value has to be a case-insensitive string literal of 'hex', 
'utf-8', or 'base64'.
+
+
+-- !query
+select to_binary('a!', 'base64')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Last unit does not have enough valid bits
diff --git 
a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 3a7f197e362..ff14da143da 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 141
+-- Number of queries: 142
 
 
 -- !query
@@ -1136,3 +1136,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 Invalid value for the 'format' parameter of function 'to_binary': 
invalidformat. The value has to be a case-insensitive string literal of 'hex', 
'utf-8', or 'base64'.
+
+
+-- !query
+select to_binary('a!', 'base64')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Last unit does not have enough valid bits
diff --git 
a/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out
new file mode 100644
index 00000000000..bda723fd19e
Binary files /dev/null and 
b/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out 
differ


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to