vitaliili-db commented on code in PR #37483:
URL: https://github.com/apache/spark/pull/37483#discussion_r956457712
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -2487,59 +2538,117 @@ case class Encode(value: Expression, charset:
Expression)
""",
since = "3.3.0",
group = "string_funcs")
-// scalastyle:on line.size.limit
-case class ToBinary(
- expr: Expression,
- format: Option[Expression],
- nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable
- with ImplicitCastInputTypes {
-
- override lazy val replacement: Expression = format.map { f =>
- assert(f.foldable && (f.dataType == StringType || f.dataType == NullType))
- val value = f.eval()
- if (value == null) {
- Literal(null, BinaryType)
- } else {
- value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
- case "hex" => Unhex(expr)
- case "utf-8" => Encode(expr, Literal("UTF-8"))
- case "base64" => UnBase64(expr)
Review Comment:
Yes, I will make changes to utilize logic from `Unhex`, `Unbase64` and
`Encode`. However, I am not sure we can keep it `RuntimeReplaceable` unless
format is always constant (which it is not?).
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -2487,59 +2538,117 @@ case class Encode(value: Expression, charset:
Expression)
""",
since = "3.3.0",
group = "string_funcs")
-// scalastyle:on line.size.limit
-case class ToBinary(
- expr: Expression,
- format: Option[Expression],
- nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable
- with ImplicitCastInputTypes {
-
- override lazy val replacement: Expression = format.map { f =>
- assert(f.foldable && (f.dataType == StringType || f.dataType == NullType))
- val value = f.eval()
- if (value == null) {
- Literal(null, BinaryType)
- } else {
- value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
- case "hex" => Unhex(expr)
- case "utf-8" => Encode(expr, Literal("UTF-8"))
- case "base64" => UnBase64(expr)
- case _ if nullOnInvalidFormat => Literal(null, BinaryType)
- case other => throw
QueryCompilationErrors.invalidStringLiteralParameter(
- "to_binary", "format", other,
- Some("The value has to be a case-insensitive string literal of " +
- "'hex', 'utf-8', or 'base64'."))
- }
- }
- }.getOrElse(Unhex(expr))
+case class ToBinary(left: Expression, right: Expression)
+ extends BinaryExpression
+ with ImplicitCastInputTypes with NullIntolerant with SupportQueryContext {
- def this(expr: Expression) = this(expr, None, false)
+ def this(left: Expression) = this(left, Literal("hex"))
- def this(expr: Expression, format: Expression) = this(expr, Some({
- // We perform this check in the constructor to make it eager and not go
through type coercion.
- if (format.foldable && (format.dataType == StringType || format.dataType
== NullType)) {
- format
- } else {
- throw QueryCompilationErrors.requireLiteralParameter("to_binary",
"format", "string")
- }
- }),
- false
- )
+ override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
- override def prettyName: String = "to_binary"
+ override def dataType: DataType = BinaryType
- override def children: Seq[Expression] = expr +: format.toSeq
+ override def nullable: Boolean = true
- override def inputTypes: Seq[AbstractDataType] = children.map(_ =>
StringType)
+ override def prettyName: String = "to_binary"
override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]): Expression = {
- if (format.isDefined) {
- copy(expr = newChildren.head, format = Some(newChildren.last))
- } else {
- copy(expr = newChildren.head)
+ newLeft: Expression,
+ newRight: Expression): ToBinary = copy(left = newLeft, right = newRight)
+
+ override def initQueryContext(): Option[SQLQueryContext] =
Option(origin.context)
+
+ override protected def nullSafeEval(input: Any, format: Any): Any = {
+ val fmtString = format.asInstanceOf[UTF8String]
+ val srcString = input.asInstanceOf[UTF8String]
+ fmtString.toString.toLowerCase(Locale.ROOT) match {
+ case "hex" =>
Review Comment:
Why do we assume format string is constant? It could be a conditional
expression or column reference, isn't it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]