cloud-fan commented on code in PR #37483:
URL: https://github.com/apache/spark/pull/37483#discussion_r954455653
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala:
##########
@@ -2487,59 +2538,117 @@ case class Encode(value: Expression, charset:
Expression)
""",
since = "3.3.0",
group = "string_funcs")
-// scalastyle:on line.size.limit
-case class ToBinary(
- expr: Expression,
- format: Option[Expression],
- nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable
- with ImplicitCastInputTypes {
-
- override lazy val replacement: Expression = format.map { f =>
- assert(f.foldable && (f.dataType == StringType || f.dataType == NullType))
- val value = f.eval()
- if (value == null) {
- Literal(null, BinaryType)
- } else {
- value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
- case "hex" => Unhex(expr)
- case "utf-8" => Encode(expr, Literal("UTF-8"))
- case "base64" => UnBase64(expr)
- case _ if nullOnInvalidFormat => Literal(null, BinaryType)
- case other => throw
QueryCompilationErrors.invalidStringLiteralParameter(
- "to_binary", "format", other,
- Some("The value has to be a case-insensitive string literal of " +
- "'hex', 'utf-8', or 'base64'."))
- }
- }
- }.getOrElse(Unhex(expr))
+case class ToBinary(left: Expression, right: Expression)
+ extends BinaryExpression
+ with ImplicitCastInputTypes with NullIntolerant with SupportQueryContext {
- def this(expr: Expression) = this(expr, None, false)
+ def this(left: Expression) = this(left, Literal("hex"))
- def this(expr: Expression, format: Expression) = this(expr, Some({
- // We perform this check in the constructor to make it eager and not go
through type coercion.
- if (format.foldable && (format.dataType == StringType || format.dataType
== NullType)) {
- format
- } else {
- throw QueryCompilationErrors.requireLiteralParameter("to_binary",
"format", "string")
- }
- }),
- false
- )
+ override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
- override def prettyName: String = "to_binary"
+ override def dataType: DataType = BinaryType
- override def children: Seq[Expression] = expr +: format.toSeq
+ override def nullable: Boolean = true
- override def inputTypes: Seq[AbstractDataType] = children.map(_ =>
StringType)
+ override def prettyName: String = "to_binary"
override protected def withNewChildrenInternal(
- newChildren: IndexedSeq[Expression]): Expression = {
- if (format.isDefined) {
- copy(expr = newChildren.head, format = Some(newChildren.last))
- } else {
- copy(expr = newChildren.head)
+ newLeft: Expression,
+ newRight: Expression): ToBinary = copy(left = newLeft, right = newRight)
+
+ override def initQueryContext(): Option[SQLQueryContext] =
Option(origin.context)
+
+ override protected def nullSafeEval(input: Any, format: Any): Any = {
+ val fmtString = format.asInstanceOf[UTF8String]
+ val srcString = input.asInstanceOf[UTF8String]
+ fmtString.toString.toLowerCase(Locale.ROOT) match {
+ case "hex" =>
Review Comment:
this is worse than before. We assume the format string is a constant, so we
decide the "to binary" behavior at the compile time. Now we are checking the
format string for each input record at runtime.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]