AngersZhuuuu commented on a change in pull request #32365:
URL: https://github.com/apache/spark/pull/32365#discussion_r631625554



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
##########
@@ -2649,3 +2652,78 @@ case class Sentences(
     copy(str = newFirst, language = newSecond, country = newThird)
 
 }
+
+case class ToPrettyString(child: Expression, timeZoneId: Option[String] = None)
+  extends UnaryExpression with TimeZoneAwareExpression {
+  import ToPrettyString._
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+  override def dataType: DataType = StringType
+
+  private val timeFormatters: TimeFormatters =
+    TimeFormatters(DateFormatter(zoneId), 
TimestampFormatter.getFractionFormatter(zoneId))
+
+  override def nullSafeEval(input: Any): Any = {
+    UTF8String.fromString(toHiveString((input, child.dataType), false, 
timeFormatters))
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    nullSafeCodeGen(ctx, ev, eval => {
+      val toHiveString = ToPrettyString.getClass.getName.stripSuffix("$")
+      val tuple2 = Tuple2.getClass.getName.stripSuffix("$")
+      val dataType = JavaCode.global(
+        ctx.addReferenceObj("dataType", child.dataType),
+        child.dataType.getClass)
+      val formatter = JavaCode.global(
+        ctx.addReferenceObj("dateFormatter", timeFormatters),
+        timeFormatters.getClass)
+      s"""${ev.value} = UTF8String.fromString($toHiveString.toHiveString(
+         |$tuple2.apply($eval, ${dataType}), false, 
$formatter));""".stripMargin
+    })
+  }
+
+  override def prettyName: String = "to_hive_string"
+
+  override protected def withNewChildInternal(newChild: Expression): 
Expression =
+    ToPrettyString(newChild)
+}
+
+object ToPrettyString {
+  case class TimeFormatters(date: DateFormatter, timestamp: TimestampFormatter)
+
+  def toHiveString(
+      a: (Any, DataType),
+      nested: Boolean,
+      formatters: TimeFormatters): String = a match {

Review comment:
       > This is inefficient in codegen, as we know the data type ahead but we 
are still doing runtime pattern match.
   > 
   > Can we follow `Cast` to implement the expression?
   
   Done, can you recheck this?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to