Github user maropu commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20024#discussion_r159630335
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 ---
    @@ -199,13 +199,60 @@ case class Cast(child: Expression, dataType: 
DataType, timeZoneId: Option[String
     
       // [[func]] assumes the input is no longer null because eval already 
does the null check.
       @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = 
func(a.asInstanceOf[T])
    +  @inline private[this] def buildWriter[T](
    +      a: Any, buffer: UTF8StringBuilder, writer: (T, UTF8StringBuilder) => 
Unit): Unit = {
    +    writer(a.asInstanceOf[T], buffer)
    +  }
    +
    +  private[this] def buildElemWriter(
    +      from: DataType): (Any, UTF8StringBuilder) => Unit = from match {
    +    case BinaryType => buildWriter[Array[Byte]](_, _, (b, buf) => 
buf.append(b))
    +    case StringType => buildWriter[UTF8String](_, _, (b, buf) => 
buf.append(b))
    +    case DateType => buildWriter[Int](_, _,
    +      (d, buf) => buf.append(DateTimeUtils.dateToString(d)))
    +    case TimestampType => buildWriter[Long](_, _,
    +      (t, buf) => buf.append(DateTimeUtils.timestampToString(t)))
    +    case ar: ArrayType =>
    +      buildWriter[ArrayData](_, _, (array, buf) => {
    +         buf.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), buf)
    +          var i = 1
    +          while (i < array.numElements) {
    +            buf.append(", ")
    +            writeElemToBuffer(array.get(i, ar.elementType), buf)
    +            i += 1
    +          }
    +        }
    +        buf.append("]")
    +      })
    +    case _ => buildWriter[Any](_, _, (o, buf) => 
buf.append(String.valueOf(o)))
    +  }
     
       // UDFToString
       private[this] def castToString(from: DataType): Any => Any = from match {
         case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
         case DateType => buildCast[Int](_, d => 
UTF8String.fromString(DateTimeUtils.dateToString(d)))
         case TimestampType => buildCast[Long](_,
           t => UTF8String.fromString(DateTimeUtils.timestampToString(t, 
timeZone)))
    +    case ar: ArrayType =>
    +      buildCast[ArrayData](_, array => {
    +        val res = new UTF8StringBuilder
    +        res.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), res)
    --- End diff --
    
    That's cuz a single string builder is reused for the whole conversion (In 
the suggested code, IIUC multiple string builders are created for nested 
arrays). But, yea. since simpler is better, I'll fix in that way.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to