Github user chenghao-intel commented on a diff in the pull request:

    https://github.com/apache/spark/pull/7034#discussion_r33418346
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
 ---
    @@ -293,20 +294,118 @@ case class Substring(str: Expression, pos: 
Expression, len: Expression)
     }
     
     /**
    - * A function that return the length of the given string expression.
    + * A function that return the length of the given string or binary 
expression.
      */
    -case class StringLength(child: Expression) extends UnaryExpression with 
ExpectsInputTypes {
    +case class Length(child: Expression) extends UnaryExpression with 
ExpressionConstraint {
       override def dataType: DataType = IntegerType
    -  override def expectedChildTypes: Seq[DataType] = Seq(StringType)
    +  def constraint: Seq[DataTypeConstraint] =
    +    DataTypeConstraint(AcceptSpecifiedType(Set(StringType, BinaryType))) 
:: Nil
    +
    +  @transient
    +  private lazy val function: Any => Any = child.dataType match {
    +    case StringType => (s: Any) => s.asInstanceOf[UTF8String].length()
    +    case BinaryType => (s: Any) => s.asInstanceOf[Array[Byte]].length
    +    case NullType => (s: Any) => null
    +  }
     
       override def eval(input: InternalRow): Any = {
    -    val string = child.eval(input)
    -    if (string == null) null else string.asInstanceOf[UTF8String].length
    +    val value = child.eval(input)
    +    if (value == null) null else function(value)
       }
     
    -  override def toString: String = s"length($child)"
    -
       override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): 
String = {
    -    defineCodeGen(ctx, ev, c => s"($c).length()")
    +    child.dataType match {
    +      case StringType => defineCodeGen(ctx, ev, c => s"($c).length()")
    +      case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length")
    +      case NullType => defineCodeGen(ctx, ev, c => s"-1")
    +    }
    +  }
    +}
    +
    +/**
    + * Formats the number X to a format like '#,###,###.##', rounded to D 
decimal places,
    + * and returns the result as a string. If D is 0, the result has no 
decimal point or
    + * fractional part.
    + * (As of Hive 0.10.0; bug with float types fixed in Hive 0.14.0,
    + * decimal type support added in Hive 0.14.0)
    + */
    +case class FormatNumber(x: Expression, d: Expression) extends Expression 
with ExpressionConstraint {
    +
    +  override def children: Seq[Expression] = x :: d :: Nil
    +  override def dataType: DataType = StringType
    +  override def foldable: Boolean = x.foldable && d.foldable
    +  override def nullable: Boolean = x.nullable || d.nullable
    +
    +  def constraint: Seq[DataTypeConstraint] =
    +    DataTypeConstraint(AcceptNumbericType, (expr: Expression) => 
expr.dataType match {
    +      case _ @ (_: DoubleType | _: DecimalType) => expr
    +      case _ @ (_: NullType | _: FractionalType) => Cast(expr, DoubleType)
    +      case LongType => expr
    +      case _: IntegerType => expr
    +      case _: IntegralType => Cast(expr, IntegerType)
    +    }) ::
    +    DataTypeConstraint(AcceptIntegralType, (expr: Expression) => 
expr.dataType match {
    +      case IntegerType => expr
    +      case _: IntegralType => Cast(expr, IntegerType)
    +    }) :: Nil
    +
    +  @transient
    +  private var lastDValue: Int = -100
    +
    +  @transient
    +  private val pattern: StringBuffer = new StringBuffer()
    +
    +  @transient
    +  private val numberFormat: DecimalFormat = new DecimalFormat("")
    +
    +  @transient
    +  private lazy val function: Any => Any = x.dataType match {
    +    case DoubleType => (s: Any) =>
    +      UTF8String.fromString(numberFormat.format(s.asInstanceOf[Double]))
    +    case LongType => (s: Any) =>
    +      UTF8String.fromString(numberFormat.format(s.asInstanceOf[Long]))
    +    case IntegerType => (s: Any) =>
    +      UTF8String.fromString(numberFormat.format(s.asInstanceOf[Int]))
    +    case _: DecimalType => (s: Any) =>
    +      
UTF8String.fromString(numberFormat.format(s.asInstanceOf[Decimal].toJavaBigDecimal))
    +    case NullType => (s: Any) => null
    --- End diff --
    
    yes, I will update it.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to