Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/7034#discussion_r33339741
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
---
@@ -293,20 +294,118 @@ case class Substring(str: Expression, pos:
Expression, len: Expression)
}
/**
- * A function that return the length of the given string expression.
+ * A function that return the length of the given string or binary
expression.
*/
-case class StringLength(child: Expression) extends UnaryExpression with
ExpectsInputTypes {
+case class Length(child: Expression) extends UnaryExpression with
ExpressionConstraint {
override def dataType: DataType = IntegerType
- override def expectedChildTypes: Seq[DataType] = Seq(StringType)
+ def constraint: Seq[DataTypeConstraint] =
+ DataTypeConstraint(AcceptSpecifiedType(Set(StringType, BinaryType)))
:: Nil
+
+ @transient
+ private lazy val function: Any => Any = child.dataType match {
+ case StringType => (s: Any) => s.asInstanceOf[UTF8String].length()
+ case BinaryType => (s: Any) => s.asInstanceOf[Array[Byte]].length
+ case NullType => (s: Any) => null
+ }
override def eval(input: InternalRow): Any = {
- val string = child.eval(input)
- if (string == null) null else string.asInstanceOf[UTF8String].length
+ val value = child.eval(input)
+ if (value == null) null else function(value)
}
- override def toString: String = s"length($child)"
-
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode):
String = {
- defineCodeGen(ctx, ev, c => s"($c).length()")
+ child.dataType match {
+ case StringType => defineCodeGen(ctx, ev, c => s"($c).length()")
+ case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length")
+ case NullType => defineCodeGen(ctx, ev, c => s"-1")
+ }
+ }
+}
+
+/**
+ * Formats the number X to a format like '#,###,###.##', rounded to D
decimal places,
+ * and returns the result as a string. If D is 0, the result has no
decimal point or
+ * fractional part.
+ * (As of Hive 0.10.0; bug with float types fixed in Hive 0.14.0,
+ * decimal type support added in Hive 0.14.0)
+ */
+case class FormatNumber(x: Expression, d: Expression) extends Expression
with ExpressionConstraint {
+
+ override def children: Seq[Expression] = x :: d :: Nil
+ override def dataType: DataType = StringType
+ override def foldable: Boolean = x.foldable && d.foldable
+ override def nullable: Boolean = x.nullable || d.nullable
+
+ def constraint: Seq[DataTypeConstraint] =
+ DataTypeConstraint(AcceptNumbericType, (expr: Expression) =>
expr.dataType match {
+ case _ @ (_: DoubleType | _: DecimalType) => expr
+ case _ @ (_: NullType | _: FractionalType) => Cast(expr, DoubleType)
+ case LongType => expr
+ case _: IntegerType => expr
+ case _: IntegralType => Cast(expr, IntegerType)
+ }) ::
+ DataTypeConstraint(AcceptIntegralType, (expr: Expression) =>
expr.dataType match {
--- End diff --
is it duplicated with rules above?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]