Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/12646#discussion_r137085104
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
@@ -503,69 +504,319 @@ case class FindInSet(left: Expression, right:
Expression) extends BinaryExpressi
override def prettyName: String = "find_in_set"
}
+trait String2TrimExpression extends Expression with ImplicitCastInputTypes
{
+
+ override def dataType: DataType = StringType
+ override def inputTypes: Seq[AbstractDataType] =
Seq.fill(children.size)(StringType)
+
+ override def nullable: Boolean = children.exists(_.nullable)
+ override def foldable: Boolean = children.forall(_.foldable)
+
+ override def sql: String = {
+ if (children.size == 1) {
+ val childrenSQL = children.map(_.sql).mkString(", ")
+ s"$prettyName($childrenSQL)"
+ } else {
+ val trimSQL = children(0).map(_.sql).mkString(", ")
+ val tarSQL = children(1).map(_.sql).mkString(", ")
+ s"$prettyName($trimSQL, $tarSQL)"
+ }
+ }
+}
+
+object StringTrim {
+ def apply(str: Expression, trimStr: Expression) : StringTrim =
StringTrim(str, Some(trimStr))
+ def apply(str: Expression) : StringTrim = StringTrim(str, None)
+}
+
/**
- * A function that trim the spaces from both ends for the specified string.
- */
+ * A function that takes a character string, removes the leading and
trailing characters matching with the characters
+ * in the trim string, returns the new string.
+ * If BOTH and trimStr keywords are not specified, it defaults to remove
space character from both ends. The trim
+ * function will have one argument, which contains the source string.
+ * If BOTH and trimStr keywords are specified, it trims the characters
from both ends, and the trim function will have
+ * two arguments, the first argument contains trimStr, the second argument
contains the source string.
+ * trimStr: A character string to be trimmed from the source string, if it
has multiple characters, the function
+ * searches for each character in the source string, removes the
characters from the source string until it
+ * encounters the first non-match character.
+ * BOTH: removes any characters from both ends of the source string that
matches characters in the trim string.
+ */
@ExpressionDescription(
- usage = "_FUNC_(str) - Removes the leading and trailing space characters
from `str`.",
+ usage = """
+ _FUNC_(str) - Removes the leading and trailing space characters from
`str`.
+ _FUNC_(BOTH trimStr FROM str) - Remove the leading and trailing
trimString from `str`
+ """,
+ arguments = """
+ Arguments:
+ * str - a string expression
+ * trimString - the trim string
+ * BOTH, FROM - these are keyword to specify for trim string from
both ends of the string
+ """,
examples = """
Examples:
> SELECT _FUNC_(' SparkSQL ');
SparkSQL
+ > SELECT _FUNC_(BOTH 'SL' FROM 'SSparkSQLS');
+ parkSQ
""")
-case class StringTrim(child: Expression)
- extends UnaryExpression with String2StringExpression {
+case class StringTrim(
+ srcStr: Expression,
+ trimStr: Option[Expression] = None)
+ extends String2TrimExpression {
- def convert(v: UTF8String): UTF8String = v.trim()
+ def this (trimStr: Expression, srcStr: Expression) = this(srcStr,
Option(trimStr))
+
+ def this(srcStr: Expression) = this(srcStr, None)
override def prettyName: String = "trim"
+ override def children: Seq[Expression] = if (trimStr.isDefined) {
+ srcStr :: trimStr.get :: Nil
+ } else {
+ srcStr :: Nil
+ }
+ override def eval(input: InternalRow): Any = {
+ val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
+ if (srcString != null) {
+ if (trimStr.isDefined) {
+ return
srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String])
+ } else {
+ return srcString.trim()
+ }
+ }
+ null
+ }
+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- defineCodeGen(ctx, ev, c => s"($c).trim()")
+ val evals = children.map(_.genCode(ctx))
+ val srcString = evals(0)
+
+ if (evals.length == 1) {
+ ev.copy(evals.map(_.code).mkString("\n") + s"""
+ boolean ${ev.isNull} = false;
+ UTF8String ${ev.value} = null;
+ if (${srcString.isNull}) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.value} = ${srcString.value}.trim();
+ }
+ """.stripMargin)
--- End diff --
> }""".stripMargin)
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]