Github user kevinyu98 commented on a diff in the pull request:
https://github.com/apache/spark/pull/12646#discussion_r139278875
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
@@ -503,69 +504,307 @@ case class FindInSet(left: Expression, right:
Expression) extends BinaryExpressi
override def prettyName: String = "find_in_set"
}
+trait String2TrimExpression extends Expression with ImplicitCastInputTypes
{
+
+ override def dataType: DataType = StringType
+ override def inputTypes: Seq[AbstractDataType] =
Seq.fill(children.size)(StringType)
+
+ override def nullable: Boolean = children.exists(_.nullable)
+ override def foldable: Boolean = children.forall(_.foldable)
+}
+
+object StringTrim {
+ def apply(str: Expression, trimStr: Expression) : StringTrim =
StringTrim(str, Some(trimStr))
+ def apply(str: Expression) : StringTrim = StringTrim(str, None)
+}
+
/**
- * A function that trim the spaces from both ends for the specified string.
+ * A function that takes a character string, removes the leading and
trailing characters matching with the characters
+ * in the trim string, returns the new string.
+ * If BOTH and trimStr keywords are not specified, it defaults to remove
space character from both ends. The trim
+ * function will have one argument, which contains the source string.
+ * If BOTH and trimStr keywords are specified, it trims the characters
from both ends, and the trim function will have
+ * two arguments, the first argument contains trimStr, the second argument
contains the source string.
+ * trimStr: A character string to be trimmed from the source string, if it
has multiple characters, the function
+ * searches for each character in the source string, removes the
characters from the source string until it
+ * encounters the first non-match character.
+ * BOTH: removes any character from both ends of the source string that
matches characters in the trim string.
*/
@ExpressionDescription(
- usage = "_FUNC_(str) - Removes the leading and trailing space characters
from `str`.",
+ usage = """
+ _FUNC_(str) - Removes the leading and trailing space characters from
`str`.
+ _FUNC_(BOTH trimStr FROM str) - Remove the leading and trailing
trimString from `str`
+ """,
+ arguments = """
+ Arguments:
+ * str - a string expression
+ * trimString - the trim string
+ * BOTH, FROM - these are keyword to specify for trim string from
both ends of the string
+ """,
examples = """
Examples:
> SELECT _FUNC_(' SparkSQL ');
SparkSQL
+ > SELECT _FUNC_(BOTH 'SL' FROM 'SSparkSQLS');
+ parkSQ
""")
-case class StringTrim(child: Expression)
- extends UnaryExpression with String2StringExpression {
+case class StringTrim(
+ srcStr: Expression,
+ trimStr: Option[Expression] = None)
+ extends String2TrimExpression {
- def convert(v: UTF8String): UTF8String = v.trim()
+ def this (trimStr: Expression, srcStr: Expression) = this(srcStr,
Option(trimStr))
+
+ def this(srcStr: Expression) = this(srcStr, None)
override def prettyName: String = "trim"
+ override def children: Seq[Expression] = if (trimStr.isDefined) {
+ srcStr :: trimStr.get :: Nil
+ } else {
+ srcStr :: Nil
+ }
+ override def eval(input: InternalRow): Any = {
+ val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
+ if (srcString == null) {
+ null
+ } else {
+ if (trimStr.isDefined) {
+ return
srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String])
+ } else {
+ return srcString.trim()
+ }
+ }
+ }
+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- defineCodeGen(ctx, ev, c => s"($c).trim()")
+ val evals = children.map(_.genCode(ctx))
+ val srcString = evals(0)
+
+ if (evals.length == 1) {
+ ev.copy(evals.map(_.code).mkString("\n") + s"""
--- End diff --
I changed the generate code to ev.copy(evals.map(_.code).mkString + s"""
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]