Github user kevinyu98 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12646#discussion_r118804681
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 ---
    @@ -461,68 +462,269 @@ case class FindInSet(left: Expression, right: 
Expression) extends BinaryExpressi
     }
     
     /**
    - * A function that trim the spaces from both ends for the specified string.
    - */
    + * A function that takes a character string, removes the leading and/or 
trailing characters matching with the characters
    + * in the trim string, returns the new string. If LEADING/TRAILING/BOTH 
and trimStr keywords are not specified, it
    + * defaults to remove space character from both ends.
    + * trimStr: A character string to be trimmed from the source string, if it 
has multiple characters, the function
    + * searches for each character in the source string, removes the 
characters from the source string until it
    + * encounters the first non-match character.
    + * LEADING: removes any characters from the left end of the source string 
that matches characters in the trim string.
    + * TRAILING: removes any characters from the right end of the source 
string that matches characters in the trim string.
    + * BOTH: removes any characters from both ends of the source string that 
matches characters in the trim string.
    +  */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters 
from `str`.",
    +  usage = """
    +    _FUNC_(str) - Removes the leading and trailing space characters from 
`str`.
    +    _FUNC_(BOTH trimStr FROM str) - Remove the leading and trailing 
trimString from `str`
    +    _FUNC_(LEADING trimStr FROM str) - Remove the leading trimString from 
`str`
    +    _FUNC_(TRAILING trimStr FROM str) - Remove the trailing trimString 
from `str`
    +  """,
       extended = """
    +    Arguments:
    +      str - a string expression
    +      trimString - the trim string
    +      BOTH, FROM - these are keyword to specify for trim string from both 
ends of the string
    +      LEADING, FROM - these are keyword to specify for trim string from 
left end of the string
    +      TRAILING, FROM - these are keyword to specify for trim string from 
right end of the string
         Examples:
           > SELECT _FUNC_('    SparkSQL   ');
            SparkSQL
    +      > SELECT _FUNC_(BOTH 'SL' FROM 'SSparkSQLS');
    +       parkSQ
    +      > SELECT _FUNC_(LEADING 'paS' FROM 'SSparkSQLS');
    +       rkSQLS
    +      > SELECT _FUNC_(TRAILING 'SLQ' FROM 'SSparkSQLS');
    +       SSparkS
       """)
    -case class StringTrim(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +case class StringTrim(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
    +
    +  require(children.size <= 2 && children.nonEmpty,
    +    s"$prettyName requires at least one argument and no more than two.")
    +
    +  override def dataType: DataType = StringType
    +  override def inputTypes: Seq[AbstractDataType] = 
Seq.fill(children.size)(StringType)
     
    -  def convert(v: UTF8String): UTF8String = v.trim()
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "trim"
     
    +  // trim function can take one or two arguments.
    +  // For one argument(children size is 1), it is the trim space function.
    +  // For two arguments(children size is 2), it is the trim function with 
one of these options: BOTH/LEADING/TRAILING.
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trim()
    +      } else if (inputs(1) != null) {
    +        return inputs(1).trim(inputs(0))
    +      }
    +    }
    +    null
    +  }
    +
       override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trim()")
    +    if (children.size == 2 && !children(0).isInstanceOf[Literal]) {
    +      throw new AnalysisException(s"The trimming parameter should be 
Literal.")}
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimFunction = if (children.size == 1) {
    +      s"UTF8String ${ev.value} = ${inputs(0)}.trim();"
    +    } else {
    +      s"UTF8String ${ev.value} = 
${inputs(1)}.trim(${inputs(0)});".stripMargin
    +    }
    +    ev.copy(evals.map(_.code).mkString("\n") + s"""
    +      boolean ${ev.isNull} = false;
    +      ${getTrimFunction};
    +      if (${ev.value} == null) {
    +        ${ev.isNull} = true;
    +      }
    +    """)
    +    }
    +
    +  override def sql: String = {
    +    if (children.size == 1) {
    +      val childrenSQL = children.map(_.sql).mkString(", ")
    +      s"$prettyName($childrenSQL)"
    +    } else {
    +      val trimSQL = children(0).map(_.sql).mkString(", ")
    +      val tarSQL = children(1).map(_.sql).mkString(", ")
    +      s"$prettyName($trimSQL, $tarSQL)"
    +    }
       }
     }
     
     /**
    - * A function that trim the spaces from left end for given string.
    + * A function that trims the characters from left end for a given string, 
if the trimStr is not specified, it defaults
    + * to trim the spaces from the left end of the source string.
    + * trimStr: the function removes any characters from the left end of the 
source string which matches with the characters
    + * from trimStr, it stops at the first non-match character.
      */
     @ExpressionDescription(
    -  usage = "_FUNC_(str) - Removes the leading and trailing space characters 
from `str`.",
    +  usage = """
    +    _FUNC_(str) - Removes the leading space characters from `str`.
    +    _FUNC_(trimStr, str) - Removes the leading string contains the 
characters from the trim string from the `str`
    +  """,
       extended = """
    +    Arguments:
    +      str - a string expression
    +      trimStr - the trim string
         Examples:
    -      > SELECT _FUNC_('    SparkSQL');
    +      > SELECT _FUNC_('    SparkSQL   ');
            SparkSQL
    +      > SELECT _FUNC_('Sp', 'SSparkSQLS');
    +       arkSQLS
       """)
    -case class StringTrimLeft(child: Expression)
    -  extends UnaryExpression with String2StringExpression {
    +case class StringTrimLeft(children: Seq[Expression])
    +  extends Expression with ImplicitCastInputTypes {
     
    -  def convert(v: UTF8String): UTF8String = v.trimLeft()
    +  require (children.size <= 2 && children.nonEmpty,
    +    "$prettyName requires at least one argument and no more than two.")
    +
    +  override def inputTypes: Seq[AbstractDataType] = 
Seq.fill(children.size)(StringType)
    +  override def dataType: DataType = StringType
    +
    +  override def nullable: Boolean = children.exists(_.nullable)
    +  override def foldable: Boolean = children.forall(_.foldable)
     
       override def prettyName: String = "ltrim"
     
    -  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    -    defineCodeGen(ctx, ev, c => s"($c).trimLeft()")
    +  // ltrim function can take one or two arguments.
    +  // For one argument(children size is 1), it is the ltrim space function.
    +  // For two arguments(children size is 2), it is the trim function with 
option LEADING.
    +  override def eval(input: InternalRow): Any = {
    +    val inputs = children.map(_.eval(input).asInstanceOf[UTF8String])
    +    if (inputs(0) != null) {
    +      if (children.size == 1) {
    +        return inputs(0).trimLeft()
    +      } else if (inputs(1) != null) {
    +        return inputs(1).trimLeft(inputs(0))
    +      }
    +    }
    +    null
    +  }
    +
    +  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
    +    if (children.size == 2 && !children(0).isInstanceOf[Literal]) {
    +      throw new AnalysisException(s"The trimming parameter should be 
Literal.")}
    +
    +    val evals = children.map(_.genCode(ctx))
    +    val inputs = evals.map { eval =>
    +      s"${eval.isNull} ? null : ${eval.value}"
    +    }
    +    val getTrimLeftFunction = if (children.size == 1) {
    +      s"UTF8String ${ev.value} = ${inputs(0)}.trimLeft();"
    +    } else {
    +      s"UTF8String ${ev.value} = ${inputs(1)}.trimLeft(${inputs(0)});"
    +    }
    +
    +    ev.copy(evals.map(_.code).mkString("\n") + s"""
    +      boolean ${ev.isNull} = false;
    +      ${getTrimLeftFunction};
    +      if (${ev.value} == null) {
    +        ${ev.isNull} = true;
    +      }
    +    """)
    +  }
    +
    +  override def sql: String = {
    --- End diff --
    
    ok



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to