Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/19777#discussion_r151953946
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
---
@@ -125,19 +125,43 @@ case class ConcatWs(children: Seq[Expression])
if (children.forall(_.dataType == StringType)) {
// All children are strings. In that case we can construct a fixed
size array.
val evals = children.map(_.genCode(ctx))
-
- val inputs = evals.map { eval =>
- s"${eval.isNull} ? (UTF8String) null : ${eval.value}"
- }.mkString(", ")
-
- ev.copy(evals.map(_.code).mkString("\n") + s"""
- UTF8String ${ev.value} = UTF8String.concatWs($inputs);
+ val separator = evals.head
+ val strings = evals.tail
+ val numArgs = strings.length
+ val args = ctx.freshName("args")
+
+ val inputs = strings.zipWithIndex.map { case (eval, index) =>
+ if (eval.isNull != "true") {
+ s"""
+ ${eval.code}
+ if (!${eval.isNull}) {
+ $args[$index] = ${eval.value};
+ }
+ """
+ } else {
+ ""
+ }
+ }
+ val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
+ ctx.splitExpressions(inputs, "valueConcatWs",
+ ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
+ } else {
+ inputs.mkString("\n")
+ }
+ ev.copy(s"""
+ UTF8String[] $args = new UTF8String[$numArgs];
+ ${separator.code}
+ $codes
+ UTF8String ${ev.value} = UTF8String.concatWs(${separator.value},
$args);
boolean ${ev.isNull} = ${ev.value} == null;
""")
} else {
val array = ctx.freshName("array")
+ ctx.addMutableState("UTF8String[]", array, "")
val varargNum = ctx.freshName("varargNum")
+ ctx.addMutableState("int", varargNum, "")
val idxInVararg = ctx.freshName("idxInVararg")
+ ctx.addMutableState("int", idxInVararg, "")
--- End diff --
Sure, let me do it later.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]