beliefer commented on code in PR #38874:
URL: https://github.com/apache/spark/pull/38874#discussion_r1043982018
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -4600,3 +4600,57 @@ case class ArrayExcept(left: Expression, right:
Expression) extends ArrayBinaryL
override protected def withNewChildrenInternal(
newLeft: Expression, newRight: Expression): ArrayExcept = copy(left =
newLeft, right = newRight)
}
+
+@ExpressionDescription(
+ usage = "_FUNC_(array) - Removes null values from the array.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(array(1, 2, 3, null));
+ [1,2,3]
+ """,
+ group = "array_funcs",
+ since = "3.4.0")
+case class ArrayCompact(child: Expression)
+ extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+ override def dataType: DataType = child.dataType
+
+ @transient private lazy val elementType: DataType =
dataType.asInstanceOf[ArrayType].elementType
+
+ override def nullSafeEval(array: Any): Any = {
+ val newArray = new Array[Any](array.asInstanceOf[ArrayData].numElements())
+ var pos = 0
+ var hasNull = false
+ array.asInstanceOf[ArrayData].foreach(elementType, (index, v) =>
+ // add elements only if the source has null
+ if (v != null && hasNull) {
+ newArray(pos) = v
+ pos += 1
+ } else if (v == null && !hasNull) {
+ hasNull = true
+ // source has null elements, so copy the elements to newArray
+ for(i <- 0 until index) {
+ newArray(pos) = array.asInstanceOf[ArrayData].get(i, elementType)
+ pos += 1
+ }
+ }
+ )
+ if (hasNull) {
+ new GenericArrayData(newArray.slice(0, pos))
+ } else {
+ array
+ }
+ }
+ override def prettyName: String = "array_compact"
+
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
+
+ nullSafeCodeGen(ctx, ev, array => {
+ val expr = ctx.addReferenceObj("arrayCompactExpr", this)
+ s"${ev.value} = (ArrayData)$expr.nullSafeEval($array);"
Review Comment:
This implementation has some hackers. Please see the discussion
https://github.com/apache/spark/pull/38865#discussion_r1043977011
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]