Github user wajda commented on a diff in the pull request: https://github.com/apache/spark/pull/20938#discussion_r181379483 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -287,3 +289,160 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } + +/** + * Transforms an array of arrays into a single array. + */ +@ExpressionDescription( + usage = "_FUNC_(arrayOfArrays) - Transforms an array of arrays into a single array.", + examples = """ + Examples: + > SELECT _FUNC_(array(array(1, 2), array(3, 4)); + [1,2,3,4] + """, + since = "2.4.0") +case class Flatten(child: Expression) extends UnaryExpression { + + override def nullable: Boolean = child.nullable || dataType.containsNull + + override def dataType: ArrayType = { + child + .dataType.asInstanceOf[ArrayType] + .elementType.asInstanceOf[ArrayType] + } + + override def checkInputDataTypes(): TypeCheckResult = child.dataType match { + case ArrayType(_: ArrayType, _) => + TypeCheckResult.TypeCheckSuccess + case _ => + TypeCheckResult.TypeCheckFailure( + s"The argument should be an array of arrays, " + + s"but '${child.sql}' is of ${child.dataType.simpleString} type." + ) + } + + override def nullSafeEval(array: Any): Any = { + val elements = array.asInstanceOf[ArrayData].toObjectArray(dataType) + + if (elements.contains(null)) { + null + } else { + val flattened = elements.flatMap( --- End diff -- I agree, especially in combination with (this comment)[https://github.com/apache/spark/pull/20858#discussion_r181359247], provided that the resulted array length is known in advance. ```flatMap``` can then be replaced with a simple loop copying chanks of data into a preallocated array.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org