zhengruifeng commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1119610173
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right:
Expression)
copy(left = newLeft, right = newRight)
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage =
Review Comment:
please also document the null handling like `ArrayApend`:
```
Type of element should be similar to type of the elements of the array.
Null element is also appended into the array. But if the array passed,
is NULL
output is NULL
```
examples
```
> SELECT _FUNC_(array(1, 2, 3, null), null);
[null,1,2,3,null]
> SELECT _FUNC_(CAST(null as Array<Int>), 2);
NULL
```
##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index:
Union["ColumnOrName", int]) -> Column:
return _invoke_function_over_columns("get", col, index)
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:
+ """
+ Collection function: Returns an array containing element as
+ well as all elements from array. The new element is positioned
+ at the beginning of the array.
+
+ .. versionadded:: 3.4.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ name of column containing array
+ element :
+ element to be prepended to the array
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ an array excluding given value.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([([2, 3, 4],), ([],)], ['data'])
+ >>> df.select(array_prepend(df.data, 1)).collect()
+ [Row(array_prepend(data, 1)=[1, 2, 3, 4]), Row(array_prepend(data, 1)=[1])]
+ """
+ return _invoke_function("array_prepend", _to_java_column(col), element)
Review Comment:
```suggestion
return _invoke_function_over_columns("array_prepend", col, lit(value))
```
##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index:
Union["ColumnOrName", int]) -> Column:
return _invoke_function_over_columns("get", col, index)
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:
+ """
+ Collection function: Returns an array containing element as
+ well as all elements from array. The new element is positioned
+ at the beginning of the array.
+
+ .. versionadded:: 3.4.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ name of column containing array
+ element :
+ element to be prepended to the array
Review Comment:
```suggestion
value :
a literal value, or a :class:`~pyspark.sql.Column` expression.
```
##########
python/pyspark/sql/functions.py:
##########
@@ -7619,6 +7619,36 @@ def get(col: "ColumnOrName", index:
Union["ColumnOrName", int]) -> Column:
return _invoke_function_over_columns("get", col, index)
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", element: Any) -> Column:
Review Comment:
```suggestion
def array_prepend(col: "ColumnOrName", value: Any) -> Column:
```
to be consistent with `array_append`
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -1399,6 +1399,145 @@ case class ArrayContains(left: Expression, right:
Expression)
copy(left = newLeft, right = newRight)
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage =
+ "_FUNC_(array, value) - Returns an array containing value as well as all
elements from array. The new element is positioned at the beginning of the
array.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+ ["d","b","d","c","a"]
+ """,
+ group = "array_funcs",
+ since = "3.4.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+ extends BinaryExpression
+ with ImplicitCastInputTypes
+ with ComplexTypeMergingExpression
+ with QueryErrorsBase {
+
+ override def nullable: Boolean = left.nullable
+
+ @transient protected lazy val elementType: DataType =
+ inputTypes.head.asInstanceOf[ArrayType].elementType
+
+ override def eval(input: InternalRow): Any = {
+ val value1 = left.eval(input)
+ if (value1 == null) {
+ null
+ } else {
+ val value2 = right.eval(input)
+ nullSafeEval(value1, value2)
+ }
+ }
+ override def nullSafeEval(arr: Any, elementData: Any): Any = {
+ val arrayData = arr.asInstanceOf[ArrayData]
+ val numberOfElements = arrayData.numElements() + 1
+ if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+ throw
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+ }
+ val finalData = new Array[Any](numberOfElements)
+ finalData.update(0, elementData)
+ arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1,
v))
+ new GenericArrayData(finalData)
+ }
+ override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
+ val leftGen = left.genCode(ctx)
+ val rightGen = right.genCode(ctx)
+ val f = (arr: String, value: String) => {
+ val newArraySize = ctx.freshName("newArraySize")
+ val newArray = ctx.freshName("newArray")
+ val i = ctx.freshName("i")
+ val pos = ctx.freshName("pos")
Review Comment:
I guess we only need one of var `i` and `pos`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]