[GitHub] [spark] cloud-fan commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function

2023-03-12 Thread via GitHub


cloud-fan commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1133454186


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: 
Expression)
 copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the beginning of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.
+  Null element is also prepended to the array. But if the array passed is 
NULL
+  output is NULL
+""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["d","b","d","c","a"]
+  > SELECT _FUNC_(array(1, 2, 3, null), null);
+   [null,1,2,3,null]
+  > SELECT _FUNC_(CAST(null as Array), 2);
+   NULL
+  """,
+  group = "array_funcs",
+  since = "3.5.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with QueryErrorsBase {
+
+  override def nullable: Boolean = left.nullable
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+if (value1 == null) {
+  null
+} else {
+  val value2 = right.eval(input)
+  nullSafeEval(value1, value2)
+}
+  }
+  override def nullSafeEval(arr: Any, elementData: Any): Any = {
+val arrayData = arr.asInstanceOf[ArrayData]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+finalData.update(0, elementData)
+arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, 
v))
+new GenericArrayData(finalData)
+  }
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)
+val f = (arr: String, value: String) => {
+  val newArraySize = ctx.freshName("newArraySize")
+  val newArray = ctx.freshName("newArray")
+  val i = ctx.freshName("i")
+  val iPlus1 = s"$i+1"
+  val zero = "0"
+  val allocation = CodeGenerator.createArrayData(
+newArray,
+elementType,
+newArraySize,
+s" $prettyName failed.")
+  val assignment =
+CodeGenerator.createArrayAssignment(newArray, elementType, arr, 
iPlus1, i, false)
+  val newElemAssignment =
+CodeGenerator.setArrayElement(newArray, elementType, zero, value, 
Some(rightGen.isNull))
+  s"""
+ |int $newArraySize = $arr.numElements() + 1;
+ |$allocation
+ |$newElemAssignment
+ |for (int $i = 0; $i < $arr.numElements(); $i ++) {
+ |  $assignment
+ |}
+ |${ev.value} = $newArray;
+ |""".stripMargin
+}
+val resultCode = f(leftGen.value, rightGen.value)
+if(nullable) {
+  val nullSafeEval = leftGen.code + rightGen.code + 
ctx.nullSafeExec(nullable, leftGen.isNull) {
+s"""
+   |${ev.isNull} = false;
+   |${resultCode}
+   |""".stripMargin
+  }
+  ev.copy(code =
+code"""
+boolean ${ev.isNull} = true;
+${CodeGenerator.javaType(dataType)} ${ev.value} = 
${CodeGenerator.defaultValue(dataType)};
+$nullSafeEval
+  """)
+} else {
+  ev.copy(code =
+code"""

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] cloud-fan commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function

2023-03-12 Thread via GitHub


cloud-fan commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1133454113


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: 
Expression)
 copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the beginning of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.
+  Null element is also prepended to the array. But if the array passed is 
NULL
+  output is NULL
+""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["d","b","d","c","a"]
+  > SELECT _FUNC_(array(1, 2, 3, null), null);
+   [null,1,2,3,null]
+  > SELECT _FUNC_(CAST(null as Array), 2);
+   NULL
+  """,
+  group = "array_funcs",
+  since = "3.5.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with QueryErrorsBase {
+
+  override def nullable: Boolean = left.nullable
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+if (value1 == null) {
+  null
+} else {
+  val value2 = right.eval(input)
+  nullSafeEval(value1, value2)
+}
+  }
+  override def nullSafeEval(arr: Any, elementData: Any): Any = {
+val arrayData = arr.asInstanceOf[ArrayData]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+finalData.update(0, elementData)
+arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, 
v))
+new GenericArrayData(finalData)
+  }
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)
+val f = (arr: String, value: String) => {
+  val newArraySize = ctx.freshName("newArraySize")
+  val newArray = ctx.freshName("newArray")
+  val i = ctx.freshName("i")
+  val iPlus1 = s"$i+1"
+  val zero = "0"
+  val allocation = CodeGenerator.createArrayData(
+newArray,
+elementType,
+newArraySize,
+s" $prettyName failed.")
+  val assignment =
+CodeGenerator.createArrayAssignment(newArray, elementType, arr, 
iPlus1, i, false)
+  val newElemAssignment =
+CodeGenerator.setArrayElement(newArray, elementType, zero, value, 
Some(rightGen.isNull))
+  s"""
+ |int $newArraySize = $arr.numElements() + 1;
+ |$allocation
+ |$newElemAssignment
+ |for (int $i = 0; $i < $arr.numElements(); $i ++) {
+ |  $assignment
+ |}
+ |${ev.value} = $newArray;
+ |""".stripMargin
+}
+val resultCode = f(leftGen.value, rightGen.value)
+if(nullable) {
+  val nullSafeEval = leftGen.code + rightGen.code + 
ctx.nullSafeExec(nullable, leftGen.isNull) {
+s"""
+   |${ev.isNull} = false;
+   |${resultCode}
+   |""".stripMargin
+  }
+  ev.copy(code =
+code"""
+boolean ${ev.isNull} = true;
+${CodeGenerator.javaType(dataType)} ${ev.value} = 
${CodeGenerator.defaultValue(dataType)};
+$nullSafeEval
+  """)

Review Comment:
   please please the same code style as
   ```
   s"""
  |...
  |...
  |""". stripMargin
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] cloud-fan commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function

2023-03-12 Thread via GitHub


cloud-fan commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1133450986


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: 
Expression)
 copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the beginning of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.
+  Null element is also prepended to the array. But if the array passed is 
NULL
+  output is NULL
+""",
+  examples = """
+Examples:
+  > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+   ["d","b","d","c","a"]
+  > SELECT _FUNC_(array(1, 2, 3, null), null);
+   [null,1,2,3,null]
+  > SELECT _FUNC_(CAST(null as Array), 2);
+   NULL
+  """,
+  group = "array_funcs",
+  since = "3.5.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+  extends BinaryExpression
+with ImplicitCastInputTypes
+with ComplexTypeMergingExpression
+with QueryErrorsBase {
+
+  override def nullable: Boolean = left.nullable
+
+  @transient protected lazy val elementType: DataType =
+inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def eval(input: InternalRow): Any = {
+val value1 = left.eval(input)
+if (value1 == null) {
+  null
+} else {
+  val value2 = right.eval(input)
+  nullSafeEval(value1, value2)
+}
+  }
+  override def nullSafeEval(arr: Any, elementData: Any): Any = {
+val arrayData = arr.asInstanceOf[ArrayData]
+val numberOfElements = arrayData.numElements() + 1
+if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+  throw 
QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+}
+val finalData = new Array[Any](numberOfElements)
+finalData.update(0, elementData)
+arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, 
v))
+new GenericArrayData(finalData)
+  }
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+val leftGen = left.genCode(ctx)
+val rightGen = right.genCode(ctx)
+val f = (arr: String, value: String) => {
+  val newArraySize = ctx.freshName("newArraySize")
+  val newArray = ctx.freshName("newArray")
+  val i = ctx.freshName("i")
+  val iPlus1 = s"$i+1"
+  val zero = "0"
+  val allocation = CodeGenerator.createArrayData(
+newArray,
+elementType,
+newArraySize,

Review Comment:
   do we really need this variable? can we just pass `s"$arr.numElements() + 
1"`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] cloud-fan commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function

2023-03-12 Thread via GitHub


cloud-fan commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1133450131


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: 
Expression)
 copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the beginning of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.

Review Comment:
   We don't need to document the type coercion behavior in each function. We 
have a dedicated doc to explain what "similar" type is: 
https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html#type-promotion-and-precedence



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] cloud-fan commented on a diff in pull request #38947: [SPARK-41233][SQL][PYTHON] Add `array_prepend` function

2023-03-12 Thread via GitHub


cloud-fan commented on code in PR #38947:
URL: https://github.com/apache/spark/pull/38947#discussion_r1133448219


##
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##
@@ -1399,6 +1399,151 @@ case class ArrayContains(left: Expression, right: 
Expression)
 copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+  _FUNC_(array, element) - Add the element at the beginning of the array 
passed as first
+  argument. Type of element should be similar to type of the elements of 
the array.

Review Comment:
   ```suggestion
 argument. Type of element should be the same to the type of the 
elements of the array.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org