[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/21069 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user huaxingao commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r192256591 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala --- @@ -552,4 +552,60 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ArrayRepeat(strArray, Literal(2)), Seq(Seq("hi", "hola"), Seq("hi", "hola"))) checkEvaluation(ArrayRepeat(Literal("hi"), Literal(null, IntegerType)), null) } + + test("Array remove") { +val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType)) +val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType)) +val a2 = Literal.create(Seq[String](null, "", null, ""), ArrayType(StringType)) +val a3 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType)) +val a4 = Literal.create(null, ArrayType(StringType)) +val a5 = Literal.create(Seq(1, null, 8, 9, null), ArrayType(IntegerType)) +val a6 = Literal.create(Seq(true, false, false, true), ArrayType(BooleanType)) + +checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(1)), Seq(2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(2)), Seq(1, 3, 5)) +checkEvaluation(ArrayRemove(a0, Literal(3)), Seq(1, 2, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(5)), Seq(1, 2, 3, 2, 2)) +checkEvaluation(ArrayRemove(a0, Literal(null, IntegerType)), null) + +checkEvaluation(ArrayRemove(a1, Literal("")), Seq("b", "a", "a", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("a")), Seq("b", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("b")), Seq("a", "a", "c")) +checkEvaluation(ArrayRemove(a1, Literal("c")), Seq("b", "a", "a", "b")) + +checkEvaluation(ArrayRemove(a2, Literal("")), Seq(null, null)) +checkEvaluation(ArrayRemove(a2, Literal(null, StringType)), null) + +checkEvaluation(ArrayRemove(a3, Literal(1)), Seq.empty[Integer]) + +checkEvaluation(ArrayRemove(a4, Literal("a")), null) + +checkEvaluation(ArrayRemove(a5, Literal(9)), Seq(1, null, 8, null)) +checkEvaluation(ArrayRemove(a6, Literal(false)), Seq(true, true)) + +// complex data types +val b0 = Literal.create(Seq[Array[Byte]](Array[Byte](5, 6), Array[Byte](1, 2), + Array[Byte](1, 2), Array[Byte](5, 6)), ArrayType(BinaryType)) +val b1 = Literal.create(Seq[Array[Byte]](Array[Byte](2, 1), null), + ArrayType(BinaryType)) +val b2 = Literal.create(Seq[Array[Byte]](null, Array[Byte](1, 2)), + ArrayType(BinaryType)) +val nullBinary = Literal.create(null, BinaryType) + +val dataToRemoved1 = Literal.create(Array[Byte](5, 6), BinaryType) +checkEvaluation(ArrayRemove(b0, dataToRemoved1), + Seq[Array[Byte]](Array[Byte](1, 2), Array[Byte](1, 2))) +checkEvaluation(ArrayRemove(b0, nullBinary), null) +checkEvaluation(ArrayRemove(b1, dataToRemoved1), Seq[Array[Byte]](Array[Byte](2, 1), null)) +checkEvaluation(ArrayRemove(b2, dataToRemoved1), Seq[Array[Byte]](null, Array[Byte](1, 2))) + +val c0 = Literal.create(Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4)), + ArrayType(ArrayType(IntegerType))) +val c1 = Literal.create(Seq[Seq[Int]](Seq[Int](5, 6), Seq[Int](2, 1)), + ArrayType(ArrayType(IntegerType))) --- End diff -- @ueshin Thanks for your comments. I added c2 in the test and also fixed the other three issues. Could you please review one more time? Thanks! --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r191956713 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,117 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + @transient private lazy val ordering: Ordering[Any] = +TypeUtils.getInterpretedOrdering(right.dataType) + + override def checkInputDataTypes(): TypeCheckResult = { +if (!left.dataType.isInstanceOf[ArrayType] + || left.dataType.asInstanceOf[ArrayType].elementType != right.dataType) { + TypeCheckResult.TypeCheckFailure( +"Arguments must be an array followed by a value of same type as the array members") +} else { + TypeUtils.checkForOrderingExpr(right.dataType, s"function $prettyName") +} + } + + override def nullSafeEval(arr: Any, value: Any): Any = { +val newArray = new Array[Any](arr.asInstanceOf[ArrayData].numElements()) +var pos = 0 +arr.asInstanceOf[ArrayData].foreach(right.dataType, (i, v) => + if (v == null || !ordering.equiv(v, value)) { +newArray(pos) = v +pos += 1 + } +) +new GenericArrayData(newArray.slice(0, pos)) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, (arr, value) => { + val numsToRemove = ctx.freshName("numsToRemove") + val newArraySize = ctx.freshName("newArraySize") + val i = ctx.freshName("i") + val getValue = CodeGenerator.getValue(arr, elementType, i) + val isEqual = ctx.genEqual(elementType, value, getValue) + s""" + |int $numsToRemove = 0; + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && $isEqual) { + |$numsToRemove = $numsToRemove + 1; + | } + |} + |int $newArraySize = $arr.numElements() - $numsToRemove; + |${genCodeForResult(ctx, ev, arr, value, newArraySize)} + """.stripMargin +}) + } + + def genCodeForResult( + ctx: CodegenContext, + ev: ExprCode, + inputArray: String, + value: String, + newArraySize: String): String = { +val values = ctx.freshName("values") +val i = ctx.freshName("i") +val pos = ctx.freshName("pos") +val getValue = CodeGenerator.getValue(inputArray, elementType, i) +val isEqual = ctx.genEqual(elementType, value, getValue) +if (!CodeGenerator.isPrimitiveType(elementType)) { + val arrayClass = classOf[GenericArrayData].getName + s""" + |int $pos = 0; + |Object[] $values = new Object[$newArraySize]; + |for (int $i = 0; $i < $inputArray.numElements(); $i ++) { + | if (!($isEqual)) { --- End diff -- Don't we need to check null? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r191955752 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,117 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + @transient private lazy val ordering: Ordering[Any] = +TypeUtils.getInterpretedOrdering(right.dataType) + + override def checkInputDataTypes(): TypeCheckResult = { +if (!left.dataType.isInstanceOf[ArrayType] + || left.dataType.asInstanceOf[ArrayType].elementType != right.dataType) { --- End diff -- Maybe we need to change here as well. We can follow the implementation of `ArrayPosition`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r191955459 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,117 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) --- End diff -- This will cause `ClassCastException`. See #21401. Also could you add tests similar to tests added in #21401? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r191958470 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala --- @@ -552,4 +552,60 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ArrayRepeat(strArray, Literal(2)), Seq(Seq("hi", "hola"), Seq("hi", "hola"))) checkEvaluation(ArrayRepeat(Literal("hi"), Literal(null, IntegerType)), null) } + + test("Array remove") { +val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType)) +val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType)) +val a2 = Literal.create(Seq[String](null, "", null, ""), ArrayType(StringType)) +val a3 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType)) +val a4 = Literal.create(null, ArrayType(StringType)) +val a5 = Literal.create(Seq(1, null, 8, 9, null), ArrayType(IntegerType)) +val a6 = Literal.create(Seq(true, false, false, true), ArrayType(BooleanType)) + +checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(1)), Seq(2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(2)), Seq(1, 3, 5)) +checkEvaluation(ArrayRemove(a0, Literal(3)), Seq(1, 2, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(5)), Seq(1, 2, 3, 2, 2)) +checkEvaluation(ArrayRemove(a0, Literal(null, IntegerType)), null) + +checkEvaluation(ArrayRemove(a1, Literal("")), Seq("b", "a", "a", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("a")), Seq("b", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("b")), Seq("a", "a", "c")) +checkEvaluation(ArrayRemove(a1, Literal("c")), Seq("b", "a", "a", "b")) + +checkEvaluation(ArrayRemove(a2, Literal("")), Seq(null, null)) +checkEvaluation(ArrayRemove(a2, Literal(null, StringType)), null) + +checkEvaluation(ArrayRemove(a3, Literal(1)), Seq.empty[Integer]) + +checkEvaluation(ArrayRemove(a4, Literal("a")), null) + +checkEvaluation(ArrayRemove(a5, Literal(9)), Seq(1, null, 8, null)) +checkEvaluation(ArrayRemove(a6, Literal(false)), Seq(true, true)) + +// complex data types +val b0 = Literal.create(Seq[Array[Byte]](Array[Byte](5, 6), Array[Byte](1, 2), + Array[Byte](1, 2), Array[Byte](5, 6)), ArrayType(BinaryType)) +val b1 = Literal.create(Seq[Array[Byte]](Array[Byte](2, 1), null), + ArrayType(BinaryType)) +val b2 = Literal.create(Seq[Array[Byte]](null, Array[Byte](1, 2)), + ArrayType(BinaryType)) +val nullBinary = Literal.create(null, BinaryType) + +val dataToRemoved1 = Literal.create(Array[Byte](5, 6), BinaryType) +checkEvaluation(ArrayRemove(b0, dataToRemoved1), + Seq[Array[Byte]](Array[Byte](1, 2), Array[Byte](1, 2))) +checkEvaluation(ArrayRemove(b0, nullBinary), null) +checkEvaluation(ArrayRemove(b1, dataToRemoved1), Seq[Array[Byte]](Array[Byte](2, 1), null)) +checkEvaluation(ArrayRemove(b2, dataToRemoved1), Seq[Array[Byte]](null, Array[Byte](1, 2))) + +val c0 = Literal.create(Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4)), + ArrayType(ArrayType(IntegerType))) +val c1 = Literal.create(Seq[Seq[Int]](Seq[Int](5, 6), Seq[Int](2, 1)), + ArrayType(ArrayType(IntegerType))) --- End diff -- What if for `val c2 = Literal.create(Seq[Seq[Int]](null, Seq[Int](2, 1)), ArrayType(ArrayType(IntegerType)))`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user huaxingao commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r190652607 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,123 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + @transient private lazy val ordering: Ordering[Any] = +TypeUtils.getInterpretedOrdering(right.dataType) + + override def checkInputDataTypes(): TypeCheckResult = { +if (!left.dataType.isInstanceOf[ArrayType] + || left.dataType.asInstanceOf[ArrayType].elementType != right.dataType) { + TypeCheckResult.TypeCheckFailure( +"Arguments must be an array followed by a value of same type as the array members") +} else { + TypeUtils.checkForOrderingExpr(right.dataType, s"function $prettyName") +} + } + + override def nullSafeEval(arr: Any, value: Any): Any = { +val newArray = new Array[Any](arr.asInstanceOf[ArrayData].numElements()) +var pos = 0 +arr.asInstanceOf[ArrayData].foreach(right.dataType, (i, v) => + if (v == null) { +if (value != null) { --- End diff -- You are right. The null check is redundant. Will remove. Thanks! --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r190485891 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,123 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + @transient private lazy val ordering: Ordering[Any] = +TypeUtils.getInterpretedOrdering(right.dataType) + + override def checkInputDataTypes(): TypeCheckResult = { +if (!left.dataType.isInstanceOf[ArrayType] + || left.dataType.asInstanceOf[ArrayType].elementType != right.dataType) { + TypeCheckResult.TypeCheckFailure( +"Arguments must be an array followed by a value of same type as the array members") +} else { + TypeUtils.checkForOrderingExpr(right.dataType, s"function $prettyName") +} + } + + override def nullSafeEval(arr: Any, value: Any): Any = { +val newArray = new Array[Any](arr.asInstanceOf[ArrayData].numElements()) +var pos = 0 +arr.asInstanceOf[ArrayData].foreach(right.dataType, (i, v) => + if (v == null) { +if (value != null) { --- End diff -- nit: Do we need this check since we are in `nullSafeEval`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user huaxingao commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r189978900 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,98 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) --- End diff -- @ueshin Fixed. Could you please review again? Thanks a lot! --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r189219299 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,98 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) --- End diff -- We shouldn't use `!=` directly, use `TypeUtils.getInterpretedOrdering()` instead. See #21361. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r188704602 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1468,3 +1468,104 @@ case class Flatten(child: Expression) extends UnaryExpression { override def prettyName: String = "flatten" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, (arr, value) => { + val numsToRemove = ctx.freshName("numsToRemove") + val newArraySize = ctx.freshName("newArraySize") + val i = ctx.freshName("i") + val getValue = CodeGenerator.getValue(arr, elementType, i) + val isEqual = ctx.genEqual(elementType, value, getValue) + s""" + |int $numsToRemove = 0; + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && $isEqual) { + |$numsToRemove = $numsToRemove + 1; + | } + |} + |int $newArraySize = $arr.numElements() - $numsToRemove; + |${genCodeForResult(ctx, ev, arr, value, newArraySize)} + """.stripMargin +}) + } + + def genCodeForResult( + ctx: CodegenContext, + ev: ExprCode, + inputArray: String, + value: String, + newArraySize: String): String = { +val values = ctx.freshName("values") +val i = ctx.freshName("i") +val pos = ctx.freshName("pos") +val getValue = CodeGenerator.getValue(inputArray, elementType, i) +val isEqual = ctx.genEqual(elementType, value, getValue) +if (!CodeGenerator.isPrimitiveType(elementType)) { + val arrayClass = classOf[GenericArrayData].getName + s""" + |int $pos = 0; + |Object[] $values = new Object[$newArraySize]; + |for (int $i = 0; $i < $inputArray.numElements(); $i ++) { + | if ($isEqual) { + |; + | } + | else { + |$values[$pos] = $getValue; + |$pos = $pos + 1; + | } + |} + |${ev.value} = new $arrayClass($values); + """.stripMargin +} else { + val primitiveValueTypeName = CodeGenerator.primitiveTypeName(elementType) + s""" + |${ctx.createUnsafeArray(values, newArraySize, elementType, s" $prettyName failed.")} + |int $pos = 0; + |for (int $i = 0; $i < $inputArray.numElements(); $i ++) { + | if ($inputArray.isNullAt($i)) { + | $values.setNullAt($pos); + | $pos = $pos + 1; + | } + | else { + |if ($isEqual) { --- End diff -- ditto --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r188704502 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1468,3 +1468,104 @@ case class Flatten(child: Expression) extends UnaryExpression { override def prettyName: String = "flatten" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + lazy val elementType: DataType = left.dataType.asInstanceOf[ArrayType].elementType + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, (arr, value) => { + val numsToRemove = ctx.freshName("numsToRemove") + val newArraySize = ctx.freshName("newArraySize") + val i = ctx.freshName("i") + val getValue = CodeGenerator.getValue(arr, elementType, i) + val isEqual = ctx.genEqual(elementType, value, getValue) + s""" + |int $numsToRemove = 0; + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && $isEqual) { + |$numsToRemove = $numsToRemove + 1; + | } + |} + |int $newArraySize = $arr.numElements() - $numsToRemove; + |${genCodeForResult(ctx, ev, arr, value, newArraySize)} + """.stripMargin +}) + } + + def genCodeForResult( + ctx: CodegenContext, + ev: ExprCode, + inputArray: String, + value: String, + newArraySize: String): String = { +val values = ctx.freshName("values") +val i = ctx.freshName("i") +val pos = ctx.freshName("pos") +val getValue = CodeGenerator.getValue(inputArray, elementType, i) +val isEqual = ctx.genEqual(elementType, value, getValue) +if (!CodeGenerator.isPrimitiveType(elementType)) { + val arrayClass = classOf[GenericArrayData].getName + s""" + |int $pos = 0; + |Object[] $values = new Object[$newArraySize]; + |for (int $i = 0; $i < $inputArray.numElements(); $i ++) { + | if ($isEqual) { --- End diff -- How about ``` if (!($isEqual)) { $values[$pos++] = $getValue; } ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user huaxingao commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r188494901 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala --- @@ -280,4 +280,35 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Concat(Seq(aa0, aa1)), Seq(Seq("a", "b"), Seq("c"), Seq("d"), Seq("e", "f"))) } + + test("Array remove") { +val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType)) +val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType)) +val a2 = Literal.create(Seq[String](null, "", null, ""), ArrayType(StringType)) +val a3 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType)) +val a4 = Literal.create(null, ArrayType(StringType)) +val a5 = Literal.create(Seq(1, null, 8, 9, null), ArrayType(IntegerType)) +val a6 = Literal.create(Seq(true, false, false, true), ArrayType(BooleanType)) + +checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(1)), Seq(2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(2)), Seq(1, 3, 5)) +checkEvaluation(ArrayRemove(a0, Literal(3)), Seq(1, 2, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(5)), Seq(1, 2, 3, 2, 2)) --- End diff -- @ueshin Thank you very much for your comments. I am very sorry for the late reply. I corrected everything except this one. I have ```checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5))``` to check no value is removed with not contained value. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185726735 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala --- @@ -691,6 +691,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { } } + test("array remove") { +val df = Seq( + (Array[Int](2, 1, 2, 3), Array("a", "b", "c", "a"), Array("", "")), + (Array.empty[Int], Array.empty[String], Array.empty[String]), + (null, null, null) +).toDF("a", "b", "c") +checkAnswer( + df.select(array_remove(df("a"), 2), array_remove(df("b"), "a"), array_remove(df("c"), "")), --- End diff -- maybe `$"a"` form is preferred to `df("a")`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185723637 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -883,3 +883,70 @@ case class Concat(children: Seq[Expression]) extends Expression { override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +nullSafeCodeGen(ctx, ev, (arr, value) => { + val arrayClass = classOf[GenericArrayData].getName + val values = ctx.freshName("values") + val i = ctx.freshName("i") + val pos = ctx.freshName("arrayPosition") + val numsToRemove = ctx.freshName("newArrLen") + val getValue = CodeGenerator.getValue(arr, right.dataType, i) + s""" + |int $pos = 0; + |int $numsToRemove = 0; + |Object[] $values; + | + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && ${ctx.genEqual(right.dataType, value, getValue)}) { + |$numsToRemove = $numsToRemove + 1; + | } + |} + |$values = new Object[$arr.numElements() - $numsToRemove]; + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if ($arr.isNullAt($i)) { + | $values[$pos] = null; + | $pos = $pos + 1; + | } + | else { + | if (${ctx.genEqual(right.dataType, value, getValue)}) { + | ; + | } + | else { + | $values[$pos] = ${CodeGenerator.getValue(arr, elementType, s"$i")}; --- End diff -- `$values[$pos] = $getValue;`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185721700 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala --- @@ -280,4 +280,35 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Concat(Seq(aa0, aa1)), Seq(Seq("a", "b"), Seq("c"), Seq("d"), Seq("e", "f"))) } + + test("Array remove") { +val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType)) +val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType)) +val a2 = Literal.create(Seq[String](null, "", null, ""), ArrayType(StringType)) +val a3 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType)) +val a4 = Literal.create(null, ArrayType(StringType)) +val a5 = Literal.create(Seq(1, null, 8, 9, null), ArrayType(IntegerType)) +val a6 = Literal.create(Seq(true, false, false, true), ArrayType(BooleanType)) + +checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(1)), Seq(2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(2)), Seq(1, 3, 5)) +checkEvaluation(ArrayRemove(a0, Literal(3)), Seq(1, 2, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(5)), Seq(1, 2, 3, 2, 2)) --- End diff -- Can you add a case for something like `ArrayRemove(a0, Literal(10))` to check no value is removed with not contained value? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185720852 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -883,3 +883,70 @@ case class Concat(children: Seq[Expression]) extends Expression { override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +nullSafeCodeGen(ctx, ev, (arr, value) => { + val arrayClass = classOf[GenericArrayData].getName + val values = ctx.freshName("values") + val i = ctx.freshName("i") + val pos = ctx.freshName("arrayPosition") + val numsToRemove = ctx.freshName("newArrLen") + val getValue = CodeGenerator.getValue(arr, right.dataType, i) + s""" + |int $pos = 0; + |int $numsToRemove = 0; + |Object[] $values; + | + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && ${ctx.genEqual(right.dataType, value, getValue)}) { + |$numsToRemove = $numsToRemove + 1; + | } + |} + |$values = new Object[$arr.numElements() - $numsToRemove]; --- End diff -- Can you specialize creating `ArrayData` for primitive types to avoid boxing? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185715072 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -287,3 +287,44 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { --- End diff -- Now that there is a codegen, we can remove `CodegenFallback`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185722053 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala --- @@ -280,4 +280,35 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Concat(Seq(aa0, aa1)), Seq(Seq("a", "b"), Seq("c"), Seq("d"), Seq("e", "f"))) } + + test("Array remove") { +val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType)) +val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType)) +val a2 = Literal.create(Seq[String](null, "", null, ""), ArrayType(StringType)) +val a3 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType)) +val a4 = Literal.create(null, ArrayType(StringType)) +val a5 = Literal.create(Seq(1, null, 8, 9, null), ArrayType(IntegerType)) +val a6 = Literal.create(Seq(true, false, false, true), ArrayType(BooleanType)) + +checkEvaluation(ArrayRemove(a0, Literal(0)), Seq(1, 2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(1)), Seq(2, 3, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(2)), Seq(1, 3, 5)) +checkEvaluation(ArrayRemove(a0, Literal(3)), Seq(1, 2, 2, 2, 5)) +checkEvaluation(ArrayRemove(a0, Literal(5)), Seq(1, 2, 3, 2, 2)) + +checkEvaluation(ArrayRemove(a1, Literal("")), Seq("b", "a", "a", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("a")), Seq("b", "c", "b")) +checkEvaluation(ArrayRemove(a1, Literal("b")), Seq("a", "a", "c")) +checkEvaluation(ArrayRemove(a1, Literal("c")), Seq("b", "a", "a", "b")) + +checkEvaluation(ArrayRemove(a2, Literal("")), Seq(null, null)) +checkEvaluation(ArrayRemove(a2, Literal.create(null, StringType)), null) + +checkEvaluation(ArrayRemove(a3, Literal(1)), Seq.empty[Integer]) + +checkEvaluation(ArrayRemove(a4, Literal("a")), null) + +checkEvaluation(ArrayRemove(a5, Literal(9)), Seq(1, null, 8, null)) +checkEvaluation(ArrayRemove(a6, Literal(false)), Seq(true, true)) --- End diff -- Can you add a case for something like `ArrayRemove(a0, Literal(null, IntegerType))`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185716705 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -883,3 +883,70 @@ case class Concat(children: Seq[Expression]) extends Expression { override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +nullSafeCodeGen(ctx, ev, (arr, value) => { + val arrayClass = classOf[GenericArrayData].getName + val values = ctx.freshName("values") + val i = ctx.freshName("i") + val pos = ctx.freshName("arrayPosition") + val numsToRemove = ctx.freshName("newArrLen") + val getValue = CodeGenerator.getValue(arr, right.dataType, i) --- End diff -- `CodeGenerator.getValue(arr, elementType, i)`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185716774 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -883,3 +883,70 @@ case class Concat(children: Seq[Expression]) extends Expression { override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +nullSafeCodeGen(ctx, ev, (arr, value) => { + val arrayClass = classOf[GenericArrayData].getName + val values = ctx.freshName("values") + val i = ctx.freshName("i") + val pos = ctx.freshName("arrayPosition") + val numsToRemove = ctx.freshName("newArrLen") + val getValue = CodeGenerator.getValue(arr, right.dataType, i) + s""" + |int $pos = 0; + |int $numsToRemove = 0; + |Object[] $values; + | + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | if (!$arr.isNullAt($i) && ${ctx.genEqual(right.dataType, value, getValue)}) { --- End diff -- `ctx.genEqual(elementType, value, getValue)`? Btw, this expression is used twice, so we should extract it. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r185716615 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -883,3 +883,70 @@ case class Concat(children: Seq[Expression]) extends Expression { override def sql: String = s"concat(${children.map(_.sql).mkString(", ")})" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { + + override def dataType: DataType = left.dataType + + override def inputTypes: Seq[AbstractDataType] = +Seq(ArrayType, left.dataType.asInstanceOf[ArrayType].elementType) + + override def nullSafeEval(arr: Any, value: Any): Any = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +val data = arr.asInstanceOf[ArrayData].toArray[AnyRef](elementType).filter(_ != value) +new GenericArrayData(data.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +val elementType = left.dataType.asInstanceOf[ArrayType].elementType +nullSafeCodeGen(ctx, ev, (arr, value) => { + val arrayClass = classOf[GenericArrayData].getName + val values = ctx.freshName("values") + val i = ctx.freshName("i") + val pos = ctx.freshName("arrayPosition") + val numsToRemove = ctx.freshName("newArrLen") --- End diff -- `ctx.freshName("numsToRemove")`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r182605869 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -287,3 +287,44 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { --- End diff -- I'm not sure about it. But I think it is better to make it implement codegen instead of CodegenFallback. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user huaxingao commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r182605111 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -287,3 +287,44 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { --- End diff -- @viirya Thanks for your comment. I will remove the CodegenFallback and submit a change soon. Do you know why the current SortArray implements CodegenFallback? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21069#discussion_r181525550 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -287,3 +287,44 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } + +/** + * Remove all elements that equal to element from the given array + */ +@ExpressionDescription( + usage = "_FUNC_(array, element) - Remove all elements that equal to element from array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3), 3); + [1,2,null] + """, since = "2.4.0") +case class ArrayRemove(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with CodegenFallback { --- End diff -- As the same reason at https://github.com/apache/spark/pull/21061#discussion_r181399858, I think we should avoid using `CodegenFallback` if possible. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21069: [SPARK-23920][SQL]add array_remove to remove all ...
GitHub user huaxingao opened a pull request: https://github.com/apache/spark/pull/21069 [SPARK-23920][SQL]add array_remove to remove all elements that equal element from array ## What changes were proposed in this pull request? add array_remove to remove all elements that equal element from array ## How was this patch tested? add unit tests You can merge this pull request into a Git repository by running: $ git pull https://github.com/huaxingao/spark spark-23920 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/21069.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #21069 commit cd9969442f780e5a0dad74aa61e49151dd2b2250 Author: Huaxin GaoDate: 2018-04-13T18:25:19Z [SPARK-23920][SQL]add array_remove to remove all elements that equal element from array --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org