Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/21061#discussion_r182200622
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -417,3 +418,156 @@ case class ArrayMax(child: Expression) extends
UnaryExpression with ImplicitCast
override def prettyName: String = "array_max"
}
+
+abstract class ArraySetUtils extends BinaryExpression with
ExpectsInputTypes {
+ val kindUnion = 1
+ def typeId: Int
+
+ def array1: Expression
+ def array2: Expression
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType,
ArrayType)
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ val r = super.checkInputDataTypes()
+ if ((r == TypeCheckResult.TypeCheckSuccess) &&
+ (array1.dataType.asInstanceOf[ArrayType].elementType !=
+ array2.dataType.asInstanceOf[ArrayType].elementType)) {
+ TypeCheckResult.TypeCheckFailure("Element type in both arrays must
be the same")
+ } else {
+ r
+ }
+ }
+
+ override def dataType: DataType = array1.dataType
+
+ private def elementType = dataType.asInstanceOf[ArrayType].elementType
+ private def cn1 = array1.dataType.asInstanceOf[ArrayType].containsNull
+ private def cn2 = array2.dataType.asInstanceOf[ArrayType].containsNull
+
+ override def nullSafeEval(input1: Any, input2: Any): Any = {
+ val ary1 = input1.asInstanceOf[ArrayData]
+ val ary2 = input2.asInstanceOf[ArrayData]
+
+ if (!cn1 && !cn2) {
+ elementType match {
+ case IntegerType =>
+ // avoid boxing of primitive int array elements
+ val hs = new OpenHashSet[Int]
+ var i = 0
+ while (i < ary1.numElements()) {
+ hs.add(ary1.getInt(i))
+ i += 1
+ }
+ i = 0
+ while (i < ary2.numElements()) {
--- End diff --
We can also support `array_union` and `array_except` by changing this 2nd
loop with small other changes. This is why we introduced `ArraySetUtils` in
this PR.
Other PRs will update `ArraySetUtils` appropriately.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]