Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21061#discussion_r192249752
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -1882,3 +1882,311 @@ case class ArrayRepeat(left: Expression, right:
Expression)
}
}
+
+object ArraySetLike {
+ val kindUnion = 1
+
+ private val MAX_ARRAY_LENGTH: Int =
ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+
+ def toArrayDataInt(hs: OpenHashSet[Int]): ArrayData = {
+ val array = new Array[Int](hs.size)
+ var pos = hs.nextPos(0)
+ var i = 0
+ while (pos != OpenHashSet.INVALID_POS) {
+ array(i) = hs.getValue(pos)
+ pos = hs.nextPos(pos + 1)
+ i += 1
+ }
+
+ val numBytes = 4L * array.length
--- End diff --
We should `IntegerType.defaultSize` instead of `4L` here?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]