Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21061#discussion_r192520463
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -2189,3 +2189,302 @@ case class ArrayRemove(left: Expression, right:
Expression)
override def prettyName: String = "array_remove"
}
+
+object ArraySetLike {
+ private val MAX_ARRAY_LENGTH: Int =
ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+
+ def toArrayDataInt(hs: OpenHashSet[Int]): ArrayData = {
+ val array = new Array[Int](hs.size)
+ var pos = hs.nextPos(0)
+ var i = 0
+ while (pos != OpenHashSet.INVALID_POS) {
+ array(i) = hs.getValue(pos)
+ pos = hs.nextPos(pos + 1)
+ i += 1
+ }
+
+ if (useGenericArrayData(LongType.defaultSize, array.length)) {
+ new GenericArrayData(array)
+ } else {
+ UnsafeArrayData.fromPrimitiveArray(array)
+ }
+ }
+
+ def toArrayDataLong(hs: OpenHashSet[Long]): ArrayData = {
+ val array = new Array[Long](hs.size)
+ var pos = hs.nextPos(0)
+ var i = 0
+ while (pos != OpenHashSet.INVALID_POS) {
+ array(i) = hs.getValue(pos)
+ pos = hs.nextPos(pos + 1)
+ i += 1
+ }
+
+ if (useGenericArrayData(LongType.defaultSize, array.length)) {
+ new GenericArrayData(array)
+ } else {
+ UnsafeArrayData.fromPrimitiveArray(array)
+ }
+ }
+
+ def useGenericArrayData(elementSize: Int, length: Int): Boolean = {
--- End diff --
Shall we move this to `UnsafeArrayData` and reuse it? Maybe the name should
be modified to fit the case.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]