Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/21061#discussion_r192546226
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -2189,3 +2189,302 @@ case class ArrayRemove(left: Expression, right:
Expression)
override def prettyName: String = "array_remove"
}
+
+object ArraySetLike {
+ private val MAX_ARRAY_LENGTH: Int =
ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+
+ def toArrayDataInt(hs: OpenHashSet[Int]): ArrayData = {
+ val array = new Array[Int](hs.size)
+ var pos = hs.nextPos(0)
+ var i = 0
+ while (pos != OpenHashSet.INVALID_POS) {
+ array(i) = hs.getValue(pos)
+ pos = hs.nextPos(pos + 1)
+ i += 1
+ }
+
+ if (useGenericArrayData(LongType.defaultSize, array.length)) {
+ new GenericArrayData(array)
+ } else {
+ UnsafeArrayData.fromPrimitiveArray(array)
+ }
+ }
+
+ def toArrayDataLong(hs: OpenHashSet[Long]): ArrayData = {
+ val array = new Array[Long](hs.size)
+ var pos = hs.nextPos(0)
+ var i = 0
+ while (pos != OpenHashSet.INVALID_POS) {
+ array(i) = hs.getValue(pos)
+ pos = hs.nextPos(pos + 1)
+ i += 1
+ }
+
+ if (useGenericArrayData(LongType.defaultSize, array.length)) {
+ new GenericArrayData(array)
+ } else {
+ UnsafeArrayData.fromPrimitiveArray(array)
+ }
+ }
+
+ def useGenericArrayData(elementSize: Int, length: Int): Boolean = {
--- End diff --
Although I tried it, I stopped reusing. This is because
`UnsafeArrayData.fromPrimitiveArray()` also uses variables (e.g.
`headerInBytes` and `valueRegionInBytes`) calculated in this method.
I think that there is no typical way to return multiple values from a
function.
Thus, we can move this to `UnsafeArrayData`. But, it is not easy to reuse
it. WDYT?
```
private static UnsafeArrayData fromPrimitiveArray(
Object arr, int offset, int length, int elementSize) {
final long headerInBytes = calculateHeaderPortionInBytes(length);
final long valueRegionInBytes = elementSize * length;
final long totalSizeInLongs = (headerInBytes + valueRegionInBytes + 7)
/ 8;
if (totalSizeInLongs > Integer.MAX_VALUE / 8) {
throw new UnsupportedOperationException("Cannot convert this array to
unsafe format as " +
"it's too big.");
}
final long[] data = new long[(int)totalSizeInLongs];
Platform.putLong(data, Platform.LONG_ARRAY_OFFSET, length);
Platform.copyMemory(arr, offset, data,
Platform.LONG_ARRAY_OFFSET + headerInBytes, valueRegionInBytes);
UnsafeArrayData result = new UnsafeArrayData();
result.pointTo(data, Platform.LONG_ARRAY_OFFSET, (int)totalSizeInLongs
* 8);
return result;
}
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]