Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21061#discussion_r192251150
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -1882,3 +1882,311 @@ case class ArrayRepeat(left: Expression, right: 
Expression)
       }
     
     }
    +
    +object ArraySetLike {
    +  val kindUnion = 1
    +
    +  private val MAX_ARRAY_LENGTH: Int = 
ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
    +
    +  def toArrayDataInt(hs: OpenHashSet[Int]): ArrayData = {
    +    val array = new Array[Int](hs.size)
    +    var pos = hs.nextPos(0)
    +    var i = 0
    +    while (pos != OpenHashSet.INVALID_POS) {
    +      array(i) = hs.getValue(pos)
    +      pos = hs.nextPos(pos + 1)
    +      i += 1
    +    }
    +
    +    val numBytes = 4L * array.length
    +    val unsafeArraySizeInBytes = 
UnsafeArrayData.calculateHeaderPortionInBytes(array.length) +
    +      
org.apache.spark.unsafe.array.ByteArrayMethods.roundNumberOfBytesToNearestWord(numBytes)
    +    // Since UnsafeArrayData.fromPrimitiveArray() uses long[], max 
elements * 8 bytes can be used
    +    if (unsafeArraySizeInBytes <= Integer.MAX_VALUE * 8) {
    +      UnsafeArrayData.fromPrimitiveArray(array)
    +    } else {
    +      new GenericArrayData(array)
    +    }
    +  }
    +
    +  def toArrayDataLong(hs: OpenHashSet[Long]): ArrayData = {
    +    val array = new Array[Long](hs.size)
    +    var pos = hs.nextPos(0)
    +    var i = 0
    +    while (pos != OpenHashSet.INVALID_POS) {
    +      array(i) = hs.getValue(pos)
    +      pos = hs.nextPos(pos + 1)
    +      i += 1
    +    }
    +
    +    val numBytes = 8L * array.length
    --- End diff --
    
    We should use `LongType.defaultSize` instead of `8L` here?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to