s1ck commented on a change in pull request #24107: [SPARK-27174][SQL] Add
support for casting integer types to binary
URL: https://github.com/apache/spark/pull/24107#discussion_r266046555
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
##########
@@ -1382,3 +1392,44 @@ case class UpCast(child: Expression, dataType:
DataType, walkedTypePath: Seq[Str
extends UnaryExpression with Unevaluable {
override lazy val resolved = false
}
+
+object ToBinary {
+
+ val objectName: String = ToBinary.getClass.getName.stripSuffix("$")
+
+ def cast(l: Long): Array[Byte] = {
Review comment:
Java also provides the following way to convert `Long` to `byte[]`:
```
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.putLong(x);
return buffer.array();
```
If we don't share the `ByteBuffer`, this is much slower than the custom
conversion. We did a JMH benchmark which compares our custom conversion and the
`ByteBuffer` approach:
```
import java.nio.ByteBuffer
import org.openjdk.jmh.annotations._
import scala.util.Random
object Impls {
@inline final def customCast(l: Long): Array[Byte] = {
val result = new Array[Byte](8)
result(0) = (l >>> 56 & 0xFF).toByte
result(1) = (l >>> 48 & 0xFF).toByte
result(2) = (l >>> 40 & 0xFF).toByte
result(3) = (l >>> 32 & 0xFF).toByte
result(4) = (l >>> 24 & 0xFF).toByte
result(5) = (l >>> 16 & 0xFF).toByte
result(6) = (l >>> 8 & 0xFF).toByte
result(7) = (l & 0xFF).toByte
result
}
@inline final def byteBufferCast(l: Long): Array[Byte] = {
val buffer = ByteBuffer.allocate(8)
buffer.putLong(l)
buffer.array
}
}
@State(Scope.Benchmark)
@Fork(value = 3)
class LongSerializationBenchmark {
var longsToConvert: Array[Long] = _
@Setup
def setUp(): Unit = {
val numConversions = 1000
longsToConvert = Array.fill(numConversions)(Random.nextLong)
}
@Benchmark
def customCast(): Int = {
val l = longsToConvert.length
var check = 0
var i = 0
while (i < l) {
check += Impls.customCast(longsToConvert(i)).length
i += 1
}
check
}
@Benchmark
def byteBufferCast(): Int = {
val l = longsToConvert.length
var check = 0
var i = 0
while (i < l) {
check += Impls.byteBufferCast(longsToConvert(i)).length
i += 1
}
check
}
}
```
The results suggests that we should go for the custom implementation:
```
Benchmark Mode Cnt Score
Error Units
LongSerializationBenchmark.byteBufferCast thrpt 15 165426.898 ±
3412.197 ops/s
LongSerializationBenchmark.customCast thrpt 15 310527608.212 ±
6216611.765 ops/s
```
Is there a way to reuse the `ByteBuffer` across multiple conversions?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]