s1ck commented on a change in pull request #24107: [SPARK-27174][SQL] Add 
support for casting integer types to binary
URL: https://github.com/apache/spark/pull/24107#discussion_r266046555
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 ##########
 @@ -1382,3 +1392,44 @@ case class UpCast(child: Expression, dataType: 
DataType, walkedTypePath: Seq[Str
   extends UnaryExpression with Unevaluable {
   override lazy val resolved = false
 }
+
+object ToBinary {
+
+  val objectName: String = ToBinary.getClass.getName.stripSuffix("$")
+
+  def cast(l: Long): Array[Byte] = {
 
 Review comment:
   Java also provides the following way to convert `Long` to `byte[]`:
   
   ```
   ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
   buffer.putLong(x);
   return buffer.array();
   ```
   
   If we don't share the `ByteBuffer`, this is much slower than the custom 
conversion. We did a JMH benchmark which compares our custom conversion and the 
`ByteBuffer` approach:
   
   ```
   import java.nio.ByteBuffer
   
   import org.openjdk.jmh.annotations._
   
   import scala.util.Random
   
   object Impls {
   
     @inline final def customCast(l: Long): Array[Byte] = {
       val result = new Array[Byte](8)
       result(0) = (l >>> 56 & 0xFF).toByte
       result(1) = (l >>> 48 & 0xFF).toByte
       result(2) = (l >>> 40 & 0xFF).toByte
       result(3) = (l >>> 32 & 0xFF).toByte
       result(4) = (l >>> 24 & 0xFF).toByte
       result(5) = (l >>> 16 & 0xFF).toByte
       result(6) = (l >>> 8 & 0xFF).toByte
       result(7) = (l & 0xFF).toByte
       result
     }
   
     @inline final def byteBufferCast(l: Long): Array[Byte] = {
       val buffer = ByteBuffer.allocate(8)
       buffer.putLong(l)
       buffer.array
     }
   
   }
   
   @State(Scope.Benchmark)
   @Fork(value = 3)
   class LongSerializationBenchmark {
   
     var longsToConvert: Array[Long] = _
   
     @Setup
     def setUp(): Unit = {
       val numConversions = 1000
       longsToConvert = Array.fill(numConversions)(Random.nextLong)
     }
   
     @Benchmark
     def customCast(): Int = {
       val l = longsToConvert.length
       var check = 0
       var i = 0
       while (i < l) {
         check += Impls.customCast(longsToConvert(i)).length
         i += 1
       }
       check
     }
   
     @Benchmark
     def byteBufferCast(): Int = {
       val l = longsToConvert.length
       var check = 0
       var i = 0
       while (i < l) {
         check += Impls.byteBufferCast(longsToConvert(i)).length
         i += 1
       }
       check
     }
   
   }
   ```
   
   The results suggests that we should go for the custom implementation:
   
   ```
   Benchmark                                   Mode  Cnt          Score         
Error  Units
   LongSerializationBenchmark.byteBufferCast  thrpt   15     165426.898 ±    
3412.197  ops/s
   LongSerializationBenchmark.customCast      thrpt   15  310527608.212 ± 
6216611.765  ops/s
   ```
   
   Is there a way to reuse the `ByteBuffer` across multiple conversions?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to