This is an automated email from the ASF dual-hosted git repository.

ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 8813c4c  [KYUUBI #1333] Improve zorder interleave perf
8813c4c is described below

commit 8813c4cf9240c4de3c84d2318375d78e718907e3
Author: ulysses-you <[email protected]>
AuthorDate: Fri Nov 5 18:04:47 2021 +0800

    [KYUUBI #1333] Improve zorder interleave perf
    
    <!--
    Thanks for sending a pull request!
    
    Here are some tips for you:
      1. If this is your first time, please read our contributor guidelines: 
https://kyuubi.readthedocs.io/en/latest/community/contributions.html
      2. If the PR is related to an issue in 
https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your 
PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
      3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., 
'[WIP][KYUUBI #XXXX] Your PR title ...'.
    -->
    
    ### _Why are the changes needed?_
    <!--
    Please clarify why the changes are needed. For instance,
      1. If you add a feature, you can talk about the use case of it.
      2. If you fix a bug, you can clarify why it is a bug.
    -->
    Make interleave function more faster.
    
    The basic idea is from 
http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious
    
    ### _How was this patch tested?_
    - [x] Add some test cases that check the changes thoroughly including 
negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run 
test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests)
 locally before make a pull request
    
    Closes #1333 from ulysses-you/zorder-perf.
    
    Closes #1333
    
    a9411877 [ulysses-you] address comment
    bce82fc7 [ulysses-you] simplify
    05f0b6e1 [ulysses-you] benchmark result
    f470a043 [ulysses-you] while
    a63cfae2 [ulysses-you] improve
    
    Authored-by: ulysses-you <[email protected]>
    Signed-off-by: ulysses-you <[email protected]>
---
 .../benchmarks/ZorderCoreBenchmark-results.txt     |  16 +-
 .../org/apache/kyuubi/sql/zorder/ZorderBase.scala  |  18 +-
 .../kyuubi/sql/zorder/ZorderBytesUtils.scala       | 322 ++++++++++++++++++++-
 .../org/apache/spark/sql/ZorderCoreBenchmark.scala |  30 +-
 .../scala/org/apache/spark/sql/ZorderSuite.scala   |  14 +-
 5 files changed, 357 insertions(+), 43 deletions(-)

diff --git 
a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt 
b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
index 642ff20..0ed1cbf 100644
--- 
a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
+++ 
b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
@@ -2,17 +2,17 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 
10.16
 Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
 1000000 rows zorder core benchmark:       Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
------------------------------------------------------------------------------------------------------------------------
-2 int columns benchmark                            1620           1693         
 67          0.6        1619.9       1.0X
-3 int columns benchmark                            1610           1684         
 66          0.6        1609.5       1.0X
-4 int columns benchmark                            1904           1969         
 58          0.5        1903.7       0.9X
-2 long columns benchmark                           2387           2420         
 50          0.4        2386.6       0.7X
-3 long columns benchmark                           3029           3133         
111          0.3        3028.7       0.5X
-4 long columns benchmark                           3789           3848         
 89          0.3        3789.0       0.4X
+2 int columns benchmark                             191            201         
 12          5.2         191.1       1.0X
+3 int columns benchmark                             262            304         
 72          3.8         261.5       0.7X
+4 int columns benchmark                             302            310         
  9          3.3         302.1       0.6X
+2 long columns benchmark                            185            188         
  3          5.4         185.4       1.0X
+3 long columns benchmark                            241            243         
  3          4.2         240.6       0.8X
+4 long columns benchmark                            291            335         
 69          3.4         290.6       0.7X
 
 Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 10.16
 Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
 10000000 iterations paddingTo8Byte benchmark:  Best Time(ms)   Avg Time(ms)   
Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 
----------------------------------------------------------------------------------------------------------------------------
-2 length benchmark                                      167            170     
      3         59.9          16.7       1.0X
-16 length benchmark                                     162            164     
      3         61.7          16.2       1.0X
+2 length benchmark                                      163            168     
      5         61.2          16.3       1.0X
+16 length benchmark                                     154            155     
      2         65.1          15.4       1.1X
 
diff --git 
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
 
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
index 29e7d5b..8eb1def 100644
--- 
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
+++ 
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
@@ -43,46 +43,46 @@ abstract class ZorderBase extends Expression {
   }
 
   @transient
-  private lazy val defaultNullValues: Array[Array[Byte]] =
+  private[this] lazy val defaultNullValues: Array[Any] =
     children.map(_.dataType)
       .map(ZorderBytesUtils.defaultValue)
       .toArray
 
   override def eval(input: InternalRow): Any = {
-    val binaryArr = children.zipWithIndex.map {
+    val childrenValues = children.zipWithIndex.map {
       case (child: Expression, index) =>
         val v = child.eval(input)
         if (v == null) {
           defaultNullValues(index)
         } else {
-          ZorderBytesUtils.toByte(v)
+          v
         }
     }
-    ZorderBytesUtils.interleaveMultiByteArray(binaryArr.toArray)
+    ZorderBytesUtils.interleaveBits(childrenValues.toArray)
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
     val evals = children.map(_.genCode(ctx))
     val defaultValues = ctx.addReferenceObj("defaultValues", defaultNullValues)
-    val binaryArray = ctx.freshName("binaryArray")
+    val values = ctx.freshName("values")
     val util = ZorderBytesUtils.getClass.getName.stripSuffix("$")
     val inputs = evals.zipWithIndex.map {
       case (eval, index) =>
         s"""
            |${eval.code}
            |if (${eval.isNull}) {
-           |  $binaryArray[$index] = (byte[]) $defaultValues[$index];
+           |  $values[$index] = $defaultValues[$index];
            |} else {
-           |  $binaryArray[$index] = $util.toByte(${eval.value});
+           |  $values[$index] = ${eval.value};
            |}
            |""".stripMargin
     }
     ev.copy(code =
       code"""
          |byte[] ${ev.value} = null;
-         |byte[][] $binaryArray = new byte[${evals.length}][];
+         |Object[] $values = new Object[${evals.length}];
          |${inputs.mkString("\n")}
-         |${ev.value} = $util.interleaveMultiByteArray($binaryArray);
+         |${ev.value} = $util.interleaveBits($values);
          |""".stripMargin,
       isNull = FalseLiteral)
   }
diff --git 
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
 
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
index 6b849e8..d5de5cd 100644
--- 
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
+++ 
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
@@ -30,27 +30,311 @@ object ZorderBytesUtils {
   private final val BIT_32_MASK = 1 << 31
   private final val BIT_64_MASK = 1L << 63
 
-  def interleaveMultiByteArray(arrays: Array[Array[Byte]]): Array[Byte] = {
+  def interleaveBits(inputs: Array[Any]): Array[Byte] = {
+    inputs.length match {
+      // it's a more fast approach, use O(8 * 8)
+      // can see 
http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious
+      case 1 => longToByte(toLong(inputs(0)))
+      case 2 => interleave2Longs(toLong(inputs(0)), toLong(inputs(1)))
+      case 3 => interleave3Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)))
+      case 4 => interleave4Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)),
+        toLong(inputs(3)))
+      case 5 => interleave5Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)),
+        toLong(inputs(3)), toLong(inputs(4)))
+      case 6 => interleave6Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)),
+        toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)))
+      case 7 => interleave7Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)),
+        toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)), 
toLong(inputs(6)))
+      case 8 => interleave8Longs(toLong(inputs(0)), toLong(inputs(1)), 
toLong(inputs(2)),
+        toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)), 
toLong(inputs(6)),
+        toLong(inputs(7)))
+
+      case _ =>
+        // it's the default approach, use O(64 * n), n is the length of inputs
+        interleaveBitsDefault(inputs.map(toByteArray))
+    }
+  }
+
+  private def interleave2Longs(l1: Long, l2: Long): Array[Byte] = {
+    // output 8 * 16 bits
+    val result = new Array[Byte](16)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toShort
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toShort
+
+      var z = 0
+      var j = 0
+      while (j < 8) {
+        val x_masked = tmp1 & (1 << j)
+        val y_masked = tmp2 & (1 << j)
+        z |= (x_masked << j)
+        z |= (y_masked << (j + 1))
+        j = j + 1
+      }
+      result((7 - i) * 2 + 1) = (z & 0xff).toByte
+      result((7 - i) * 2) = ((z >> 8) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave3Longs(l1: Long, l2: Long, l3: Long): Array[Byte] = {
+    // output 8 * 24 bits
+    val result = new Array[Byte](24)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toInt
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toInt
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toInt
+
+      var z = 0
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        z |= (r1_mask << (2 * j)) | (r2_mask << (2 * j + 1)) | (r3_mask << (2 
* j + 2))
+        j = j + 1
+      }
+      result((7 - i) * 3 + 2) = (z & 0xff).toByte
+      result((7 - i) * 3 + 1) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 3) = ((z >> 16) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave4Longs(l1: Long, l2: Long, l3: Long, l4: Long): 
Array[Byte] = {
+    // output 8 * 32 bits
+    val result = new Array[Byte](32)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toInt
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toInt
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toInt
+      val tmp4 = ((l4 >> (i * 8)) & 0xff).toInt
+
+      var z = 0
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        val r4_mask = tmp4 & (1 << j)
+        z |= (r1_mask << (3 * j)) | (r2_mask << (3 * j + 1)) | (r3_mask << (3 
* j + 2)) |
+          (r4_mask << (3 * j + 3))
+        j = j + 1
+      }
+      result((7 - i) * 4 + 3) = (z & 0xff).toByte
+      result((7 - i) * 4 + 2) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 4 + 1) = ((z >> 16) & 0xff).toByte
+      result((7 - i) * 4) = ((z >> 24) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave5Longs(
+      l1: Long,
+      l2: Long,
+      l3: Long,
+      l4: Long,
+      l5: Long): Array[Byte] = {
+    // output 8 * 40 bits
+    val result = new Array[Byte](40)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+      val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+      val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+
+      var z = 0L
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        val r4_mask = tmp4 & (1 << j)
+        val r5_mask = tmp5 & (1 << j)
+        z |= (r1_mask << (4 * j)) | (r2_mask << (4 * j + 1)) | (r3_mask << (4 
* j + 2)) |
+          (r4_mask << (4 * j + 3)) | (r5_mask << (4 * j + 4))
+        j = j + 1
+      }
+      result((7 - i) * 5 + 4) = (z & 0xff).toByte
+      result((7 - i) * 5 + 3) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 5 + 2) = ((z >> 16) & 0xff).toByte
+      result((7 - i) * 5 + 1) = ((z >> 24) & 0xff).toByte
+      result((7 - i) * 5) = ((z >> 32) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave6Longs(
+      l1: Long,
+      l2: Long,
+      l3: Long,
+      l4: Long,
+      l5: Long,
+      l6: Long): Array[Byte] = {
+    // output 8 * 48 bits
+    val result = new Array[Byte](48)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+      val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+      val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+      val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+
+      var z = 0L
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        val r4_mask = tmp4 & (1 << j)
+        val r5_mask = tmp5 & (1 << j)
+        val r6_mask = tmp6 & (1 << j)
+        z |= (r1_mask << (5 * j)) | (r2_mask << (5 * j + 1)) | (r3_mask << (5 
* j + 2)) |
+          (r4_mask << (5 * j + 3)) | (r5_mask << (5 * j + 4)) | (r6_mask << (5 
* j + 5))
+        j = j + 1
+      }
+      result((7 - i) * 6 + 5) = (z & 0xff).toByte
+      result((7 - i) * 6 + 4) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 6 + 3) = ((z >> 16) & 0xff).toByte
+      result((7 - i) * 6 + 2) = ((z >> 24) & 0xff).toByte
+      result((7 - i) * 6 + 1) = ((z >> 32) & 0xff).toByte
+      result((7 - i) * 6) = ((z >> 40) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave7Longs(
+      l1: Long,
+      l2: Long,
+      l3: Long,
+      l4: Long,
+      l5: Long,
+      l6: Long,
+      l7: Long): Array[Byte] = {
+    // output 8 * 56 bits
+    val result = new Array[Byte](56)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+      val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+      val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+      val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+      val tmp7 = ((l7 >> (i * 8)) & 0xff).toLong
+
+      var z = 0L
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        val r4_mask = tmp4 & (1 << j)
+        val r5_mask = tmp5 & (1 << j)
+        val r6_mask = tmp6 & (1 << j)
+        val r7_mask = tmp7 & (1 << j)
+        z |= (r1_mask << (6 * j)) | (r2_mask << (6 * j + 1)) | (r3_mask << (6 
* j + 2)) |
+          (r4_mask << (6 * j + 3)) | (r5_mask << (6 * j + 4)) | (r6_mask << (6 
* j + 5)) |
+          (r7_mask << (6 * j + 6))
+        j = j + 1
+      }
+      result((7 - i) * 7 + 6) = (z & 0xff).toByte
+      result((7 - i) * 7 + 5) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 7 + 4) = ((z >> 16) & 0xff).toByte
+      result((7 - i) * 7 + 3) = ((z >> 24) & 0xff).toByte
+      result((7 - i) * 7 + 2) = ((z >> 32) & 0xff).toByte
+      result((7 - i) * 7 + 1) = ((z >> 40) & 0xff).toByte
+      result((7 - i) * 7) = ((z >> 48) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  private def interleave8Longs(
+      l1: Long,
+      l2: Long,
+      l3: Long,
+      l4: Long,
+      l5: Long,
+      l6: Long,
+      l7: Long,
+      l8: Long): Array[Byte] = {
+    // output 8 * 64 bits
+    val result = new Array[Byte](64)
+    var i = 0
+    while(i < 8) {
+      val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+      val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+      val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+      val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+      val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+      val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+      val tmp7 = ((l7 >> (i * 8)) & 0xff).toLong
+      val tmp8 = ((l8 >> (i * 8)) & 0xff).toLong
+
+      var z = 0L
+      var j = 0
+      while (j < 8) {
+        val r1_mask = tmp1 & (1 << j)
+        val r2_mask = tmp2 & (1 << j)
+        val r3_mask = tmp3 & (1 << j)
+        val r4_mask = tmp4 & (1 << j)
+        val r5_mask = tmp5 & (1 << j)
+        val r6_mask = tmp6 & (1 << j)
+        val r7_mask = tmp7 & (1 << j)
+        val r8_mask = tmp8 & (1 << j)
+        z |= (r1_mask << (7 * j)) | (r2_mask << (7 * j + 1)) | (r3_mask << (7 
* j + 2)) |
+          (r4_mask << (7 * j + 3)) | (r5_mask << (7 * j + 4)) | (r6_mask << (7 
* j + 5)) |
+          (r7_mask << (7 * j + 6)) | (r8_mask << (7 * j + 7))
+        j = j + 1
+      }
+      result((7 - i) * 8 + 7) = (z & 0xff).toByte
+      result((7 - i) * 8 + 6) = ((z >> 8) & 0xff).toByte
+      result((7 - i) * 8 + 5) = ((z >> 16) & 0xff).toByte
+      result((7 - i) * 8 + 4) = ((z >> 24) & 0xff).toByte
+      result((7 - i) * 8 + 3) = ((z >> 32) & 0xff).toByte
+      result((7 - i) * 8 + 2) = ((z >> 40) & 0xff).toByte
+      result((7 - i) * 8 + 1) = ((z >> 48) & 0xff).toByte
+      result((7 - i) * 8) = ((z >> 56) & 0xff).toByte
+      i = i + 1
+    }
+    result
+  }
+
+  def interleaveBitsDefault(arrays: Array[Array[Byte]]): Array[Byte] = {
     var totalLength = 0
     var maxLength = 0
-    arrays.foreach(array => {
+    arrays.foreach { array =>
       totalLength += array.length
       maxLength = maxLength.max(array.length * 8)
-    })
+    }
     val result = new Array[Byte](totalLength)
     var resultBit = 0
 
     var bit = 0
     while (bit < maxLength) {
-      val bytePos = Math.floor(bit / 8).toInt
+      val bytePos = bit / 8
       val bitPos = bit % 8
 
       for (arr <- arrays) {
-        if (bytePos < arr.length) {
-          val resultBytePos = totalLength - 1 - Math.floor(resultBit / 8).toInt
+        val len = arr.length
+        if (bytePos < len) {
+          val resultBytePos = totalLength - 1 - resultBit / 8
           val resultBitPos = resultBit % 8
           result(resultBytePos) = updatePos(result(resultBytePos), 
resultBitPos,
-            arr(arr.length - 1 - bytePos), bitPos)
+            arr(len - 1 - bytePos), bitPos)
           resultBit += 1
         }
       }
@@ -73,7 +357,23 @@ object ZorderBytesUtils {
     (a ^ (1 << apos)).toByte
   }
 
-  def toByte(a: Any): Array[Byte] = {
+  def toLong(a: Any): Long = {
+    a match {
+      case b: Boolean => (if (b) 1 else 0).toLong ^ BIT_64_MASK
+      case b: Byte => b.toLong ^ BIT_64_MASK
+      case s: Short => s.toLong ^ BIT_64_MASK
+      case i: Int => i.toLong ^ BIT_64_MASK
+      case l: Long => l ^ BIT_64_MASK
+      case f: Float => java.lang.Float.floatToRawIntBits(f).toLong ^ 
BIT_64_MASK
+      case d: Double => java.lang.Double.doubleToRawLongBits(d) ^ BIT_64_MASK
+      case str: UTF8String => str.getPrefix
+      case dec: Decimal => dec.toLong ^ BIT_64_MASK
+      case other: Any =>
+        throw new KyuubiSQLExtensionException("Unsupported z-order type: " + 
other.getClass)
+    }
+  }
+
+  def toByteArray(a: Any): Array[Byte] = {
     a match {
       case bo: Boolean =>
         booleanToByte(bo)
@@ -166,7 +466,11 @@ object ZorderBytesUtils {
     }
   }
 
-  def defaultValue(dataType: DataType): Array[Byte] = toByte {
+  def defaultByteArrayValue(dataType: DataType): Array[Byte] = toByteArray {
+    defaultValue(dataType)
+  }
+
+  def defaultValue(dataType: DataType): Any = {
     dataType match {
       case BooleanType =>
         true
diff --git 
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
 
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
index 78c9c91..3774fc3 100644
--- 
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
+++ 
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
@@ -38,19 +38,18 @@ class ZorderCoreBenchmark extends 
KyuubiSparkSQLExtensionTest with KyuubiBenchma
   private val runBenchmark = sys.env.contains("RUN_BENCHMARK")
   private val numRows = 1 * 1000 * 1000
 
-  private def randomIntByteArray(numColumns: Int): Seq[Array[Array[Byte]]] = {
-    (1 to numRows).map { i =>
-      val arr = new Array[Array[Byte]](numColumns)
-      (0 until numColumns).foreach(col => arr(col) = 
ZorderBytesUtils.toByte(i))
+  private def randomInt(numColumns: Int): Seq[Array[Any]] = {
+    (1 to numRows).map { l =>
+      val arr = new Array[Any](numColumns)
+      (0 until numColumns).foreach(col => arr(col) = l)
       arr
     }
   }
 
-  private def randomLongByteArray(numColumns: Int): Seq[Array[Array[Byte]]] = {
-    (1 to numRows).map { i =>
-      val l = i.toLong
-      val arr = new Array[Array[Byte]](numColumns)
-      (0 until numColumns).foreach(col => arr(col) = 
ZorderBytesUtils.toByte(l))
+  private def randomLong(numColumns: Int): Seq[Array[Any]] = {
+    (1 to numRows).map { l =>
+      val arr = new Array[Any](numColumns)
+      (0 until numColumns).foreach(col => arr(col) = l.toLong)
       arr
     }
   }
@@ -59,28 +58,27 @@ class ZorderCoreBenchmark extends 
KyuubiSparkSQLExtensionTest with KyuubiBenchma
     val benchmark = new Benchmark(
       s"$numRows rows zorder core benchmark", numRows, output = output)
     benchmark.addCase("2 int columns benchmark", 3) { _ =>
-      randomIntByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomInt(2).foreach(ZorderBytesUtils.interleaveBits)
     }
 
     benchmark.addCase("3 int columns benchmark", 3) { _ =>
-      randomIntByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomInt(3).foreach(ZorderBytesUtils.interleaveBits)
     }
 
     benchmark.addCase("4 int columns benchmark", 3) { _ =>
-      randomIntByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomInt(4).foreach(ZorderBytesUtils.interleaveBits)
     }
 
-
     benchmark.addCase("2 long columns benchmark", 3) { _ =>
-      randomLongByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomLong(2).foreach(ZorderBytesUtils.interleaveBits)
     }
 
     benchmark.addCase("3 long columns benchmark", 3) { _ =>
-      randomLongByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomLong(3).foreach(ZorderBytesUtils.interleaveBits)
     }
 
     benchmark.addCase("4 long columns benchmark", 3) { _ =>
-      randomLongByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+      randomLong(4).foreach(ZorderBytesUtils.interleaveBits)
     }
 
     benchmark.run()
diff --git 
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
 
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
index f3e6085..7fc46ae 100644
--- 
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
+++ 
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types._
 
 import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException}
-import org.apache.kyuubi.sql.zorder.{OptimizeZorderCommandBase, Zorder}
+import org.apache.kyuubi.sql.zorder.{OptimizeZorderCommandBase, Zorder, 
ZorderBytesUtils}
 
 trait ZorderSuite extends KyuubiSparkSQLExtensionTest with 
ExpressionEvalHelper {
   override def sparkConf(): SparkConf = {
@@ -559,6 +559,18 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with 
ExpressionEvalHelper
       }
     }
   }
+
+  test("fast approach test") {
+    Seq[Seq[Any]](Seq(1L, 2L), Seq(1L, 2L, 3L), Seq(1L, 2L, 3L, 4L), Seq(1L, 
2L, 3L, 4L, 5L),
+      Seq(1L, 2L, 3L, 4L, 5L, 6L), Seq(1L, 2L, 3L, 4L, 5L, 6L, 7L),
+      Seq(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L))
+      .foreach { inputs =>
+        assert(java.util.Arrays.equals(
+          ZorderBytesUtils.interleaveBits(inputs.toArray),
+          
ZorderBytesUtils.interleaveBitsDefault(inputs.map(ZorderBytesUtils.toByteArray).toArray))
+        )
+      }
+  }
 }
 
 class ZorderWithCodegenEnabledSuite extends ZorderSuite {

Reply via email to