This is an automated email from the ASF dual-hosted git repository.
ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 8813c4c [KYUUBI #1333] Improve zorder interleave perf
8813c4c is described below
commit 8813c4cf9240c4de3c84d2318375d78e718907e3
Author: ulysses-you <[email protected]>
AuthorDate: Fri Nov 5 18:04:47 2021 +0800
[KYUUBI #1333] Improve zorder interleave perf
<!--
Thanks for sending a pull request!
Here are some tips for you:
1. If this is your first time, please read our contributor guidelines:
https://kyuubi.readthedocs.io/en/latest/community/contributions.html
2. If the PR is related to an issue in
https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your
PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
3. If the PR is unfinished, add '[WIP]' in your PR title, e.g.,
'[WIP][KYUUBI #XXXX] Your PR title ...'.
-->
### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
1. If you add a feature, you can talk about the use case of it.
2. If you fix a bug, you can clarify why it is a bug.
-->
Make interleave function more faster.
The basic idea is from
http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [x] [Run
test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #1333 from ulysses-you/zorder-perf.
Closes #1333
a9411877 [ulysses-you] address comment
bce82fc7 [ulysses-you] simplify
05f0b6e1 [ulysses-you] benchmark result
f470a043 [ulysses-you] while
a63cfae2 [ulysses-you] improve
Authored-by: ulysses-you <[email protected]>
Signed-off-by: ulysses-you <[email protected]>
---
.../benchmarks/ZorderCoreBenchmark-results.txt | 16 +-
.../org/apache/kyuubi/sql/zorder/ZorderBase.scala | 18 +-
.../kyuubi/sql/zorder/ZorderBytesUtils.scala | 322 ++++++++++++++++++++-
.../org/apache/spark/sql/ZorderCoreBenchmark.scala | 30 +-
.../scala/org/apache/spark/sql/ZorderSuite.scala | 14 +-
5 files changed, 357 insertions(+), 43 deletions(-)
diff --git
a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
index 642ff20..0ed1cbf 100644
---
a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
+++
b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
@@ -2,17 +2,17 @@ Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X
10.16
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
1000000 rows zorder core benchmark: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
-2 int columns benchmark 1620 1693
67 0.6 1619.9 1.0X
-3 int columns benchmark 1610 1684
66 0.6 1609.5 1.0X
-4 int columns benchmark 1904 1969
58 0.5 1903.7 0.9X
-2 long columns benchmark 2387 2420
50 0.4 2386.6 0.7X
-3 long columns benchmark 3029 3133
111 0.3 3028.7 0.5X
-4 long columns benchmark 3789 3848
89 0.3 3789.0 0.4X
+2 int columns benchmark 191 201
12 5.2 191.1 1.0X
+3 int columns benchmark 262 304
72 3.8 261.5 0.7X
+4 int columns benchmark 302 310
9 3.3 302.1 0.6X
+2 long columns benchmark 185 188
3 5.4 185.4 1.0X
+3 long columns benchmark 241 243
3 4.2 240.6 0.8X
+4 long columns benchmark 291 335
69 3.4 290.6 0.7X
Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 10.16
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
10000000 iterations paddingTo8Byte benchmark: Best Time(ms) Avg Time(ms)
Stdev(ms) Rate(M/s) Per Row(ns) Relative
----------------------------------------------------------------------------------------------------------------------------
-2 length benchmark 167 170
3 59.9 16.7 1.0X
-16 length benchmark 162 164
3 61.7 16.2 1.0X
+2 length benchmark 163 168
5 61.2 16.3 1.0X
+16 length benchmark 154 155
2 65.1 15.4 1.1X
diff --git
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
index 29e7d5b..8eb1def 100644
---
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
+++
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBase.scala
@@ -43,46 +43,46 @@ abstract class ZorderBase extends Expression {
}
@transient
- private lazy val defaultNullValues: Array[Array[Byte]] =
+ private[this] lazy val defaultNullValues: Array[Any] =
children.map(_.dataType)
.map(ZorderBytesUtils.defaultValue)
.toArray
override def eval(input: InternalRow): Any = {
- val binaryArr = children.zipWithIndex.map {
+ val childrenValues = children.zipWithIndex.map {
case (child: Expression, index) =>
val v = child.eval(input)
if (v == null) {
defaultNullValues(index)
} else {
- ZorderBytesUtils.toByte(v)
+ v
}
}
- ZorderBytesUtils.interleaveMultiByteArray(binaryArr.toArray)
+ ZorderBytesUtils.interleaveBits(childrenValues.toArray)
}
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode):
ExprCode = {
val evals = children.map(_.genCode(ctx))
val defaultValues = ctx.addReferenceObj("defaultValues", defaultNullValues)
- val binaryArray = ctx.freshName("binaryArray")
+ val values = ctx.freshName("values")
val util = ZorderBytesUtils.getClass.getName.stripSuffix("$")
val inputs = evals.zipWithIndex.map {
case (eval, index) =>
s"""
|${eval.code}
|if (${eval.isNull}) {
- | $binaryArray[$index] = (byte[]) $defaultValues[$index];
+ | $values[$index] = $defaultValues[$index];
|} else {
- | $binaryArray[$index] = $util.toByte(${eval.value});
+ | $values[$index] = ${eval.value};
|}
|""".stripMargin
}
ev.copy(code =
code"""
|byte[] ${ev.value} = null;
- |byte[][] $binaryArray = new byte[${evals.length}][];
+ |Object[] $values = new Object[${evals.length}];
|${inputs.mkString("\n")}
- |${ev.value} = $util.interleaveMultiByteArray($binaryArray);
+ |${ev.value} = $util.interleaveBits($values);
|""".stripMargin,
isNull = FalseLiteral)
}
diff --git
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
index 6b849e8..d5de5cd 100644
---
a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
+++
b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
@@ -30,27 +30,311 @@ object ZorderBytesUtils {
private final val BIT_32_MASK = 1 << 31
private final val BIT_64_MASK = 1L << 63
- def interleaveMultiByteArray(arrays: Array[Array[Byte]]): Array[Byte] = {
+ def interleaveBits(inputs: Array[Any]): Array[Byte] = {
+ inputs.length match {
+ // it's a more fast approach, use O(8 * 8)
+ // can see
http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious
+ case 1 => longToByte(toLong(inputs(0)))
+ case 2 => interleave2Longs(toLong(inputs(0)), toLong(inputs(1)))
+ case 3 => interleave3Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)))
+ case 4 => interleave4Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)),
+ toLong(inputs(3)))
+ case 5 => interleave5Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)),
+ toLong(inputs(3)), toLong(inputs(4)))
+ case 6 => interleave6Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)),
+ toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)))
+ case 7 => interleave7Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)),
+ toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)),
toLong(inputs(6)))
+ case 8 => interleave8Longs(toLong(inputs(0)), toLong(inputs(1)),
toLong(inputs(2)),
+ toLong(inputs(3)), toLong(inputs(4)), toLong(inputs(5)),
toLong(inputs(6)),
+ toLong(inputs(7)))
+
+ case _ =>
+ // it's the default approach, use O(64 * n), n is the length of inputs
+ interleaveBitsDefault(inputs.map(toByteArray))
+ }
+ }
+
+ private def interleave2Longs(l1: Long, l2: Long): Array[Byte] = {
+ // output 8 * 16 bits
+ val result = new Array[Byte](16)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toShort
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toShort
+
+ var z = 0
+ var j = 0
+ while (j < 8) {
+ val x_masked = tmp1 & (1 << j)
+ val y_masked = tmp2 & (1 << j)
+ z |= (x_masked << j)
+ z |= (y_masked << (j + 1))
+ j = j + 1
+ }
+ result((7 - i) * 2 + 1) = (z & 0xff).toByte
+ result((7 - i) * 2) = ((z >> 8) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave3Longs(l1: Long, l2: Long, l3: Long): Array[Byte] = {
+ // output 8 * 24 bits
+ val result = new Array[Byte](24)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toInt
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toInt
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toInt
+
+ var z = 0
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ z |= (r1_mask << (2 * j)) | (r2_mask << (2 * j + 1)) | (r3_mask << (2
* j + 2))
+ j = j + 1
+ }
+ result((7 - i) * 3 + 2) = (z & 0xff).toByte
+ result((7 - i) * 3 + 1) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 3) = ((z >> 16) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave4Longs(l1: Long, l2: Long, l3: Long, l4: Long):
Array[Byte] = {
+ // output 8 * 32 bits
+ val result = new Array[Byte](32)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toInt
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toInt
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toInt
+ val tmp4 = ((l4 >> (i * 8)) & 0xff).toInt
+
+ var z = 0
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ val r4_mask = tmp4 & (1 << j)
+ z |= (r1_mask << (3 * j)) | (r2_mask << (3 * j + 1)) | (r3_mask << (3
* j + 2)) |
+ (r4_mask << (3 * j + 3))
+ j = j + 1
+ }
+ result((7 - i) * 4 + 3) = (z & 0xff).toByte
+ result((7 - i) * 4 + 2) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 4 + 1) = ((z >> 16) & 0xff).toByte
+ result((7 - i) * 4) = ((z >> 24) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave5Longs(
+ l1: Long,
+ l2: Long,
+ l3: Long,
+ l4: Long,
+ l5: Long): Array[Byte] = {
+ // output 8 * 40 bits
+ val result = new Array[Byte](40)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+ val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+ val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+
+ var z = 0L
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ val r4_mask = tmp4 & (1 << j)
+ val r5_mask = tmp5 & (1 << j)
+ z |= (r1_mask << (4 * j)) | (r2_mask << (4 * j + 1)) | (r3_mask << (4
* j + 2)) |
+ (r4_mask << (4 * j + 3)) | (r5_mask << (4 * j + 4))
+ j = j + 1
+ }
+ result((7 - i) * 5 + 4) = (z & 0xff).toByte
+ result((7 - i) * 5 + 3) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 5 + 2) = ((z >> 16) & 0xff).toByte
+ result((7 - i) * 5 + 1) = ((z >> 24) & 0xff).toByte
+ result((7 - i) * 5) = ((z >> 32) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave6Longs(
+ l1: Long,
+ l2: Long,
+ l3: Long,
+ l4: Long,
+ l5: Long,
+ l6: Long): Array[Byte] = {
+ // output 8 * 48 bits
+ val result = new Array[Byte](48)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+ val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+ val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+ val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+
+ var z = 0L
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ val r4_mask = tmp4 & (1 << j)
+ val r5_mask = tmp5 & (1 << j)
+ val r6_mask = tmp6 & (1 << j)
+ z |= (r1_mask << (5 * j)) | (r2_mask << (5 * j + 1)) | (r3_mask << (5
* j + 2)) |
+ (r4_mask << (5 * j + 3)) | (r5_mask << (5 * j + 4)) | (r6_mask << (5
* j + 5))
+ j = j + 1
+ }
+ result((7 - i) * 6 + 5) = (z & 0xff).toByte
+ result((7 - i) * 6 + 4) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 6 + 3) = ((z >> 16) & 0xff).toByte
+ result((7 - i) * 6 + 2) = ((z >> 24) & 0xff).toByte
+ result((7 - i) * 6 + 1) = ((z >> 32) & 0xff).toByte
+ result((7 - i) * 6) = ((z >> 40) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave7Longs(
+ l1: Long,
+ l2: Long,
+ l3: Long,
+ l4: Long,
+ l5: Long,
+ l6: Long,
+ l7: Long): Array[Byte] = {
+ // output 8 * 56 bits
+ val result = new Array[Byte](56)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+ val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+ val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+ val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+ val tmp7 = ((l7 >> (i * 8)) & 0xff).toLong
+
+ var z = 0L
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ val r4_mask = tmp4 & (1 << j)
+ val r5_mask = tmp5 & (1 << j)
+ val r6_mask = tmp6 & (1 << j)
+ val r7_mask = tmp7 & (1 << j)
+ z |= (r1_mask << (6 * j)) | (r2_mask << (6 * j + 1)) | (r3_mask << (6
* j + 2)) |
+ (r4_mask << (6 * j + 3)) | (r5_mask << (6 * j + 4)) | (r6_mask << (6
* j + 5)) |
+ (r7_mask << (6 * j + 6))
+ j = j + 1
+ }
+ result((7 - i) * 7 + 6) = (z & 0xff).toByte
+ result((7 - i) * 7 + 5) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 7 + 4) = ((z >> 16) & 0xff).toByte
+ result((7 - i) * 7 + 3) = ((z >> 24) & 0xff).toByte
+ result((7 - i) * 7 + 2) = ((z >> 32) & 0xff).toByte
+ result((7 - i) * 7 + 1) = ((z >> 40) & 0xff).toByte
+ result((7 - i) * 7) = ((z >> 48) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ private def interleave8Longs(
+ l1: Long,
+ l2: Long,
+ l3: Long,
+ l4: Long,
+ l5: Long,
+ l6: Long,
+ l7: Long,
+ l8: Long): Array[Byte] = {
+ // output 8 * 64 bits
+ val result = new Array[Byte](64)
+ var i = 0
+ while(i < 8) {
+ val tmp1 = ((l1 >> (i * 8)) & 0xff).toLong
+ val tmp2 = ((l2 >> (i * 8)) & 0xff).toLong
+ val tmp3 = ((l3 >> (i * 8)) & 0xff).toLong
+ val tmp4 = ((l4 >> (i * 8)) & 0xff).toLong
+ val tmp5 = ((l5 >> (i * 8)) & 0xff).toLong
+ val tmp6 = ((l6 >> (i * 8)) & 0xff).toLong
+ val tmp7 = ((l7 >> (i * 8)) & 0xff).toLong
+ val tmp8 = ((l8 >> (i * 8)) & 0xff).toLong
+
+ var z = 0L
+ var j = 0
+ while (j < 8) {
+ val r1_mask = tmp1 & (1 << j)
+ val r2_mask = tmp2 & (1 << j)
+ val r3_mask = tmp3 & (1 << j)
+ val r4_mask = tmp4 & (1 << j)
+ val r5_mask = tmp5 & (1 << j)
+ val r6_mask = tmp6 & (1 << j)
+ val r7_mask = tmp7 & (1 << j)
+ val r8_mask = tmp8 & (1 << j)
+ z |= (r1_mask << (7 * j)) | (r2_mask << (7 * j + 1)) | (r3_mask << (7
* j + 2)) |
+ (r4_mask << (7 * j + 3)) | (r5_mask << (7 * j + 4)) | (r6_mask << (7
* j + 5)) |
+ (r7_mask << (7 * j + 6)) | (r8_mask << (7 * j + 7))
+ j = j + 1
+ }
+ result((7 - i) * 8 + 7) = (z & 0xff).toByte
+ result((7 - i) * 8 + 6) = ((z >> 8) & 0xff).toByte
+ result((7 - i) * 8 + 5) = ((z >> 16) & 0xff).toByte
+ result((7 - i) * 8 + 4) = ((z >> 24) & 0xff).toByte
+ result((7 - i) * 8 + 3) = ((z >> 32) & 0xff).toByte
+ result((7 - i) * 8 + 2) = ((z >> 40) & 0xff).toByte
+ result((7 - i) * 8 + 1) = ((z >> 48) & 0xff).toByte
+ result((7 - i) * 8) = ((z >> 56) & 0xff).toByte
+ i = i + 1
+ }
+ result
+ }
+
+ def interleaveBitsDefault(arrays: Array[Array[Byte]]): Array[Byte] = {
var totalLength = 0
var maxLength = 0
- arrays.foreach(array => {
+ arrays.foreach { array =>
totalLength += array.length
maxLength = maxLength.max(array.length * 8)
- })
+ }
val result = new Array[Byte](totalLength)
var resultBit = 0
var bit = 0
while (bit < maxLength) {
- val bytePos = Math.floor(bit / 8).toInt
+ val bytePos = bit / 8
val bitPos = bit % 8
for (arr <- arrays) {
- if (bytePos < arr.length) {
- val resultBytePos = totalLength - 1 - Math.floor(resultBit / 8).toInt
+ val len = arr.length
+ if (bytePos < len) {
+ val resultBytePos = totalLength - 1 - resultBit / 8
val resultBitPos = resultBit % 8
result(resultBytePos) = updatePos(result(resultBytePos),
resultBitPos,
- arr(arr.length - 1 - bytePos), bitPos)
+ arr(len - 1 - bytePos), bitPos)
resultBit += 1
}
}
@@ -73,7 +357,23 @@ object ZorderBytesUtils {
(a ^ (1 << apos)).toByte
}
- def toByte(a: Any): Array[Byte] = {
+ def toLong(a: Any): Long = {
+ a match {
+ case b: Boolean => (if (b) 1 else 0).toLong ^ BIT_64_MASK
+ case b: Byte => b.toLong ^ BIT_64_MASK
+ case s: Short => s.toLong ^ BIT_64_MASK
+ case i: Int => i.toLong ^ BIT_64_MASK
+ case l: Long => l ^ BIT_64_MASK
+ case f: Float => java.lang.Float.floatToRawIntBits(f).toLong ^
BIT_64_MASK
+ case d: Double => java.lang.Double.doubleToRawLongBits(d) ^ BIT_64_MASK
+ case str: UTF8String => str.getPrefix
+ case dec: Decimal => dec.toLong ^ BIT_64_MASK
+ case other: Any =>
+ throw new KyuubiSQLExtensionException("Unsupported z-order type: " +
other.getClass)
+ }
+ }
+
+ def toByteArray(a: Any): Array[Byte] = {
a match {
case bo: Boolean =>
booleanToByte(bo)
@@ -166,7 +466,11 @@ object ZorderBytesUtils {
}
}
- def defaultValue(dataType: DataType): Array[Byte] = toByte {
+ def defaultByteArrayValue(dataType: DataType): Array[Byte] = toByteArray {
+ defaultValue(dataType)
+ }
+
+ def defaultValue(dataType: DataType): Any = {
dataType match {
case BooleanType =>
true
diff --git
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
index 78c9c91..3774fc3 100644
---
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
+++
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
@@ -38,19 +38,18 @@ class ZorderCoreBenchmark extends
KyuubiSparkSQLExtensionTest with KyuubiBenchma
private val runBenchmark = sys.env.contains("RUN_BENCHMARK")
private val numRows = 1 * 1000 * 1000
- private def randomIntByteArray(numColumns: Int): Seq[Array[Array[Byte]]] = {
- (1 to numRows).map { i =>
- val arr = new Array[Array[Byte]](numColumns)
- (0 until numColumns).foreach(col => arr(col) =
ZorderBytesUtils.toByte(i))
+ private def randomInt(numColumns: Int): Seq[Array[Any]] = {
+ (1 to numRows).map { l =>
+ val arr = new Array[Any](numColumns)
+ (0 until numColumns).foreach(col => arr(col) = l)
arr
}
}
- private def randomLongByteArray(numColumns: Int): Seq[Array[Array[Byte]]] = {
- (1 to numRows).map { i =>
- val l = i.toLong
- val arr = new Array[Array[Byte]](numColumns)
- (0 until numColumns).foreach(col => arr(col) =
ZorderBytesUtils.toByte(l))
+ private def randomLong(numColumns: Int): Seq[Array[Any]] = {
+ (1 to numRows).map { l =>
+ val arr = new Array[Any](numColumns)
+ (0 until numColumns).foreach(col => arr(col) = l.toLong)
arr
}
}
@@ -59,28 +58,27 @@ class ZorderCoreBenchmark extends
KyuubiSparkSQLExtensionTest with KyuubiBenchma
val benchmark = new Benchmark(
s"$numRows rows zorder core benchmark", numRows, output = output)
benchmark.addCase("2 int columns benchmark", 3) { _ =>
- randomIntByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomInt(2).foreach(ZorderBytesUtils.interleaveBits)
}
benchmark.addCase("3 int columns benchmark", 3) { _ =>
- randomIntByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomInt(3).foreach(ZorderBytesUtils.interleaveBits)
}
benchmark.addCase("4 int columns benchmark", 3) { _ =>
- randomIntByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomInt(4).foreach(ZorderBytesUtils.interleaveBits)
}
-
benchmark.addCase("2 long columns benchmark", 3) { _ =>
- randomLongByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomLong(2).foreach(ZorderBytesUtils.interleaveBits)
}
benchmark.addCase("3 long columns benchmark", 3) { _ =>
- randomLongByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomLong(3).foreach(ZorderBytesUtils.interleaveBits)
}
benchmark.addCase("4 long columns benchmark", 3) { _ =>
- randomLongByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+ randomLong(4).foreach(ZorderBytesUtils.interleaveBits)
}
benchmark.run()
diff --git
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
index f3e6085..7fc46ae 100644
---
a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
+++
b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.types._
import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException}
-import org.apache.kyuubi.sql.zorder.{OptimizeZorderCommandBase, Zorder}
+import org.apache.kyuubi.sql.zorder.{OptimizeZorderCommandBase, Zorder,
ZorderBytesUtils}
trait ZorderSuite extends KyuubiSparkSQLExtensionTest with
ExpressionEvalHelper {
override def sparkConf(): SparkConf = {
@@ -559,6 +559,18 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with
ExpressionEvalHelper
}
}
}
+
+ test("fast approach test") {
+ Seq[Seq[Any]](Seq(1L, 2L), Seq(1L, 2L, 3L), Seq(1L, 2L, 3L, 4L), Seq(1L,
2L, 3L, 4L, 5L),
+ Seq(1L, 2L, 3L, 4L, 5L, 6L), Seq(1L, 2L, 3L, 4L, 5L, 6L, 7L),
+ Seq(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L))
+ .foreach { inputs =>
+ assert(java.util.Arrays.equals(
+ ZorderBytesUtils.interleaveBits(inputs.toArray),
+
ZorderBytesUtils.interleaveBitsDefault(inputs.map(ZorderBytesUtils.toByteArray).toArray))
+ )
+ }
+ }
}
class ZorderWithCodegenEnabledSuite extends ZorderSuite {