This is an automated email from the ASF dual-hosted git repository.
junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 0050167b2 [core] Fix bug: zindexer should deal well with column `null`
value. (#2736)
0050167b2 is described below
commit 0050167b287633030db280746a195f6723aca1e8
Author: YeJunHao <[email protected]>
AuthorDate: Thu Jan 18 20:07:08 2024 +0800
[core] Fix bug: zindexer should deal well with column `null` value. (#2736)
---
.../org/apache/paimon/sort/zorder/ZIndexer.java | 74 +++++++++++++---------
.../apache/paimon/sort/zorder/ZIndexerTest.java | 33 ++++++++++
2 files changed, 76 insertions(+), 31 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
b/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
index 7f31f8d29..4a4071476 100644
--- a/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
@@ -50,6 +50,7 @@ import org.apache.paimon.utils.ZOrderByteUtils;
import java.io.Serializable;
import java.nio.ByteBuffer;
+import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
@@ -145,48 +146,57 @@ public class ZIndexer implements Serializable {
private final int fieldIndex;
private final int varTypeSize;
+ private final byte[] nullVarBytes;
+
public TypeVisitor(int index, int varTypeSize) {
this.fieldIndex = index;
this.varTypeSize = varTypeSize;
+
+ if (varTypeSize == PRIMITIVE_BUFFER_SIZE) {
+ nullVarBytes = NULL_BYTES;
+ } else {
+ nullVarBytes = new byte[varTypeSize];
+ Arrays.fill(nullVarBytes, (byte) 0x00);
+ }
}
@Override
public ZProcessFunction visit(CharType charType) {
return (row, reuse) -> {
- BinaryString binaryString = row.getString(fieldIndex);
-
- return row.isNullAt(fieldIndex)
- ? NULL_BYTES
- : ZOrderByteUtils.byteTruncateOrFill(
- MemorySegmentUtils.getBytes(
- binaryString.getSegments(),
- binaryString.getOffset(),
- Math.min(
- varTypeSize,
-
binaryString.getSizeInBytes())),
- varTypeSize,
- reuse)
- .array();
+ if (row.isNullAt(fieldIndex)) {
+ return nullVarBytes;
+ } else {
+ BinaryString binaryString = row.getString(fieldIndex);
+
+ return ZOrderByteUtils.byteTruncateOrFill(
+ MemorySegmentUtils.getBytes(
+ binaryString.getSegments(),
+ binaryString.getOffset(),
+ Math.min(varTypeSize,
binaryString.getSizeInBytes())),
+ varTypeSize,
+ reuse)
+ .array();
+ }
};
}
@Override
public ZProcessFunction visit(VarCharType varCharType) {
return (row, reuse) -> {
- BinaryString binaryString = row.getString(fieldIndex);
-
- return row.isNullAt(fieldIndex)
- ? NULL_BYTES
- : ZOrderByteUtils.byteTruncateOrFill(
- MemorySegmentUtils.getBytes(
- binaryString.getSegments(),
- binaryString.getOffset(),
- Math.min(
- varTypeSize,
-
binaryString.getSizeInBytes())),
- varTypeSize,
- reuse)
- .array();
+ if (row.isNullAt(fieldIndex)) {
+ return nullVarBytes;
+ } else {
+ BinaryString binaryString = row.getString(fieldIndex);
+
+ return ZOrderByteUtils.byteTruncateOrFill(
+ MemorySegmentUtils.getBytes(
+ binaryString.getSegments(),
+ binaryString.getOffset(),
+ Math.min(varTypeSize,
binaryString.getSizeInBytes())),
+ varTypeSize,
+ reuse)
+ .array();
+ }
};
}
@@ -206,7 +216,7 @@ public class ZIndexer implements Serializable {
public ZProcessFunction visit(BinaryType binaryType) {
return (row, reuse) ->
row.isNullAt(fieldIndex)
- ? NULL_BYTES
+ ? nullVarBytes
: ZOrderByteUtils.byteTruncateOrFill(
row.getBinary(fieldIndex),
varTypeSize, reuse)
.array();
@@ -216,7 +226,7 @@ public class ZIndexer implements Serializable {
public ZProcessFunction visit(VarBinaryType varBinaryType) {
return (row, reuse) ->
row.isNullAt(fieldIndex)
- ? NULL_BYTES
+ ? nullVarBytes
: ZOrderByteUtils.byteTruncateOrFill(
row.getBinary(fieldIndex),
varTypeSize, reuse)
.array();
@@ -380,5 +390,7 @@ public class ZIndexer implements Serializable {
}
}
- interface ZProcessFunction extends BiFunction<InternalRow, ByteBuffer,
byte[]>, Serializable {}
+ /** Process function interface. */
+ public interface ZProcessFunction
+ extends BiFunction<InternalRow, ByteBuffer, byte[]>, Serializable
{}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
b/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
index 4beb682e7..801a1927d 100644
--- a/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
@@ -103,6 +103,39 @@ public class ZIndexerTest {
}
}
+ @Test
+ public void testZIndexerForVarcharWithNull() {
+ RowType rowType = RowType.of(new VarCharType(), new VarCharType());
+
+ int varTypeSize = 10;
+ ZIndexer zIndexer = new ZIndexer(rowType, Arrays.asList("f0", "f1"),
varTypeSize);
+ zIndexer.open();
+
+ byte[] nullBytes = new byte[varTypeSize];
+ Arrays.fill(nullBytes, (byte) 0x00);
+ for (int i = 0; i < 1000; i++) {
+ BinaryString a = BinaryString.fromString(randomString(varTypeSize
+ 1));
+
+ InternalRow internalRow = GenericRow.of(a, null);
+
+ byte[] zOrder = zIndexer.index(internalRow);
+
+ byte[][] zCache = new byte[2][];
+ ByteBuffer byteBuffer = ByteBuffer.allocate(varTypeSize);
+ ZOrderByteUtils.stringToOrderedBytes(a.toString(), varTypeSize,
byteBuffer);
+ zCache[0] = Arrays.copyOf(byteBuffer.array(), varTypeSize);
+
+ zCache[1] = nullBytes;
+
+ byte[] expectedZOrder =
+ ZOrderByteUtils.interleaveBits(zCache, zCache.length *
varTypeSize);
+
+ for (int j = 0; j < zCache.length * varTypeSize; j++) {
+ Assertions.assertThat(zOrder[j]).isEqualTo(expectedZOrder[j]);
+ }
+ }
+ }
+
public static String randomString(int length) {
byte[] buffer = new byte[length];