This is an automated email from the ASF dual-hosted git repository.

junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 0050167b2 [core] Fix bug: zindexer should deal well with column `null` 
value. (#2736)
0050167b2 is described below

commit 0050167b287633030db280746a195f6723aca1e8
Author: YeJunHao <[email protected]>
AuthorDate: Thu Jan 18 20:07:08 2024 +0800

    [core] Fix bug: zindexer should deal well with column `null` value. (#2736)
---
 .../org/apache/paimon/sort/zorder/ZIndexer.java    | 74 +++++++++++++---------
 .../apache/paimon/sort/zorder/ZIndexerTest.java    | 33 ++++++++++
 2 files changed, 76 insertions(+), 31 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java 
b/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
index 7f31f8d29..4a4071476 100644
--- a/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/sort/zorder/ZIndexer.java
@@ -50,6 +50,7 @@ import org.apache.paimon.utils.ZOrderByteUtils;
 
 import java.io.Serializable;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
@@ -145,48 +146,57 @@ public class ZIndexer implements Serializable {
         private final int fieldIndex;
         private final int varTypeSize;
 
+        private final byte[] nullVarBytes;
+
         public TypeVisitor(int index, int varTypeSize) {
             this.fieldIndex = index;
             this.varTypeSize = varTypeSize;
+
+            if (varTypeSize == PRIMITIVE_BUFFER_SIZE) {
+                nullVarBytes = NULL_BYTES;
+            } else {
+                nullVarBytes = new byte[varTypeSize];
+                Arrays.fill(nullVarBytes, (byte) 0x00);
+            }
         }
 
         @Override
         public ZProcessFunction visit(CharType charType) {
             return (row, reuse) -> {
-                BinaryString binaryString = row.getString(fieldIndex);
-
-                return row.isNullAt(fieldIndex)
-                        ? NULL_BYTES
-                        : ZOrderByteUtils.byteTruncateOrFill(
-                                        MemorySegmentUtils.getBytes(
-                                                binaryString.getSegments(),
-                                                binaryString.getOffset(),
-                                                Math.min(
-                                                        varTypeSize,
-                                                        
binaryString.getSizeInBytes())),
-                                        varTypeSize,
-                                        reuse)
-                                .array();
+                if (row.isNullAt(fieldIndex)) {
+                    return nullVarBytes;
+                } else {
+                    BinaryString binaryString = row.getString(fieldIndex);
+
+                    return ZOrderByteUtils.byteTruncateOrFill(
+                                    MemorySegmentUtils.getBytes(
+                                            binaryString.getSegments(),
+                                            binaryString.getOffset(),
+                                            Math.min(varTypeSize, 
binaryString.getSizeInBytes())),
+                                    varTypeSize,
+                                    reuse)
+                            .array();
+                }
             };
         }
 
         @Override
         public ZProcessFunction visit(VarCharType varCharType) {
             return (row, reuse) -> {
-                BinaryString binaryString = row.getString(fieldIndex);
-
-                return row.isNullAt(fieldIndex)
-                        ? NULL_BYTES
-                        : ZOrderByteUtils.byteTruncateOrFill(
-                                        MemorySegmentUtils.getBytes(
-                                                binaryString.getSegments(),
-                                                binaryString.getOffset(),
-                                                Math.min(
-                                                        varTypeSize,
-                                                        
binaryString.getSizeInBytes())),
-                                        varTypeSize,
-                                        reuse)
-                                .array();
+                if (row.isNullAt(fieldIndex)) {
+                    return nullVarBytes;
+                } else {
+                    BinaryString binaryString = row.getString(fieldIndex);
+
+                    return ZOrderByteUtils.byteTruncateOrFill(
+                                    MemorySegmentUtils.getBytes(
+                                            binaryString.getSegments(),
+                                            binaryString.getOffset(),
+                                            Math.min(varTypeSize, 
binaryString.getSizeInBytes())),
+                                    varTypeSize,
+                                    reuse)
+                            .array();
+                }
             };
         }
 
@@ -206,7 +216,7 @@ public class ZIndexer implements Serializable {
         public ZProcessFunction visit(BinaryType binaryType) {
             return (row, reuse) ->
                     row.isNullAt(fieldIndex)
-                            ? NULL_BYTES
+                            ? nullVarBytes
                             : ZOrderByteUtils.byteTruncateOrFill(
                                             row.getBinary(fieldIndex), 
varTypeSize, reuse)
                                     .array();
@@ -216,7 +226,7 @@ public class ZIndexer implements Serializable {
         public ZProcessFunction visit(VarBinaryType varBinaryType) {
             return (row, reuse) ->
                     row.isNullAt(fieldIndex)
-                            ? NULL_BYTES
+                            ? nullVarBytes
                             : ZOrderByteUtils.byteTruncateOrFill(
                                             row.getBinary(fieldIndex), 
varTypeSize, reuse)
                                     .array();
@@ -380,5 +390,7 @@ public class ZIndexer implements Serializable {
         }
     }
 
-    interface ZProcessFunction extends BiFunction<InternalRow, ByteBuffer, 
byte[]>, Serializable {}
+    /** Process function interface. */
+    public interface ZProcessFunction
+            extends BiFunction<InternalRow, ByteBuffer, byte[]>, Serializable 
{}
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java 
b/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
index 4beb682e7..801a1927d 100644
--- a/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/sort/zorder/ZIndexerTest.java
@@ -103,6 +103,39 @@ public class ZIndexerTest {
         }
     }
 
+    @Test
+    public void testZIndexerForVarcharWithNull() {
+        RowType rowType = RowType.of(new VarCharType(), new VarCharType());
+
+        int varTypeSize = 10;
+        ZIndexer zIndexer = new ZIndexer(rowType, Arrays.asList("f0", "f1"), 
varTypeSize);
+        zIndexer.open();
+
+        byte[] nullBytes = new byte[varTypeSize];
+        Arrays.fill(nullBytes, (byte) 0x00);
+        for (int i = 0; i < 1000; i++) {
+            BinaryString a = BinaryString.fromString(randomString(varTypeSize 
+ 1));
+
+            InternalRow internalRow = GenericRow.of(a, null);
+
+            byte[] zOrder = zIndexer.index(internalRow);
+
+            byte[][] zCache = new byte[2][];
+            ByteBuffer byteBuffer = ByteBuffer.allocate(varTypeSize);
+            ZOrderByteUtils.stringToOrderedBytes(a.toString(), varTypeSize, 
byteBuffer);
+            zCache[0] = Arrays.copyOf(byteBuffer.array(), varTypeSize);
+
+            zCache[1] = nullBytes;
+
+            byte[] expectedZOrder =
+                    ZOrderByteUtils.interleaveBits(zCache, zCache.length * 
varTypeSize);
+
+            for (int j = 0; j < zCache.length * varTypeSize; j++) {
+                Assertions.assertThat(zOrder[j]).isEqualTo(expectedZOrder[j]);
+            }
+        }
+    }
+
     public static String randomString(int length) {
         byte[] buffer = new byte[length];
 

Reply via email to