This is an automated email from the ASF dual-hosted git repository.

Fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 2346fdbed GH-3464: Improve `DeltaByteArrayWriter.writeBytes` (#3465)
2346fdbed is described below

commit 2346fdbed480a362d9aa5242955ae85fac656e4e
Author: André Rouél <[email protected]>
AuthorDate: Wed May 6 21:30:32 2026 +0200

    GH-3464: Improve `DeltaByteArrayWriter.writeBytes` (#3465)
    
    * GH-3464 Improve `DeltaByteArrayWriter.writeBytes` to avoid unnecessary 
allocation and scalar prefix comparison
    
    * GH-3464 Add regression test
    
    * Update DeltaByteArrayWriter.java
---
 .../values/deltastrings/DeltaByteArrayWriter.java   | 15 +++++++++------
 .../values/deltastrings/TestDeltaByteArray.java     | 21 +++++++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
 
b/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
index c23410861..5496ed194 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
@@ -18,6 +18,7 @@
  */
 package org.apache.parquet.column.values.deltastrings;
 
+import java.util.Arrays;
 import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.Encoding;
@@ -88,12 +89,14 @@ public class DeltaByteArrayWriter extends ValuesWriter {
 
   @Override
   public void writeBytes(Binary v) {
-    int i = 0;
-    byte[] vb = v.getBytes();
-    int length = previous.length < vb.length ? previous.length : vb.length;
-    // find the number of matching prefix bytes between this value and the 
previous one
-    for (i = 0; (i < length) && (previous[i] == vb[i]); i++)
-      ;
+    byte[] vb = v.isBackingBytesReused() ? v.getBytes() : v.getBytesUnsafe();
+    int length = Math.min(previous.length, vb.length);
+    // Find the number of matching prefix bytes between this value and the 
previous one.
+    // Arrays.mismatch is intrinsified by the JVM to use SIMD instructions.
+    int i = Arrays.mismatch(previous, 0, length, vb, 0, length);
+    if (i < 0) {
+      i = length; // all bytes in the common range matched
+    }
     prefixLengthWriter.writeInteger(i);
     suffixWriter.writeBytes(v.slice(i, vb.length - i));
     previous = vb;
diff --git 
a/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
 
b/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
index 5ce6adbdf..b73e5562d 100644
--- 
a/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
+++ 
b/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.column.values.deltastrings;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import org.apache.parquet.bytes.ByteBufferInputStream;
 import org.apache.parquet.bytes.DirectByteBufferAllocator;
 import org.apache.parquet.column.values.Utils;
@@ -128,4 +129,24 @@ public class TestDeltaByteArray {
 
     assertReadWrite(writer, new DeltaByteArrayReader(), values);
   }
+
+  @Test
+  public void testReusedBackingArrayRegression() throws Exception {
+    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 
1024, new DirectByteBufferAllocator());
+    DeltaByteArrayReader reader = new DeltaByteArrayReader();
+
+    byte[] buffer = "parquet-000".getBytes(StandardCharsets.UTF_8);
+    writer.writeBytes(Binary.fromReusedByteArray(buffer));
+
+    System.arraycopy("parquet-111".getBytes(StandardCharsets.UTF_8), 0, 
buffer, 0, buffer.length);
+    writer.writeBytes(Binary.fromReusedByteArray(buffer));
+
+    System.arraycopy("parquet-222".getBytes(StandardCharsets.UTF_8), 0, 
buffer, 0, buffer.length);
+    writer.writeBytes(Binary.fromReusedByteArray(buffer));
+
+    Binary[] decoded = Utils.readData(reader, 
writer.getBytes().toInputStream(), 3);
+    Assert.assertEquals(Binary.fromString("parquet-000"), decoded[0]);
+    Assert.assertEquals(Binary.fromString("parquet-111"), decoded[1]);
+    Assert.assertEquals(Binary.fromString("parquet-222"), decoded[2]);
+  }
 }

Reply via email to