This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 6a4352f466f When removeNullBytes is set, length calculations did not 
take into account null bytes. (#17232)
6a4352f466f is described below

commit 6a4352f466fd8b79e1c3c321044e540ff19b88fe
Author: Karan Kumar <[email protected]>
AuthorDate: Mon Oct 7 18:02:52 2024 +0530

    When removeNullBytes is set, length calculations did not take into account 
null bytes. (#17232)
    
    * When replaceNullBytes is set, length calculations did not take into 
account null bytes.
---
 .../druid/frame/field/StringFieldWriter.java       |  4 +-
 .../apache/druid/frame/write/FrameWriterUtils.java | 22 +++--
 .../druid/frame/field/StringFieldWriterTest.java   | 98 ++++++++++++++++++----
 3 files changed, 102 insertions(+), 22 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java 
b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
index 2ffb79a12da..5b304f8a053 100644
--- 
a/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
+++ 
b/processing/src/main/java/org/apache/druid/frame/field/StringFieldWriter.java
@@ -128,14 +128,14 @@ public class StringFieldWriter implements FieldWriter
         written++;
 
         if (len > 0) {
-          FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes(
+          int lenWritten = 
FrameWriterUtils.copyByteBufferToMemoryDisallowingNullBytes(
               utf8Datum,
               memory,
               position + written,
               len,
               removeNullBytes
           );
-          written += len;
+          written += lenWritten;
         }
       }
 
diff --git 
a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java 
b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
index 9ba2b29fe51..a480767f111 100644
--- 
a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
+++ 
b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java
@@ -212,9 +212,11 @@ public class FrameWriterUtils
 
   /**
    * Copies {@code src} to {@code dst}, disallowing null bytes to be written 
to the destination. If {@code removeNullBytes}
-   * is true, the method will drop the null bytes, and if it is false, the 
method will throw an exception.
+   * is true, the method will drop the null bytes, and if it is false, the 
method will throw an exception. The written bytes
+   * can be less than "len" if the null bytes are dropped, and the callers 
must evaluate the return value to see the actual
+   * length of the buffer that is copied
    */
-  public static void copyByteBufferToMemoryDisallowingNullBytes(
+  public static int copyByteBufferToMemoryDisallowingNullBytes(
       final ByteBuffer src,
       final WritableMemory dst,
       final long dstPosition,
@@ -222,11 +224,16 @@ public class FrameWriterUtils
       final boolean removeNullBytes
   )
   {
-    copyByteBufferToMemory(src, dst, dstPosition, len, false, removeNullBytes);
+    return copyByteBufferToMemory(src, dst, dstPosition, len, false, 
removeNullBytes);
   }
 
   /**
-   * Copies "len" bytes from {@code src.position()} to "dstPosition" in 
"memory". Does not update the position of src.
+   * Tries to copy "len" bytes from {@code src.position()} to "dstPosition" in 
"memory". If removeNullBytes is set to true,
+   * it will remove the U+0000 bytes from the src buffer, and the written 
bytes will be less than "len". It is imperative that the
+   * callers check the number of written bytes when "removeNullBytes" can be 
set to true, i.e. this method is invoked via
+   * {@link #copyByteBufferToMemoryDisallowingNullBytes}
+   * <p>
+   * Does not update the position of src.
    * <p>
    * Whenever "allowNullBytes" is true, "removeNullBytes" must be false. Use 
the methods {@link #copyByteBufferToMemoryAllowingNullBytes}
    * and {@link #copyByteBufferToMemoryDisallowingNullBytes} to copy between 
the memory
@@ -234,7 +241,7 @@ public class FrameWriterUtils
    *
    * @throws InvalidNullByteException if "allowNullBytes" and 
"removeNullBytes" is false and a null byte is encountered
    */
-  private static void copyByteBufferToMemory(
+  private static int copyByteBufferToMemory(
       final ByteBuffer src,
       final WritableMemory dst,
       final long dstPosition,
@@ -251,6 +258,7 @@ public class FrameWriterUtils
     }
 
     final int srcEnd = src.position() + len;
+    int writtenLength = 0;
 
     if (allowNullBytes) {
       if (src.hasArray()) {
@@ -264,6 +272,8 @@ public class FrameWriterUtils
           dst.putByte(q, b);
         }
       }
+      // The method does not alter the length of the memory copied if null 
bytes are allowed
+      writtenLength = len;
     } else {
       long q = dstPosition;
       for (int p = src.position(); p < srcEnd; p++) {
@@ -282,9 +292,11 @@ public class FrameWriterUtils
         } else {
           dst.putByte(q, b);
           q++;
+          writtenLength++;
         }
       }
     }
+    return writtenLength;
   }
 
   /**
diff --git 
a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
 
b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
index 0108e772d94..adde9f89cbd 100644
--- 
a/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
+++ 
b/processing/src/test/java/org/apache/druid/frame/field/StringFieldWriterTest.java
@@ -21,6 +21,7 @@ package org.apache.druid.frame.field;
 
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.frame.write.InvalidNullByteException;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.ColumnValueSelector;
@@ -40,9 +41,11 @@ import org.mockito.junit.MockitoRule;
 import org.mockito.quality.Strictness;
 
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.Collectors;
 
 public class StringFieldWriterTest extends InitializedNullHandlingTest
 {
@@ -57,9 +60,12 @@ public class StringFieldWriterTest extends 
InitializedNullHandlingTest
   @Mock
   public DimensionSelector selectorUtf8;
 
+
   private WritableMemory memory;
   private FieldWriter fieldWriter;
   private FieldWriter fieldWriterUtf8;
+  private FieldWriter fieldWriterRemoveNull;
+  private FieldWriter fieldWriterUtf8RemoveNull;
 
   @Before
   public void setUp()
@@ -67,13 +73,32 @@ public class StringFieldWriterTest extends 
InitializedNullHandlingTest
     memory = WritableMemory.allocate(1000);
     fieldWriter = new StringFieldWriter(selector, false);
     fieldWriterUtf8 = new StringFieldWriter(selectorUtf8, false);
+    fieldWriterRemoveNull = new StringFieldWriter(selector, true);
+    fieldWriterUtf8RemoveNull = new StringFieldWriter(selectorUtf8, true);
   }
 
   @After
   public void tearDown()
   {
-    fieldWriter.close();
-    fieldWriterUtf8.close();
+    for (FieldWriter fw : getFieldWriter(FieldWritersType.ALL)) {
+      try {
+        fw.close();
+      }
+      catch (Exception ignore) {
+      }
+    }
+  }
+
+
+  private List<FieldWriter> getFieldWriter(FieldWritersType fieldWritersType)
+  {
+    if (fieldWritersType == FieldWritersType.NULL_REPLACING) {
+      return Arrays.asList(fieldWriterRemoveNull, fieldWriterUtf8RemoveNull);
+    } else if (fieldWritersType == FieldWritersType.ALL) {
+      return Arrays.asList(fieldWriter, fieldWriterUtf8, 
fieldWriterRemoveNull, fieldWriterUtf8RemoveNull);
+    } else {
+      throw new ISE("Handler missing for type:[%s]", fieldWritersType);
+    }
   }
 
   @Test
@@ -100,31 +125,63 @@ public class StringFieldWriterTest extends 
InitializedNullHandlingTest
     doTest(Arrays.asList("foo", "bar"));
   }
 
+
   @Test
   public void testMultiValueStringContainingNulls()
   {
     doTest(Arrays.asList("foo", NullHandling.emptyToNullIfNeeded(""), "bar", 
null));
   }
 
+  @Test
+  public void testNullByteReplacement()
+  {
+    doTest(
+        Arrays.asList("abc\u0000", "foo" + 
NullHandling.emptyToNullIfNeeded("") + "bar", "def"),
+        FieldWritersType.NULL_REPLACING
+    );
+  }
+
+  @Test
+  public void testNullByteNotReplaced()
+  {
+    mockSelectors(Arrays.asList("abc\u0000", "foo" + 
NullHandling.emptyToNullIfNeeded("") + "bar", "def"));
+    Assert.assertThrows(InvalidNullByteException.class, () -> {
+      doTestWithSpecificFieldWriter(fieldWriter);
+    });
+    Assert.assertThrows(InvalidNullByteException.class, () -> {
+      doTestWithSpecificFieldWriter(fieldWriterUtf8);
+    });
+  }
+
   private void doTest(final List<String> values)
+  {
+    doTest(values, FieldWritersType.ALL);
+  }
+
+  private void doTest(final List<String> values, FieldWritersType 
fieldWritersType)
   {
     mockSelectors(values);
 
-    // Non-UTF8 test
-    {
-      final long written = writeToMemory(fieldWriter);
-      final Object[] valuesRead = readFromMemory(written);
-      Assert.assertEquals("values read (non-UTF8)", values, 
Arrays.asList(valuesRead));
+    List<FieldWriter> fieldWriters = getFieldWriter(fieldWritersType);
+    for (FieldWriter fw : fieldWriters) {
+      final Object[] valuesRead = doTestWithSpecificFieldWriter(fw);
+      List<String> expectedResults = new ArrayList<>(values);
+      if (fieldWritersType == FieldWritersType.NULL_REPLACING) {
+        expectedResults = expectedResults.stream()
+                                         .map(val -> StringUtils.replace(val, 
"\u0000", ""))
+                                         .collect(Collectors.toList());
+      }
+      Assert.assertEquals("values read", expectedResults, 
Arrays.asList(valuesRead));
     }
+  }
 
-    // UTF8 test
-    {
-      final long writtenUtf8 = writeToMemory(fieldWriterUtf8);
-      final Object[] valuesReadUtf8 = readFromMemory(writtenUtf8);
-      Assert.assertEquals("values read (UTF8)", values, 
Arrays.asList(valuesReadUtf8));
-    }
+  private Object[] doTestWithSpecificFieldWriter(FieldWriter fieldWriter)
+  {
+    final long written = writeToMemory(fieldWriter);
+    return readFromMemory(written);
   }
 
+
   private void mockSelectors(final List<String> values)
   {
     final RangeIndexedInts row = new RangeIndexedInts();
@@ -183,9 +240,20 @@ public class StringFieldWriterTest extends 
InitializedNullHandlingTest
     memory.getByteArray(MEMORY_POSITION, bytes, 0, (int) written);
 
     final FieldReader fieldReader = 
FieldReaders.create("columnNameDoesntMatterHere", ColumnType.STRING_ARRAY);
-    final ColumnValueSelector<?> selector =
-        fieldReader.makeColumnValueSelector(memory, new 
ConstantFieldPointer(MEMORY_POSITION, -1));
+    final ColumnValueSelector<?> selector = 
fieldReader.makeColumnValueSelector(
+        memory,
+        new ConstantFieldPointer(
+            MEMORY_POSITION,
+            -1
+        )
+    );
 
     return (Object[]) selector.getObject();
   }
+
+  private enum FieldWritersType
+  {
+    NULL_REPLACING, // include null replacing writers only
+    ALL // include all writers
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to