This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new dee18e7af [VL] Fix Shuffle split get wrong binary length (#5168)
dee18e7af is described below

commit dee18e7afdffa1301954ded7b71b527232b7be43
Author: Rong Ma <[email protected]>
AuthorDate: Fri Mar 29 08:52:32 2024 +0800

    [VL] Fix Shuffle split get wrong binary length (#5168)
---
 cpp/velox/shuffle/VeloxShuffleWriter.cc | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/cpp/velox/shuffle/VeloxShuffleWriter.cc 
b/cpp/velox/shuffle/VeloxShuffleWriter.cc
index a6b07b6c0..870ca0496 100644
--- a/cpp/velox/shuffle/VeloxShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxShuffleWriter.cc
@@ -25,6 +25,7 @@
 #include "utils/VeloxArrowUtils.h"
 #include "utils/macros.h"
 #include "velox/buffer/Buffer.h"
+#include "velox/common/base/Nulls.h"
 #include "velox/type/HugeInt.h"
 #include "velox/type/Timestamp.h"
 #include "velox/type/Type.h"
@@ -649,13 +650,14 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
     uint32_t binaryIdx,
     const facebook::velox::FlatVector<facebook::velox::StringView>& src,
     std::vector<BinaryBuf>& dst) {
-  auto srcRawValues = src.rawValues();
+  const auto* srcRawValues = src.rawValues();
+  const auto* srcRawNulls = src.rawNulls();
 
   for (auto& pid : partitionUsed_) {
     auto& binaryBuf = dst[pid];
 
     // use 32bit offset
-    auto dstOffsetBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) + 
partitionBufferBase_[pid];
+    auto dstLengthBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) + 
partitionBufferBase_[pid];
 
     auto valueOffset = binaryBuf.valueOffset;
     auto dstValuePtr = binaryBuf.valuePtr + valueOffset;
@@ -668,10 +670,11 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
     for (auto i = 0; i < numRows; i++) {
       auto rowId = rowOffset2RowId_[rowOffsetBase + i];
       auto& stringView = srcRawValues[rowId];
-      auto stringLen = stringView.size();
+      size_t isNull = srcRawNulls && 
facebook::velox::bits::isBitNull(srcRawNulls, rowId);
+      auto stringLen = (isNull - 1) & stringView.size();
 
       // 1. copy length, update offset.
-      dstOffsetBase[i] = stringLen;
+      dstLengthBase[i] = stringLen;
       valueOffset += stringLen;
 
       // Resize if necessary.
@@ -691,8 +694,8 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
         binaryBuf.valuePtr = valueBuffer->mutable_data();
         binaryBuf.valueCapacity = capacity;
         dstValuePtr = binaryBuf.valuePtr + valueOffset - stringLen;
-        // Need to update dstOffsetBase because lengthPtr can be updated if 
Reserve triggers spill.
-        dstOffsetBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) + 
partitionBufferBase_[pid];
+        // Need to update dstLengthBase because lengthPtr can be updated if 
Reserve triggers spill.
+        dstLengthBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) + 
partitionBufferBase_[pid];
       }
 
       // 2. copy value


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to