This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new dee18e7af [VL] Fix Shuffle split get wrong binary length (#5168)
dee18e7af is described below
commit dee18e7afdffa1301954ded7b71b527232b7be43
Author: Rong Ma <[email protected]>
AuthorDate: Fri Mar 29 08:52:32 2024 +0800
[VL] Fix Shuffle split get wrong binary length (#5168)
---
cpp/velox/shuffle/VeloxShuffleWriter.cc | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/cpp/velox/shuffle/VeloxShuffleWriter.cc
b/cpp/velox/shuffle/VeloxShuffleWriter.cc
index a6b07b6c0..870ca0496 100644
--- a/cpp/velox/shuffle/VeloxShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxShuffleWriter.cc
@@ -25,6 +25,7 @@
#include "utils/VeloxArrowUtils.h"
#include "utils/macros.h"
#include "velox/buffer/Buffer.h"
+#include "velox/common/base/Nulls.h"
#include "velox/type/HugeInt.h"
#include "velox/type/Timestamp.h"
#include "velox/type/Type.h"
@@ -649,13 +650,14 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
uint32_t binaryIdx,
const facebook::velox::FlatVector<facebook::velox::StringView>& src,
std::vector<BinaryBuf>& dst) {
- auto srcRawValues = src.rawValues();
+ const auto* srcRawValues = src.rawValues();
+ const auto* srcRawNulls = src.rawNulls();
for (auto& pid : partitionUsed_) {
auto& binaryBuf = dst[pid];
// use 32bit offset
- auto dstOffsetBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) +
partitionBufferBase_[pid];
+ auto dstLengthBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) +
partitionBufferBase_[pid];
auto valueOffset = binaryBuf.valueOffset;
auto dstValuePtr = binaryBuf.valuePtr + valueOffset;
@@ -668,10 +670,11 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
for (auto i = 0; i < numRows; i++) {
auto rowId = rowOffset2RowId_[rowOffsetBase + i];
auto& stringView = srcRawValues[rowId];
- auto stringLen = stringView.size();
+ size_t isNull = srcRawNulls &&
facebook::velox::bits::isBitNull(srcRawNulls, rowId);
+ auto stringLen = (isNull - 1) & stringView.size();
// 1. copy length, update offset.
- dstOffsetBase[i] = stringLen;
+ dstLengthBase[i] = stringLen;
valueOffset += stringLen;
// Resize if necessary.
@@ -691,8 +694,8 @@ arrow::Status VeloxShuffleWriter::splitBinaryType(
binaryBuf.valuePtr = valueBuffer->mutable_data();
binaryBuf.valueCapacity = capacity;
dstValuePtr = binaryBuf.valuePtr + valueOffset - stringLen;
- // Need to update dstOffsetBase because lengthPtr can be updated if
Reserve triggers spill.
- dstOffsetBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) +
partitionBufferBase_[pid];
+ // Need to update dstLengthBase because lengthPtr can be updated if
Reserve triggers spill.
+ dstLengthBase = (BinaryArrayLengthBufferType*)(binaryBuf.lengthPtr) +
partitionBufferBase_[pid];
}
// 2. copy value
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]