This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 3ba07c9cd [VL] fix bug of string buffer size calculation in shuffle
(#5395)
3ba07c9cd is described below
commit 3ba07c9cdea6a9c0a05c74593b670d5b067cf62e
Author: BInwei Yang <[email protected]>
AuthorDate: Sun Apr 14 17:58:03 2024 -0700
[VL] fix bug of string buffer size calculation in shuffle (#5395)
Flatten buffer doesn't update vector, so we can't use the vector to
calculate the string buffer size. Instead to get the strlength from stringview
and nullbit.
---
cpp/velox/shuffle/VeloxShuffleWriter.cc | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/cpp/velox/shuffle/VeloxShuffleWriter.cc
b/cpp/velox/shuffle/VeloxShuffleWriter.cc
index 870ca0496..909d4ebc3 100644
--- a/cpp/velox/shuffle/VeloxShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxShuffleWriter.cc
@@ -858,8 +858,15 @@ uint16_t
VeloxShuffleWriter::calculatePartitionBufferSize(const facebook::velox:
for (size_t i = 0; i < binaryColumnIndices_.size(); ++i) {
uint64_t binarySizeBytes = 0;
auto column =
rv.childAt(binaryColumnIndices_[i])->asFlatVector<facebook::velox::StringView>();
- for (auto& buffer : column->stringBuffers()) {
- binarySizeBytes += buffer->size();
+
+ const auto* srcRawValues = column->rawValues();
+ const auto* srcRawNulls = column->rawNulls();
+
+ for (auto idx = 0; idx < numRows; idx++) {
+ auto& stringView = srcRawValues[idx];
+ size_t isNull = srcRawNulls &&
facebook::velox::bits::isBitNull(srcRawNulls, idx);
+ auto stringLen = (isNull - 1) & stringView.size();
+ binarySizeBytes += stringLen;
}
binaryArrayTotalSizeBytes_[i] += binarySizeBytes;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]