This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new cc6dd509b1 [VL] Enhance VeloxHashShuffleWriter partition buffer size
estimation by incorporating complex type columns (#8089)
cc6dd509b1 is described below
commit cc6dd509b1570c5d0e463828879005ded9b50662
Author: zhaokuo <[email protected]>
AuthorDate: Wed Dec 4 15:24:36 2024 +0800
[VL] Enhance VeloxHashShuffleWriter partition buffer size estimation by
incorporating complex type columns (#8089)
---
cpp/velox/shuffle/VeloxHashShuffleWriter.cc | 6 ++++++
cpp/velox/shuffle/VeloxHashShuffleWriter.h | 1 +
2 files changed, 7 insertions(+)
diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
index f044736142..4cd6630fc3 100644
--- a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
@@ -725,7 +725,9 @@ arrow::Status
VeloxHashShuffleWriter::splitComplexType(const facebook::velox::Ro
for (auto& pid : partitionUsed_) {
if (rowIndexs[pid].size() != 0) {
+ auto old = arenas_[pid]->size();
complexTypeData_[pid]->append(rowVector,
folly::Range(rowIndexs[pid].data(), rowIndexs[pid].size()));
+ complexTotalSizeBytes_ += arenas_[pid]->size() - old;
}
}
@@ -853,6 +855,10 @@ uint32_t
VeloxHashShuffleWriter::calculatePartitionBufferSize(const facebook::ve
VS_PRINT_VECTOR_MAPPING(binaryArrayAvgBytesPerRow);
+ if (totalInputNumRows_ > 0) {
+ bytesPerRow += complexTotalSizeBytes_ / totalInputNumRows_;
+ }
+
VS_PRINTLF(bytesPerRow);
memLimit += cachedPayloadSize();
diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.h
b/cpp/velox/shuffle/VeloxHashShuffleWriter.h
index 121eaf116c..4ee12a1550 100644
--- a/cpp/velox/shuffle/VeloxHashShuffleWriter.h
+++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.h
@@ -355,6 +355,7 @@ class VeloxHashShuffleWriter : public VeloxShuffleWriter {
// Updated for each input RowVector.
uint64_t totalInputNumRows_ = 0;
std::vector<uint64_t> binaryArrayTotalSizeBytes_;
+ size_t complexTotalSizeBytes_ = 0;
// True if input column has null in any processed input RowVector.
// In the order of fixed-width columns + binary columns.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]