This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new cc6dd509b1 [VL] Enhance VeloxHashShuffleWriter partition buffer size 
estimation by incorporating complex type columns (#8089)
cc6dd509b1 is described below

commit cc6dd509b1570c5d0e463828879005ded9b50662
Author: zhaokuo <[email protected]>
AuthorDate: Wed Dec 4 15:24:36 2024 +0800

    [VL] Enhance VeloxHashShuffleWriter partition buffer size estimation by 
incorporating complex type columns (#8089)
---
 cpp/velox/shuffle/VeloxHashShuffleWriter.cc | 6 ++++++
 cpp/velox/shuffle/VeloxHashShuffleWriter.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc 
b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
index f044736142..4cd6630fc3 100644
--- a/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.cc
@@ -725,7 +725,9 @@ arrow::Status 
VeloxHashShuffleWriter::splitComplexType(const facebook::velox::Ro
 
   for (auto& pid : partitionUsed_) {
     if (rowIndexs[pid].size() != 0) {
+      auto old = arenas_[pid]->size();
       complexTypeData_[pid]->append(rowVector, 
folly::Range(rowIndexs[pid].data(), rowIndexs[pid].size()));
+      complexTotalSizeBytes_ += arenas_[pid]->size() - old;
     }
   }
 
@@ -853,6 +855,10 @@ uint32_t 
VeloxHashShuffleWriter::calculatePartitionBufferSize(const facebook::ve
 
   VS_PRINT_VECTOR_MAPPING(binaryArrayAvgBytesPerRow);
 
+  if (totalInputNumRows_ > 0) {
+    bytesPerRow += complexTotalSizeBytes_ / totalInputNumRows_;
+  }
+
   VS_PRINTLF(bytesPerRow);
 
   memLimit += cachedPayloadSize();
diff --git a/cpp/velox/shuffle/VeloxHashShuffleWriter.h 
b/cpp/velox/shuffle/VeloxHashShuffleWriter.h
index 121eaf116c..4ee12a1550 100644
--- a/cpp/velox/shuffle/VeloxHashShuffleWriter.h
+++ b/cpp/velox/shuffle/VeloxHashShuffleWriter.h
@@ -355,6 +355,7 @@ class VeloxHashShuffleWriter : public VeloxShuffleWriter {
   // Updated for each input RowVector.
   uint64_t totalInputNumRows_ = 0;
   std::vector<uint64_t> binaryArrayTotalSizeBytes_;
+  size_t complexTotalSizeBytes_ = 0;
 
   // True if input column has null in any processed input RowVector.
   // In the order of fixed-width columns + binary columns.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to