(incubator-gluten) branch main updated: [VL] Remove one legacy Velox config used for Spark collect_list function (#7826)

philo Wed, 06 Nov 2024 01:13:40 -0800

This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 4fecd1dda8 [VL] Remove one legacy Velox config used for Spark 
collect_list function (#7826)
4fecd1dda8 is described below

commit 4fecd1dda81c4ac2f4a33e2f4a6f5e6351898873
Author: PHILO-HE <[email protected]>
AuthorDate: Wed Nov 6 17:05:10 2024 +0800

    [VL] Remove one legacy Velox config used for Spark collect_list function 
(#7826)
---
 cpp/velox/compute/WholeStageResultIterator.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index adc9e9bbe9..b6ecbd959f 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -430,8 +430,6 @@ std::unordered_map<std::string, std::string> 
WholeStageResultIterator::getQueryC
       std::to_string(veloxCfg_->get<uint32_t>(kSparkBatchSize, 4096));
   configs[velox::core::QueryConfig::kMaxOutputBatchRows] =
       std::to_string(veloxCfg_->get<uint32_t>(kSparkBatchSize, 4096));
-  // Find offheap size from Spark confs. If found, set the max memory usage of 
partial aggregation.
-  // FIXME this uses process-wise off-heap memory which is not for task
   try {
     if (veloxCfg_->valueExists(kDefaultSessionTimezone)) {
       configs[velox::core::QueryConfig::kSessionTimezone] = 
veloxCfg_->get<std::string>(kDefaultSessionTimezone, "");
@@ -443,6 +441,8 @@ std::unordered_map<std::string, std::string> 
WholeStageResultIterator::getQueryC
     configs[velox::core::QueryConfig::kAdjustTimestampToTimezone] = "true";
 
     {
+      // Find offheap size from Spark confs. If found, set the max memory 
usage of partial aggregation.
+      // FIXME this uses process-wise off-heap memory which is not for task
       // partial aggregation memory config
       auto offHeapMemory = veloxCfg_->get<int64_t>(kSparkTaskOffHeapMemory, 
facebook::velox::memory::kMaxMemory);
       auto maxPartialAggregationMemory =
@@ -456,8 +456,6 @@ std::unordered_map<std::string, std::string> 
WholeStageResultIterator::getQueryC
           
std::to_string(veloxCfg_->get<int32_t>(kAbandonPartialAggregationMinPct, 90));
       configs[velox::core::QueryConfig::kAbandonPartialAggregationMinRows] =
           
std::to_string(veloxCfg_->get<int32_t>(kAbandonPartialAggregationMinRows, 
100000));
-      // Spark's collect_set ignore nulls.
-      configs[velox::core::QueryConfig::kPrestoArrayAggIgnoreNulls] = "true";
     }
     // Spill configs
     if (spillStrategy_ == "none") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Remove one legacy Velox config used for Spark collect_list function (#7826)

Reply via email to