This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4fecd1dda8 [VL] Remove one legacy Velox config used for Spark
collect_list function (#7826)
4fecd1dda8 is described below
commit 4fecd1dda81c4ac2f4a33e2f4a6f5e6351898873
Author: PHILO-HE <[email protected]>
AuthorDate: Wed Nov 6 17:05:10 2024 +0800
[VL] Remove one legacy Velox config used for Spark collect_list function
(#7826)
---
cpp/velox/compute/WholeStageResultIterator.cc | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index adc9e9bbe9..b6ecbd959f 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -430,8 +430,6 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<uint32_t>(kSparkBatchSize, 4096));
configs[velox::core::QueryConfig::kMaxOutputBatchRows] =
std::to_string(veloxCfg_->get<uint32_t>(kSparkBatchSize, 4096));
- // Find offheap size from Spark confs. If found, set the max memory usage of
partial aggregation.
- // FIXME this uses process-wise off-heap memory which is not for task
try {
if (veloxCfg_->valueExists(kDefaultSessionTimezone)) {
configs[velox::core::QueryConfig::kSessionTimezone] =
veloxCfg_->get<std::string>(kDefaultSessionTimezone, "");
@@ -443,6 +441,8 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kAdjustTimestampToTimezone] = "true";
{
+ // Find offheap size from Spark confs. If found, set the max memory
usage of partial aggregation.
+ // FIXME this uses process-wise off-heap memory which is not for task
// partial aggregation memory config
auto offHeapMemory = veloxCfg_->get<int64_t>(kSparkTaskOffHeapMemory,
facebook::velox::memory::kMaxMemory);
auto maxPartialAggregationMemory =
@@ -456,8 +456,6 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<int32_t>(kAbandonPartialAggregationMinPct, 90));
configs[velox::core::QueryConfig::kAbandonPartialAggregationMinRows] =
std::to_string(veloxCfg_->get<int32_t>(kAbandonPartialAggregationMinRows,
100000));
- // Spark's collect_set ignore nulls.
- configs[velox::core::QueryConfig::kPrestoArrayAggIgnoreNulls] = "true";
}
// Spill configs
if (spillStrategy_ == "none") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]