This is an automated email from the ASF dual-hosted git repository.
chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ea9ba244f5 [VL] Add config
spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory
(#11354)
ea9ba244f5 is described below
commit ea9ba244f5511d0435b540f566df036371d75453
Author: Zouxxyy <[email protected]>
AuthorDate: Wed Jan 7 18:07:19 2026 +0800
[VL] Add config
spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory
(#11354)
---
.../main/scala/org/apache/gluten/config/VeloxConfig.scala | 12 ++++++++++++
cpp/velox/compute/WholeStageResultIterator.cc | 5 ++++-
cpp/velox/config/VeloxConfig.h | 2 ++
docs/velox-configuration.md | 1 +
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 6644f4edc2..03781d2fb5 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -380,6 +380,18 @@ object VeloxConfig extends ConfigRegistry {
.doubleConf
.createWithDefault(0.1)
+ val MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY =
+
buildConf("spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory")
+ .doc(
+ "Set the max extended memory of partial aggregation in bytes. When
this option is set " +
+ "to a value greater than 0, it will override
spark.gluten.sql.columnar.backend.velox." +
+ "maxExtendedPartialAggregationMemoryRatio. Note: this option only
works when " +
+ "flushable partial aggregation is enabled. Ignored when " +
+
"spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false."
+ )
+ .bytesConf(ByteUnit.BYTE)
+ .createOptional
+
val MAX_EXTENDED_PARTIAL_AGGREGATION_MEMORY_RATIO =
buildConf("spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio")
.doc(
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 6b9761a154..e91e2ad69d 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -555,7 +555,10 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
:
static_cast<int64_t>(veloxCfg_->get<double>(kMaxPartialAggregationMemoryRatio,
0.1) * offHeapMemory));
auto maxExtendedPartialAggregationMemory = std::max<int64_t>(
1 << 26,
-
static_cast<long>(veloxCfg_->get<double>(kMaxExtendedPartialAggregationMemoryRatio,
0.15) * offHeapMemory));
+
veloxCfg_->get<int64_t>(kMaxExtendedPartialAggregationMemory).has_value()
+ ?
veloxCfg_->get<int64_t>(kMaxExtendedPartialAggregationMemory).value()
+ : static_cast<int64_t>(
+
veloxCfg_->get<double>(kMaxExtendedPartialAggregationMemoryRatio, 0.15) *
offHeapMemory));
configs[velox::core::QueryConfig::kMaxPartialAggregationMemory] =
std::to_string(maxPartialAggregationMemory);
configs[velox::core::QueryConfig::kMaxExtendedPartialAggregationMemory] =
std::to_string(maxExtendedPartialAggregationMemory);
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
index 6db6fb0994..2cacee5369 100644
--- a/cpp/velox/config/VeloxConfig.h
+++ b/cpp/velox/config/VeloxConfig.h
@@ -55,6 +55,8 @@ const std::string kMaxPartialAggregationMemoryRatio =
const std::string kMaxPartialAggregationMemory =
"spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemory";
const std::string kMaxExtendedPartialAggregationMemoryRatio =
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio";
+const std::string kMaxExtendedPartialAggregationMemory =
+
"spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory";
const std::string kAbandonPartialAggregationMinPct =
"spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct";
const std::string kAbandonPartialAggregationMinRows =
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index 9738f283ee..bd838f357c 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -37,6 +37,7 @@ nav_order: 16
| spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes
| 64MB | Set the max coalesced bytes for velox file scan
[...]
| spark.gluten.sql.columnar.backend.velox.maxCoalescedDistance
| 512KB | Set the max coalesced distance bytes for velox file
scan
[...]
| spark.gluten.sql.columnar.backend.velox.maxCompiledRegexes
| 100 | Controls maximum number of compiled regular
expression patterns per function instance per thread of execution.
[...]
+| spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemory
| <undefined> | Set the max extended memory of partial aggregation in
bytes. When this option is set to a value greater than 0, it will override
spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio.
Note: this option only works when flushable partial aggregation is enabled.
Ignored when
spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.
[...]
|
spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio
| 0.15 | Set the max extended memory of partial aggregation as
maxExtendedPartialAggregationMemoryRatio of offheap size. Note: this option
only works when flushable partial aggregation is enabled. Ignored when
spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.
[...]
| spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemory
| <undefined> | Set the max memory of partial aggregation in bytes.
When this option is set to a value greater than 0, it will override
spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio. Note:
this option only works when flushable partial aggregation is enabled. Ignored
when spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.
[...]
| spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio
| 0.1 | Set the max memory of partial aggregation as
maxPartialAggregationMemoryRatio of offheap size. Note: this option only works
when flushable partial aggregation is enabled. Ignored when
spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false.
[...]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]