This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f9609f33f4 [VL] Update dedup hash build configuration name (#11289)
f9609f33f4 is described below
commit f9609f33f4683c5774555c0918434de3ac1f451c
Author: Rong Ma <[email protected]>
AuthorDate: Fri Dec 12 22:12:11 2025 +0800
[VL] Update dedup hash build configuration name (#11289)
Update the configuration names according to the changes in Velox
---
.../src/main/scala/org/apache/gluten/config/VeloxConfig.scala | 9 +++++----
cpp/velox/compute/WholeStageResultIterator.cc | 4 ++--
cpp/velox/config/VeloxConfig.h | 4 ++--
docs/velox-configuration.md | 4 ++--
4 files changed, 11 insertions(+), 10 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 5444059a0a..6644f4edc2 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -538,17 +538,18 @@ object VeloxConfig extends ConfigRegistry {
.createWithDefault(false)
val VELOX_HASHMAP_ABANDON_BUILD_DUPHASH_MIN_ROWS =
- buildConf("spark.gluten.velox.abandonbuild.noduphashminrows")
+ buildConf("spark.gluten.velox.abandonDedupHashMap.minRows")
.experimental()
.doc("Experimental: abandon hashmap build if duplicated rows more than
this number.")
.intConf
.createWithDefault(100000)
val VELOX_HASHMAP_ABANDON_BUILD_DUPHASH_MIN_PCT =
- buildConf("spark.gluten.velox.abandonbuild.noduphashminpct")
+ buildConf("spark.gluten.velox.abandonDedupHashMap.minPct")
.experimental()
- .doc("Experimental: abandon hashmap build if duplicated rows are more
than this percentile." +
- "Value is integer based and range is [0, 100].")
+ .doc(
+ "Experimental: abandon hashmap build if duplicated rows are more than
this percentile. " +
+ "Value is integer based and range is [0, 100].")
.intConf
.createWithDefault(0)
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index bd1016d02b..6b9761a154 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -616,9 +616,9 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
// hashtable build optimizations
configs[velox::core::QueryConfig::kAbandonDedupHashMapMinRows] =
- std::to_string(veloxCfg_->get<int32_t>(kAbandonBuildNoDupHashMinRows,
100000));
+ std::to_string(veloxCfg_->get<int32_t>(kAbandonDedupHashMapMinRows,
100000));
configs[velox::core::QueryConfig::kAbandonDedupHashMapMinPct] =
- std::to_string(veloxCfg_->get<int32_t>(kAbandonBuildNoDupHashMinPct,
0));
+ std::to_string(veloxCfg_->get<int32_t>(kAbandonDedupHashMapMinPct, 0));
// Disable driver cpu time slicing.
configs[velox::core::QueryConfig::kDriverCpuTimeSliceLimitMs] = "0";
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
index 419f6e79ca..6db6fb0994 100644
--- a/cpp/velox/config/VeloxConfig.h
+++ b/cpp/velox/config/VeloxConfig.h
@@ -61,8 +61,8 @@ const std::string kAbandonPartialAggregationMinRows =
"spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows";
// hashmap build
-const std::string kAbandonBuildNoDupHashMinRows =
"spark.gluten.velox.abandonbuild.noduphashminrows";
-const std::string kAbandonBuildNoDupHashMinPct =
"spark.gluten.velox.abandonbuild.noduphashminpct";
+const std::string kAbandonDedupHashMapMinRows =
"spark.gluten.velox.abandonDedupHashMap.minRows";
+const std::string kAbandonDedupHashMapMinPct =
"spark.gluten.velox.abandonDedupHashMap.minPct";
// execution
const std::string kBloomFilterExpectedNumItems =
"spark.gluten.sql.columnar.backend.velox.bloomFilter.expectedNumItems";
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index cacb8cf480..9738f283ee 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -80,7 +80,7 @@ nav_order: 16
| Key | Default |
Description
|
|----------------------------------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------|
-| spark.gluten.velox.abandonbuild.noduphashminpct | 0.0 |
Experimental: abandon hashmap build if duplicated rows are more than this
percentile. |
-| spark.gluten.velox.abandonbuild.noduphashminrows | 100000 |
Experimental: abandon hashmap build if duplicated rows more than this number.
|
+| spark.gluten.velox.abandonDedupHashMap.minPct | 0 |
Experimental: abandon hashmap build if duplicated rows are more than this
percentile. Value is integer based and range is [0, 100]. |
+| spark.gluten.velox.abandonDedupHashMap.minRows | 100000 |
Experimental: abandon hashmap build if duplicated rows more than this number.
|
| spark.gluten.velox.offHeapBroadcastBuildRelation.enabled | false |
Experimental: If enabled, broadcast build relation will use offheap memory.
Otherwise, broadcast build relation will use onheap memory. |
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]