This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 47237cbbc4 [GLUTEN-10388] Reduce off-heap memory request for partial 
stage fallback (#10389)
47237cbbc4 is described below

commit 47237cbbc43d93221c53b81ed40d51e7cb5264f8
Author: xinghuayu007 <[email protected]>
AuthorDate: Sat Aug 16 17:50:50 2025 +0800

    [GLUTEN-10388] Reduce off-heap memory request for partial stage fallback 
(#10389)
---
 .../execution/AutoAdjustStageResourceProfileSuite.scala       |  4 +++-
 docs/Configuration.md                                         |  1 +
 .../main/scala/org/apache/gluten/config/GlutenConfig.scala    | 11 +++++++++++
 .../sql/execution/GlutenAutoAdjustStageResourceProfile.scala  |  8 ++++++++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/AutoAdjustStageResourceProfileSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/AutoAdjustStageResourceProfileSuite.scala
index f094ce1408..e545892861 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/AutoAdjustStageResourceProfileSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/AutoAdjustStageResourceProfileSuite.scala
@@ -92,7 +92,9 @@ class AutoAdjustStageResourceProfileSuite
   test("stage contains fallback nodes and apply new resource profile") {
     withSQLConf(
       GlutenConfig.COLUMNAR_SHUFFLE_ENABLED.key -> "false",
-      GlutenConfig.AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD.key 
-> "0.1") {
+      GlutenConfig.AUTO_ADJUST_STAGE_RESOURCES_OFFHEAP_RATIO.key -> "0.6",
+      GlutenConfig.AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD.key 
-> "0.1"
+    ) {
       runQueryAndCompare("select c1, count(*) from tmp1 group by c1") {
         df =>
           val plan = df.queryExecution.executedPlan
diff --git a/docs/Configuration.md b/docs/Configuration.md
index 5313386cad..995bdf3886 100644
--- a/docs/Configuration.md
+++ b/docs/Configuration.md
@@ -25,6 +25,7 @@ nav_order: 15
 | spark.gluten.auto.adjustStageResource.enabled                      | false   
          | Experimental: If enabled, gluten will try to set the stage resource 
according to stage execution plan. Only worked when aqe is enabled at the same 
time!!                                                                          
                                                                                
                                                                                
               [...]
 | spark.gluten.auto.adjustStageResources.fallenNode.ratio.threshold  | 0.5     
          | Experimental: Increase executor heap memory when stage contains 
fallen node count exceeds the total node count ratio.                           
                                                                                
                                                                                
                                                                                
                  [...]
 | spark.gluten.auto.adjustStageResources.heap.ratio                  | 2.0     
          | Experimental: Increase executor heap memory when match adjust stage 
resource rule.                                                                  
                                                                                
                                                                                
                                                                                
              [...]
+| spark.gluten.auto.adjustStageResources.offheap.ratio               | 0.5     
          | Experimental: Decrease executor offheap memory when match adjust 
stage resource rule.                                                            
                                                                                
                                                                                
                                                                                
                 [...]
 | spark.gluten.enabled                                               | true    
          | Whether to enable gluten. Default value is true. Just an 
experimental property. Recommend to enable/disable Gluten through the setting 
for spark.plugins.                                                              
                                                                                
                                                                                
                           [...]
 | spark.gluten.execution.resource.expired.time                       | 86400   
          | Expired time of execution with resource relation has cached.        
                                                                                
                                                                                
                                                                                
                                                                                
              [...]
 | spark.gluten.expression.blacklist                                  | 
&lt;undefined&gt; | A black list of expression to skip transform, multiple 
values separated by commas.                                                     
                                                                                
                                                                                
                                                                                
                           [...]
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index e81d6b0664..4dbdf11a52 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -365,6 +365,10 @@ class GlutenConfig(conf: SQLConf) extends 
GlutenCoreConfig(conf) {
 
   def autoAdjustStageRPHeapRatio: Double = 
getConf(AUTO_ADJUST_STAGE_RESOURCES_HEAP_RATIO)
 
+  def autoAdjustStageRPOffHeapRatio: Double = getConf(
+    AUTO_ADJUST_STAGE_RESOURCES_OFFHEAP_RATIO
+  )
+
   def autoAdjustStageFallenNodeThreshold: Double =
     getConf(AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD)
   def parquetEncryptionValidationFileLimit: Int = 
getConf(ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT)
@@ -1585,6 +1589,13 @@ object GlutenConfig {
       .doubleConf
       .createWithDefault(2.0d)
 
+  val AUTO_ADJUST_STAGE_RESOURCES_OFFHEAP_RATIO =
+    buildConf("spark.gluten.auto.adjustStageResources.offheap.ratio")
+      .internal()
+      .doc("Experimental: Decrease executor offheap memory when match adjust 
stage resource rule.")
+      .doubleConf
+      .createWithDefault(0.5d)
+
   val AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD =
     
buildConf("spark.gluten.auto.adjustStageResources.fallenNode.ratio.threshold")
       .internal()
diff --git 
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenAutoAdjustStageResourceProfile.scala
 
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenAutoAdjustStageResourceProfile.scala
index 7a96d87cf9..fe76910cc9 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenAutoAdjustStageResourceProfile.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/GlutenAutoAdjustStageResourceProfile.scala
@@ -112,6 +112,7 @@ case class GlutenAutoAdjustStageResourceProfile(glutenConf: 
GlutenConfig, spark:
     }
 
     // case 2: check whether fallback exists and decide whether increase heap 
memory
+    // and decrease offheap memory.
     val fallenNodeCnt = planNodes.count(p => !p.isInstanceOf[GlutenPlan])
     val totalCount = planNodes.size
 
@@ -120,6 +121,13 @@ case class 
GlutenAutoAdjustStageResourceProfile(glutenConf: GlutenConfig, spark:
       val newExecutorMemory =
         new ExecutorResourceRequest(ResourceProfile.MEMORY, 
newMemoryAmount.toLong)
       executorResource.put(ResourceProfile.MEMORY, newExecutorMemory)
+
+      val newOffHeapMemoryAmount =
+        offheapRequest.get.amount * glutenConf.autoAdjustStageRPOffHeapRatio;
+      val newExecutorOffheap =
+        new ExecutorResourceRequest(ResourceProfile.OFFHEAP_MEM, 
newOffHeapMemoryAmount.toLong)
+      executorResource.put(ResourceProfile.OFFHEAP_MEM, newExecutorOffheap)
+
       val newRP = new ResourceProfile(executorResource.toMap, 
taskResource.toMap)
       return GlutenResourceProfile.applyNewResourceProfileIfPossible(
         plan,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to