This is an automated email from the ASF dual-hosted git repository.

wangzhen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new f691dfb453 [GLUTEN-9756][VL] Support jemalloc memory profile dump on 
exit (#9759)
f691dfb453 is described below

commit f691dfb453986860926e05d04a82f55cdcd5ba4a
Author: Zhen Wang <[email protected]>
AuthorDate: Wed Jun 4 09:23:30 2025 +0800

    [GLUTEN-9756][VL] Support jemalloc memory profile dump on exit (#9759)
    
    * [GLUTEN-9756][VL] Support jemalloc memory profile dump on exit
    
    * address comments
    
    * fix style
    
    * address comment
    
    * address comments
---
 .../apache/gluten/monitor/VeloxMemoryProfiler.java | 42 ++++++++++++++++++++++
 .../backendsapi/velox/VeloxListenerApi.scala       | 17 ++++++++-
 .../org/apache/gluten/config/VeloxConfig.scala     |  9 +++++
 cpp/velox/jni/VeloxJniWrapper.cc                   | 32 +++++++++++++++++
 docs/developers/ProfileMemoryOfGlutenWithVelox.md  | 15 ++++++++
 5 files changed, 114 insertions(+), 1 deletion(-)

diff --git 
a/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
 
b/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
new file mode 100644
index 0000000000..988e291013
--- /dev/null
+++ 
b/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.monitor;
+
+/**
+ * VeloxMemoryProfiler is a JNI for controlling native memory profiler. 
Currently, it uses jemalloc
+ * for memory profiling, so if you want to enable it, need to build gluten with
+ * `--enable_jemalloc_stats=ON`.
+ *
+ * <p>Please set the following configurations by using the same lib jemalloc 
linked to Gluten native
+ * lib.
+ *
+ * <ul>
+ *   <li>spark.executorEnv.LD_PRELOAD=/path/to/libjemalloc.so
+ *   
<li>spark.executorEnv.MALLOC_CONF=prof:true,prof_prefix:/tmp/gluten_heap_perf
+ * </ul>
+ */
+public class VeloxMemoryProfiler {
+
+  /** Starts the Velox memory profiler. (jemalloc: prof.active=ture) */
+  public static native void start();
+
+  /** Dumps the current memory profile. (jemalloc: prof.dump) */
+  public static native void dump();
+
+  /** Stops the Velox memory profiler. (jemalloc: prof.active=false) */
+  public static native void stop();
+}
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index d5e7191b74..778d79ab8b 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -28,6 +28,7 @@ import org.apache.gluten.init.NativeBackendInitializer
 import org.apache.gluten.jni.{JniLibLoader, JniWorkspace}
 import org.apache.gluten.memory.{MemoryUsageRecorder, 
SimpleMemoryUsageRecorder}
 import org.apache.gluten.memory.listener.ReservationListener
+import org.apache.gluten.monitor.VeloxMemoryProfiler
 import org.apache.gluten.udf.UdfJniWrapper
 import org.apache.gluten.utils._
 
@@ -43,7 +44,7 @@ import 
org.apache.spark.sql.execution.datasources.GlutenWriterColumnarRules
 import 
org.apache.spark.sql.execution.datasources.velox.{VeloxParquetWriterInjects, 
VeloxRowSplitter}
 import org.apache.spark.sql.expression.UDFResolver
 import org.apache.spark.sql.internal.{GlutenConfigUtil, StaticSQLConf}
-import org.apache.spark.util.{SparkDirectoryUtil, SparkResourceUtil}
+import org.apache.spark.util.{SparkDirectoryUtil, SparkResourceUtil, 
SparkShutdownManagerUtil}
 
 import org.apache.commons.lang3.StringUtils
 
@@ -146,6 +147,7 @@ class VeloxListenerApi extends ListenerApi with Logging {
 
     SparkDirectoryUtil.init(conf)
     initialize(conf, isDriver = false)
+    addIfNeedMemoryDumpShutdownHook(conf)
   }
 
   override def onExecutorShutdown(): Unit = shutdown()
@@ -222,6 +224,19 @@ class VeloxListenerApi extends ListenerApi with Logging {
     GlutenFormatFactory.register(new VeloxRowSplitter())
   }
 
+  private def addIfNeedMemoryDumpShutdownHook(conf: SparkConf): Unit = {
+    val memoryDumpOnExit =
+      conf.get(MEMORY_DUMP_ON_EXIT.key, 
MEMORY_DUMP_ON_EXIT.defaultValueString).toBoolean
+    if (memoryDumpOnExit) {
+      SparkShutdownManagerUtil.addHook(
+        () => {
+          logInfo("MemoryDumpOnExit triggered, dumping memory profile.")
+          VeloxMemoryProfiler.dump()
+          logInfo("MemoryDumpOnExit completed.")
+        })
+    }
+  }
+
   private def shutdown(): Unit = {
     // TODO shutdown implementation in velox to release resources
   }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala 
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 1271d707c6..de8eeb2f62 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -627,4 +627,13 @@ object VeloxConfig {
       .doc("Enable check memory usage leak.")
       .booleanConf
       .createWithDefault(true)
+
+  val MEMORY_DUMP_ON_EXIT =
+    buildConf("spark.gluten.monitor.memoryDumpOnExit")
+      .doc(
+        "Whether to trigger native memory dump when executor exits. Currently 
it uses jemalloc" +
+          " for memory profiling, so if you want to enable it, also need to  
build gluten" +
+          " with `--enable_jemalloc_stats=ON`.")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index fe72430dd9..c60b36c423 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -463,6 +463,38 @@ JNIEXPORT jlong JNICALL 
Java_org_apache_gluten_columnarbatch_VeloxColumnarBatchJ
   JNI_METHOD_END(kInvalidObjectHandle)
 }
 
+JNIEXPORT void JNICALL 
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_start( // NOLINT
+    JNIEnv* env,
+    jclass) {
+  JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+  bool active = true;
+  mallctl("prof.active", NULL, NULL, &active, sizeof(bool));
+#endif
+  JNI_METHOD_END()
+}
+
+JNIEXPORT void JNICALL 
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_dump( // NOLINT
+    JNIEnv* env,
+    jclass) {
+  JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+  mallctl("prof.dump", NULL, NULL, NULL, 0);
+#endif
+  JNI_METHOD_END()
+}
+
+JNIEXPORT void JNICALL 
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_stop( // NOLINT
+    JNIEnv* env,
+    jclass) {
+  JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+  bool active = false;
+  mallctl("prof.active", NULL, NULL, &active, sizeof(bool));
+#endif
+  JNI_METHOD_END()
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/docs/developers/ProfileMemoryOfGlutenWithVelox.md 
b/docs/developers/ProfileMemoryOfGlutenWithVelox.md
index 1f57f80921..4705e4f592 100644
--- a/docs/developers/ProfileMemoryOfGlutenWithVelox.md
+++ b/docs/developers/ProfileMemoryOfGlutenWithVelox.md
@@ -119,6 +119,21 @@ spark.executorEnv.MALLOC_CONF 
prof:true,lg_prof_interval:30,prof_prefix:/tmp/glu
 
 Finally, profiling files prefixed with `/tmp/gluten_heap_perf.${PID}` will be 
generated for each spark executor.
 
+## Memory dump on spark executor exit
+
+Sometimes, when native memory is not managed by gluten or there are some 
memory leaks that will cause spark executor to be killed due to memory limit,
+we only need to trigger a memory dump on executor exit.
+
+If we want to enable this feature we need to follow steps:
+
+1. Build gluten with `--enable_jemalloc_stats=ON` to enabled jemalloc stats.
+2. Enabled memory dump on exit, add spark executor environments to load 
jemalloc lib and make memory profiling active.
+    ```
+   spark.gluten.monitor.memoryDumpOnExit=true
+   spark.executorEnv.LD_PRELOAD=/path/to/libjemalloc.so
+   spark.executorEnv.MALLOC_CONF=prof:true,prof_prefix:/tmp/gluten_heap_perf
+   ```
+
 ## Analyze profiling output
 
 Prepare the required native libraries. Assume static build is used for Gluten, 
so there is no other shared dependency libs.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to