This is an automated email from the ASF dual-hosted git repository.
wangzhen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f691dfb453 [GLUTEN-9756][VL] Support jemalloc memory profile dump on
exit (#9759)
f691dfb453 is described below
commit f691dfb453986860926e05d04a82f55cdcd5ba4a
Author: Zhen Wang <[email protected]>
AuthorDate: Wed Jun 4 09:23:30 2025 +0800
[GLUTEN-9756][VL] Support jemalloc memory profile dump on exit (#9759)
* [GLUTEN-9756][VL] Support jemalloc memory profile dump on exit
* address comments
* fix style
* address comment
* address comments
---
.../apache/gluten/monitor/VeloxMemoryProfiler.java | 42 ++++++++++++++++++++++
.../backendsapi/velox/VeloxListenerApi.scala | 17 ++++++++-
.../org/apache/gluten/config/VeloxConfig.scala | 9 +++++
cpp/velox/jni/VeloxJniWrapper.cc | 32 +++++++++++++++++
docs/developers/ProfileMemoryOfGlutenWithVelox.md | 15 ++++++++
5 files changed, 114 insertions(+), 1 deletion(-)
diff --git
a/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
b/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
new file mode 100644
index 0000000000..988e291013
--- /dev/null
+++
b/backends-velox/src/main/java/org/apache/gluten/monitor/VeloxMemoryProfiler.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.monitor;
+
+/**
+ * VeloxMemoryProfiler is a JNI for controlling native memory profiler.
Currently, it uses jemalloc
+ * for memory profiling, so if you want to enable it, need to build gluten with
+ * `--enable_jemalloc_stats=ON`.
+ *
+ * <p>Please set the following configurations by using the same lib jemalloc
linked to Gluten native
+ * lib.
+ *
+ * <ul>
+ * <li>spark.executorEnv.LD_PRELOAD=/path/to/libjemalloc.so
+ *
<li>spark.executorEnv.MALLOC_CONF=prof:true,prof_prefix:/tmp/gluten_heap_perf
+ * </ul>
+ */
+public class VeloxMemoryProfiler {
+
+ /** Starts the Velox memory profiler. (jemalloc: prof.active=ture) */
+ public static native void start();
+
+ /** Dumps the current memory profile. (jemalloc: prof.dump) */
+ public static native void dump();
+
+ /** Stops the Velox memory profiler. (jemalloc: prof.active=false) */
+ public static native void stop();
+}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index d5e7191b74..778d79ab8b 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -28,6 +28,7 @@ import org.apache.gluten.init.NativeBackendInitializer
import org.apache.gluten.jni.{JniLibLoader, JniWorkspace}
import org.apache.gluten.memory.{MemoryUsageRecorder,
SimpleMemoryUsageRecorder}
import org.apache.gluten.memory.listener.ReservationListener
+import org.apache.gluten.monitor.VeloxMemoryProfiler
import org.apache.gluten.udf.UdfJniWrapper
import org.apache.gluten.utils._
@@ -43,7 +44,7 @@ import
org.apache.spark.sql.execution.datasources.GlutenWriterColumnarRules
import
org.apache.spark.sql.execution.datasources.velox.{VeloxParquetWriterInjects,
VeloxRowSplitter}
import org.apache.spark.sql.expression.UDFResolver
import org.apache.spark.sql.internal.{GlutenConfigUtil, StaticSQLConf}
-import org.apache.spark.util.{SparkDirectoryUtil, SparkResourceUtil}
+import org.apache.spark.util.{SparkDirectoryUtil, SparkResourceUtil,
SparkShutdownManagerUtil}
import org.apache.commons.lang3.StringUtils
@@ -146,6 +147,7 @@ class VeloxListenerApi extends ListenerApi with Logging {
SparkDirectoryUtil.init(conf)
initialize(conf, isDriver = false)
+ addIfNeedMemoryDumpShutdownHook(conf)
}
override def onExecutorShutdown(): Unit = shutdown()
@@ -222,6 +224,19 @@ class VeloxListenerApi extends ListenerApi with Logging {
GlutenFormatFactory.register(new VeloxRowSplitter())
}
+ private def addIfNeedMemoryDumpShutdownHook(conf: SparkConf): Unit = {
+ val memoryDumpOnExit =
+ conf.get(MEMORY_DUMP_ON_EXIT.key,
MEMORY_DUMP_ON_EXIT.defaultValueString).toBoolean
+ if (memoryDumpOnExit) {
+ SparkShutdownManagerUtil.addHook(
+ () => {
+ logInfo("MemoryDumpOnExit triggered, dumping memory profile.")
+ VeloxMemoryProfiler.dump()
+ logInfo("MemoryDumpOnExit completed.")
+ })
+ }
+ }
+
private def shutdown(): Unit = {
// TODO shutdown implementation in velox to release resources
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 1271d707c6..de8eeb2f62 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -627,4 +627,13 @@ object VeloxConfig {
.doc("Enable check memory usage leak.")
.booleanConf
.createWithDefault(true)
+
+ val MEMORY_DUMP_ON_EXIT =
+ buildConf("spark.gluten.monitor.memoryDumpOnExit")
+ .doc(
+ "Whether to trigger native memory dump when executor exits. Currently
it uses jemalloc" +
+ " for memory profiling, so if you want to enable it, also need to
build gluten" +
+ " with `--enable_jemalloc_stats=ON`.")
+ .booleanConf
+ .createWithDefault(false)
}
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index fe72430dd9..c60b36c423 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -463,6 +463,38 @@ JNIEXPORT jlong JNICALL
Java_org_apache_gluten_columnarbatch_VeloxColumnarBatchJ
JNI_METHOD_END(kInvalidObjectHandle)
}
+JNIEXPORT void JNICALL
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_start( // NOLINT
+ JNIEnv* env,
+ jclass) {
+ JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+ bool active = true;
+ mallctl("prof.active", NULL, NULL, &active, sizeof(bool));
+#endif
+ JNI_METHOD_END()
+}
+
+JNIEXPORT void JNICALL
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_dump( // NOLINT
+ JNIEnv* env,
+ jclass) {
+ JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+ mallctl("prof.dump", NULL, NULL, NULL, 0);
+#endif
+ JNI_METHOD_END()
+}
+
+JNIEXPORT void JNICALL
Java_org_apache_gluten_monitor_VeloxMemoryProfiler_stop( // NOLINT
+ JNIEnv* env,
+ jclass) {
+ JNI_METHOD_START
+#ifdef ENABLE_JEMALLOC_STATS
+ bool active = false;
+ mallctl("prof.active", NULL, NULL, &active, sizeof(bool));
+#endif
+ JNI_METHOD_END()
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/docs/developers/ProfileMemoryOfGlutenWithVelox.md
b/docs/developers/ProfileMemoryOfGlutenWithVelox.md
index 1f57f80921..4705e4f592 100644
--- a/docs/developers/ProfileMemoryOfGlutenWithVelox.md
+++ b/docs/developers/ProfileMemoryOfGlutenWithVelox.md
@@ -119,6 +119,21 @@ spark.executorEnv.MALLOC_CONF
prof:true,lg_prof_interval:30,prof_prefix:/tmp/glu
Finally, profiling files prefixed with `/tmp/gluten_heap_perf.${PID}` will be
generated for each spark executor.
+## Memory dump on spark executor exit
+
+Sometimes, when native memory is not managed by gluten or there are some
memory leaks that will cause spark executor to be killed due to memory limit,
+we only need to trigger a memory dump on executor exit.
+
+If we want to enable this feature we need to follow steps:
+
+1. Build gluten with `--enable_jemalloc_stats=ON` to enabled jemalloc stats.
+2. Enabled memory dump on exit, add spark executor environments to load
jemalloc lib and make memory profiling active.
+ ```
+ spark.gluten.monitor.memoryDumpOnExit=true
+ spark.executorEnv.LD_PRELOAD=/path/to/libjemalloc.so
+ spark.executorEnv.MALLOC_CONF=prof:true,prof_prefix:/tmp/gluten_heap_perf
+ ```
+
## Analyze profiling output
Prepare the required native libraries. Assume static build is used for Gluten,
so there is no other shared dependency libs.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]