This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new c36d8c8d0 [#1446][FOLLOWUP] improvement(common)(server): Add JVM pause
monitor metrics (#2114)
c36d8c8d0 is described below
commit c36d8c8d01576ca4ef22436c38fb84e51f9ddc26
Author: maobaolong <[email protected]>
AuthorDate: Thu Sep 12 16:22:40 2024 +0800
[#1446][FOLLOWUP] improvement(common)(server): Add JVM pause monitor
metrics (#2114)
### What changes were proposed in this pull request?
Add jvm pause monitor metrics
### Why are the changes needed?
Fix: #1446
### Does this PR introduce _any_ user-facing change?
Added new metrics.
- JvmPauseMonitorTotalExtraTime
- JvmPauseMonitorInfoTimeExceeded
- JvmPauseMonitorWarnTimeExceeded
### How was this patch tested?
Tested through dashboard server metrics popup window.
---
.../uniffle/common/metrics/CommonMetrics.java | 24 ++++++++++++++++++++++
.../org/apache/uniffle/server/ShuffleServer.java | 11 ++++++++++
2 files changed, 35 insertions(+)
diff --git
a/common/src/main/java/org/apache/uniffle/common/metrics/CommonMetrics.java
b/common/src/main/java/org/apache/uniffle/common/metrics/CommonMetrics.java
new file mode 100644
index 000000000..ad5b22710
--- /dev/null
+++ b/common/src/main/java/org/apache/uniffle/common/metrics/CommonMetrics.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.uniffle.common.metrics;
+
+public class CommonMetrics {
+ public static final String JVM_PAUSE_TOTAL_EXTRA_TIME =
"JvmPauseMonitorTotalExtraTime";
+ public static final String JVM_PAUSE_INFO_TIME_EXCEEDED =
"JvmPauseMonitorInfoTimeExceeded";
+ public static final String JVM_PAUSE_WARN_TIME_EXCEEDED =
"JvmPauseMonitorWarnTimeExceeded";
+}
diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
index 4b59cb5ce..88ec8ea64 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
@@ -72,6 +72,9 @@ import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_K
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_SECURITY_HADOOP_KRB5_CONF_FILE;
import static org.apache.uniffle.common.config.RssBaseConf.RSS_STORAGE_TYPE;
import static
org.apache.uniffle.common.config.RssBaseConf.RSS_TEST_MODE_ENABLE;
+import static
org.apache.uniffle.common.metrics.CommonMetrics.JVM_PAUSE_INFO_TIME_EXCEEDED;
+import static
org.apache.uniffle.common.metrics.CommonMetrics.JVM_PAUSE_TOTAL_EXTRA_TIME;
+import static
org.apache.uniffle.common.metrics.CommonMetrics.JVM_PAUSE_WARN_TIME_EXCEEDED;
import static
org.apache.uniffle.server.ShuffleServerConf.SERVER_DECOMMISSION_CHECK_INTERVAL;
import static
org.apache.uniffle.server.ShuffleServerConf.SERVER_DECOMMISSION_SHUTDOWN;
import static
org.apache.uniffle.server.ShuffleServerMetrics.USED_DIRECT_MEMORY_SIZE;
@@ -331,6 +334,14 @@ public class ShuffleServer {
(PlatformDependent.usedDirectMemory()
+
io.grpc.netty.shaded.io.netty.util.internal.PlatformDependent
.usedDirectMemory()));
+ ShuffleServerMetrics.addLabeledGauge(
+ JVM_PAUSE_TOTAL_EXTRA_TIME, () -> (double)
jvmPauseMonitor.getTotalGcExtraSleepTime());
+ ShuffleServerMetrics.addLabeledGauge(
+ JVM_PAUSE_INFO_TIME_EXCEEDED,
+ () -> (double) jvmPauseMonitor.getNumGcInfoThresholdExceeded());
+ ShuffleServerMetrics.addLabeledGauge(
+ JVM_PAUSE_WARN_TIME_EXCEEDED,
+ () -> (double) jvmPauseMonitor.getNumGcWarnThresholdExceeded());
setServer();
}