This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 7d5912632 [#1356] improvement: add metric of total expired
pre-allocated buffers (#1412)
7d5912632 is described below
commit 7d59126323fbba76a9a2fb0b9482ebce6e195ef1
Author: Qing <[email protected]>
AuthorDate: Fri Jan 5 09:57:11 2024 +0800
[#1356] improvement: add metric of total expired pre-allocated buffers
(#1412)
### What changes were proposed in this pull request?
Add metrics of expired pre-allocated buffer size
### Why are the changes needed?
For: https://github.com/apache/incubator-uniffle/issues/1356
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
test on PRD
---
.../java/org/apache/uniffle/server/ShuffleServerMetrics.java | 10 ++++++++++
.../java/org/apache/uniffle/server/ShuffleTaskManager.java | 4 ++++
2 files changed, 14 insertions(+)
diff --git
a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
index 7ab54e9c7..295be4f01 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
@@ -69,6 +69,10 @@ public class ShuffleServerMetrics {
private static final String IS_HEALTHY = "is_healthy";
private static final String ALLOCATED_BUFFER_SIZE = "allocated_buffer_size";
+ private static final String EXPIRED_PRE_ALLOCATED_BUFFER_SIZE_TOTAL =
+ "expired_pre_allocated_buffer_size_total";
+ private static final String EXPIRED_PRE_ALLOCATED_BUFFER_ID_TOTAL =
+ "expired_pre_allocated_buffer_id_total";
private static final String IN_FLUSH_BUFFER_SIZE = "in_flush_buffer_size";
private static final String USED_BUFFER_SIZE = "used_buffer_size";
private static final String READ_USED_BUFFER_SIZE = "read_used_buffer_size";
@@ -176,6 +180,8 @@ public class ShuffleServerMetrics {
public static Counter counterRemoteStorageSuccessWrite;
public static Counter counterTotalHadoopWriteDataSize;
public static Counter counterTotalLocalFileWriteDataSize;
+ public static Counter counterExpiredPreAllocatedBufferSizeTotal;
+ public static Counter counterExpiredPreAllocatedBufferIdTotal;
private static String tags;
public static Counter counterLocalFileEventFlush;
@@ -327,6 +333,10 @@ public class ShuffleServerMetrics {
gaugeIsHealthy = metricsManager.addLabeledGauge(IS_HEALTHY);
gaugeAllocatedBufferSize =
metricsManager.addLabeledGauge(ALLOCATED_BUFFER_SIZE);
+ counterExpiredPreAllocatedBufferSizeTotal =
+ metricsManager.addCounter(EXPIRED_PRE_ALLOCATED_BUFFER_SIZE_TOTAL);
+ counterExpiredPreAllocatedBufferIdTotal =
+ metricsManager.addCounter(EXPIRED_PRE_ALLOCATED_BUFFER_ID_TOTAL);
gaugeInFlushBufferSize =
metricsManager.addLabeledGauge(IN_FLUSH_BUFFER_SIZE);
gaugeUsedBufferSize = metricsManager.addLabeledGauge(USED_BUFFER_SIZE);
gaugeReadBufferUsedSize =
metricsManager.addLabeledGauge(READ_USED_BUFFER_SIZE);
diff --git
a/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
b/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
index 3aaba8c18..1e3f00f38 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java
@@ -766,17 +766,21 @@ public class ShuffleTaskManager {
removeIds.add(info.getRequireId());
}
}
+ int expiredBufferIdsCnt = 0;
for (Long requireId : removeIds) {
PreAllocatedBufferInfo info = requireBufferIds.remove(requireId);
if (info != null) {
// move release memory code down to here as the requiredBuffer could
be consumed during
// removing processing.
shuffleBufferManager.releaseMemory(info.getRequireSize(), false,
true);
+
ShuffleServerMetrics.counterExpiredPreAllocatedBufferSizeTotal.inc(info.getRequireSize());
+ expiredBufferIdsCnt++;
LOG.info("Remove expired preAllocatedBuffer " + requireId);
} else {
LOG.info("PreAllocatedBuffer[id={}] has already been removed",
requireId);
}
}
+
ShuffleServerMetrics.counterExpiredPreAllocatedBufferIdTotal.inc(expiredBufferIdsCnt);
} catch (Exception e) {
LOG.warn("Error happened in preAllocatedBufferCheck", e);
}