This is an automated email from the ASF dual-hosted git repository.

zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new fa527461 [#1114] feat: introduce hdfs host as the 
total_hadoop_write_data metric label (#1107)
fa527461 is described below

commit fa527461f93fff3dbfaa9fc39a66d1df365bc1a6
Author: Junfan Zhang <[email protected]>
AuthorDate: Thu Aug 10 20:27:37 2023 +0800

    [#1114] feat: introduce hdfs host as the total_hadoop_write_data metric 
label (#1107)
    
    ### What changes were proposed in this pull request?
    
    Introduce the hdfs host as the total_hadoop_write_data metric label
    
    ### Why are the changes needed?
    
    We will write apps of different businesses apps into different HDFS.
     For further iterative optimization, we need to expose the write capacity of
    different HDFS to evaluate the total write capacity of different business 
lines.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. More metrics are exposed.
    
    ### How was this patch tested?
    
    1. UTs
---
 .../uniffle/server/ShuffleServerMetrics.java       | 17 ++++++++--
 .../server/storage/HadoopStorageManager.java       |  7 ++++-
 .../uniffle/server/ShuffleServerMetricsTest.java   | 36 ++++++++++++++++++++++
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git 
a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java 
b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
index f7334b75..ea7b445c 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
@@ -85,6 +85,7 @@ public class ShuffleServerMetrics {
   private static final String STORAGE_FAILED_WRITE_LOCAL = 
"storage_failed_write_local";
   private static final String STORAGE_SUCCESS_WRITE_LOCAL = 
"storage_success_write_local";
   private static final String STORAGE_HOST_LABEL = "storage_host";
+  public static final String STORAGE_HOST_LABEL_ALL = "ALL";
   public static final String STORAGE_TOTAL_WRITE_REMOTE = 
"storage_total_write_remote";
   public static final String STORAGE_RETRY_WRITE_REMOTE = 
"storage_retry_write_remote";
   public static final String STORAGE_FAILED_WRITE_REMOTE = 
"storage_failed_write_remote";
@@ -125,7 +126,6 @@ public class ShuffleServerMetrics {
   public static Counter.Child counterTotalReadTime;
   public static Counter.Child counterTotalFailedWrittenEventNum;
   public static Counter.Child counterTotalDroppedEventNum;
-  public static Counter.Child counterTotalHadoopWriteDataSize;
   public static Counter.Child counterTotalLocalFileWriteDataSize;
   public static Counter.Child counterTotalRequireBufferFailed;
   public static Counter.Child counterTotalRequireBufferFailedForHugePartition;
@@ -158,10 +158,13 @@ public class ShuffleServerMetrics {
   public static Gauge.Child gaugeEventQueueSize;
   public static Gauge.Child gaugeAppNum;
   public static Gauge.Child gaugeTotalPartitionNum;
+
   public static Counter counterRemoteStorageTotalWrite;
   public static Counter counterRemoteStorageRetryWrite;
   public static Counter counterRemoteStorageFailedWrite;
   public static Counter counterRemoteStorageSuccessWrite;
+  public static Counter counterTotalHadoopWriteDataSize;
+
   private static String tags;
   public static Counter counterLocalFileEventFlush;
   public static Counter counterHadoopEventFlush;
@@ -231,6 +234,14 @@ public class ShuffleServerMetrics {
     }
   }
 
+  public static void incHadoopStorageWriteDataSize(String storageHost, long 
size) {
+    if (StringUtils.isEmpty(storageHost)) {
+      return;
+    }
+    counterTotalHadoopWriteDataSize.labels(tags, storageHost).inc(size);
+    counterTotalHadoopWriteDataSize.labels(tags, 
STORAGE_HOST_LABEL_ALL).inc(size);
+  }
+
   private static void setUpMetrics() {
     counterTotalReceivedDataSize = 
metricsManager.addLabeledCounter(TOTAL_RECEIVED_DATA);
     counterTotalWriteDataSize = 
metricsManager.addLabeledCounter(TOTAL_WRITE_DATA);
@@ -253,7 +264,9 @@ public class ShuffleServerMetrics {
     counterTotalDroppedEventNum = 
metricsManager.addLabeledCounter(TOTAL_DROPPED_EVENT_NUM);
     counterTotalFailedWrittenEventNum =
         metricsManager.addLabeledCounter(TOTAL_FAILED_WRITTEN_EVENT_NUM);
-    counterTotalHadoopWriteDataSize = 
metricsManager.addLabeledCounter(TOTAL_HADOOP_WRITE_DATA);
+    counterTotalHadoopWriteDataSize =
+        metricsManager.addCounter(
+            TOTAL_HADOOP_WRITE_DATA, Constants.METRICS_TAG_LABEL_NAME, 
STORAGE_HOST_LABEL);
     counterTotalLocalFileWriteDataSize =
         metricsManager.addLabeledCounter(TOTAL_LOCALFILE_WRITE_DATA);
     counterTotalRequireBufferFailed = 
metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED);
diff --git 
a/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
 
b/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
index 09039dfb..443646dc 100644
--- 
a/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
+++ 
b/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
@@ -68,7 +68,12 @@ public class HadoopStorageManager extends 
SingleStorageManager {
   @Override
   public void updateWriteMetrics(ShuffleDataFlushEvent event, long writeTime) {
     super.updateWriteMetrics(event, writeTime);
-    ShuffleServerMetrics.counterTotalHadoopWriteDataSize.inc(event.getSize());
+    Storage storage = event.getUnderStorage();
+    if (storage == null) {
+      LOG.warn("The storage owned by event: {} is null, this should not 
happen", event);
+      return;
+    }
+    
ShuffleServerMetrics.incHadoopStorageWriteDataSize(storage.getStorageHost(), 
event.getSize());
   }
 
   @Override
diff --git 
a/server/src/test/java/org/apache/uniffle/server/ShuffleServerMetricsTest.java 
b/server/src/test/java/org/apache/uniffle/server/ShuffleServerMetricsTest.java
index f19543bd..f7b3a571 100644
--- 
a/server/src/test/java/org/apache/uniffle/server/ShuffleServerMetricsTest.java
+++ 
b/server/src/test/java/org/apache/uniffle/server/ShuffleServerMetricsTest.java
@@ -132,6 +132,42 @@ public class ShuffleServerMetricsTest {
     assertTrue(bingo);
   }
 
+  @Test
+  public void testHadoopStorageWriteDataSize() {
+    // case1
+    String host1 = "hadoop-cluster01";
+    ShuffleServerMetrics.incHadoopStorageWriteDataSize(host1, 1000);
+    assertEquals(
+        1000.0,
+        ShuffleServerMetrics.counterTotalHadoopWriteDataSize
+            .labels(Constants.SHUFFLE_SERVER_VERSION, host1)
+            .get());
+
+    // case2
+    ShuffleServerMetrics.incHadoopStorageWriteDataSize(host1, 500);
+    assertEquals(
+        1500.0,
+        ShuffleServerMetrics.counterTotalHadoopWriteDataSize
+            .labels(Constants.SHUFFLE_SERVER_VERSION, host1)
+            .get());
+
+    // case3
+    String host2 = "hadoop-cluster2";
+    ShuffleServerMetrics.incHadoopStorageWriteDataSize(host2, 2000);
+    assertEquals(
+        2000.0,
+        ShuffleServerMetrics.counterTotalHadoopWriteDataSize
+            .labels(Constants.SHUFFLE_SERVER_VERSION, host2)
+            .get());
+
+    // case4
+    assertEquals(
+        3500.0,
+        ShuffleServerMetrics.counterTotalHadoopWriteDataSize
+            .labels(Constants.SHUFFLE_SERVER_VERSION, 
ShuffleServerMetrics.STORAGE_HOST_LABEL_ALL)
+            .get());
+  }
+
   @Test
   public void testStorageCounter() {
     // test for local storage

Reply via email to