This is an automated email from the ASF dual-hosted git repository.

roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new a6a721f49 [#1467] feat(server): introduce total hdfs write data size 
for huge partition (#1468)
a6a721f49 is described below

commit a6a721f4935a25c31dd93ca15bcd2a92b9ab1f4d
Author: Junfan Zhang <[email protected]>
AuthorDate: Fri Jan 19 22:30:30 2024 +0800

    [#1467] feat(server): introduce total hdfs write data size for huge 
partition (#1468)
    
    ### What changes were proposed in this pull request?
    
    introduce total hdfs write data size for huge partition:
    1. `total_hadoop_write_data_for_huge_partition` is introduced
    
    ### Why are the changes needed?
    
    For #1467, it is to show the hdfs spill data size for huge partition
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing tests.
---
 .../uniffle/server/ShuffleServerMetrics.java       | 23 +++++++++++++++++++++-
 .../server/storage/HadoopStorageManager.java       |  3 ++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git 
a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java 
b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
index 2ab1a4ba1..0e305c2bb 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
@@ -82,6 +82,8 @@ public class ShuffleServerMetrics {
   private static final String TOTAL_FAILED_WRITTEN_EVENT_NUM = 
"total_failed_written_event_num";
   private static final String TOTAL_DROPPED_EVENT_NUM = 
"total_dropped_event_num";
   private static final String TOTAL_HADOOP_WRITE_DATA = 
"total_hadoop_write_data";
+  private static final String TOTAL_HADOOP_WRITE_DATA_FOR_HUGE_PARTITION =
+      "total_hadoop_write_data_for_huge_partition";
   private static final String TOTAL_LOCALFILE_WRITE_DATA = 
"total_localfile_write_data";
   private static final String LOCAL_DISK_PATH_LABEL = "local_disk_path";
   public static final String LOCAL_DISK_PATH_LABEL_ALL = "ALL";
@@ -197,6 +199,7 @@ public class ShuffleServerMetrics {
   public static Counter counterRemoteStorageFailedWrite;
   public static Counter counterRemoteStorageSuccessWrite;
   public static Counter counterTotalHadoopWriteDataSize;
+  public static Counter counterTotalHadoopWriteDataSizeForHugePartition;
   public static Counter counterTotalLocalFileWriteDataSize;
 
   private static String tags;
@@ -268,12 +271,25 @@ public class ShuffleServerMetrics {
     }
   }
 
-  public static void incHadoopStorageWriteDataSize(String storageHost, long 
size) {
+  public static void incHadoopStorageWriteDataSize(
+      String storageHost, long size, boolean isOwnedByHugePartition) {
     if (StringUtils.isEmpty(storageHost)) {
       return;
     }
     counterTotalHadoopWriteDataSize.labels(tags, storageHost).inc(size);
     counterTotalHadoopWriteDataSize.labels(tags, 
STORAGE_HOST_LABEL_ALL).inc(size);
+    if (isOwnedByHugePartition) {
+      counterTotalHadoopWriteDataSizeForHugePartition.labels(tags, 
storageHost).inc(size);
+      counterTotalHadoopWriteDataSizeForHugePartition
+          .labels(tags, STORAGE_HOST_LABEL_ALL)
+          .inc(size);
+    }
+  }
+
+  // only for test cases
+  @VisibleForTesting
+  public static void incHadoopStorageWriteDataSize(String storageHost, long 
size) {
+    incHadoopStorageWriteDataSize(storageHost, size, false);
   }
 
   private static void setUpMetrics() {
@@ -301,6 +317,11 @@ public class ShuffleServerMetrics {
     counterTotalHadoopWriteDataSize =
         metricsManager.addCounter(
             TOTAL_HADOOP_WRITE_DATA, Constants.METRICS_TAG_LABEL_NAME, 
STORAGE_HOST_LABEL);
+    counterTotalHadoopWriteDataSizeForHugePartition =
+        metricsManager.addCounter(
+            TOTAL_HADOOP_WRITE_DATA_FOR_HUGE_PARTITION,
+            Constants.METRICS_TAG_LABEL_NAME,
+            STORAGE_HOST_LABEL);
     counterTotalLocalFileWriteDataSize =
         metricsManager.addCounter(TOTAL_LOCALFILE_WRITE_DATA, 
LOCAL_DISK_PATH_LABEL);
 
diff --git 
a/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
 
b/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
index 443646dc5..496c48551 100644
--- 
a/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
+++ 
b/server/src/main/java/org/apache/uniffle/server/storage/HadoopStorageManager.java
@@ -73,7 +73,8 @@ public class HadoopStorageManager extends 
SingleStorageManager {
       LOG.warn("The storage owned by event: {} is null, this should not 
happen", event);
       return;
     }
-    
ShuffleServerMetrics.incHadoopStorageWriteDataSize(storage.getStorageHost(), 
event.getSize());
+    ShuffleServerMetrics.incHadoopStorageWriteDataSize(
+        storage.getStorageHost(), event.getSize(), 
event.isOwnedByHugePartition());
   }
 
   @Override

Reply via email to