This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new 43bd09c65 [#1682] feat(server): Introduce localfile isWritable metric
(#1683)
43bd09c65 is described below
commit 43bd09c652889919ad888d31d4e3a5461c93f21d
Author: Junfan Zhang <[email protected]>
AuthorDate: Wed May 8 18:52:55 2024 +0800
[#1682] feat(server): Introduce localfile isWritable metric (#1683)
### What changes were proposed in this pull request?
Introduce localfile isWritable metric
### Why are the changes needed?
For #1682
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
---
.../java/org/apache/uniffle/server/LocalStorageChecker.java | 10 ++++++++++
.../java/org/apache/uniffle/server/ShuffleServerMetrics.java | 4 ++++
2 files changed, 14 insertions(+)
diff --git
a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
index 0554035d6..5be0eb392 100644
--- a/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
+++ b/server/src/main/java/org/apache/uniffle/server/LocalStorageChecker.java
@@ -116,6 +116,12 @@ public class LocalStorageChecker extends Checker {
serviceUsedSpace.addAndGet(getServiceUsedSpace(storageInfo.storageDir));
storageInfo.updateStorageFreeSpace(free);
+
+ boolean isWritable = storageInfo.canWrite();
+ ShuffleServerMetrics.gaugeLocalStorageIsWritable
+ .labels(storageInfo.storage.getBasePath())
+ .set(isWritable ? 0 : 1);
+
if (storageInfo.checkIsSpaceEnough(total, free)) {
num.incrementAndGet();
}
@@ -255,6 +261,10 @@ public class LocalStorageChecker extends Checker {
return isHealthy;
}
+ boolean canWrite() {
+ return storage.canWrite();
+ }
+
boolean checkStorageReadAndWrite() {
if (storage.isCorrupted()) {
return false;
diff --git
a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
index bee4dd48c..140f76566 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java
@@ -77,6 +77,7 @@ public class ShuffleServerMetrics {
private static final String TOTAL_REQUIRE_READ_MEMORY_FAILED =
"total_require_read_memory_failed_num";
+ private static final String LOCAL_STORAGE_IS_WRITABLE =
"local_storage_is_writable";
private static final String LOCAL_STORAGE_TOTAL_DIRS_NUM =
"local_storage_total_dirs_num";
private static final String LOCAL_STORAGE_CORRUPTED_DIRS_NUM =
"local_storage_corrupted_dirs_num";
private static final String LOCAL_STORAGE_TOTAL_SPACE =
"local_storage_total_space";
@@ -187,6 +188,7 @@ public class ShuffleServerMetrics {
public static Gauge.Child gaugeHugePartitionNum;
public static Gauge.Child gaugeAppWithHugePartitionNum;
+ public static Gauge gaugeLocalStorageIsWritable;
public static Gauge.Child gaugeLocalStorageTotalDirsNum;
public static Gauge.Child gaugeLocalStorageCorruptedDirsNum;
public static Gauge.Child gaugeLocalStorageTotalSpace;
@@ -401,6 +403,8 @@ public class ShuffleServerMetrics {
counterTotalPartitionNum =
metricsManager.addLabeledCounter(TOTAL_PARTITION_NUM);
counterTotalHugePartitionNum =
metricsManager.addLabeledCounter(TOTAL_HUGE_PARTITION_NUM);
+ gaugeLocalStorageIsWritable =
+ metricsManager.addGauge(LOCAL_STORAGE_IS_WRITABLE,
LOCAL_DISK_PATH_LABEL);
gaugeLocalStorageTotalDirsNum =
metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_DIRS_NUM);
gaugeLocalStorageCorruptedDirsNum =
metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM);