This is an automated email from the ASF dual-hosted git repository.

xianjingfeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new 0cdb5239 [#1001] improvement: support get all metrics by one request 
(#1002)
0cdb5239 is described below

commit 0cdb5239764b16a6c08bd6bad81fecc9940dd3da
Author: xianjingfeng <[email protected]>
AuthorDate: Tue Jul 11 21:04:07 2023 +0800

    [#1001] improvement: support get all metrics by one request (#1002)
    
    ### What changes were proposed in this pull request?
    Support get all metrics by one request
    
    ### Why are the changes needed?
    Fix: https://github.com/apache/incubator-uniffle/issues/1001
    
    ### Does this PR introduce any user-facing change?
    /metrics and /prometheus/metrics will return all metrics of the server
    
    ### How was this patch tested?
    UT
---
 .../common/web/CoalescedCollectorRegistry.java     | 90 ++++++++++++++++++++++
 .../common/web/resource/BaseMetricResource.java    |  3 +
 .../common/web/resource/MetricResource.java        |  8 +-
 .../uniffle/coordinator/CoordinatorServer.java     |  4 +
 .../coordinator/metric/CoordinatorMetricsTest.java | 18 +++++
 docs/coordinator_guide.md                          | 10 ---
 docs/index.md                                      |  9 ++-
 docs/metrics_guide.md                              | 53 +++++++++++++
 docs/server_guide.md                               | 11 ---
 .../org/apache/uniffle/server/ShuffleServer.java   |  4 +
 10 files changed, 184 insertions(+), 26 deletions(-)

diff --git 
a/common/src/main/java/org/apache/uniffle/common/web/CoalescedCollectorRegistry.java
 
b/common/src/main/java/org/apache/uniffle/common/web/CoalescedCollectorRegistry.java
new file mode 100644
index 00000000..d949f4fc
--- /dev/null
+++ 
b/common/src/main/java/org/apache/uniffle/common/web/CoalescedCollectorRegistry.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.uniffle.common.web;
+
+import java.util.Enumeration;
+import java.util.Set;
+import java.util.function.Function;
+
+import io.prometheus.client.Collector;
+import io.prometheus.client.CollectorRegistry;
+
+public class CoalescedCollectorRegistry extends CollectorRegistry {
+  private final CollectorRegistry[] registries;
+
+  public CoalescedCollectorRegistry(CollectorRegistry... registries) {
+    this.registries = registries;
+  }
+
+  @Override
+  public Enumeration<Collector.MetricFamilySamples> metricFamilySamples() {
+    return new CoalescedEnumeration((index) -> 
registries[index].metricFamilySamples());
+  }
+
+  @Override
+  public Enumeration<Collector.MetricFamilySamples> 
filteredMetricFamilySamples(Set<String> includedNames) {
+    return new CoalescedEnumeration((index) -> registries[index]
+        .filteredMetricFamilySamples(includedNames));
+  }
+
+  @Override
+  public Double getSampleValue(String name) {
+    return this.getSampleValue(name, new String[0], new String[0]);
+  }
+
+  @Override
+  public Double getSampleValue(String name, String[] labelNames, String[] 
labelValues) {
+    Double ret = null;
+    for (CollectorRegistry collectorRegistry : registries) {
+      ret = collectorRegistry.getSampleValue(name, labelNames, labelValues);
+      if (ret != null) {
+        return ret;
+      }
+    }
+    return ret;
+  }
+
+  private class CoalescedEnumeration implements 
Enumeration<Collector.MetricFamilySamples> {
+    private final Function<Integer, 
Enumeration<Collector.MetricFamilySamples>> function;
+    Enumeration<Collector.MetricFamilySamples> currentEnumeration;
+    int index = 0;
+
+    CoalescedEnumeration(Function<Integer, 
Enumeration<Collector.MetricFamilySamples>> function) {
+      this.function = function;
+    }
+
+    @Override
+    public boolean hasMoreElements() {
+      if (currentEnumeration == null || !currentEnumeration.hasMoreElements()) 
{
+        if (index >= registries.length) {
+          return false;
+        }
+        currentEnumeration = function.apply(index++);
+      }
+      return currentEnumeration.hasMoreElements();
+    }
+
+    @Override
+    public Collector.MetricFamilySamples nextElement() {
+      if (!currentEnumeration.hasMoreElements()) {
+        return null;
+      }
+      return currentEnumeration.nextElement();
+    }
+  }
+}
diff --git 
a/common/src/main/java/org/apache/uniffle/common/web/resource/BaseMetricResource.java
 
b/common/src/main/java/org/apache/uniffle/common/web/resource/BaseMetricResource.java
index 66351131..9e25c63b 100644
--- 
a/common/src/main/java/org/apache/uniffle/common/web/resource/BaseMetricResource.java
+++ 
b/common/src/main/java/org/apache/uniffle/common/web/resource/BaseMetricResource.java
@@ -26,6 +26,9 @@ import 
org.apache.uniffle.common.exception.InvalidRequestException;
 public abstract class BaseMetricResource {
 
   protected CollectorRegistry getCollectorRegistry(ServletContext 
servletContext, String type) {
+    if (type == null) {
+      type = "all";
+    }
     CollectorRegistry registry = (CollectorRegistry) 
servletContext.getAttribute(
         CollectorRegistry.class.getCanonicalName() + "#" + type);
     if (registry == null) {
diff --git 
a/common/src/main/java/org/apache/uniffle/common/web/resource/MetricResource.java
 
b/common/src/main/java/org/apache/uniffle/common/web/resource/MetricResource.java
index 9979e6ed..d6d18be8 100644
--- 
a/common/src/main/java/org/apache/uniffle/common/web/resource/MetricResource.java
+++ 
b/common/src/main/java/org/apache/uniffle/common/web/resource/MetricResource.java
@@ -33,13 +33,19 @@ import org.apache.hbase.thirdparty.javax.ws.rs.core.Context;
 import org.apache.hbase.thirdparty.javax.ws.rs.core.MediaType;
 
 @Path("/metrics")
+@Produces({ MediaType.APPLICATION_JSON })
 public class MetricResource extends BaseMetricResource {
   @Context
   protected ServletContext servletContext;
 
+  @GET
+  @Path("/")
+  public MetricsJsonObj metrics(@QueryParam("name[]") Set<String> names) {
+    return metrics(null, names);
+  }
+
   @GET
   @Path("/{type}")
-  @Produces({ MediaType.APPLICATION_JSON })
   public MetricsJsonObj metrics(
       @PathParam("type") String type,
       @QueryParam("name[]") Set<String> names) {
diff --git 
a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
 
b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
index 9c32dc68..9210cf56 100644
--- 
a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
+++ 
b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java
@@ -35,6 +35,7 @@ import org.apache.uniffle.common.rpc.ServerInterface;
 import org.apache.uniffle.common.security.SecurityConfig;
 import org.apache.uniffle.common.security.SecurityContextFactory;
 import org.apache.uniffle.common.util.RssUtils;
+import org.apache.uniffle.common.web.CoalescedCollectorRegistry;
 import org.apache.uniffle.common.web.JettyServer;
 import org.apache.uniffle.coordinator.metric.CoordinatorGrpcMetrics;
 import org.apache.uniffle.coordinator.metric.CoordinatorMetrics;
@@ -193,6 +194,9 @@ public class CoordinatorServer extends ReconfigurableBase {
         grpcMetrics.getCollectorRegistry());
     jettyServer.registerInstance(CollectorRegistry.class.getCanonicalName() + 
"#jvm",
         JvmMetrics.getCollectorRegistry());
+    jettyServer.registerInstance(CollectorRegistry.class.getCanonicalName() + 
"#all",
+        new 
CoalescedCollectorRegistry(CoordinatorMetrics.getCollectorRegistry(),
+            grpcMetrics.getCollectorRegistry(), 
JvmMetrics.getCollectorRegistry()));
   }
 
   private void registerMetrics() throws Exception {
diff --git 
a/coordinator/src/test/java/org/apache/uniffle/coordinator/metric/CoordinatorMetricsTest.java
 
b/coordinator/src/test/java/org/apache/uniffle/coordinator/metric/CoordinatorMetricsTest.java
index d0ebae2f..f35a58c4 100644
--- 
a/coordinator/src/test/java/org/apache/uniffle/coordinator/metric/CoordinatorMetricsTest.java
+++ 
b/coordinator/src/test/java/org/apache/uniffle/coordinator/metric/CoordinatorMetricsTest.java
@@ -21,6 +21,9 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.PrintWriter;
 import java.nio.file.Files;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -38,6 +41,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class CoordinatorMetricsTest {
 
+  private static final String METRICS_URL = "http://127.0.0.1:12345/metrics";;
   private static final String SERVER_METRICS_URL = 
"http://127.0.0.1:12345/metrics/server";;
   private static final String SERVER_JVM_URL = 
"http://127.0.0.1:12345/metrics/jvm";;
   private static final String SERVER_GRPC_URL = 
"http://127.0.0.1:12345/metrics/grpc";;
@@ -126,6 +130,20 @@ public class CoordinatorMetricsTest {
     assertEquals(9, actualObj.get("metrics").size());
   }
 
+  @Test
+  public void testAllMetrics() throws Exception {
+    String content = TestUtils.httpGet(METRICS_URL);
+    ObjectMapper mapper = new ObjectMapper();
+    JsonNode actualObj = mapper.readTree(content);
+    assertEquals(2, actualObj.size());
+    Iterator<JsonNode> metrics = actualObj.get("metrics").elements();
+    Set<String> metricNames = new HashSet<>();
+    metrics.forEachRemaining((metric) -> 
metricNames.add(metric.get("name").textValue()));
+    assertTrue(metricNames.contains("total_app_num"));
+    assertTrue(metricNames.contains("grpc_total"));
+    assertTrue(metricNames.contains("jvm_info"));
+  }
+
   private static void writeRemoteStorageConf(File cfgFile, String value) 
throws Exception {
     FileWriter fileWriter = new FileWriter(cfgFile);
     PrintWriter printWriter = new PrintWriter(fileWriter);
diff --git a/docs/coordinator_guide.md b/docs/coordinator_guide.md
index acf0ca00..f33d6084 100644
--- a/docs/coordinator_guide.md
+++ b/docs/coordinator_guide.md
@@ -127,16 +127,6 @@ AccessQuotaChecker is a checker when the number of 
concurrent tasks submitted by
 |rss.coordinator.quota.default.path|-|A configuration file for the number of 
apps for a user-defined user.|
 |rss.coordinator.quota.default.app.num|5|Default number of apps at user level.|
 
-### PrometheusPushGatewayMetricReporter settings
-PrometheusPushGatewayMetricReporter is one of the built-in metrics reporter, 
which will allow user pushes metrics to a [Prometheus 
Pushgateway](https://github.com/prometheus/pushgateway), which can be scraped 
by Prometheus.
-
-|Property Name|Default|        Description                                     
                                                                                
                                                                                
                                                                                
                                     |
-|---|---|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|rss.metrics.reporter.class|org.apache.uniffle.common.metrics.<br/>prometheus.PrometheusPushGatewayMetricReporter|The
 class of metrics reporter.|
-|rss.metrics.prometheus.pushgateway.addr|-| The PushGateway server host URL 
including scheme, host name, and port.                                          
                                                                                
                                                                                
                                                      |
-|rss.metrics.prometheus.pushgateway.groupingkey|-| Specifies the grouping key 
which is the group and global labels of all metrics. The label name and value 
are separated by '=', and labels are separated by ';', e.g., k1=v1;k2=v2. 
Please ensure that your grouping key meets the [Prometheus 
requirements](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels).
 |
-|rss.metrics.prometheus.pushgateway.jobname|-| The job name under which 
metrics will be pushed.                                                         
                                                                                
                                                                                
                                                             |
-|rss.metrics.prometheus.pushgateway.report.interval.seconds|10| The interval 
in seconds for the reporter to report metrics.                                  
                                                                                
                                                                                
                                                                                
   |
 
 ## RESTful API(beta)
 
diff --git a/docs/index.md b/docs/index.md
index b0bb8479..26ffa85b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -29,13 +29,14 @@ to store shuffle data on remote servers.
 
 More advanced details for Uniffle users are available in the following:
 
-- [Uniffle Coordinator Guide](coordinator_guide.html)
+- [Uniffle Coordinator Guide](coordinator_guide.md)
 
-- [Uniffle Shuffle Server Guide](server_guide.html)
-
-- [Uniffle Shuffle Client Guide](client_guide.html)
+- [Uniffle Shuffle Server Guide](server_guide.md)
 
+- [Uniffle Shuffle Client Guide](client_guide.md)
 
+- [Metrics Guide](metrics_guide.md)
+- 
 Here you can read API docs for Uniffle along with its submodules.
 
 - [Java API (Javadoc)](apidocs/index.html)
diff --git a/docs/metrics_guide.md b/docs/metrics_guide.md
new file mode 100644
index 00000000..4ad5a6d7
--- /dev/null
+++ b/docs/metrics_guide.md
@@ -0,0 +1,53 @@
+---
+layout: page
+displayTitle: Metrics Guide
+title: Metrics Guide
+description: Metrics Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+# Metric Guide
+
+## Summary
+This document will introduce how to collect metrics from servers.
+
+### Fetch metrics by REST API
+``` shell
+# For json format
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/metric # fetch all metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/metric/server # only fetch 
server metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/metric/grpc # only fetch grpc 
metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/metric/jvm # only fetch jvm 
metrics
+
+# For Prometheus format
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/prometheus/ # fetch all metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/prometheus/metric/server # only 
fetch server metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/prometheus/metric/grpc # only 
fetch grpc metrics
+curl http://${SERVER_HOST}:${SERVER_HTTP_PORT}/prometheus/metric/jvm # only 
fetch jvm metrics
+
+```
+
+### Report metrics to Prometheus automatically
+PrometheusPushGatewayMetricReporter is one of the built-in metrics reporter, 
which will allow user pushes metrics to a [Prometheus 
Pushgateway](https://github.com/prometheus/pushgateway), which can be scraped 
by Prometheus.
+
+|Property Name|Default|        Description                                     
                                                                                
                                                                                
                                                                                
                                     |
+|---|---|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|rss.metrics.reporter.class|org.apache.uniffle.common.metrics.<br/>prometheus.PrometheusPushGatewayMetricReporter|The
 class of metrics reporter.|
+|rss.metrics.prometheus.pushgateway.addr|-| The PushGateway server host URL 
including scheme, host name, and port.                                          
                                                                                
                                                                                
                                                      |
+|rss.metrics.prometheus.pushgateway.groupingkey|-| Specifies the grouping key 
which is the group and global labels of all metrics. The label name and value 
are separated by '=', and labels are separated by ';', e.g., k1=v1;k2=v2. 
Please ensure that your grouping key meets the [Prometheus 
requirements](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels).
 |
+|rss.metrics.prometheus.pushgateway.jobname|-| The job name under which 
metrics will be pushed.                                                         
                                                                                
                                                                                
                                                             |
+|rss.metrics.prometheus.pushgateway.report.interval.seconds|10| The interval 
in seconds for the reporter to report metrics.                                  
                                                                                
                                                                                
                                                                                
   |
+
diff --git a/docs/server_guide.md b/docs/server_guide.md
index 93f04854..5ab42b61 100644
--- a/docs/server_guide.md
+++ b/docs/server_guide.md
@@ -102,17 +102,6 @@ This document will introduce how to deploy Uniffle shuffle 
servers.
 |rss.server.decommission.shutdown|true| Whether shutdown the server after 
server is decommissioned                                                        
                                                                          |
 
 
-### PrometheusPushGatewayMetricReporter settings
-PrometheusPushGatewayMetricReporter is one of the built-in metrics reporter, 
which will allow user pushes metrics to a [Prometheus 
Pushgateway](https://github.com/prometheus/pushgateway), which can be scraped 
by Prometheus.
-
-|Property Name|Default|        Description                                     
                                                                                
                                                                                
                                                                                
                                     |
-|---|---|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|rss.metrics.reporter.class|org.apache.uniffle.common.metrics.<br/>prometheus.PrometheusPushGatewayMetricReporter|The
 class of metrics reporter.|
-|rss.metrics.prometheus.pushgateway.addr|-| The PushGateway server host URL 
including scheme, host name, and port.                                          
                                                                                
                                                                                
                                                      |
-|rss.metrics.prometheus.pushgateway.groupingkey|-| Specifies the grouping key 
which is the group and global labels of all metrics. The label name and value 
are separated by '=', and labels are separated by ';', e.g., k1=v1;k2=v2. 
Please ensure that your grouping key meets the [Prometheus 
requirements](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels).
 |
-|rss.metrics.prometheus.pushgateway.jobname|-| The job name under which 
metrics will be pushed.                                                         
                                                                                
                                                                                
                                                             |
-|rss.metrics.prometheus.pushgateway.report.interval.seconds|10| The interval 
in seconds for the reporter to report metrics.                                  
                                                                                
                                                                                
                                                                                
   |
-
 ### Huge Partition Optimization
 A huge partition is a common problem for Spark/MR and so on, caused by data 
skew. And it can cause the shuffle server to become unstable. To solve this, we 
introduce some mechanisms to limit the writing of huge partitions to avoid 
affecting regular partitions, more details can be found in 
[ISSUE-378](https://github.com/apache/incubator-uniffle/issues/378). The basic 
rules for limiting large partitions are memory usage limits and flushing 
individual buffers directly to persistent storage.
 
diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java 
b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
index 9b5eadfd..02d7f755 100644
--- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
+++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java
@@ -49,6 +49,7 @@ import org.apache.uniffle.common.util.Constants;
 import org.apache.uniffle.common.util.ExitUtils;
 import org.apache.uniffle.common.util.RssUtils;
 import org.apache.uniffle.common.util.ThreadUtils;
+import org.apache.uniffle.common.web.CoalescedCollectorRegistry;
 import org.apache.uniffle.common.web.JettyServer;
 import org.apache.uniffle.server.buffer.ShuffleBufferManager;
 import org.apache.uniffle.server.netty.StreamServer;
@@ -214,6 +215,9 @@ public class ShuffleServer {
         grpcMetrics.getCollectorRegistry());
     jettyServer.registerInstance(CollectorRegistry.class.getCanonicalName() + 
"#jvm",
         JvmMetrics.getCollectorRegistry());
+    jettyServer.registerInstance(CollectorRegistry.class.getCanonicalName() + 
"#all",
+        new 
CoalescedCollectorRegistry(ShuffleServerMetrics.getCollectorRegistry(),
+            grpcMetrics.getCollectorRegistry(), 
JvmMetrics.getCollectorRegistry()));
 
     SecurityConfig securityConfig = null;
     if (shuffleServerConf.getBoolean(RSS_SECURITY_HADOOP_KERBEROS_ENABLE)) {

Reply via email to