advancedxy commented on code in PR #415: URL: https://github.com/apache/incubator-uniffle/pull/415#discussion_r1050467470
########## common/src/main/java/org/apache/uniffle/common/metrics/MetricReporterFactory.java: ########## @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.uniffle.common.metrics; + +import java.lang.reflect.Constructor; + +import org.apache.commons.lang3.StringUtils; + +import org.apache.uniffle.common.config.RssConf; + +public class MetricReporterFactory { + public static final String REPORT_CLASS = "rss.metrics.reporter.class"; Review Comment: seems like this should be defined as a RSS configuration, and be declared in RSSConf? ########## common/src/main/java/org/apache/uniffle/common/metrics/prometheus/PrometheusPushGatewayMetricReporter.java: ########## @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.uniffle.common.metrics.prometheus; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; +import io.prometheus.client.CollectorRegistry; +import io.prometheus.client.exporter.PushGateway; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.uniffle.common.config.RssConf; +import org.apache.uniffle.common.metrics.AbstractMetricReporter; +import org.apache.uniffle.common.util.ThreadUtils; + +public class PrometheusPushGatewayMetricReporter extends AbstractMetricReporter { + private static final Logger LOG = LoggerFactory.getLogger(PrometheusPushGatewayMetricReporter.class); + static final String PUSHGATEWAY_ADDR = "rss.metrics.prometheus.pushgateway.addr"; + static final String GROUPING_KEY = "rss.metrics.prometheus.pushgateway.groupingkey"; + static final String JOB_NAME = "rss.metrics.prometheus.pushgateway.jobname"; + static final String REPORT_INTEVAL = "rss.metrics.prometheus.pushgateway.report.interval.seconds"; + private ScheduledExecutorService scheduledExecutorService; + private PushGateway pushGateway; + + public PrometheusPushGatewayMetricReporter(RssConf conf, String instanceId) { + super(conf, instanceId); + } + + @Override + public void start() { + if (pushGateway == null) { + String address = conf.getString(PUSHGATEWAY_ADDR, null); + if (StringUtils.isEmpty(address)) { + throw new RuntimeException(PUSHGATEWAY_ADDR + " should not be empty!"); + } + pushGateway = new PushGateway(address); + } + String jobName = conf.getString(JOB_NAME, null); + if (StringUtils.isEmpty(jobName)) { + throw new RuntimeException(JOB_NAME + " should not be empty!"); + } + Map<String, String> groupingKey = parseGroupingKey(conf.getString(GROUPING_KEY, "")); + groupingKey.put("instance", instanceId); + int reportInterval = conf.getInteger(REPORT_INTEVAL, 10); + scheduledExecutorService = Executors.newScheduledThreadPool(1, + ThreadUtils.getThreadFactory("PrometheusPushGatewayMetricReporter-%d")); + scheduledExecutorService.scheduleWithFixedDelay(() -> { + for (CollectorRegistry registry : registryList) { + try { + pushGateway.push(registry, jobName, groupingKey); + } catch (Throwable e) { + LOG.error("Failed to send metrics to push gateway.", e); + } + } + }, 0, reportInterval, TimeUnit.SECONDS); + } + + @Override + public void stop() { + if (scheduledExecutorService != null) { + scheduledExecutorService.shutdownNow(); + } + } + + @VisibleForTesting + void setPushGateway(PushGateway pushGateway) { + this.pushGateway = pushGateway; + } + + static Map<String, String> parseGroupingKey(final String groupingKeyConfig) { + Map<String, String> groupingKey = new HashMap<>(); + if (!groupingKeyConfig.isEmpty()) { + String[] kvs = groupingKeyConfig.split(";"); + for (String kv : kvs) { + int idx = kv.indexOf("="); + if (idx < 0) { + LOG.warn("Invalid prometheusPushGateway groupingKey:{}, will be ignored", kv); + continue; + } + + String labelKey = kv.substring(0, idx); + String labelValue = kv.substring(idx + 1); + if (StringUtils.isEmpty(labelKey) + || StringUtils.isEmpty(labelValue)) { + LOG.warn( + "Invalid groupingKey {labelKey:{}, labelValue:{}} must not be empty", + labelKey, + labelValue); + continue; + } + groupingKey.put(labelKey, labelValue); + } + + return groupingKey; Review Comment: nit: this seems unnecessary, ########## coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java: ########## @@ -182,6 +190,15 @@ private void registerMetrics() { jettyServer.addServlet( new CommonMetricsServlet(JvmMetrics.getCollectorRegistry(), true), "/prometheus/metrics/jvm"); + + String ip = RssUtils.getHostIp(); + int port = coordinatorConf.getInteger(CoordinatorConf.RPC_SERVER_PORT); + metricReporter = MetricReporterFactory.getMetricReporter(coordinatorConf, ip + "-" + port); Review Comment: could we set id for coordinator just like how shuffle server assembly it. Also another problem, it should be possible to set the port to zero and let the system decides which port is free. We may need to create an issue to track this case. ########## common/src/main/java/org/apache/uniffle/common/metrics/MetricReporterFactory.java: ########## @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.uniffle.common.metrics; + +import java.lang.reflect.Constructor; + +import org.apache.commons.lang3.StringUtils; + +import org.apache.uniffle.common.config.RssConf; + +public class MetricReporterFactory { + public static final String REPORT_CLASS = "rss.metrics.reporter.class"; + + public static MetricReporter getMetricReporter(RssConf conf, String instanceId) throws Exception { + String name = conf.getString(REPORT_CLASS, null); + if (StringUtils.isEmpty(name)) { + return null; + } + Class<?> klass = Class.forName(name); + Constructor<?> constructor; + constructor = klass.getConstructor(conf.getClass(), instanceId.getClass()); Review Comment: There are multiple similar code blocks to load class dynamically. Could you make it a utility function in RSSUtils? ########## server/src/main/java/org/apache/uniffle/server/ShuffleServer.java: ########## @@ -236,6 +243,13 @@ private void registerMetrics() { jettyServer.addServlet( new CommonMetricsServlet(JvmMetrics.getCollectorRegistry(), true), "/prometheus/metrics/jvm"); + + metricReporter = MetricReporterFactory.getMetricReporter(shuffleServerConf, id); Review Comment: If user/administrator misconfigured the metric reporter class, this would throw an exception. I'm prefer to logging the error and just disable the metrics reporter function. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
