This is an automated email from the ASF dual-hosted git repository.
wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git
The following commit(s) were added to refs/heads/main by this push:
new a6d80e49 BIGTOP-4304: Support Prometheus configuration (#127)
a6d80e49 is described below
commit a6d80e49ecc21c649239a7142ec6191e63a738b0
Author: haopeng <[email protected]>
AuthorDate: Wed Dec 25 09:58:49 2024 +0800
BIGTOP-4304: Support Prometheus configuration (#127)
---
.../prometheus/configuration/prometheus-rule.xml | 65 ++++++++++++++++++
.../prometheus/configuration/prometheus.xml | 26 +++++--
.../infra/v1_0_0/prometheus/PrometheusParams.java | 80 ++++++++++++++++++++++
.../v1_0_0/prometheus/PrometheusServerScript.java | 4 +-
.../infra/v1_0_0/prometheus/PrometheusSetup.java | 37 ++++++++++
5 files changed, 204 insertions(+), 8 deletions(-)
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
new file mode 100644
index 00000000..e7ee51e1
--- /dev/null
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ https://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+-->
+
+<configuration>
+ <property>
+ <name>rules_file_name</name>
+ <value>prometheus_rules.yml</value>
+ <description>Rules file name</description>
+ </property>
+ <property>
+ <name>content</name>
+ <description>This is the freemarker template for rules
file</description>
+ <value><![CDATA[
+groups:
+ # Recording rules group: Used to calculate and save new aggregated metrics
+ - name: example_recording_rules
+ interval: 1m # The frequency at which the rules are evaluated
+
+ rules:
+ # Recording rule: Calculate the average CPU usage over the last 5
minutes for each job
+ - record: job:cpu_usage:avg
+ expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (job)
+ # This creates a new metric `job:cpu_usage:avg` representing the
average CPU usage per job
+
+ # Alerting rules group: Used to trigger alerts based on conditions
+ - name: example_alerting_rules
+ interval: 1m # The frequency at which the alerting rules are evaluated
+
+ rules:
+ # Alerting rule: Trigger an alert if the average CPU usage is over 90%
for the last 5 minutes
+ - alert: HighCpuUsage
+ expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance)
> 0.9
+ # This expression checks if the average CPU usage over the last 5
minutes for each instance is greater than 90%
+ for: 5m # The condition must hold true for 5 minutes before the alert
is triggered
+ labels:
+ severity: critical # Set the severity of the alert as 'critical'
+ annotations:
+ summary: "CPU usage on instance {{ $labels.instance }} is over 90%
for the last 5 minutes"
+ # Summary of the alert that will appear when it triggers
+ description: "The CPU usage on instance {{ $labels.instance }} has
been over 90% for the past 5 minutes."
+ # Detailed description of the alert that will provide more context
+]]>
+ </value>
+ <attrs>
+ <type>longtext</type>
+ </attrs>
+ </property>
+</configuration>
\ No newline at end of file
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
index 03d7e8a3..215bec8b 100644
---
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
@@ -19,6 +19,11 @@
-->
<configuration>
+ <property>
+ <name>port</name>
+ <description>Port on which Prometheus server listens</description>
+ <value>9090</value>
+ </property>
<property>
<name>content</name>
<description>This is the freemarker template for prometheus.yml
file</description>
@@ -31,17 +36,26 @@ global:
external_labels:
monitor: 'codelab-monitor'
+# Rule files specifies a list of globs. Rules and alerts are read from
+# all matching files.
+rule_files:
+<#if rules_file_name??>
+ - ${rules_file_name}
+</#if>
+
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries
scraped from this config.
- - job_name: 'prometheus'
-
- # Override the global default and scrape targets from this job every 5
seconds.
- scrape_interval: 5s
+<#list scrape_jobs as job>
+ - job_name: '${job.name}'
+<#if job.metrics_path??>
+ metrics_path: "${job.metrics_path}"
+</#if>
+ file_sd_configs:
+ - files: ['${job.targets_file}']
- static_configs:
- - targets: ['localhost:9090']
+</#list>
]]>
</value>
<attrs>
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
index 946eb7d3..d9af54a3 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
@@ -19,7 +19,9 @@
package org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
import org.apache.bigtop.manager.common.message.entity.payload.CommandPayload;
+import org.apache.bigtop.manager.stack.core.annotations.GlobalParams;
import org.apache.bigtop.manager.stack.core.spi.param.Params;
+import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
import org.apache.bigtop.manager.stack.infra.param.InfraParams;
import com.google.auto.service.AutoService;
@@ -28,6 +30,10 @@ import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
@Getter
@Slf4j
@@ -35,16 +41,90 @@ import java.text.MessageFormat;
@NoArgsConstructor
public class PrometheusParams extends InfraParams {
+ protected final String PROMETHEUS_SELF_JOB_NAME = "prometheus";
+ protected final String BM_AGENT_JOB_NAME = "bm-agent";
+ protected final String BM_AGENT_PORT = "8081";
+
+ private Map<String, Object> prometheusScrapeJob;
+ private Map<String, Object> agentScrapeJob;
+ private List<Map<String, Object>> scrapeJobs;
+ private String prometheusPort;
+ private String prometheusContent;
+ private String prometheusRulesFilename;
+ private String prometheusRulesFileContent;
+
public PrometheusParams(CommandPayload commandPayload) {
super(commandPayload);
+ scrapeJobs = new ArrayList<>();
+ scrapeJobs.add(prometheusScrapeJob);
+ scrapeJobs.add(agentScrapeJob);
+ globalParamsMap.put("scrape_jobs", scrapeJobs);
+ globalParamsMap.put("rules_file_name", prometheusRulesFilename);
}
public String dataDir() {
return MessageFormat.format("{0}/data", serviceHome());
}
+ public String targetsConfigFile(String jobName) {
+ return MessageFormat.format("{0}/{1}_targets.json", confDir(),
jobName);
+ }
+
@Override
public String getServiceName() {
return "prometheus";
}
+
+ protected List<String> getAllHost() {
+ List<String> ips = LocalSettings.hosts().get("all");
+ List<String> hosts = new ArrayList<>();
+ for (String ip : ips) {
+ hosts.add(MessageFormat.format("{0}:{1}", ip, BM_AGENT_PORT));
+ }
+ return hosts;
+ }
+
+ @GlobalParams
+ public Map<String, Object> prometheusJob() {
+ Map<String, Object> configuration =
LocalSettings.configurations(getServiceName(), "prometheus");
+ prometheusPort = (String) configuration.get("port");
+ Map<String, Object> job = new HashMap<>();
+ job.put("name", PROMETHEUS_SELF_JOB_NAME);
+ job.put("targets_file", targetsConfigFile(PROMETHEUS_SELF_JOB_NAME));
+ job.put("targets_list", List.of(MessageFormat.format("localhost:{0}",
prometheusPort)));
+ prometheusScrapeJob = job;
+ return configuration;
+ }
+
+ @GlobalParams
+ public Map<String, Object> agentJob() {
+ Map<String, Object> job = new HashMap<>();
+ job.put("name", BM_AGENT_JOB_NAME);
+ job.put("targets_file", targetsConfigFile(BM_AGENT_JOB_NAME));
+ job.put("targets_list", getAllHost());
+ job.put("metrics_path", "/actuator/prometheus");
+ agentScrapeJob = job;
+ return LocalSettings.configurations(getServiceName(), "prometheus");
+ }
+
+ @GlobalParams
+ public Map<String, Object> configs() {
+ Map<String, Object> configuration =
LocalSettings.configurations(getServiceName(), "prometheus");
+
+ prometheusContent = (String) configuration.get("content");
+ return configuration;
+ }
+
+ @GlobalParams
+ public Map<String, Object> rules() {
+ Map<String, Object> configuration =
LocalSettings.configurations(getServiceName(), "prometheus-rule");
+
+ prometheusRulesFilename = (String)
configuration.get("rules_file_name");
+ prometheusRulesFileContent = (String) configuration.get("content");
+ return configuration;
+ }
+
+ public String listenAddress() {
+ return MessageFormat.format("0.0.0.0:{0}", prometheusPort);
+ }
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
index a9e3f3c8..d98c84be 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
@@ -51,8 +51,8 @@ public class PrometheusServerScript extends
AbstractServerScript {
configure(params);
PrometheusParams prometheusParams = (PrometheusParams) params;
String cmd = MessageFormat.format(
- "nohup {0}/prometheus --config.file={0}/prometheus.yml
--storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
- prometheusParams.serviceHome());
+ "nohup {0}/prometheus --config.file={1}/prometheus.yml
--web.listen-address={2} --storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
+ prometheusParams.serviceHome(), prometheusParams.confDir(),
prometheusParams.listenAddress());
try {
ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd,
prometheusParams.user());
if (shellResult.getExitCode() != 0) {
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
index 50e4af6c..89d015de 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
@@ -20,6 +20,7 @@ package
org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
import org.apache.bigtop.manager.common.constants.Constants;
import org.apache.bigtop.manager.common.shell.ShellResult;
+import org.apache.bigtop.manager.stack.core.enums.ConfigType;
import org.apache.bigtop.manager.stack.core.spi.param.Params;
import org.apache.bigtop.manager.stack.core.utils.linux.LinuxFileUtils;
@@ -27,17 +28,53 @@ import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import java.text.MessageFormat;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
@Slf4j
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class PrometheusSetup {
+ @SuppressWarnings("unchecked")
public static ShellResult config(Params params) {
PrometheusParams prometheusParams = (PrometheusParams) params;
String user = prometheusParams.user();
String group = prometheusParams.group();
LinuxFileUtils.createDirectories(prometheusParams.dataDir(), user,
group, Constants.PERMISSION_755, true);
+ LinuxFileUtils.createDirectories(prometheusParams.confDir(), user,
group, Constants.PERMISSION_755, true);
+
+ LinuxFileUtils.toFileByTemplate(
+ prometheusParams.getPrometheusContent(),
+ MessageFormat.format("{0}/prometheus.yml",
prometheusParams.confDir()),
+ user,
+ group,
+ Constants.PERMISSION_644,
+ prometheusParams.getGlobalParamsMap());
+
+ LinuxFileUtils.toFileByTemplate(
+ prometheusParams.getPrometheusRulesFileContent(),
+ MessageFormat.format(
+ "{0}/{1}", prometheusParams.confDir(),
prometheusParams.getPrometheusRulesFilename()),
+ user,
+ group,
+ Constants.PERMISSION_644,
+ prometheusParams.getGlobalParamsMap());
+ for (int i = 0; i < prometheusParams.getScrapeJobs().size(); i++) {
+ Map<String, Object> job = prometheusParams.getScrapeJobs().get(i);
+ Map<String, List<String>> targets = new HashMap<>();
+ targets.put("targets", (List<String>) job.get("targets_list"));
+ LinuxFileUtils.toFile(
+ ConfigType.JSON,
+ (String) job.get("targets_file"),
+ user,
+ group,
+ Constants.PERMISSION_644,
+ List.of(targets));
+ }
return ShellResult.success("Prometheus Configure success!");
}
}