This is an automated email from the ASF dual-hosted git repository.

wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git


The following commit(s) were added to refs/heads/main by this push:
     new a6d80e49 BIGTOP-4304: Support Prometheus configuration (#127)
a6d80e49 is described below

commit a6d80e49ecc21c649239a7142ec6191e63a738b0
Author: haopeng <[email protected]>
AuthorDate: Wed Dec 25 09:58:49 2024 +0800

    BIGTOP-4304: Support Prometheus configuration (#127)
---
 .../prometheus/configuration/prometheus-rule.xml   | 65 ++++++++++++++++++
 .../prometheus/configuration/prometheus.xml        | 26 +++++--
 .../infra/v1_0_0/prometheus/PrometheusParams.java  | 80 ++++++++++++++++++++++
 .../v1_0_0/prometheus/PrometheusServerScript.java  |  4 +-
 .../infra/v1_0_0/prometheus/PrometheusSetup.java   | 37 ++++++++++
 5 files changed, 204 insertions(+), 8 deletions(-)

diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
new file mode 100644
index 00000000..e7ee51e1
--- /dev/null
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
@@ -0,0 +1,65 @@
+<?xml version="1.0"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~    https://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+-->
+
+<configuration>
+    <property>
+        <name>rules_file_name</name>
+        <value>prometheus_rules.yml</value>
+        <description>Rules file name</description>
+    </property>
+    <property>
+        <name>content</name>
+        <description>This is the freemarker template for rules 
file</description>
+        <value><![CDATA[
+groups:
+  # Recording rules group: Used to calculate and save new aggregated metrics
+  - name: example_recording_rules
+    interval: 1m  # The frequency at which the rules are evaluated
+
+    rules:
+      # Recording rule: Calculate the average CPU usage over the last 5 
minutes for each job
+      - record: job:cpu_usage:avg
+        expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (job)
+        # This creates a new metric `job:cpu_usage:avg` representing the 
average CPU usage per job
+
+  # Alerting rules group: Used to trigger alerts based on conditions
+  - name: example_alerting_rules
+    interval: 1m  # The frequency at which the alerting rules are evaluated
+
+    rules:
+      # Alerting rule: Trigger an alert if the average CPU usage is over 90% 
for the last 5 minutes
+      - alert: HighCpuUsage
+        expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance) 
> 0.9
+        # This expression checks if the average CPU usage over the last 5 
minutes for each instance is greater than 90%
+        for: 5m  # The condition must hold true for 5 minutes before the alert 
is triggered
+        labels:
+          severity: critical  # Set the severity of the alert as 'critical'
+        annotations:
+          summary: "CPU usage on instance {{ $labels.instance }} is over 90% 
for the last 5 minutes"
+          # Summary of the alert that will appear when it triggers
+          description: "The CPU usage on instance {{ $labels.instance }} has 
been over 90% for the past 5 minutes."
+          # Detailed description of the alert that will provide more context
+]]>
+        </value>
+        <attrs>
+            <type>longtext</type>
+        </attrs>
+    </property>
+</configuration>
\ No newline at end of file
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
index 03d7e8a3..215bec8b 100644
--- 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
@@ -19,6 +19,11 @@
 -->
 
 <configuration>
+    <property>
+        <name>port</name>
+        <description>Port on which Prometheus server listens</description>
+        <value>9090</value>
+    </property>
     <property>
         <name>content</name>
         <description>This is the freemarker template for prometheus.yml 
file</description>
@@ -31,17 +36,26 @@ global:
   external_labels:
     monitor: 'codelab-monitor'
 
+# Rule files specifies a list of globs. Rules and alerts are read from
+# all matching files.
+rule_files:
+<#if rules_file_name??>
+  - ${rules_file_name}
+</#if>
+
 # A scrape configuration containing exactly one endpoint to scrape:
 # Here it's Prometheus itself.
 scrape_configs:
   # The job name is added as a label `job=<job_name>` to any timeseries 
scraped from this config.
-  - job_name: 'prometheus'
-
-    # Override the global default and scrape targets from this job every 5 
seconds.
-    scrape_interval: 5s
+<#list scrape_jobs as job>
+  - job_name: '${job.name}'
+<#if job.metrics_path??>
+    metrics_path: "${job.metrics_path}"
+</#if>
+    file_sd_configs:
+      - files: ['${job.targets_file}']
 
-    static_configs:
-      - targets: ['localhost:9090']
+</#list>
 ]]>
         </value>
         <attrs>
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
index 946eb7d3..d9af54a3 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
@@ -19,7 +19,9 @@
 package org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
 
 import org.apache.bigtop.manager.common.message.entity.payload.CommandPayload;
+import org.apache.bigtop.manager.stack.core.annotations.GlobalParams;
 import org.apache.bigtop.manager.stack.core.spi.param.Params;
+import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
 import org.apache.bigtop.manager.stack.infra.param.InfraParams;
 
 import com.google.auto.service.AutoService;
@@ -28,6 +30,10 @@ import lombok.NoArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
 import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 @Getter
 @Slf4j
@@ -35,16 +41,90 @@ import java.text.MessageFormat;
 @NoArgsConstructor
 public class PrometheusParams extends InfraParams {
 
+    protected final String PROMETHEUS_SELF_JOB_NAME = "prometheus";
+    protected final String BM_AGENT_JOB_NAME = "bm-agent";
+    protected final String BM_AGENT_PORT = "8081";
+
+    private Map<String, Object> prometheusScrapeJob;
+    private Map<String, Object> agentScrapeJob;
+    private List<Map<String, Object>> scrapeJobs;
+    private String prometheusPort;
+    private String prometheusContent;
+    private String prometheusRulesFilename;
+    private String prometheusRulesFileContent;
+
     public PrometheusParams(CommandPayload commandPayload) {
         super(commandPayload);
+        scrapeJobs = new ArrayList<>();
+        scrapeJobs.add(prometheusScrapeJob);
+        scrapeJobs.add(agentScrapeJob);
+        globalParamsMap.put("scrape_jobs", scrapeJobs);
+        globalParamsMap.put("rules_file_name", prometheusRulesFilename);
     }
 
     public String dataDir() {
         return MessageFormat.format("{0}/data", serviceHome());
     }
 
+    public String targetsConfigFile(String jobName) {
+        return MessageFormat.format("{0}/{1}_targets.json", confDir(), 
jobName);
+    }
+
     @Override
     public String getServiceName() {
         return "prometheus";
     }
+
+    protected List<String> getAllHost() {
+        List<String> ips = LocalSettings.hosts().get("all");
+        List<String> hosts = new ArrayList<>();
+        for (String ip : ips) {
+            hosts.add(MessageFormat.format("{0}:{1}", ip, BM_AGENT_PORT));
+        }
+        return hosts;
+    }
+
+    @GlobalParams
+    public Map<String, Object> prometheusJob() {
+        Map<String, Object> configuration = 
LocalSettings.configurations(getServiceName(), "prometheus");
+        prometheusPort = (String) configuration.get("port");
+        Map<String, Object> job = new HashMap<>();
+        job.put("name", PROMETHEUS_SELF_JOB_NAME);
+        job.put("targets_file", targetsConfigFile(PROMETHEUS_SELF_JOB_NAME));
+        job.put("targets_list", List.of(MessageFormat.format("localhost:{0}", 
prometheusPort)));
+        prometheusScrapeJob = job;
+        return configuration;
+    }
+
+    @GlobalParams
+    public Map<String, Object> agentJob() {
+        Map<String, Object> job = new HashMap<>();
+        job.put("name", BM_AGENT_JOB_NAME);
+        job.put("targets_file", targetsConfigFile(BM_AGENT_JOB_NAME));
+        job.put("targets_list", getAllHost());
+        job.put("metrics_path", "/actuator/prometheus");
+        agentScrapeJob = job;
+        return LocalSettings.configurations(getServiceName(), "prometheus");
+    }
+
+    @GlobalParams
+    public Map<String, Object> configs() {
+        Map<String, Object> configuration = 
LocalSettings.configurations(getServiceName(), "prometheus");
+
+        prometheusContent = (String) configuration.get("content");
+        return configuration;
+    }
+
+    @GlobalParams
+    public Map<String, Object> rules() {
+        Map<String, Object> configuration = 
LocalSettings.configurations(getServiceName(), "prometheus-rule");
+
+        prometheusRulesFilename = (String) 
configuration.get("rules_file_name");
+        prometheusRulesFileContent = (String) configuration.get("content");
+        return configuration;
+    }
+
+    public String listenAddress() {
+        return MessageFormat.format("0.0.0.0:{0}", prometheusPort);
+    }
 }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
index a9e3f3c8..d98c84be 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
@@ -51,8 +51,8 @@ public class PrometheusServerScript extends 
AbstractServerScript {
         configure(params);
         PrometheusParams prometheusParams = (PrometheusParams) params;
         String cmd = MessageFormat.format(
-                "nohup {0}/prometheus --config.file={0}/prometheus.yml 
--storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
-                prometheusParams.serviceHome());
+                "nohup {0}/prometheus --config.file={1}/prometheus.yml 
--web.listen-address={2} --storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
+                prometheusParams.serviceHome(), prometheusParams.confDir(), 
prometheusParams.listenAddress());
         try {
             ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd, 
prometheusParams.user());
             if (shellResult.getExitCode() != 0) {
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
index 50e4af6c..89d015de 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
@@ -20,6 +20,7 @@ package 
org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
 
 import org.apache.bigtop.manager.common.constants.Constants;
 import org.apache.bigtop.manager.common.shell.ShellResult;
+import org.apache.bigtop.manager.stack.core.enums.ConfigType;
 import org.apache.bigtop.manager.stack.core.spi.param.Params;
 import org.apache.bigtop.manager.stack.core.utils.linux.LinuxFileUtils;
 
@@ -27,17 +28,53 @@ import lombok.AccessLevel;
 import lombok.NoArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import java.text.MessageFormat;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 @Slf4j
 @NoArgsConstructor(access = AccessLevel.PRIVATE)
 public class PrometheusSetup {
 
+    @SuppressWarnings("unchecked")
     public static ShellResult config(Params params) {
         PrometheusParams prometheusParams = (PrometheusParams) params;
         String user = prometheusParams.user();
         String group = prometheusParams.group();
 
         LinuxFileUtils.createDirectories(prometheusParams.dataDir(), user, 
group, Constants.PERMISSION_755, true);
+        LinuxFileUtils.createDirectories(prometheusParams.confDir(), user, 
group, Constants.PERMISSION_755, true);
+
+        LinuxFileUtils.toFileByTemplate(
+                prometheusParams.getPrometheusContent(),
+                MessageFormat.format("{0}/prometheus.yml", 
prometheusParams.confDir()),
+                user,
+                group,
+                Constants.PERMISSION_644,
+                prometheusParams.getGlobalParamsMap());
+
+        LinuxFileUtils.toFileByTemplate(
+                prometheusParams.getPrometheusRulesFileContent(),
+                MessageFormat.format(
+                        "{0}/{1}", prometheusParams.confDir(), 
prometheusParams.getPrometheusRulesFilename()),
+                user,
+                group,
+                Constants.PERMISSION_644,
+                prometheusParams.getGlobalParamsMap());
 
+        for (int i = 0; i < prometheusParams.getScrapeJobs().size(); i++) {
+            Map<String, Object> job = prometheusParams.getScrapeJobs().get(i);
+            Map<String, List<String>> targets = new HashMap<>();
+            targets.put("targets", (List<String>) job.get("targets_list"));
+            LinuxFileUtils.toFile(
+                    ConfigType.JSON,
+                    (String) job.get("targets_file"),
+                    user,
+                    group,
+                    Constants.PERMISSION_644,
+                    List.of(targets));
+        }
         return ShellResult.success("Prometheus Configure success!");
     }
 }

Reply via email to