Copilot commented on code in PR #240: URL: https://github.com/apache/bigtop-manager/pull/240#discussion_r2200970537
########## bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/prometheus/PrometheusProxy.java: ########## @@ -0,0 +1,490 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.bigtop.manager.server.prometheus; + +import org.apache.bigtop.manager.server.model.vo.ClusterMetricsVO; +import org.apache.bigtop.manager.server.model.vo.HostMetricsVO; + +import org.springframework.http.MediaType; +import org.springframework.util.CollectionUtils; +import org.springframework.web.reactive.function.BodyInserters; +import org.springframework.web.reactive.function.client.WebClient; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class PrometheusProxy { + + private final WebClient webClient; + + public static final String MEM_IDLE = "memIdle"; + public static final String MEM_TOTAL = "memTotal"; + public static final String DISK_IDLE = "diskFreeSpace"; + public static final String DISK_TOTAL = "diskTotalSpace"; + public static final String FILE_OPEN_DESCRIPTOR = "fileOpenDescriptor"; + public static final String FILE_TOTAL_DESCRIPTOR = "fileTotalDescriptor"; + public static final String CPU_LOAD_AVG_MIN_1 = "cpuLoadAvgMin_1"; + public static final String CPU_LOAD_AVG_MIN_5 = "cpuLoadAvgMin_5"; + public static final String CPU_LOAD_AVG_MIN_15 = "cpuLoadAvgMin_15"; + public static final String CPU_USAGE = "cpuUsage"; + public static final String PHYSICAL_CORES = "physical_cores"; + public static final String DISK_READ = "diskRead"; + public static final String DISK_WRITE = "diskWrite"; + + private static final ThreadLocal<List<String>> timestampCache = ThreadLocal.withInitial(ArrayList::new); + + public PrometheusProxy(String prometheusHost, Integer prometheusPort) { + this.webClient = WebClient.builder() + .baseUrl("http://" + prometheusHost + ":" + prometheusPort) + .build(); + } + + public PrometheusResponse query(String params) { + return webClient + .post() + .uri(uriBuilder -> uriBuilder.path("/api/v1/query").build()) + .contentType(MediaType.APPLICATION_FORM_URLENCODED) + .body(BodyInserters.fromFormData("query", params).with("timeout", "10")) + .retrieve() + .bodyToMono(PrometheusResponse.class) + .block(); + } + + public PrometheusResponse queryRange(String query, String start, String end, String step) { + return webClient + .post() + .uri(uriBuilder -> uriBuilder.path("/api/v1/query_range").build()) + .contentType(MediaType.APPLICATION_FORM_URLENCODED) + .body(BodyInserters.fromFormData("query", query) + .with("timeout", "10") + .with("start", start) + .with("end", end) + .with("step", step)) + .retrieve() + .bodyToMono(PrometheusResponse.class) + .block(); + } + + public HostMetricsVO queryAgentsInfo(String agentIpv4, String interval) { + timestampCache.set(getTimestampsList(processInternal(interval))); + + HostMetricsVO res = new HostMetricsVO(); + if (!agentIpv4.isBlank()) { + // Instant metrics + Map<String, BigDecimal> agentCpu = retrieveAgentCpu(agentIpv4); + Map<String, BigDecimal> agentMem = retrieveAgentMemory(agentIpv4); + Map<String, BigDecimal> agentDisk = retrieveAgentDisk(agentIpv4); + Map<String, BigDecimal> agentDiskIO = retrieveAgentDiskIO(agentIpv4); + + // Use physical cores to check if the metrics is starting collect + if (!agentCpu.containsKey(PHYSICAL_CORES)) { + return res; + } + + res.setCpuUsageCur( + agentCpu.get(CPU_USAGE).multiply(new BigDecimal("100")).toString()); + res.setMemoryUsageCur((agentMem.get(MEM_TOTAL).subtract(agentMem.get(MEM_IDLE))) + .divide(agentMem.get(MEM_TOTAL), 4, RoundingMode.HALF_UP) + .multiply(new BigDecimal("100")) + .toString()); + res.setDiskUsageCur(agentDisk + .get(DISK_TOTAL) + .subtract(agentDisk.get(DISK_IDLE)) + .divide(agentDisk.get(DISK_TOTAL), 4, RoundingMode.HALF_UP) + .multiply(new BigDecimal("100")) + .toString()); + res.setFileDescriptorUsage(agentCpu.get(FILE_OPEN_DESCRIPTOR) + .divide(agentCpu.get(FILE_TOTAL_DESCRIPTOR), 4, RoundingMode.HALF_UP) + .multiply(new BigDecimal("100")) + .toString()); + res.setDiskReadCur(agentDiskIO.get(DISK_READ).toString()); + res.setDiskWriteCur(agentDiskIO.get(DISK_WRITE).toString()); + + // Range metrics + Map<String, List<BigDecimal>> agentCpuInterval = retrieveAgentCpu(agentIpv4, interval); + Map<String, List<BigDecimal>> agentMemInterval = retrieveAgentMemory(agentIpv4, interval); + Map<String, List<BigDecimal>> agentDiskIOInterval = retrieveAgentDiskIO(agentIpv4, interval); + + res.setCpuUsage(convertList(agentCpuInterval.get(CPU_USAGE), 100)); + res.setSystemLoad1(convertList(agentCpuInterval.get(CPU_LOAD_AVG_MIN_1))); + res.setSystemLoad5(convertList(agentCpuInterval.get(CPU_LOAD_AVG_MIN_5))); + res.setSystemLoad15(convertList(agentCpuInterval.get(CPU_LOAD_AVG_MIN_15))); + res.setMemoryUsage(convertList(agentMemInterval.get("memUsage"), 100)); + res.setDiskRead(convertList(agentDiskIOInterval.get(DISK_READ))); + res.setDiskWrite(convertList(agentDiskIOInterval.get(DISK_WRITE))); + } + + res.setTimestamps(timestampCache.get()); + timestampCache.remove(); + return res; + } + + public ClusterMetricsVO queryClustersInfo(List<String> agentIpv4s, String interval) { + timestampCache.set(getTimestampsList(processInternal(interval))); + + ClusterMetricsVO res = new ClusterMetricsVO(); + if (!agentIpv4s.isEmpty()) { + BigDecimal totalPhysicalCores = new BigDecimal("0.0"); + BigDecimal totalMemSpace = new BigDecimal("0.0"); + + BigDecimal usedPhysicalCores = new BigDecimal("0.0"); + BigDecimal totalMemIdle = new BigDecimal("0.0"); + + List<BigDecimal> timeUsedCores = getEmptyList(); + List<BigDecimal> timeMemIdle = getEmptyList(); + + for (String agentIpv4 : agentIpv4s) { + // Instant Metrics + Map<String, BigDecimal> agentCpu = retrieveAgentCpu(agentIpv4); + Map<String, BigDecimal> agentMem = retrieveAgentMemory(agentIpv4); + + // Use physical cores to check if the metrics is starting collect + if (!agentCpu.containsKey(PHYSICAL_CORES)) { + return res; + } + + BigDecimal cpuUsage = agentCpu.get(CPU_USAGE); + BigDecimal physicalCores = agentCpu.get(PHYSICAL_CORES); + BigDecimal usedCores = cpuUsage.multiply(physicalCores); + BigDecimal memIdle = agentMem.get(MEM_IDLE); + BigDecimal memTotal = agentMem.get(MEM_TOTAL); + + totalPhysicalCores = totalPhysicalCores.add(physicalCores); + usedPhysicalCores = usedPhysicalCores.add(usedCores); + totalMemIdle = totalMemIdle.add(memIdle); + totalMemSpace = totalMemSpace.add(memTotal); + + // Range Metrics + List<BigDecimal> cpuUsageInterval = + retrieveAgentCpu(agentIpv4, interval).get(CPU_USAGE); + for (int i = 0; i < cpuUsageInterval.size(); i++) { + BigDecimal c = cpuUsageInterval.get(i); + if (c != null) { + c = c.multiply(physicalCores); + BigDecimal b = timeUsedCores.get(i); + if (b == null) { + b = new BigDecimal("0.0"); + } + + timeUsedCores.set(i, c.add(b)); + } + } + + List<BigDecimal> memIdleInterval = + retrieveAgentMemory(agentIpv4, interval).get(MEM_IDLE); + for (int i = 0; i < memIdleInterval.size(); i++) { + BigDecimal m = memIdleInterval.get(i); + if (m != null) { + BigDecimal b = timeMemIdle.get(i); + if (b == null) { + b = new BigDecimal("0.0"); + } + + timeMemIdle.set(i, m.add(b)); + } + } + } + + // Instant Metrics + res.setCpuUsageCur(usedPhysicalCores + .divide(totalPhysicalCores, 4, RoundingMode.HALF_UP) + .multiply(new BigDecimal("100")) + .toString()); + res.setMemoryUsageCur(totalMemSpace + .subtract(totalMemIdle) + .divide(totalMemSpace, 4, RoundingMode.HALF_UP) + .multiply(new BigDecimal("100")) + .toString()); + + // Range Metrics + List<BigDecimal> cpuUsageList = getEmptyList(); + List<BigDecimal> memUsageList = getEmptyList(); + + for (int i = 0; i < timeUsedCores.size(); i++) { + BigDecimal usedCores = timeUsedCores.get(i); + if (usedCores != null) { + usedCores = usedCores.divide(totalPhysicalCores, 4, RoundingMode.HALF_UP); + cpuUsageList.set(i, usedCores); + } + } + + for (int i = 0; i < timeMemIdle.size(); i++) { + BigDecimal memIdle = timeMemIdle.get(i); + if (memIdle != null) { + memIdle = totalMemSpace.subtract(memIdle).divide(totalMemSpace, 4, RoundingMode.HALF_UP); + memUsageList.set(i, memIdle); + } + } + + res.setCpuUsage(convertList(cpuUsageList, 100)); + res.setMemoryUsage(convertList(memUsageList, 100)); + } + + res.setTimestamps(timestampCache.get()); + timestampCache.remove(); + return res; + } + + public Map<String, BigDecimal> retrieveAgentCpu(String iPv4addr) { + Map<String, BigDecimal> map = new HashMap<>(); + String params = String.format("agent_host_monitoring_cpu{iPv4addr=\"%s\"}", iPv4addr); + PrometheusResponse response = query(params); + for (PrometheusResult result : response.getData().getResult()) { + String key = result.getMetric().get("cpuUsage"); + map.put(key, new BigDecimal(result.getValue().get(1))); + } + + // Get common metrics + if (!CollectionUtils.isEmpty(response.getData().getResult())) { + Map<String, String> metric = response.getData().getResult().get(0).getMetric(); + map.put(PHYSICAL_CORES, new BigDecimal(metric.get(PHYSICAL_CORES))); + map.put(FILE_OPEN_DESCRIPTOR, new BigDecimal(metric.get(FILE_OPEN_DESCRIPTOR))); + map.put(FILE_TOTAL_DESCRIPTOR, new BigDecimal(metric.get(FILE_TOTAL_DESCRIPTOR))); + } Review Comment: There's no null check for `response` or `response.getData()` before accessing `getResult()`, which may lead to NullPointerExceptions if the Prometheus API call fails; add defensive null checks. ```suggestion if (response == null || response.getData() == null || CollectionUtils.isEmpty(response.getData().getResult())) { return map; // Return empty map if response or data is null } for (PrometheusResult result : response.getData().getResult()) { String key = result.getMetric().get("cpuUsage"); map.put(key, new BigDecimal(result.getValue().get(1))); } // Get common metrics Map<String, String> metric = response.getData().getResult().get(0).getMetric(); map.put(PHYSICAL_CORES, new BigDecimal(metric.get(PHYSICAL_CORES))); map.put(FILE_OPEN_DESCRIPTOR, new BigDecimal(metric.get(FILE_OPEN_DESCRIPTOR))); map.put(FILE_TOTAL_DESCRIPTOR, new BigDecimal(metric.get(FILE_TOTAL_DESCRIPTOR))); ``` ########## bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/ComponentStageHelper.java: ########## @@ -99,13 +99,13 @@ public static List<Stage> createComponentStages( stages.add(new ComponentConfigureStage(stageContext)); break; case INIT: - stageContext = createStageContext(componentName, hostnames, commandDTO); + stageContext = createStageContext(componentName, List.of(hostnames.get(0)), commandDTO); stages.add(new ComponentInitStage(stageContext)); break; case PREPARE: // Prepare phase runs after component started, client component shouldn't create this. if (!StackUtils.isClientComponent(componentName)) { - stageContext = createStageContext(componentName, hostnames, commandDTO); + stageContext = createStageContext(componentName, List.of(hostnames.get(0)), commandDTO); Review Comment: Restricting INIT and PREPARE stages to only the first hostname may skip running stages on other hosts; consider passing the full hostnames list or explicitly documenting that change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@bigtop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org