autumnust commented on a change in pull request #2864: GOBBLIN-1018: Report GC counts and durations from Gobblin containers … URL: https://github.com/apache/incubator-gobblin/pull/2864#discussion_r366541361
########## File path: gobblin-cluster/src/main/java/org/apache/gobblin/cluster/ContainerHealthMetricsService.java ########## @@ -94,35 +133,75 @@ protected void runOneIteration() throws Exception { this.totalSwapSpaceSize.set(this.operatingSystemMXBean.getTotalSwapSpaceSize()); this.freePhysicalMemSize.set(this.operatingSystemMXBean.getFreePhysicalMemorySize()); this.processHeapUsedSize.set(this.memoryMXBean.getHeapMemoryUsage().getUsed()); + + GcStats gcStats = collectGcStats(); + //Since GC Beans report accumulated counts/durations, we need to subtract the previous values to obtain the counts/durations + // since the last measurement time. + this.minorGcCount.set(gcStats.getMinorCount() - this.minorGcCount.get()); + this.minorGcDuration.set(gcStats.getMinorDuration() - this.minorGcDuration.get()); + this.majorGcCount.set(gcStats.getMajorCount() - this.majorGcCount.get()); + this.majorGcDuration.set(gcStats.getMajorDuration() - this.majorGcDuration.get()); + this.unknownGcCount.set(gcStats.getUnknownCount() - this.unknownGcCount.get()); + this.unknownGcDuration.set(gcStats.getUnknownDuration() - this.unknownGcDuration.get()); } protected List<ContextAwareGauge<Double>> buildGaugeList() { List<ContextAwareGauge<Double>> gaugeList = new ArrayList<>(); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.PROCESS_CPU_LOAD, - () -> this.processCpuLoad.get())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.SYSTEM_CPU_LOAD, - () -> this.systemCpuLoad.get())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.SYSTEM_LOAD_AVG, - () -> this.systemLoadAvg.get())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.COMMITTED_VMEM_SIZE, - () -> Long.valueOf(this.committedVmemSize.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.PROCESS_CPU_TIME, - () -> Long.valueOf(this.processCpuTime.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.FREE_SWAP_SPACE_SIZE, - () -> Long.valueOf(this.freeSwapSpaceSize.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.NUM_AVAILABLE_PROCESSORS, - () -> Long.valueOf(this.numAvailableProcessors.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.TOTAL_PHYSICAL_MEM_SIZE, - () -> Long.valueOf(this.totalPhysicalMemSize.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.TOTAL_SWAP_SPACE_SIZE, - () -> Long.valueOf(this.totalSwapSpaceSize.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.FREE_PHYSICAL_MEM_SIZE, - () -> Long.valueOf(this.freePhysicalMemSize.get()).doubleValue())); - gaugeList.add(RootMetricContext.get().newContextAwareGauge(ContainerHealthMetrics.PROCESS_HEAP_USED_SIZE, - () -> Long.valueOf(this.processHeapUsedSize.get()).doubleValue())); + + gaugeList.add(getGauge(ContainerHealthMetrics.PROCESS_CPU_LOAD, this.processCpuLoad)); + gaugeList.add(getGauge(ContainerHealthMetrics.SYSTEM_CPU_LOAD, this.systemCpuLoad)); + gaugeList.add(getGauge(ContainerHealthMetrics.SYSTEM_LOAD_AVG, this.systemLoadAvg)); + gaugeList.add(getGauge(ContainerHealthMetrics.COMMITTED_VMEM_SIZE, this.committedVmemSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.PROCESS_CPU_TIME, this.processCpuTime)); + gaugeList.add(getGauge(ContainerHealthMetrics.FREE_SWAP_SPACE_SIZE, this.freeSwapSpaceSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.NUM_AVAILABLE_PROCESSORS, this.numAvailableProcessors)); + gaugeList.add(getGauge(ContainerHealthMetrics.TOTAL_PHYSICAL_MEM_SIZE, this.totalPhysicalMemSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.TOTAL_SWAP_SPACE_SIZE, this.totalSwapSpaceSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.FREE_PHYSICAL_MEM_SIZE, this.freePhysicalMemSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.PROCESS_HEAP_USED_SIZE, this.processHeapUsedSize)); + gaugeList.add(getGauge(ContainerHealthMetrics.MINOR_GC_COUNT, this.minorGcCount)); + gaugeList.add(getGauge(ContainerHealthMetrics.MINOR_GC_DURATION, this.minorGcDuration)); + gaugeList.add(getGauge(ContainerHealthMetrics.MAJOR_GC_COUNT, this.majorGcCount)); + gaugeList.add(getGauge(ContainerHealthMetrics.MAJOR_GC_DURATION, this.majorGcDuration)); + gaugeList.add(getGauge(ContainerHealthMetrics.UNKNOWN_GC_COUNT, this.unknownGcCount)); + gaugeList.add(getGauge(ContainerHealthMetrics.UNKNOWN_GC_DURATION, this.unknownGcDuration)); return gaugeList; } + private ContextAwareGauge<Double> getGauge(String name, Object metric) { + if (metric instanceof AtomicLong) { + return RootMetricContext.get().newContextAwareGauge(name, () -> Long.valueOf(((AtomicLong) metric).get()).doubleValue()); Review comment: I have no strong opinion here but using `doubleValue()` for a `Long` value can save us from using `instanceof` which I believe should be avoided as much as possible in OOP. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services