Github user Ethanlm commented on a diff in the pull request: https://github.com/apache/storm/pull/2754#discussion_r209067011 --- Diff: storm-server/src/main/java/org/apache/storm/daemon/nimbus/Nimbus.java --- @@ -2891,33 +2884,26 @@ public void launchServer() throws Exception { .mapToDouble(SupervisorResources::getTotalCpu) .sum()); StormMetricsRegistry.registerGauge("nimbus:longest-scheduling-time-ms", () -> { + //We want to update longest scheduling time in real time in case scheduler get stuck + // It's normal to see some very minor jiggling in value as race condition may happen Long currTime = Time.nanoTime(); - Long startTime = schedulingStartTime.get(); - //There could be race condition here but seems trivial, elapsed is - // guaranteed to be no longer than real elapsed time of scheduling - Long longest = longestSchedulingTime.get(); - if (startTime != null) { - longest = currTime - startTime > longest ? currTime - startTime : longest; - } - //To millis. How should I put the constant for magic numbers? - return longest * 1e-6; + Long startTime = schedulingStartTimeNs.get(); + return TimeUnit.NANOSECONDS.toMillis(startTime == null ? + longestSchedulingTime.get() : Math.max(currTime - startTime, longestSchedulingTime.get())); }); StormMetricsRegistry.registerMeter("nimbus:num-launched").mark(); StormMetricsRegistry.startMetricsReporters(conf); - //IntelliJ suggests clusterConsumerExceutors always non null (unnecessary if statement) - if (clusterConsumerExceutors != null) { - timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.STORM_CLUSTER_METRICS_CONSUMER_PUBLISH_INTERVAL_SECS)), - () -> { - try { - if (isLeader()) { - sendClusterMetricsToExecutors(); - } - } catch (Exception e) { - throw new RuntimeException(e); + timer.scheduleRecurring(0, ObjectReader.getInt(conf.get(DaemonConfig.STORM_CLUSTER_METRICS_CONSUMER_PUBLISH_INTERVAL_SECS)), + () -> { + try { + if (isLeader()) { + sendClusterMetricsToExecutors(); --- End diff -- looks like a typo here?
---