[FLINK-5340] [metrics] Add an uptime and downtime metric to the Execution Graph.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/719d0cf1
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/719d0cf1
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/719d0cf1

Branch: refs/heads/master
Commit: 719d0cf19664556e62f808469fb641127c3f4410
Parents: 231bec8
Author: Stephan Ewen <[email protected]>
Authored: Wed Mar 29 18:24:13 2017 +0200
Committer: Stephan Ewen <[email protected]>
Committed: Wed Mar 29 21:52:47 2017 +0200

----------------------------------------------------------------------
 .../executiongraph/ExecutionGraphBuilder.java   |  4 +
 .../executiongraph/metrics/DownTimeGauge.java   | 79 ++++++++++++++++++++
 .../executiongraph/metrics/UpTimeGauge.java     | 67 +++++++++++++++++
 3 files changed, 150 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
----------------------------------------------------------------------
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
index f1da8bd..a6455f5 100644
--- 
a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraphBuilder.java
@@ -33,7 +33,9 @@ import 
org.apache.flink.runtime.checkpoint.CompletedCheckpointStore;
 import org.apache.flink.runtime.checkpoint.CheckpointStatsTracker;
 import org.apache.flink.runtime.client.JobExecutionException;
 import org.apache.flink.runtime.client.JobSubmissionException;
+import org.apache.flink.runtime.executiongraph.metrics.DownTimeGauge;
 import org.apache.flink.runtime.executiongraph.metrics.RestartTimeGauge;
+import org.apache.flink.runtime.executiongraph.metrics.UpTimeGauge;
 import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
 import org.apache.flink.runtime.instance.SlotProvider;
 import org.apache.flink.runtime.jobgraph.JobGraph;
@@ -247,6 +249,8 @@ public class ExecutionGraphBuilder {
                // create all the metrics for the Execution Graph
 
                metrics.gauge(RestartTimeGauge.METRIC_NAME, new 
RestartTimeGauge(executionGraph));
+               metrics.gauge(DownTimeGauge.METRIC_NAME, new 
DownTimeGauge(executionGraph));
+               metrics.gauge(UpTimeGauge.METRIC_NAME, new 
UpTimeGauge(executionGraph));
 
                return executionGraph;
        }

http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
----------------------------------------------------------------------
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
new file mode 100644
index 0000000..5f24587
--- /dev/null
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/DownTimeGauge.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.executiongraph.metrics;
+
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.runtime.executiongraph.ExecutionGraph;
+import org.apache.flink.runtime.jobgraph.JobStatus;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A gauge that returns (in milliseconds) how long a job has not been not 
running any
+ * more, in case it is in a failing/recovering situation. Running jobs return 
naturally
+ * a value of zero.
+ * 
+ * <p>For jobs that have never run (new not yet scheduled jobs), this gauge 
returns
+ * {@value NOT_YET_RUNNING}, and for jobs that are not running any more, it 
returns
+ * {@value NO_LONGER_RUNNING}. 
+ */
+public class DownTimeGauge implements Gauge<Long> {
+
+       public static final String METRIC_NAME = "downtime";
+
+       private static final long NOT_YET_RUNNING = 0L;
+
+       private static final long NO_LONGER_RUNNING = -1L;
+
+       // 
------------------------------------------------------------------------
+
+       private final ExecutionGraph eg;
+
+       public DownTimeGauge(ExecutionGraph executionGraph) {
+               this.eg = checkNotNull(executionGraph);
+       }
+
+       // 
------------------------------------------------------------------------
+
+       @Override
+       public Long getValue() {
+               final JobStatus status = eg.getState();
+
+               if (status == JobStatus.RUNNING) {
+                       // running right now - no downtime
+                       return 0L;
+               }
+               else if (status.isTerminalState()) {
+                       // not running any more -> finished or not on leader
+                       return NO_LONGER_RUNNING;
+               }
+               else {
+                       final long runningTimestamp = 
eg.getStatusTimestamp(JobStatus.RUNNING);
+                       if (runningTimestamp > 0) {
+                               // job was running at some point and is not 
running now
+                               // we use 'Math.max' here to avoid negative 
timestamps when clocks change
+                               return Math.max(System.currentTimeMillis() - 
runningTimestamp, 0);
+                       }
+                       else {
+                               // job was never scheduled so far
+                               return NOT_YET_RUNNING;
+                       }
+               }
+       }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/flink/blob/719d0cf1/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
----------------------------------------------------------------------
diff --git 
a/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
new file mode 100644
index 0000000..d3f6224
--- /dev/null
+++ 
b/flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/metrics/UpTimeGauge.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.executiongraph.metrics;
+
+import org.apache.flink.metrics.Gauge;
+import org.apache.flink.runtime.executiongraph.ExecutionGraph;
+import org.apache.flink.runtime.jobgraph.JobStatus;
+
+import static org.apache.flink.util.Preconditions.checkNotNull;
+
+/**
+ * A gauge that returns (in milliseconds) how long a job has been running.
+ * 
+ * <p>For jobs that are not running any more, it returns {@value 
NO_LONGER_RUNNING}. 
+ */
+public class UpTimeGauge implements Gauge<Long> {
+
+       public static final String METRIC_NAME = "uptime";
+
+       private static final long NO_LONGER_RUNNING = -1L;
+
+       // 
------------------------------------------------------------------------
+
+       private final ExecutionGraph eg;
+
+       public UpTimeGauge(ExecutionGraph executionGraph) {
+               this.eg = checkNotNull(executionGraph);
+       }
+
+       // 
------------------------------------------------------------------------
+
+       @Override
+       public Long getValue() {
+               final JobStatus status = eg.getState();
+
+               if (status == JobStatus.RUNNING) {
+                       // running right now - report the uptime
+                       final long runningTimestamp = 
eg.getStatusTimestamp(JobStatus.RUNNING);
+                       // we use 'Math.max' here to avoid negative timestamps 
when clocks change
+                       return Math.max(System.currentTimeMillis() - 
runningTimestamp, 0);
+               }
+               else if (status.isTerminalState()) {
+                       // not running any more -> finished or not on leader
+                       return NO_LONGER_RUNNING;
+               }
+               else {
+                       // not yet running or not up at the moment
+                       return 0L;
+               }
+       }
+}
\ No newline at end of file

Reply via email to