This is an automated email from the ASF dual-hosted git repository.
chetanm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/openwhisk.git
The following commit(s) were added to refs/heads/master by this push:
new 3e89aa5 OpenWhisk User Events (#4584)
3e89aa5 is described below
commit 3e89aa51f79598a5156910b4752f9ec82b13a7c0
Author: Cosmin Stanciu <[email protected]>
AuthorDate: Tue Sep 24 21:31:25 2019 -0700
OpenWhisk User Events (#4584)
The user event service enables aggregating the metric events sent on
`events` topic and expose them as Prometheus (or Kamon) metrics. Out of the box
dashboards are provided for the Grafana/Prometheus mode which provide detailed
insights on performance metrics at cluster/namespace/action level.
---
.../apache/openwhisk/core/connector/Message.scala | 47 +-
.../openwhisk/core/entity/ActivationResult.scala | 17 +-
.../openwhisk/core/entity/WhiskActivation.scala | 4 +
core/monitoring/user-events/.dockerignore | 5 +
core/monitoring/user-events/Dockerfile | 34 +
core/monitoring/user-events/README.md | 55 +
core/monitoring/user-events/build.gradle | 53 +
.../compose/grafana/dashboards/global-metrics.json | 422 ++++++
.../grafana/dashboards/openwhisk_events.json | 1539 ++++++++++++++++++++
.../compose/grafana/dashboards/top-namespaces.json | 473 ++++++
.../grafana/provisioning/dashboards/dashboard.yml | 28 +
.../provisioning/datasources/datasource.yml | 67 +
.../user-events/compose/prometheus/prometheus.yml | 30 +
.../monitoring/user-events/images/demo_landing.png | Bin 0 -> 516563 bytes
core/monitoring/user-events/init.sh | 25 +
.../src/main/resources/application.conf | 52 +
.../user-events/src/main/resources/reference.conf | 27 +
.../src/main/resources/whisk-logback.xml | 25 +
.../core/monitoring/metrics/EventConsumer.scala | 145 ++
.../core/monitoring/metrics/KamonRecorder.scala | 111 ++
.../openwhisk/core/monitoring/metrics/Main.scala | 45 +
.../core/monitoring/metrics/MetricNames.scala | 52 +-
.../core/monitoring/metrics/OpenWhiskEvents.scala | 65 +
.../monitoring/metrics/PrometheusEventsApi.scala | 49 +
.../monitoring/metrics/PrometheusRecorder.scala | 250 ++++
.../core/monitoring/metrics/ApiTests.scala | 64 +
.../core/monitoring/metrics/EventsTestHelper.scala | 45 +
.../core/monitoring/metrics/KafkaSpecBase.scala | 56 +
.../monitoring/metrics/KamonRecorderTests.scala | 157 ++
.../monitoring/metrics/OpenWhiskEventsTests.scala | 84 ++
.../metrics/PrometheusRecorderTests.scala | 122 ++
docs/metrics.md | 3 +
settings.gradle | 1 +
tests/build.gradle | 1 +
.../apache/openwhisk/common/UserEventTests.scala | 6 +-
.../core/connector/test/EventMessageTests.scala | 8 +-
tools/jenkins/apache/dockerhub.groovy | 2 +-
37 files changed, 4122 insertions(+), 47 deletions(-)
diff --git
a/common/scala/src/main/scala/org/apache/openwhisk/core/connector/Message.scala
b/common/scala/src/main/scala/org/apache/openwhisk/core/connector/Message.scala
index 9a1a586..c8fb64d 100644
---
a/common/scala/src/main/scala/org/apache/openwhisk/core/connector/Message.scala
+++
b/common/scala/src/main/scala/org/apache/openwhisk/core/connector/Message.scala
@@ -21,6 +21,9 @@ import scala.util.Try
import spray.json._
import org.apache.openwhisk.common.TransactionId
import org.apache.openwhisk.core.entity._
+import scala.concurrent.duration._
+import java.util.concurrent.TimeUnit
+import org.apache.openwhisk.core.entity.ActivationResponse.statusForCode
/** Basic trait for messages that are sent on a message bus connector. */
trait Message {
@@ -283,22 +286,47 @@ object EventMessageBody extends DefaultJsonProtocol {
case class Activation(name: String,
statusCode: Int,
- duration: Long,
- waitTime: Long,
- initTime: Long,
+ duration: Duration,
+ waitTime: Duration,
+ initTime: Duration,
kind: String,
conductor: Boolean,
memory: Int,
causedBy: Option[String])
extends EventMessageBody {
- val typeName = "Activation"
+ val typeName = Activation.typeName
override def serialize = toJson.compactPrint
+ def entityPath: FullyQualifiedEntityName =
EntityPath(name).toFullyQualifiedEntityName
def toJson = Activation.activationFormat.write(this)
+
+ def status: String = statusForCode(statusCode)
+
+ def isColdStart: Boolean = initTime != Duration.Zero
+
+ def namespace: String = entityPath.path.root.name
+
+ def action: String = entityPath.fullPath.relativePath.get.namespace
+
}
object Activation extends DefaultJsonProtocol {
+
+ val typeName = "Activation"
def parse(msg: String) = Try(activationFormat.read(msg.parseJson))
+
+ private implicit val durationFormat = new RootJsonFormat[Duration] {
+ override def write(obj: Duration): JsValue = obj match {
+ case o if o.isFinite => JsNumber(o.toMillis)
+ case _ => JsNumber.zero
+ }
+
+ override def read(json: JsValue): Duration = json match {
+ case JsNumber(n) if n <= 0 => Duration.Zero
+ case JsNumber(n) => toDuration(n.longValue)
+ }
+ }
+
implicit val activationFormat =
jsonFormat(
Activation.apply _,
@@ -323,9 +351,9 @@ object Activation extends DefaultJsonProtocol {
Activation(
fqn,
a.response.statusCode,
- a.duration.getOrElse(0),
-
a.annotations.getAs[Long](WhiskActivation.waitTimeAnnotation).getOrElse(0),
-
a.annotations.getAs[Long](WhiskActivation.initTimeAnnotation).getOrElse(0),
+ toDuration(a.duration.getOrElse(0)),
+
toDuration(a.annotations.getAs[Long](WhiskActivation.waitTimeAnnotation).getOrElse(0)),
+
toDuration(a.annotations.getAs[Long](WhiskActivation.initTimeAnnotation).getOrElse(0)),
kind,
a.annotations.getAs[Boolean](WhiskActivation.conductorAnnotation).getOrElse(false),
a.annotations
@@ -335,6 +363,8 @@ object Activation extends DefaultJsonProtocol {
a.annotations.getAs[String](WhiskActivation.causedByAnnotation).toOption)
}
}
+
+ def toDuration(milliseconds: Long) = new FiniteDuration(milliseconds,
TimeUnit.MILLISECONDS)
}
case class Metric(metricName: String, metricValue: Long) extends
EventMessageBody {
@@ -344,6 +374,7 @@ case class Metric(metricName: String, metricValue: Long)
extends EventMessageBod
}
object Metric extends DefaultJsonProtocol {
+ val typeName = "Metric"
def parse(msg: String) = Try(metricFormat.read(msg.parseJson))
implicit val metricFormat = jsonFormat(Metric.apply _, "metricName",
"metricValue")
}
@@ -369,5 +400,5 @@ object EventMessage extends DefaultJsonProtocol {
}
}
- def parse(msg: String) = format.read(msg.parseJson)
+ def parse(msg: String) = Try(format.read(msg.parseJson))
}
diff --git
a/common/scala/src/main/scala/org/apache/openwhisk/core/entity/ActivationResult.scala
b/common/scala/src/main/scala/org/apache/openwhisk/core/entity/ActivationResult.scala
index 98f241d..e44a8ff 100644
---
a/common/scala/src/main/scala/org/apache/openwhisk/core/entity/ActivationResult.scala
+++
b/common/scala/src/main/scala/org/apache/openwhisk/core/entity/ActivationResult.scala
@@ -61,8 +61,23 @@ protected[core] object ActivationResponse extends
DefaultJsonProtocol {
val DeveloperError = 2 // action ran but failed to handle an error, or
action did not run and failed to initialize
val WhiskError = 3 // internal system error
+ val statusSuccess = "success"
+ val statusApplicationError = "application_error"
+ val statusDeveloperError = "action_developer_error"
+ val statusWhiskError = "whisk_internal_error"
+
+ protected[core] def statusForCode(code: Int) = {
+ require(code >= 0 && code <= 3)
+ code match {
+ case Success => statusSuccess
+ case ApplicationError => statusApplicationError
+ case DeveloperError => statusDeveloperError
+ case WhiskError => statusWhiskError
+ }
+ }
+
protected[core] def messageForCode(code: Int) = {
- require(code >= Success && code <= WhiskError)
+ require(code >= 0 && code <= 3)
code match {
case Success => "success"
case ApplicationError => "application error"
diff --git
a/common/scala/src/main/scala/org/apache/openwhisk/core/entity/WhiskActivation.scala
b/common/scala/src/main/scala/org/apache/openwhisk/core/entity/WhiskActivation.scala
index b275e8b..691acaf 100644
---
a/common/scala/src/main/scala/org/apache/openwhisk/core/entity/WhiskActivation.scala
+++
b/common/scala/src/main/scala/org/apache/openwhisk/core/entity/WhiskActivation.scala
@@ -145,6 +145,10 @@ object WhiskActivation
val conductorAnnotation = "conductor"
val timeoutAnnotation = "timeout"
+ val memory = "memory"
+ val duration = "duration"
+ val statusCode = "statusCode"
+
/** Some field names for compositions */
val actionField = "action"
val paramsField = "params"
diff --git a/core/monitoring/user-events/.dockerignore
b/core/monitoring/user-events/.dockerignore
new file mode 100644
index 0000000..a595535
--- /dev/null
+++ b/core/monitoring/user-events/.dockerignore
@@ -0,0 +1,5 @@
+*
+!transformEnvironment.sh
+!init.sh
+!build/distributions
+!Dockerfile
\ No newline at end of file
diff --git a/core/monitoring/user-events/Dockerfile
b/core/monitoring/user-events/Dockerfile
new file mode 100644
index 0000000..95b06ae
--- /dev/null
+++ b/core/monitoring/user-events/Dockerfile
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM scala
+
+ENV UID=1001 \
+ NOT_ROOT_USER=owuser
+
+# Copy app jars
+ADD build/distributions/user-events.tar /
+
+COPY init.sh /
+RUN chmod +x init.sh
+
+RUN adduser -D -u ${UID} -h /home/${NOT_ROOT_USER} -s /bin/bash
${NOT_ROOT_USER}
+USER ${NOT_ROOT_USER}
+
+# Prometheus port
+EXPOSE 9095
+CMD ["./init.sh", "0"]
diff --git a/core/monitoring/user-events/README.md
b/core/monitoring/user-events/README.md
new file mode 100644
index 0000000..5ed2127
--- /dev/null
+++ b/core/monitoring/user-events/README.md
@@ -0,0 +1,55 @@
+<!--
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+-->
+
+# 
+
+# OpenWhisk User Events
+
+This service connects to `events` topic and publishes the events to various
services like Prometheus, Datadog etc via Kamon. Refer to [user specific
metrics][1] on how to enable them.
+
+
+## Local Run
+>First configure and run `openwhisk docker-compose` that can be found in the
[openwhisk-tools][2] project.
+
+- Start service inside the cluster (on the same docker-compose network:
`openwhisk_default`)
+- The service will be available on port `9095`
+- The endpoint for exposing the metrics for Prometheus can be found on
`/metrics`.
+
+## Usage
+
+The service needs the following env variables to be set
+
+- `KAFKA_HOSTS` - For local env it can be set to `172.17.0.1:9093`. When using
[OpenWhisk Devtools][2] based setup use `kafka`
+
+Integrations
+------------
+
+#### Prometheus
+The docker container would run the service and expose the metrics in format
required by [Prometheus][3] at `9095` port
+
+#### Grafana
+The `Openwhisk - Action Performance Metrics` Grafana[4] dashboard is available
on localhost port `3000` at this address:
+http://localhost:3000/d/Oew1lvymk/openwhisk-action-performance-metrics
+
+The latest version of the dashboard can be found in the
"compose/dashboard/openwhisk_events.json"
+
+[1]:
https://github.com/apache/incubator-openwhisk/blob/master/docs/metrics.md#user-specific-metrics
+[2]:
https://github.com/apache/incubator-openwhisk-devtools/tree/master/docker-compose
+[3]: https://hub.docker.com/r/prom/prometheus/
+[4]: https://hub.docker.com/r/grafana/grafana/
diff --git a/core/monitoring/user-events/build.gradle
b/core/monitoring/user-events/build.gradle
new file mode 100644
index 0000000..d6ec836
--- /dev/null
+++ b/core/monitoring/user-events/build.gradle
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'scala'
+apply plugin: 'application'
+apply plugin: 'org.scoverage'
+
+ext.dockerImageName = 'user-events'
+apply from: "../../../gradle/docker.gradle"
+distDocker.dependsOn ':common:scala:distDocker', 'distTar'
+
+project.archivesBaseName = "openwhisk-user-events"
+
+repositories {
+ mavenCentral()
+}
+
+dependencies {
+ compile "org.scala-lang:scala-library:${gradle.scala.version}"
+ compile project(':common:scala')
+
+ compile 'com.typesafe.akka:akka-stream-kafka_2.12:0.22'
+
+ compile 'io.prometheus:simpleclient:0.6.0'
+ compile 'io.prometheus:simpleclient_common:0.6.0'
+
+ testCompile 'junit:junit:4.11'
+ testCompile 'org.scalatest:scalatest_2.12:3.0.1'
+ testCompile 'net.manub:scalatest-embedded-kafka_2.12:2.0.0'
+ testCompile 'com.typesafe.akka:akka-testkit_2.12:2.5.17'
+ testCompile 'com.typesafe.akka:akka-stream-testkit_2.12:2.5.17'
+ testCompile 'com.typesafe.akka:akka-http-testkit_2.12:10.1.5'
+}
+
+tasks.withType(ScalaCompile) {
+ scalaCompileOptions.additionalParameters = gradle.scala.compileFlags
+}
+
+mainClassName = "org.apache.openwhisk.core.monitoring.metrics.Main"
diff --git
a/core/monitoring/user-events/compose/grafana/dashboards/global-metrics.json
b/core/monitoring/user-events/compose/grafana/dashboards/global-metrics.json
new file mode 100644
index 0000000..77051f0
--- /dev/null
+++ b/core/monitoring/user-events/compose/grafana/dashboards/global-metrics.json
@@ -0,0 +1,422 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "Prometheus",
+ "description": "Total number of successful activations executed",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "#9ac48a",
+ "show": true
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{status=\"success\"}[${__range_s}s]))",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0,1",
+ "title": "Successful Activations",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "total"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "Prometheus",
+ "description": "Total number of cold starts",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "#9ac48a",
+ "show": true
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_coldStarts_total[${__range_s}s]))",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1",
+ "title": "Cold Starts",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "Prometheus",
+ "description": "Total number of error due to Runtime implementation",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "#9ac48a",
+ "show": true
+ },
+ "tableColumn": "Value",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{status=\"internal_error\"}[${__range_s}s]))",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0,1",
+ "title": "Internal Errors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "folderId": null,
+ "gridPos": {
+ "h": 3,
+ "w": 11,
+ "x": 12,
+ "y": 0
+ },
+ "headings": true,
+ "id": 8,
+ "limit": 10,
+ "links": [],
+ "query": "",
+ "recent": false,
+ "search": true,
+ "starred": false,
+ "tags": ["openwhisk"],
+ "title": "Related Dashboards",
+ "type": "dashlist"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 3
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+ {
+ "dashboard": "OpenWhisk - Top Namespaces",
+ "keepTime": true,
+ "title": "OpenWhisk - Top Namespaces",
+ "type": "dashboard",
+ "url": "/d/RnvlchiZk/openwhisk-top-namespaces"
+ }
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(openwhisk_action_activations_total[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Activations",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": false,
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "openwhisk"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "OpenWhisk - Global Metrics",
+ "uid": "Kw4jl2iZz",
+ "version": 8
+}
\ No newline at end of file
diff --git
a/core/monitoring/user-events/compose/grafana/dashboards/openwhisk_events.json
b/core/monitoring/user-events/compose/grafana/dashboards/openwhisk_events.json
new file mode 100644
index 0000000..e96d62f
--- /dev/null
+++
b/core/monitoring/user-events/compose/grafana/dashboards/openwhisk_events.json
@@ -0,0 +1,1539 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.4.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Action performance metrics available for the users of
Openwhisk.",
+ "editable": true,
+ "gnetId": 9564,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1548707435650,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 0,
+ "description": "Total number of activation in the selected time
interval",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 6,
+ "x": 0,
+ "y": 0
+ },
+ "id": 28,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(249, 186, 143, 0.15)",
+ "full": false,
+ "lineColor": "#ef843c",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_activations_total{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Total activations",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(212, 74, 58, 0)",
+ "#508642",
+ "#299c46"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 0,
+ "description": "Total number of successful activations executed",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 32,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(136, 253, 150, 0.18)",
+ "full": false,
+ "lineColor": "#7eb26d",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",status=\"success\",initiator=~\"$initiator\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1",
+ "title": "Successful activations",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorPostfix": false,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(41, 156, 70, 0)",
+ "#e24d42",
+ "#e24d42"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 0,
+ "description": "Total number of error activations in the selected time
interval",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 6,
+ "x": 12,
+ "y": 0
+ },
+ "id": 34,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgb(243, 113, 104)",
+ "full": false,
+ "lineColor": "rgb(255, 194, 190)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",action=~\"$action\",status!=\"success\",initiator=~\"$initiator\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1",
+ "title": "Error activations",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(41, 156, 70, 0)",
+ "#1f78c1",
+ "#1f78c1"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 0,
+ "description": "Total number of cold starts in the selected time
interval",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 6,
+ "x": 18,
+ "y": 0
+ },
+ "id": 30,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(81, 149, 206, 0.48)",
+ "full": false,
+ "lineColor": "rgb(122, 181, 231)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_coldStarts_total{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1",
+ "title": "Cold starts",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 2
+ },
+ "id": 16,
+ "panels": [],
+ "title": "General gauges",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 1,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 0,
+ "y": 3
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",status=\"success\",initiator=~\"$initiator\"}[$interval]))
* 100 /
sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A",
+ "target": ""
+ }
+ ],
+ "thresholds": "50,75,100",
+ "title": "Activation success rate",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 1,
+ "format": "s",
+ "gauge": {
+ "maxValue": 60,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 6,
+ "y": 3
+ },
+ "id": 8,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"max(rate(openwhisk_action_duration_seconds_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
/
rate(openwhisk_action_duration_seconds_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\"}[30s])
> 0)",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "target": ""
+ }
+ ],
+ "thresholds": "20,40,60",
+ "title": "Action duration current",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 1,
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 12,
+ "y": 3
+ },
+ "id": 26,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr":
"max(rate(openwhisk_action_waitTime_seconds_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
/
rate(openwhisk_action_waitTime_seconds_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\"}[30s])
> 0)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "1000,2500,5000",
+ "title": "Action wait time current",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "columns": [
+ {
+ "text": "Current",
+ "value": "current"
+ }
+ ],
+ "datasource": "Prometheus",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 18,
+ "y": 3
+ },
+ "id": 37,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "Action name",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Metric",
+ "type": "string"
+ },
+ {
+ "alias": "Max memory",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 0,
+ "mappingType": 1,
+ "pattern": "Current",
+ "thresholds": [],
+ "type": "number",
+ "unit": "decmbytes"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_memory_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[$__range]))
by (action) /
sum(increase(openwhisk_action_memory_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\"}[$__range]))
by (action) > 0",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Action memory",
+ "transform": "timeseries_aggregations",
+ "type": "table"
+ },
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ },
+ "id": 14,
+ "title": "Activation result graph",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 0,
+ "y": 10
+ },
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_activations_total{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[1m]))
by (action)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Activations",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "activations",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 8,
+ "y": 10
+ },
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",status=\"success\",initiator=~\"$initiator\"}[1m]))
by (action)",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Activation success",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "activations",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 1,
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 16,
+ "y": 10
+ },
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"sum(increase(openwhisk_action_status{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",status!=\"success\",initiator=~\"$initiator\"}[1m]))
by (action,status)",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}: {{status}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Activation errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "activations",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 19
+ },
+ "id": 12,
+ "panels": [],
+ "title": "Duration graph",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 22,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 6,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "repeatDirection": "h",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"rate(openwhisk_action_duration_seconds_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
* 1000 /
rate(openwhisk_action_duration_seconds_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 28
+ },
+ "id": 10,
+ "panels": [],
+ "title": "Init Time Graph",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 24,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"rate(openwhisk_action_initTime_seconds_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
* 1000 /
rate(openwhisk_action_initTime_seconds_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Initialization time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 38
+ },
+ "id": 35,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr":
"rate(openwhisk_action_waitTime_seconds_sum{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
* 1000 /
rate(openwhisk_action_waitTime_seconds_count{region=~\"$region\",stack=~\"$stack\",namespace=~\"$namespace\",action=~\"$action\",initiator=~\"$initiator\"}[30s])
",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{action}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Wait time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "5s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": "",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "Prometheus",
+ "definition":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(region) > 0)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "region",
+ "options": [],
+ "query":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(region) > 0)",
+ "refresh": 1,
+ "regex": "/.*region=\"(.*)\".*/",
+ "skipUrlSync": false,
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": "",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "Prometheus",
+ "definition":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(stack) > 0)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "stack",
+ "options": [],
+ "query":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(stack) > 0)",
+ "refresh": 1,
+ "regex": "/.*stack=\"(.*)\".*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "14257_51772",
+ "value": "14257_51772"
+ },
+ "datasource": "Prometheus",
+ "definition":
"query_result(sum(increase(openwhisk_action_activations_total{namespace=~\"$namespace\"}[$interval]))
by (initiator) > 0)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "initiator",
+ "options": [],
+ "query":
"query_result(sum(increase(openwhisk_action_activations_total{namespace=~\"$namespace\"}[$interval]))
by (initiator) > 0)",
+ "refresh": 1,
+ "regex": "/.*initiator=\"(.*)\".*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "14257_51772",
+ "value": "14257_51772"
+ },
+ "datasource": "Prometheus",
+ "definition":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(namespace))",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query":
"query_result(sum(increase(openwhisk_action_activations_total[$interval])) by
(namespace))",
+ "refresh": 1,
+ "regex": "/.*namespace=\"(.*)\".*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": "",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "Prometheus",
+ "definition":
"query_result(sum(increase(openwhisk_action_activations_total{namespace=~\"$namespace\"}[$interval]))
by (action) > 0)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "action",
+ "options": [],
+ "query":
"query_result(sum(increase(openwhisk_action_activations_total{namespace=~\"$namespace\"}[$interval]))
by (action) > 0)",
+ "refresh": 1,
+ "regex": "/.*action=\"(.*)\".*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "auto": true,
+ "auto_count": 1,
+ "auto_min": "1m",
+ "current": {
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ "hide": 2,
+ "label": null,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "tags": [
+ "openwhisk"
+ ],
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Openwhisk - Action Performance Metrics",
+ "uid": "Oew1lvymk",
+ "version": 1
+}
\ No newline at end of file
diff --git
a/core/monitoring/user-events/compose/grafana/dashboards/top-namespaces.json
b/core/monitoring/user-events/compose/grafana/dashboards/top-namespaces.json
new file mode 100644
index 0000000..e00db15
--- /dev/null
+++ b/core/monitoring/user-events/compose/grafana/dashboards/top-namespaces.json
@@ -0,0 +1,473 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.1.6"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "columns": [],
+ "datasource": "Prometheus",
+ "description": "Top namespaces by activation count",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Metrics related to ${__cell}",
+ "linkUrl":
"d/Oew1lvymk/openwhisk-action-performance-metrics?var-namespace=${__cell}&from=${__from}&to=${__to}",
+ "mappingType": 1,
+ "pattern": "namespace",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Activation Count",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10, sum
by(namespace)(increase(openwhisk_action_activations_total[${__range_s}s])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "Top Namespaces",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [],
+ "datasource": "Prometheus",
+ "description": "Top memory sizes specified (in MB)",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 10,
+ "w": 5,
+ "x": 12,
+ "y": 0
+ },
+ "id": 4,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkUrl":
"d/Oew1lvymk/openwhisk-action-performance-metrics?var-namespace=${__cell}",
+ "mappingType": 1,
+ "pattern": "namespace",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Activation Count",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10, sum
by(memory)(increase(openwhisk_action_activations_total[${__range_s}s])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "Memory",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [],
+ "datasource": "Prometheus",
+ "description": "Top activation 'kind'",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 10,
+ "w": 5,
+ "x": 17,
+ "y": 0
+ },
+ "id": 5,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkUrl":
"d/Oew1lvymk/openwhisk-action-performance-metrics?var-namespace=${__cell}",
+ "mappingType": 1,
+ "pattern": "namespace",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Activation Count",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10, sum
by(kind)(increase(openwhisk_action_activations_total[${__range_s}s])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "Kind",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [],
+ "datasource": "Prometheus",
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 8,
+ "w": 22,
+ "x": 0,
+ "y": 10
+ },
+ "id": 3,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": false
+ },
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "namespace",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Action",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTooltip": "Action ${__cell} details",
+ "linkUrl":
"d/Oew1lvymk/openwhisk-action-performance-metrics?var-namespace=${__cell_2}&var-action=${__cell}&from=${__from}&to=${__to}",
+ "mappingType": 1,
+ "pattern": "action",
+ "thresholds": [],
+ "type": "string",
+ "unit": "short"
+ },
+ {
+ "alias": "Activation Count",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10, sum
by(namespace,action,kind,memory)(increase(openwhisk_action_activations_total[${__range_s}s])))",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "Top Actions",
+ "transform": "table",
+ "type": "table"
+ }
+ ],
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "openwhisk"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "OpenWhisk - Top Namespaces",
+ "uid": "RnvlchiZk",
+ "version": 1
+}
\ No newline at end of file
diff --git
a/core/monitoring/user-events/compose/grafana/provisioning/dashboards/dashboard.yml
b/core/monitoring/user-events/compose/grafana/provisioning/dashboards/dashboard.yml
new file mode 100644
index 0000000..a6ea486
--- /dev/null
+++
b/core/monitoring/user-events/compose/grafana/provisioning/dashboards/dashboard.yml
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+apiVersion: 1
+
+providers:
+- name: 'Prometheus'
+ orgId: 1
+ folder: ''
+ type: file
+ disableDeletion: false
+ editable: true
+ options:
+ path: /var/lib/grafana/dashboards
diff --git
a/core/monitoring/user-events/compose/grafana/provisioning/datasources/datasource.yml
b/core/monitoring/user-events/compose/grafana/provisioning/datasources/datasource.yml
new file mode 100644
index 0000000..b67b13d
--- /dev/null
+++
b/core/monitoring/user-events/compose/grafana/provisioning/datasources/datasource.yml
@@ -0,0 +1,67 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# config file version
+apiVersion: 1
+
+# list of datasources that should be deleted from the database
+deleteDatasources:
+ - name: Prometheus
+ orgId: 1
+
+# list of datasources to insert/update depending
+# whats available in the database
+datasources:
+ # <string, required> name of the datasource. Required
+- name: Prometheus
+ # <string, required> datasource type. Required
+ type: prometheus
+ # <string, required> access mode. direct or proxy. Required
+ access: proxy
+ # <int> org id. will default to orgId 1 if not specified
+ orgId: 1
+ # <string> url
+ url: http://prometheus:9090
+ # <string> database password, if used
+ password:
+ # <string> database user, if used
+ user:
+ # <string> database name, if used
+ database:
+ # <bool> enable/disable basic auth
+ basicAuth: true
+ # <string> basic auth username
+ basicAuthUser: admin
+ # <string> basic auth password
+ basicAuthPassword: foobar
+ # <bool> enable/disable with credentials headers
+ withCredentials:
+ # <bool> mark as default datasource. Max one per org
+ isDefault: true
+ # <map> fields that will be converted to json and stored in json_data
+ jsonData:
+ graphiteVersion: "1.1"
+ tlsAuth: false
+ tlsAuthWithCACert: false
+ # <string> json object of data that will be encrypted.
+ secureJsonData:
+ tlsCACert: "..."
+ tlsClientCert: "..."
+ tlsClientKey: "..."
+ version: 1
+ # <bool> allow users to edit datasources from the UI.
+ editable: true
diff --git a/core/monitoring/user-events/compose/prometheus/prometheus.yml
b/core/monitoring/user-events/compose/prometheus/prometheus.yml
new file mode 100644
index 0000000..453ab57
--- /dev/null
+++ b/core/monitoring/user-events/compose/prometheus/prometheus.yml
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+
+scrape_configs:
+ - job_name: 'prometheus-server'
+ static_configs:
+ - targets: ['localhost:9090']
+
+ - job_name: 'openwhisk-metrics'
+ static_configs:
+ - targets: ['user-events:9095']
+
diff --git a/core/monitoring/user-events/images/demo_landing.png
b/core/monitoring/user-events/images/demo_landing.png
new file mode 100644
index 0000000..9cfcf23
Binary files /dev/null and
b/core/monitoring/user-events/images/demo_landing.png differ
diff --git a/core/monitoring/user-events/init.sh
b/core/monitoring/user-events/init.sh
new file mode 100644
index 0000000..9da8864
--- /dev/null
+++ b/core/monitoring/user-events/init.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+./copyJMXFiles.sh
+
+export CACHE_INVALIDATOR_OPTS
+CACHE_INVALIDATOR_OPTS="$CACHE_INVALIDATOR_OPTS $(./transformEnvironment.sh)"
+
+exec user-events/bin/user-events "$@"
diff --git a/core/monitoring/user-events/src/main/resources/application.conf
b/core/monitoring/user-events/src/main/resources/application.conf
new file mode 100644
index 0000000..8c8cd3e
--- /dev/null
+++ b/core/monitoring/user-events/src/main/resources/application.conf
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+akka.kafka.consumer {
+ # Properties defined by org.apache.kafka.clients.consumer.ConsumerConfig
+ # can be defined in this configuration section.
+ kafka-clients {
+ group.id = "kamon"
+
+ auto.offset.reset = "earliest"
+
+ # Disable auto-commit by default
+ enable.auto.commit = false
+
+ bootstrap.servers = ${?KAFKA_HOSTS}
+ }
+}
+
+kamon {
+ metric {
+ tick-interval = 15 seconds
+ }
+ prometheus {
+ # We expose the metrics endpoint over akka http. So default server is
disabled
+ start-embedded-http-server = no
+ }
+
+ system-metrics {
+ # disable the host metrics as we are only interested in JVM metrics
+ host.enabled = false
+ }
+
+ environment {
+ # Identifier for this service. For keeping it backward compatible setting
to natch previous
+ # statsd name
+ service = "user-events"
+ }
+}
diff --git a/core/monitoring/user-events/src/main/resources/reference.conf
b/core/monitoring/user-events/src/main/resources/reference.conf
new file mode 100644
index 0000000..6f7d1c2
--- /dev/null
+++ b/core/monitoring/user-events/src/main/resources/reference.conf
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+whisk {
+ user-events {
+ # Server port
+ port = 9095
+
+ # Enables KamonRecorder so as to enable sending metrics to Kamon supported
backends
+ # like DataDog
+ enable-kamon = false
+ }
+}
diff --git a/core/monitoring/user-events/src/main/resources/whisk-logback.xml
b/core/monitoring/user-events/src/main/resources/whisk-logback.xml
new file mode 100644
index 0000000..983f5ef
--- /dev/null
+++ b/core/monitoring/user-events/src/main/resources/whisk-logback.xml
@@ -0,0 +1,25 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+<included>
+ <contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
+ <resetJUL>true</resetJUL>
+ </contextListener>
+
+ <!-- Kafka -->
+ <logger name="org.apache.kafka" level="ERROR" />
+</included>
\ No newline at end of file
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/EventConsumer.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/EventConsumer.scala
new file mode 100644
index 0000000..7b8c594
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/EventConsumer.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import java.lang.management.ManagementFactory
+
+import akka.Done
+import akka.actor.ActorSystem
+import akka.kafka.ConsumerMessage.CommittableOffsetBatch
+import akka.kafka.scaladsl.Consumer
+import akka.kafka.scaladsl.Consumer.DrainingControl
+import akka.kafka.{ConsumerSettings, Subscriptions}
+import akka.stream.ActorMaterializer
+import akka.stream.scaladsl.{Keep, Sink}
+import javax.management.ObjectName
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import kamon.Kamon
+import kamon.metric.MeasurementUnit
+
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration._
+import org.apache.openwhisk.core.connector.{Activation, EventMessage, Metric}
+import org.apache.openwhisk.core.entity.ActivationResponse
+
+trait MetricRecorder {
+ def processActivation(activation: Activation, initiatorNamespace: String):
Unit
+ def processMetric(metric: Metric, initiatorNamespace: String): Unit
+}
+
+case class EventConsumer(settings: ConsumerSettings[String, String],
recorders: Seq[MetricRecorder])(
+ implicit system: ActorSystem,
+ materializer: ActorMaterializer) {
+ import EventConsumer._
+
+ private implicit val ec: ExecutionContext = system.dispatcher
+
+ //Record the rate of events received
+ private val activationCounter =
Kamon.counter("openwhisk.userevents.global.activations")
+ private val metricCounter =
Kamon.counter("openwhisk.userevents.global.metric")
+
+ private val statusCounter =
Kamon.counter("openwhisk.userevents.global.status")
+ private val coldStartCounter =
Kamon.counter("openwhisk.userevents.global.coldStarts")
+
+ private val statusSuccess = statusCounter.refine("status" ->
ActivationResponse.statusSuccess)
+ private val statusFailure = statusCounter.refine("status" -> "failure")
+ private val statusApplicationError = statusCounter.refine("status" ->
ActivationResponse.statusApplicationError)
+ private val statusDeveloperError = statusCounter.refine("status" ->
ActivationResponse.statusDeveloperError)
+ private val statusInternalError = statusCounter.refine("status" ->
ActivationResponse.statusWhiskError)
+
+ private val waitTime =
Kamon.histogram("openwhisk.userevents.global.waitTime",
MeasurementUnit.time.milliseconds)
+ private val initTime =
Kamon.histogram("openwhisk.userevents.global.initTime",
MeasurementUnit.time.milliseconds)
+ private val duration =
Kamon.histogram("openwhisk.userevents.global.duration",
MeasurementUnit.time.milliseconds)
+
+ private val lagGauge = Kamon.gauge("openwhisk.userevents.consumer.lag")
+
+ def shutdown(): Future[Done] = {
+ lagRecorder.cancel()
+ control.drainAndShutdown()(system.dispatcher)
+ }
+
+ def isRunning: Boolean = !control.isShutdown.isCompleted
+
+ //TODO Use RestartSource
+ private val control: DrainingControl[Done] = Consumer
+ .committableSource(updatedSettings, Subscriptions.topics(userEventTopic))
+ .map { msg =>
+ processEvent(msg.record.value())
+ msg.committableOffset
+ }
+ .batch(max = 20, CommittableOffsetBatch(_))(_.updated(_))
+ .mapAsync(3)(_.commitScaladsl())
+ .toMat(Sink.ignore)(Keep.both)
+ .mapMaterializedValue(DrainingControl.apply)
+ .run()
+
+ private val lagRecorder =
+ system.scheduler.schedule(10.seconds,
10.seconds)(lagGauge.set(consumerLag))
+
+ private def processEvent(value: String): Unit = {
+ EventMessage
+ .parse(value)
+ .map { e =>
+ e.eventType match {
+ case Activation.typeName => activationCounter.increment()
+ case Metric.typeName => metricCounter.increment()
+ }
+ e
+ }
+ .foreach { e =>
+ e.body match {
+ case a: Activation =>
+ recorders.foreach(_.processActivation(a, e.namespace))
+ updateGlobalMetrics(a)
+ case m: Metric =>
+ recorders.foreach(_.processMetric(m, e.namespace))
+ }
+ }
+ }
+
+ private def updateGlobalMetrics(a: Activation): Unit = {
+ a.status match {
+ case ActivationResponse.statusSuccess =>
statusSuccess.increment()
+ case ActivationResponse.statusApplicationError =>
statusApplicationError.increment()
+ case ActivationResponse.statusDeveloperError =>
statusDeveloperError.increment()
+ case ActivationResponse.statusWhiskError =>
statusInternalError.increment()
+ case _ => //Ignore for now
+ }
+
+ if (a.status != ActivationResponse.statusSuccess) statusFailure.increment()
+ if (a.isColdStart) {
+ coldStartCounter.increment()
+ initTime.record(a.initTime.toMillis)
+ }
+
+ waitTime.record(a.waitTime.toMillis)
+ duration.record(a.duration.toMillis)
+ }
+
+ private def updatedSettings =
settings.withProperty(ConsumerConfig.CLIENT_ID_CONFIG, id)
+}
+
+object EventConsumer {
+ val userEventTopic = "events"
+ val id = "event-consumer"
+
+ private val server = ManagementFactory.getPlatformMBeanServer
+ private val name = new
ObjectName(s"kafka.consumer:type=consumer-fetch-manager-metrics,client-id=$id")
+
+ def consumerLag: Long = server.getAttribute(name,
"records-lag-max").asInstanceOf[Double].toLong.max(0)
+}
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorder.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorder.scala
new file mode 100644
index 0000000..d0c0c67
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorder.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.event.slf4j.SLF4JLogging
+import org.apache.openwhisk.core.connector.{Activation, Metric}
+import kamon.Kamon
+import kamon.metric.MeasurementUnit
+
+import scala.collection.concurrent.TrieMap
+
+trait KamonMetricNames extends MetricNames {
+ val activationMetric = "openwhisk.action.activations"
+ val coldStartMetric = "openwhisk.action.coldStarts"
+ val waitTimeMetric = "openwhisk.action.waitTime"
+ val initTimeMetric = "openwhisk.action.initTime"
+ val durationMetric = "openwhisk.action.duration"
+ val statusMetric = "openwhisk.action.status"
+
+ val concurrentLimitMetric = "openwhisk.action.limit.concurrent"
+ val timedLimitMetric = "openwhisk.action.limit.timed"
+}
+
+object KamonRecorder extends MetricRecorder with KamonMetricNames with
SLF4JLogging {
+ private val activationMetrics = new TrieMap[String, ActivationKamonMetrics]
+ private val limitMetrics = new TrieMap[String, LimitKamonMetrics]
+
+ override def processActivation(activation: Activation, initiatorNamespace:
String): Unit = {
+ lookup(activation, initiatorNamespace).record(activation)
+ }
+
+ override def processMetric(metric: Metric, initiatorNamespace: String): Unit
= {
+ val limitMetric = limitMetrics.getOrElseUpdate(initiatorNamespace,
LimitKamonMetrics(initiatorNamespace))
+ limitMetric.record(metric)
+ }
+
+ def lookup(activation: Activation, initiatorNamespace: String):
ActivationKamonMetrics = {
+ val name = activation.name
+ val kind = activation.kind
+ val memory = activation.memory.toString
+ val namespace = activation.namespace
+ val action = activation.action
+ activationMetrics.getOrElseUpdate(name, {
+ ActivationKamonMetrics(namespace, action, kind, memory,
initiatorNamespace)
+ })
+ }
+
+ case class LimitKamonMetrics(namespace: String) {
+ private val concurrentLimit =
Kamon.counter(concurrentLimitMetric).refine(`actionNamespace` -> namespace)
+ private val timedLimit =
Kamon.counter(timedLimitMetric).refine(`actionNamespace` -> namespace)
+
+ def record(m: Metric): Unit = {
+ m.metricName match {
+ case "ConcurrentRateLimit" => concurrentLimit.increment()
+ case "TimedRateLimit" => timedLimit.increment()
+ case x => log.warn(s"Unknown limit $x")
+ }
+ }
+ }
+
+ case class ActivationKamonMetrics(namespace: String,
+ action: String,
+ kind: String,
+ memory: String,
+ initiator: String) {
+ private val activationTags =
+ Map(
+ `actionNamespace` -> namespace,
+ `initiatorNamespace` -> initiator,
+ `actionName` -> action,
+ `actionKind` -> kind,
+ `actionMemory` -> memory)
+ private val tags = Map(`actionNamespace` -> namespace,
`initiatorNamespace` -> initiator, `actionName` -> action)
+
+ private val activations =
Kamon.counter(activationMetric).refine(activationTags)
+ private val coldStarts = Kamon.counter(coldStartMetric).refine(tags)
+ private val waitTime = Kamon.histogram(waitTimeMetric,
MeasurementUnit.time.milliseconds).refine(tags)
+ private val initTime = Kamon.histogram(initTimeMetric,
MeasurementUnit.time.milliseconds).refine(tags)
+ private val duration = Kamon.histogram(durationMetric,
MeasurementUnit.time.milliseconds).refine(tags)
+
+ def record(a: Activation): Unit = {
+ activations.increment()
+
+ if (a.isColdStart) {
+ coldStarts.increment()
+ initTime.record(a.initTime.toMillis)
+ }
+
+ //waitTime may be zero for activations which are part of sequence
+ waitTime.record(a.waitTime.toMillis)
+ duration.record(a.duration.toMillis)
+
+ Kamon.counter(statusMetric).refine(tags + ("status" ->
a.status)).increment()
+ }
+ }
+}
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/Main.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/Main.scala
new file mode 100644
index 0000000..9c1b932
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/Main.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.actor.ActorSystem
+import akka.http.scaladsl.Http
+import akka.stream.ActorMaterializer
+import kamon.Kamon
+
+import scala.concurrent.duration.DurationInt
+import scala.concurrent.{Await, ExecutionContextExecutor, Future}
+
+object Main {
+ def main(args: Array[String]): Unit = {
+ Kamon.loadReportersFromConfig()
+ implicit val system: ActorSystem = ActorSystem("events-actor-system")
+ implicit val materializer: ActorMaterializer = ActorMaterializer()
+ val binding = OpenWhiskEvents.start(system.settings.config)
+ addShutdownHook(binding)
+ }
+
+ private def addShutdownHook(binding: Future[Http.ServerBinding])(implicit
actorSystem: ActorSystem,
+
materializer: ActorMaterializer): Unit = {
+ implicit val ec: ExecutionContextExecutor = actorSystem.dispatcher
+ sys.addShutdownHook {
+ Await.result(binding.map(_.unbind()), 30.seconds)
+ Await.result(actorSystem.whenTerminated, 30.seconds)
+ }
+ }
+}
diff --git a/settings.gradle
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/MetricNames.scala
similarity index 53%
copy from settings.gradle
copy to
core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/MetricNames.scala
index 02e46b8..d82b49b 100644
--- a/settings.gradle
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/MetricNames.scala
@@ -15,35 +15,23 @@
* limitations under the License.
*/
-include 'common:scala'
-
-include 'core:controller'
-include 'core:invoker'
-include 'core:cosmosdb:cache-invalidator'
-include 'core:standalone'
-
-include 'tests'
-include 'tests:performance:gatling_tests'
-
-include 'tools:actionProxy'
-include 'tools:ow-utils'
-include 'tools:dev'
-
-include 'tools:admin'
-
-rootProject.name = 'openwhisk'
-
-gradle.ext.scala = [
- version: '2.12.9',
- compileFlags: ['-feature', '-unchecked', '-deprecation',
'-Xfatal-warnings', '-Ywarn-unused-import']
-]
-
-gradle.ext.scalafmt = [
- version: '1.5.0',
- config: new File(rootProject.projectDir, '.scalafmt.conf')
-]
-
-gradle.ext.akka = [version : '2.5.22']
-gradle.ext.akka_http = [version : '10.1.8']
-
-gradle.ext.curator = [version:'4.0.0']
+package org.apache.openwhisk.core.monitoring.metrics
+
+trait MetricNames {
+ val actionNamespace = "namespace"
+ val initiatorNamespace = "initiator"
+ val actionName = "action"
+ val actionStatus = "status"
+ val actionMemory = "memory"
+ val actionKind = "kind"
+
+ def activationMetric: String
+ def coldStartMetric: String
+ def waitTimeMetric: String
+ def initTimeMetric: String
+ def durationMetric: String
+ def statusMetric: String
+
+ def concurrentLimitMetric: String
+ def timedLimitMetric: String
+}
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEvents.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEvents.scala
new file mode 100644
index 0000000..8e963e5
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEvents.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.actor.{ActorSystem, CoordinatedShutdown}
+import akka.event.slf4j.SLF4JLogging
+import akka.http.scaladsl.Http
+import akka.kafka.ConsumerSettings
+import akka.stream.ActorMaterializer
+import com.typesafe.config.Config
+import kamon.Kamon
+import kamon.prometheus.PrometheusReporter
+import kamon.system.SystemMetrics
+import org.apache.kafka.common.serialization.StringDeserializer
+import pureconfig.loadConfigOrThrow
+
+import scala.concurrent.Future
+
+object OpenWhiskEvents extends SLF4JLogging {
+
+ case class MetricConfig(port: Int, enableKamon: Boolean)
+
+ def start(config: Config)(implicit system: ActorSystem,
+ materializer: ActorMaterializer):
Future[Http.ServerBinding] = {
+ Kamon.reconfigure(config)
+ val prometheusReporter = new PrometheusReporter()
+ Kamon.addReporter(prometheusReporter)
+ SystemMetrics.startCollecting()
+
+ val metricConfig = loadConfigOrThrow[MetricConfig](config,
"whisk.user-events")
+
+ val prometheusRecorder = PrometheusRecorder(prometheusReporter)
+ val recorders = if (metricConfig.enableKamon) Seq(prometheusRecorder,
KamonRecorder) else Seq(prometheusRecorder)
+ val eventConsumer =
EventConsumer(eventConsumerSettings(defaultConsumerConfig(config)), recorders)
+
+
CoordinatedShutdown(system).addTask(CoordinatedShutdown.PhaseBeforeServiceUnbind,
"shutdownConsumer") { () =>
+ eventConsumer.shutdown()
+ }
+ val port = metricConfig.port
+ val api = new PrometheusEventsApi(eventConsumer, prometheusRecorder)
+ val httpBinding = Http().bindAndHandle(api.routes, "0.0.0.0", port)
+ httpBinding.foreach(_ => log.info(s"Started the http server on
http://localhost:$port"))(system.dispatcher)
+ httpBinding
+ }
+
+ def eventConsumerSettings(config: Config): ConsumerSettings[String, String] =
+ ConsumerSettings(config, new StringDeserializer, new StringDeserializer)
+
+ def defaultConsumerConfig(globalConfig: Config): Config =
globalConfig.getConfig("akka.kafka.consumer")
+}
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusEventsApi.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusEventsApi.scala
new file mode 100644
index 0000000..b4b9a2b
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusEventsApi.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.http.scaladsl.model.StatusCodes.ServiceUnavailable
+import akka.http.scaladsl.model.{ContentType, MessageEntity}
+import akka.http.scaladsl.server.Directives._
+import akka.http.scaladsl.server.Route
+
+trait PrometheusExporter {
+ def getReport(): MessageEntity
+}
+
+object PrometheusExporter {
+ val textV4: ContentType = ContentType.parse("text/plain; version=0.0.4;
charset=utf-8").right.get
+}
+
+class PrometheusEventsApi(consumer: EventConsumer, prometheus:
PrometheusExporter) {
+ val routes: Route = {
+ get {
+ path("ping") {
+ if (consumer.isRunning) {
+ complete("pong")
+ } else {
+ complete(ServiceUnavailable -> "Consumer not running")
+ }
+ } ~ path("metrics") {
+ encodeResponse {
+ complete(prometheus.getReport())
+ }
+ }
+ }
+ }
+}
diff --git
a/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorder.scala
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorder.scala
new file mode 100644
index 0000000..42be2e4
--- /dev/null
+++
b/core/monitoring/user-events/src/main/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorder.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import java.io.StringWriter
+import java.util
+import java.util.concurrent.TimeUnit
+
+import akka.event.slf4j.SLF4JLogging
+import akka.http.scaladsl.model.{HttpEntity, MessageEntity}
+import akka.stream.scaladsl.{Concat, Source}
+import akka.util.ByteString
+import org.apache.openwhisk.core.connector.{Activation, Metric}
+import io.prometheus.client.exporter.common.TextFormat
+import io.prometheus.client.{CollectorRegistry, Counter, Gauge, Histogram}
+import kamon.prometheus.PrometheusReporter
+import org.apache.openwhisk.core.entity.ActivationResponse
+
+import scala.collection.JavaConverters._
+import scala.collection.concurrent.TrieMap
+import scala.concurrent.duration.Duration
+
+trait PrometheusMetricNames extends MetricNames {
+ val activationMetric = "openwhisk_action_activations_total"
+ val coldStartMetric = "openwhisk_action_coldStarts_total"
+ val waitTimeMetric = "openwhisk_action_waitTime_seconds"
+ val initTimeMetric = "openwhisk_action_initTime_seconds"
+ val durationMetric = "openwhisk_action_duration_seconds"
+ val statusMetric = "openwhisk_action_status"
+ val memoryMetric = "openwhisk_action_memory"
+
+ val concurrentLimitMetric = "openwhisk_action_limit_concurrent_total"
+ val timedLimitMetric = "openwhisk_action_limit_timed_total"
+}
+
+case class PrometheusRecorder(kamon: PrometheusReporter)
+ extends MetricRecorder
+ with PrometheusExporter
+ with SLF4JLogging {
+ import PrometheusRecorder._
+ private val activationMetrics = new TrieMap[String, ActivationPromMetrics]
+ private val limitMetrics = new TrieMap[String, LimitPromMetrics]
+
+ override def processActivation(activation: Activation, initiatorNamespace:
String): Unit = {
+ lookup(activation, initiatorNamespace).record(activation)
+ }
+
+ override def processMetric(metric: Metric, initiatorNamespace: String): Unit
= {
+ val limitMetric = limitMetrics.getOrElseUpdate(initiatorNamespace,
LimitPromMetrics(initiatorNamespace))
+ limitMetric.record(metric)
+ }
+
+ override def getReport(): MessageEntity =
+ HttpEntity(PrometheusExporter.textV4, createSource())
+
+ private def lookup(activation: Activation, initiatorNamespace: String):
ActivationPromMetrics = {
+ //TODO Unregister unused actions
+ val name = activation.name
+ val kind = activation.kind
+ val memory = activation.memory.toString
+ val namespace = activation.namespace
+ val action = activation.action
+ activationMetrics.getOrElseUpdate(name, {
+ ActivationPromMetrics(namespace, action, kind, memory,
initiatorNamespace)
+ })
+ }
+
+ case class LimitPromMetrics(namespace: String) {
+ private val concurrentLimit = concurrentLimitCounter.labels(namespace)
+ private val timedLimit = timedLimitCounter.labels(namespace)
+
+ def record(m: Metric): Unit = {
+ m.metricName match {
+ case "ConcurrentRateLimit" => concurrentLimit.inc()
+ case "TimedRateLimit" => timedLimit.inc()
+ case x => log.warn(s"Unknown limit $x")
+ }
+ }
+ }
+
+ case class ActivationPromMetrics(namespace: String,
+ action: String,
+ kind: String,
+ memory: String,
+ initiatorNamespace: String) {
+ private val activations = activationCounter.labels(namespace,
initiatorNamespace, action, kind, memory)
+ private val coldStarts = coldStartCounter.labels(namespace,
initiatorNamespace, action)
+ private val waitTime = waitTimeHisto.labels(namespace, initiatorNamespace,
action)
+ private val initTime = initTimeHisto.labels(namespace, initiatorNamespace,
action)
+ private val duration = durationHisto.labels(namespace, initiatorNamespace,
action)
+
+ private val gauge = memoryGauge.labels(namespace, initiatorNamespace,
action)
+
+ private val statusSuccess =
+ statusCounter.labels(namespace, initiatorNamespace, action,
ActivationResponse.statusSuccess)
+ private val statusApplicationError =
+ statusCounter.labels(namespace, initiatorNamespace, action,
ActivationResponse.statusApplicationError)
+ private val statusDeveloperError =
+ statusCounter.labels(namespace, initiatorNamespace, action,
ActivationResponse.statusDeveloperError)
+ private val statusInternalError =
+ statusCounter.labels(namespace, initiatorNamespace, action,
ActivationResponse.statusWhiskError)
+
+ def record(a: Activation): Unit = {
+ gauge.observe(a.memory)
+
+ activations.inc()
+
+ if (a.isColdStart) {
+ coldStarts.inc()
+ initTime.observe(seconds(a.initTime))
+ }
+
+ //waitTime may be zero for activations which are part of sequence
+ waitTime.observe(seconds(a.waitTime))
+ duration.observe(seconds(a.duration))
+
+ a.status match {
+ case ActivationResponse.statusSuccess => statusSuccess.inc()
+ case ActivationResponse.statusApplicationError =>
statusApplicationError.inc()
+ case ActivationResponse.statusDeveloperError =>
statusDeveloperError.inc()
+ case ActivationResponse.statusWhiskError =>
statusInternalError.inc()
+ case x =>
statusCounter.labels(namespace, initiatorNamespace, action, x).inc()
+ }
+ }
+ }
+
+ //Returns a floating point number
+ private def seconds(time: Duration): Double = time.toUnit(TimeUnit.SECONDS)
+
+ private def createSource() =
+ Source.combine(createJavaClientSource(),
createKamonSource())(Concat(_)).map(ByteString(_))
+
+ /**
+ * Enables streaming the prometheus metric data without building the whole
report in memory
+ */
+ private def createJavaClientSource() =
+ Source
+ .fromIterator(() =>
CollectorRegistry.defaultRegistry.metricFamilySamples().asScala)
+ .map { sample =>
+ //Stream string representation of one sample at a time
+ val writer = new StringWriter()
+ TextFormat.write004(writer, singletonEnumeration(sample))
+ writer.toString
+ }
+
+ private def createKamonSource() = Source.single(kamon.scrapeData())
+
+ private def singletonEnumeration[A](value: A) = new util.Enumeration[A] {
+ private var done = false
+ override def hasMoreElements: Boolean = !done
+ override def nextElement(): A = {
+ if (done) throw new NoSuchElementException
+ done = true
+ value
+ }
+ }
+}
+
+object PrometheusRecorder extends PrometheusMetricNames {
+ private val activationCounter =
+ counter(
+ activationMetric,
+ "Activation Count",
+ actionNamespace,
+ initiatorNamespace,
+ actionName,
+ actionKind,
+ actionMemory)
+ private val coldStartCounter =
+ counter(coldStartMetric, "Cold start counts", actionNamespace,
initiatorNamespace, actionName)
+ private val statusCounter =
+ counter(
+ statusMetric,
+ "Activation failure status type",
+ actionNamespace,
+ initiatorNamespace,
+ actionName,
+ actionStatus)
+ private val waitTimeHisto =
+ histogram(waitTimeMetric, "Internal system hold time", actionNamespace,
initiatorNamespace, actionName)
+ private val initTimeHisto =
+ histogram(
+ initTimeMetric,
+ "Time it took to initialize an action, e.g. docker init",
+ actionNamespace,
+ initiatorNamespace,
+ actionName)
+ private val durationHisto =
+ histogram(
+ durationMetric,
+ "Actual time the action code was running",
+ actionNamespace,
+ initiatorNamespace,
+ actionName)
+ private val memoryGauge =
+ histogram(
+ memoryMetric,
+ "Memory consumption of the action containers",
+ actionNamespace,
+ initiatorNamespace,
+ actionName)
+
+ private val concurrentLimitCounter =
+ counter(concurrentLimitMetric, "a user has exceeded its limit for
concurrent invocations", actionNamespace)
+
+ private val timedLimitCounter =
+ counter(
+ timedLimitMetric,
+ "the user has reached its per minute limit for the number of
invocations",
+ actionNamespace)
+
+ private def counter(name: String, help: String, tags: String*) =
+ Counter
+ .build()
+ .name(name)
+ .help(help)
+ .labelNames(tags: _*)
+ .register()
+
+ private def gauge(name: String, help: String, tags: String*) =
+ Gauge
+ .build()
+ .name(name)
+ .help(help)
+ .labelNames(tags: _*)
+ .register()
+
+ private def histogram(name: String, help: String, tags: String*) =
+ Histogram
+ .build()
+ .name(name)
+ .help(help)
+ .labelNames(tags: _*)
+ .register()
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/ApiTests.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/ApiTests.scala
new file mode 100644
index 0000000..a2cd5f7
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/ApiTests.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.http.scaladsl.model.headers.HttpEncodings._
+import akka.http.scaladsl.model.headers.{`Accept-Encoding`,
`Content-Encoding`, HttpEncoding, HttpEncodings}
+import akka.http.scaladsl.model.{HttpCharsets, HttpEntity, HttpResponse}
+import akka.http.scaladsl.testkit.ScalatestRouteTest
+import org.junit.runner.RunWith
+import org.scalatest.concurrent.ScalaFutures
+import org.scalatest.junit.JUnitRunner
+import org.scalatest.matchers.Matcher
+import org.scalatest.{FlatSpec, Matchers}
+
+import scala.concurrent.duration.DurationInt
+
+@RunWith(classOf[JUnitRunner])
+class ApiTests extends FlatSpec with Matchers with ScalatestRouteTest with
EventsTestHelper with ScalaFutures {
+ implicit val timeoutConfig = PatienceConfig(1.minute)
+ behavior of "EventsApi"
+
+ it should "respond ping request" in {
+ val consumer = createConsumer(56754, system.settings.config)
+ val api = new PrometheusEventsApi(consumer, createExporter())
+ Get("/ping") ~> api.routes ~> check {
+ //Due to retries using a random port does not immediately result in
failure
+ handled shouldBe true
+ }
+ consumer.shutdown().futureValue
+ }
+
+ it should "respond metrics request" in {
+ val consumer = createConsumer(56754, system.settings.config)
+ val api = new PrometheusEventsApi(consumer, createExporter())
+ Get("/metrics") ~> `Accept-Encoding`(gzip) ~> api.routes ~> check {
+ contentType.charsetOption shouldBe Some(HttpCharsets.`UTF-8`)
+ contentType.mediaType.params("version") shouldBe "0.0.4"
+ response should haveContentEncoding(gzip)
+ }
+ consumer.shutdown().futureValue
+ }
+
+ private def haveContentEncoding(encoding: HttpEncoding):
Matcher[HttpResponse] =
+ be(encoding) compose {
+ (_:
HttpResponse).header[`Content-Encoding`].map(_.encodings.head).getOrElse(HttpEncodings.identity)
+ }
+
+ private def createExporter(): PrometheusExporter = () =>
HttpEntity(PrometheusExporter.textV4, "foo".getBytes)
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/EventsTestHelper.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/EventsTestHelper.scala
new file mode 100644
index 0000000..71b8d2e
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/EventsTestHelper.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import java.net.ServerSocket
+
+import akka.actor.ActorSystem
+import akka.stream.ActorMaterializer
+import com.typesafe.config.Config
+import kamon.prometheus.PrometheusReporter
+
+trait EventsTestHelper {
+
+ protected def createConsumer(kport: Int,
+ globalConfig: Config,
+ recorder: MetricRecorder =
PrometheusRecorder(new PrometheusReporter))(
+ implicit system: ActorSystem,
+ materializer: ActorMaterializer) = {
+ val settings = OpenWhiskEvents
+
.eventConsumerSettings(OpenWhiskEvents.defaultConsumerConfig(globalConfig))
+ .withBootstrapServers(s"localhost:$kport")
+ EventConsumer(settings, Seq(recorder))
+ }
+
+ protected def freePort(): Int = {
+ val socket = new ServerSocket(0)
+ try socket.getLocalPort
+ finally if (socket != null) socket.close()
+ }
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KafkaSpecBase.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KafkaSpecBase.scala
new file mode 100644
index 0000000..3549159
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KafkaSpecBase.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.actor.ActorSystem
+import akka.stream.ActorMaterializer
+import akka.testkit.TestKit
+import net.manub.embeddedkafka.EmbeddedKafka
+import org.scalatest._
+import org.scalatest.concurrent.{Eventually, IntegrationPatience, ScalaFutures}
+import org.slf4j.{Logger, LoggerFactory}
+
+import scala.concurrent.duration.{DurationInt, FiniteDuration}
+
+abstract class KafkaSpecBase
+ extends TestKit(ActorSystem("test"))
+ with Suite
+ with Matchers
+ with ScalaFutures
+ with FlatSpecLike
+ with EmbeddedKafka
+ with IntegrationPatience
+ with BeforeAndAfterAll
+ with BeforeAndAfterEach
+ with Eventually
+ with EventsTestHelper { this: Suite =>
+ val log: Logger = LoggerFactory.getLogger(getClass)
+ implicit val timeoutConfig = PatienceConfig(1.minute)
+
+ implicit val materializer = ActorMaterializer()
+
+ def sleep(time: FiniteDuration, msg: String = ""): Unit = {
+ log.info(s"sleeping $time $msg")
+ Thread.sleep(time.toMillis)
+ }
+
+ override protected def afterAll(): Unit = {
+ super.afterAll()
+ shutdown()
+ }
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorderTests.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorderTests.scala
new file mode 100644
index 0000000..e25d938
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/KamonRecorderTests.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import java.time.Duration
+
+import com.typesafe.config.{Config, ConfigFactory}
+import kamon.metric.{PeriodSnapshot, PeriodSnapshotAccumulator}
+import kamon.util.Registration
+import kamon.{Kamon, MetricReporter}
+import net.manub.embeddedkafka.EmbeddedKafkaConfig
+import org.junit.runner.RunWith
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.junit.JUnitRunner
+import org.apache.openwhisk.core.connector.{Activation, EventMessage}
+import org.apache.openwhisk.core.entity.{ActivationResponse, Subject, UUID}
+
+import scala.concurrent.duration._
+
+@RunWith(classOf[JUnitRunner])
+class KamonRecorderTests extends KafkaSpecBase with BeforeAndAfterEach with
KamonMetricNames {
+ val sleepAfterProduce: FiniteDuration = 4.seconds
+ var reporterReg: Registration = _
+
+ override protected def beforeEach(): Unit = {
+ super.beforeEach()
+ TestReporter.reset()
+ val newConfig = ConfigFactory.parseString("""kamon {
+ | metric {
+ | tick-interval = 50 ms
+ | optimistic-tick-alignment = no
+ | }
+ |}""".stripMargin).withFallback(ConfigFactory.load())
+ Kamon.reconfigure(newConfig)
+ reporterReg = Kamon.addReporter(TestReporter)
+ }
+
+ override protected def afterEach(): Unit = {
+ reporterReg.cancel()
+ Kamon.reconfigure(ConfigFactory.load())
+ super.afterEach()
+ }
+
+ behavior of "KamonConsumer"
+
+ val namespace = "whisk.system"
+ val initiator = "testNS"
+ val actionWithCustomPackage = "apimgmt/createApi"
+ val actionWithDefaultPackage = "createApi"
+ val kind = "nodejs:10"
+ val memory = 256
+
+ it should "push user events to kamon" in {
+ val kconfig = EmbeddedKafkaConfig(kafkaPort = 0, zooKeeperPort = 0)
+ withRunningKafkaOnFoundPort(kconfig) { implicit actualConfig =>
+ createCustomTopic(EventConsumer.userEventTopic)
+
+ val consumer = createConsumer(actualConfig.kafkaPort,
system.settings.config, KamonRecorder)
+
+ publishStringMessageToKafka(
+ EventConsumer.userEventTopic,
+ newActivationEvent(s"$namespace/$actionWithCustomPackage").serialize)
+
+ publishStringMessageToKafka(
+ EventConsumer.userEventTopic,
+ newActivationEvent(s"$namespace/$actionWithDefaultPackage").serialize)
+
+ sleep(sleepAfterProduce, "sleeping post produce")
+ consumer.shutdown().futureValue
+ sleep(4.second, "sleeping for Kamon reporters to get invoked")
+
+ // Custom package
+ TestReporter.counter(activationMetric, actionWithCustomPackage).size
shouldBe 1
+ TestReporter
+ .counter(activationMetric, actionWithCustomPackage)
+ .filter((t) => t.tags.get(actionMemory).get == memory.toString)
+ .size shouldBe 1
+ TestReporter
+ .counter(activationMetric, actionWithCustomPackage)
+ .filter((t) => t.tags.get(actionKind).get == kind)
+ .size shouldBe 1
+ TestReporter
+ .counter(statusMetric, actionWithCustomPackage)
+ .filter((t) => t.tags.get(actionStatus).get ==
ActivationResponse.statusDeveloperError)
+ .size shouldBe 1
+ TestReporter.counter(coldStartMetric, actionWithCustomPackage).size
shouldBe 1
+ TestReporter.histogram(waitTimeMetric, actionWithCustomPackage).size
shouldBe 1
+ TestReporter.histogram(initTimeMetric, actionWithCustomPackage).size
shouldBe 1
+ TestReporter.histogram(durationMetric, actionWithCustomPackage).size
shouldBe 1
+
+ // Default package
+ TestReporter.histogram(durationMetric, actionWithDefaultPackage).size
shouldBe 1
+ }
+ }
+
+ private def newActivationEvent(name: String) =
+ EventMessage(
+ namespace,
+ Activation(name, 2, 3.millis, 5.millis, 11.millis, kind, false, memory,
None),
+ Subject("testuser"),
+ initiator,
+ UUID("test"),
+ Activation.typeName)
+
+ private object TestReporter extends MetricReporter {
+ var snapshotAccumulator = new
PeriodSnapshotAccumulator(Duration.ofDays(1), Duration.ZERO)
+ override def reportPeriodSnapshot(snapshot: PeriodSnapshot): Unit = {
+ snapshotAccumulator.add(snapshot)
+ }
+
+ override def start(): Unit = {}
+ override def stop(): Unit = {}
+ override def reconfigure(config: Config): Unit = {}
+
+ def reset(): Unit = {
+ snapshotAccumulator = new PeriodSnapshotAccumulator(Duration.ofDays(1),
Duration.ZERO)
+ }
+
+ def counter(name: String, action: String) = {
+ System.out.println()
+ snapshotAccumulator
+ .peek()
+ .metrics
+ .counters
+ .filter(_.name == name)
+ .filter((t) => t.tags.get(actionNamespace).get == namespace)
+ .filter((t) => t.tags.get(initiatorNamespace).get == initiator)
+ .filter((t) => t.tags.get(actionName).get == action)
+ }
+
+ def histogram(name: String, action: String) = {
+ snapshotAccumulator
+ .peek()
+ .metrics
+ .histograms
+ .filter(_.name == name)
+ .filter((t) => t.tags.get(actionNamespace).get == namespace)
+ .filter((t) => t.tags.get(initiatorNamespace).get == initiator)
+ .filter((t) => t.tags.get(actionName).get == action)
+ }
+ }
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEventsTests.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEventsTests.scala
new file mode 100644
index 0000000..0354c42
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/OpenWhiskEventsTests.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import akka.http.scaladsl.Http
+import akka.http.scaladsl.model.{HttpRequest, StatusCodes}
+import akka.http.scaladsl.unmarshalling.Unmarshal
+import com.typesafe.config.ConfigFactory
+import kamon.Kamon
+import net.manub.embeddedkafka.EmbeddedKafkaConfig
+import org.junit.runner.RunWith
+import org.scalatest.junit.JUnitRunner
+import scala.concurrent.duration._
+
+import scala.util.Try
+
+@RunWith(classOf[JUnitRunner])
+class OpenWhiskEventsTests extends KafkaSpecBase {
+ behavior of "Server"
+
+ it should "start working http server" in {
+ val kconfig = EmbeddedKafkaConfig(kafkaPort = 0, zooKeeperPort = 0)
+ withRunningKafkaOnFoundPort(kconfig) { implicit actualConfig =>
+ val kafkaPort = actualConfig.kafkaPort
+ val httpPort = freePort()
+ val globalConfig = system.settings.config
+ val config = ConfigFactory.parseString(s"""
+ | akka.kafka.consumer.kafka-clients {
+ | bootstrap.servers = "localhost:$kafkaPort"
+ | }
+ | kamon {
+ | metric {
+ | tick-interval = 50 ms
+ | optimistic-tick-alignment = no
+ | }
+ | }
+ | whisk {
+ | user-events {
+ | port = $httpPort
+ | }
+ | }
+ """.stripMargin).withFallback(globalConfig)
+
+ val binding = OpenWhiskEvents.start(config).futureValue
+ val res = get("localhost", httpPort, "/ping")
+ res shouldBe Some(StatusCodes.OK, "pong")
+
+ //Check if metrics using Kamon API gets included in consolidated
Prometheus
+ Kamon.counter("fooTest").increment(42)
+ sleep(1.second)
+ val metricRes = get("localhost", httpPort, "/metrics")
+ metricRes.get._2 should include("fooTest")
+
+ binding.unbind().futureValue
+ }
+ }
+
+ def get(host: String, port: Int, path: String = "/") = {
+ val response = Try {
+ Http()
+ .singleRequest(HttpRequest(uri = s"http://$host:$port$path"))
+ .futureValue
+ }.toOption
+
+ response.map { res =>
+ (res.status, Unmarshal(res).to[String].futureValue)
+ }
+ }
+}
diff --git
a/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorderTests.scala
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorderTests.scala
new file mode 100644
index 0000000..a2c9732
--- /dev/null
+++
b/core/monitoring/user-events/src/test/scala/org/apache/openwhisk/core/monitoring/metrics/PrometheusRecorderTests.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.openwhisk.core.monitoring.metrics
+
+import io.prometheus.client.CollectorRegistry
+import net.manub.embeddedkafka.EmbeddedKafkaConfig
+import org.junit.runner.RunWith
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.junit.JUnitRunner
+import org.apache.openwhisk.core.connector.{Activation, EventMessage}
+import org.apache.openwhisk.core.entity.{ActivationResponse, Subject, UUID}
+
+import scala.concurrent.duration._
+
+@RunWith(classOf[JUnitRunner])
+class PrometheusRecorderTests extends KafkaSpecBase with BeforeAndAfterEach
with PrometheusMetricNames {
+ val sleepAfterProduce: FiniteDuration = 4.seconds
+
+ behavior of "PrometheusConsumer"
+ val namespace = "whisk.system"
+ val initiator = "testNS"
+ val actionWithCustomPackage = "apimgmt/createApiOne"
+ val actionWithDefaultPackage = "createApi"
+ val kind = "nodejs:10"
+ val memory = "256"
+
+ it should "push user events to kamon" in {
+ val kconfig = EmbeddedKafkaConfig(kafkaPort = 0, zooKeeperPort = 0)
+ withRunningKafkaOnFoundPort(kconfig) { implicit actualConfig =>
+ createCustomTopic(EventConsumer.userEventTopic)
+
+ val consumer = createConsumer(actualConfig.kafkaPort,
system.settings.config)
+ publishStringMessageToKafka(
+ EventConsumer.userEventTopic,
+ newActivationEvent(s"$namespace/$actionWithCustomPackage", kind,
memory, initiator).serialize)
+
+ publishStringMessageToKafka(
+ EventConsumer.userEventTopic,
+ newActivationEvent(s"$namespace/$actionWithDefaultPackage", kind,
memory, initiator).serialize)
+
+ // Custom package
+ sleep(sleepAfterProduce, "sleeping post produce")
+ consumer.shutdown().futureValue
+ counterTotal(activationMetric, actionWithCustomPackage) shouldBe 1
+ counter(coldStartMetric, actionWithCustomPackage) shouldBe 1
+ counterStatus(statusMetric, actionWithCustomPackage,
ActivationResponse.statusDeveloperError) shouldBe 1
+
+ histogramCount(waitTimeMetric, actionWithCustomPackage) shouldBe 1
+ histogramSum(waitTimeMetric, actionWithCustomPackage) shouldBe (0.03 +-
0.001)
+
+ histogramCount(initTimeMetric, actionWithCustomPackage) shouldBe 1
+ histogramSum(initTimeMetric, actionWithCustomPackage) shouldBe (433.433
+- 0.01)
+
+ histogramCount(durationMetric, actionWithCustomPackage) shouldBe 1
+ histogramSum(durationMetric, actionWithCustomPackage) shouldBe (1.254 +-
0.01)
+
+ gauge(memoryMetric, actionWithCustomPackage) shouldBe 1
+
+ // Default package
+ counterTotal(activationMetric, actionWithDefaultPackage) shouldBe 1
+ }
+ }
+
+ private def newActivationEvent(name: String, kind: String, memory: String,
initiator: String) =
+ EventMessage(
+ "test",
+ Activation(name, 2, 1254.millis, 30.millis, 433433.millis, kind, false,
memory.toInt, None),
+ Subject("testuser"),
+ initiator,
+ UUID("test"),
+ Activation.typeName)
+
+ private def gauge(name: String, action: String) =
+ CollectorRegistry.defaultRegistry.getSampleValue(
+ s"${name}_count",
+ Array("namespace", "initiator", "action"),
+ Array(namespace, initiator, action))
+
+ private def counter(name: String, action: String) =
+ CollectorRegistry.defaultRegistry.getSampleValue(
+ name,
+ Array("namespace", "initiator", "action"),
+ Array(namespace, initiator, action))
+
+ private def counterTotal(name: String, action: String) =
+ CollectorRegistry.defaultRegistry.getSampleValue(
+ name,
+ Array("namespace", "initiator", "action", "kind", "memory"),
+ Array(namespace, initiator, action, kind, memory))
+
+ private def counterStatus(name: String, action: String, status: String) =
+ CollectorRegistry.defaultRegistry.getSampleValue(
+ name,
+ Array("namespace", "initiator", "action", "status"),
+ Array(namespace, initiator, action, status))
+
+ private def histogramCount(name: String, action: String) =
+ CollectorRegistry.defaultRegistry.getSampleValue(
+ s"${name}_count",
+ Array("namespace", "initiator", "action"),
+ Array(namespace, initiator, action))
+
+ private def histogramSum(name: String, action: String) =
+ CollectorRegistry.defaultRegistry
+ .getSampleValue(s"${name}_sum", Array("namespace", "initiator",
"action"), Array(namespace, initiator, action))
+ .doubleValue()
+}
diff --git a/docs/metrics.md b/docs/metrics.md
index 9d00b38..19b6b94 100644
--- a/docs/metrics.md
+++ b/docs/metrics.md
@@ -331,3 +331,6 @@ Metric:
```
{"body":{"metricName":"ConcurrentInvocations","metricValue":1},"eventType":"Metric","source":"controller0","subject":"guest","timestamp":1524476104419,"userId":"23bc46b1-71f6-4ed5-8c54-816aa4f8c502","namespace":"guest"}
```
+
+### User-events consumer service
+All user metrics can be consumed and published to various services such as
Prometheus, Datadog etc via Kamon by using the [user-events
service](https://github.com/apache/openwhisk/tree/master/core/monitoring/user-events/README.md).
diff --git a/settings.gradle b/settings.gradle
index 02e46b8..fc9b0b4 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -21,6 +21,7 @@ include 'core:controller'
include 'core:invoker'
include 'core:cosmosdb:cache-invalidator'
include 'core:standalone'
+include 'core:monitoring:user-events'
include 'tests'
include 'tests:performance:gatling_tests'
diff --git a/tests/build.gradle b/tests/build.gradle
index 4590e77..5def20f 100644
--- a/tests/build.gradle
+++ b/tests/build.gradle
@@ -205,6 +205,7 @@ dependencies {
compile project(':core:controller')
compile project(':core:invoker')
compile project(':core:cosmosdb:cache-invalidator')
+ compile project(':core:monitoring:user-events')
compile project(':tools:admin')
swaggerCodegen 'io.swagger:swagger-codegen-cli:2.3.1'
diff --git
a/tests/src/test/scala/org/apache/openwhisk/common/UserEventTests.scala
b/tests/src/test/scala/org/apache/openwhisk/common/UserEventTests.scala
index f321254..fa3fa88 100644
--- a/tests/src/test/scala/org/apache/openwhisk/common/UserEventTests.scala
+++ b/tests/src/test/scala/org/apache/openwhisk/common/UserEventTests.scala
@@ -74,13 +74,13 @@ class UserEventTests extends FlatSpec with Matchers with
WskTestHelpers with Str
case (_, _, _, msg) => EventMessage.parse(new String(msg,
StandardCharsets.UTF_8))
}
received.map(event => {
- event.body match {
+ event.get.body match {
case a: Activation =>
Seq(a.statusCode) should contain oneOf (0, 1, 2, 3)
- event.source should fullyMatch regex "(invoker|controller)\\d+".r
+ event.get.source should fullyMatch regex
"(invoker|controller)\\d+".r
case m: Metric =>
Seq(m.metricName) should contain oneOf ("ConcurrentInvocations",
"ConcurrentRateLimit", "TimedRateLimit")
- event.source should fullyMatch regex "controller\\d+".r
+ event.get.source should fullyMatch regex "controller\\d+".r
}
})
// produce at least 2 events - an Activation and a
'ConcurrentInvocations' Metric
diff --git
a/tests/src/test/scala/org/apache/openwhisk/core/connector/test/EventMessageTests.scala
b/tests/src/test/scala/org/apache/openwhisk/core/connector/test/EventMessageTests.scala
index bf59c67..031304b 100644
---
a/tests/src/test/scala/org/apache/openwhisk/core/connector/test/EventMessageTests.scala
+++
b/tests/src/test/scala/org/apache/openwhisk/core/connector/test/EventMessageTests.scala
@@ -18,6 +18,7 @@
package org.apache.openwhisk.core.connector.test
import java.time.Instant
+import java.util.concurrent.TimeUnit
import org.junit.runner.RunWith
import org.scalatest.{FlatSpec, Matchers}
@@ -57,7 +58,7 @@ class EventMessageTests extends FlatSpec with Matchers {
it should "transform an activation into an event body" in {
Activation.from(fullActivation) shouldBe Success(
- Activation("ns2/a", 0, 123, 5, 10, "testkind", false, 128,
Some("sequence")))
+ Activation("ns2/a", 0, toDuration(123), toDuration(5), toDuration(10),
"testkind", false, 128, Some("sequence")))
}
it should "fail transformation if needed annotations are missing" in {
@@ -75,6 +76,9 @@ class EventMessageTests extends FlatSpec with Matchers {
WhiskActivation.pathAnnotation,
"ns2/a"))
- Activation.from(a) shouldBe Success(Activation("ns2/a", 0, 0, 0, 0,
"testkind", false, 0, None))
+ Activation.from(a) shouldBe Success(
+ Activation("ns2/a", 0, toDuration(0), toDuration(0), toDuration(0),
"testkind", false, 0, None))
}
+
+ def toDuration(milliseconds: Long) = new FiniteDuration(milliseconds,
TimeUnit.MILLISECONDS)
}
diff --git a/tools/jenkins/apache/dockerhub.groovy
b/tools/jenkins/apache/dockerhub.groovy
index 4b075f3..3072fec 100644
--- a/tools/jenkins/apache/dockerhub.groovy
+++ b/tools/jenkins/apache/dockerhub.groovy
@@ -29,7 +29,7 @@ node('xenial&&!H21&&!H22&&!H11&&!ubuntu-eu3') {
withCredentials([usernamePassword(credentialsId: 'openwhisk_dockerhub',
passwordVariable: 'DOCKER_PASSWORD', usernameVariable: 'DOCKER_USER')]) {
sh 'docker login -u ${DOCKER_USER} -p ${DOCKER_PASSWORD}'
}
- def PUSH_CMD = "./gradlew :core:controller:distDocker
:core:invoker:distDocker :tools:ow-utils:distDocker -PdockerRegistry=docker.io
-PdockerImagePrefix=openwhisk"
+ def PUSH_CMD = "./gradlew :core:controller:distDocker
:core:invoker:distDocker :core:monitoring:user-events:distDocker
:tools:ow-utils:distDocker -PdockerRegistry=docker.io
-PdockerImagePrefix=openwhisk"
def gitCommit = sh(returnStdout: true, script: 'git rev-parse
HEAD').trim()
def shortCommit = gitCommit.take(7)
sh "./gradlew clean"