Ori.livneh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/202362

Change subject: [WIP] Add a script for storing NavTiming metrics using RRD
......................................................................

[WIP] Add a script for storing NavTiming metrics using RRD

This script provides simple, minimal and robust storage for a small set of
time-series metrics. The idea is not to replace a full-fledged
time-series database but to implement the bare minimum subset of
features required to power a site like <https://status.github.com/>.

rrd-navtiming subscribes to NavigationTiming events via EventLogging and it
updates a pair of RRD files in its working directory: mobile.rrd
and desktop.rrd. If the files do not exist, they are created.

Change-Id: Id6e662a77d024363882bd54bd45c4aa5445d5503
---
A modules/webperf/files/rrd-navtiming.py
1 file changed, 184 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/62/202362/1

diff --git a/modules/webperf/files/rrd-navtiming.py 
b/modules/webperf/files/rrd-navtiming.py
new file mode 100644
index 0000000..0f37195
--- /dev/null
+++ b/modules/webperf/files/rrd-navtiming.py
@@ -0,0 +1,184 @@
+# -*- coding: utf-8 -*-
+"""
+  rrd-navtiming
+  ~~~~~~~~~~~~~
+
+  This script provides simple, minimal and robust storage for a small
+  set of time-series metrics. The idea is not to replace a full-fledged
+  time-series database but to implement the bare minimum subset of
+  features required to power a site like <https://status.github.com/>.
+
+  rrd-navtiming subscribes to NavigationTiming events via EventLogging
+  and it updates a pair of RRD files in its working directory: mobile.rrd
+  and desktop.rrd. If the files do not exist, they are created.
+
+  Usage:
+
+      rrd-navtiming EVENTLOGGING_ENDPOINT
+
+  For example:
+
+      rrd-navtiming tcp://eventlog1001.eqiad.wmnet:8600
+
+  Requirements:
+
+      * eventlogging
+        https://github.com/wikimedia/mediawiki-extensions-EventLogging/
+      * rrdtool
+        http://oss.oetiker.ch/rrdtool/prog/rrdpython.en.html
+
+  Copyright 2015 Ori Livneh <[email protected]>
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+"""
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+
+import bisect
+import heapq
+import logging
+
+import eventlogging
+import rrdtool
+
+if len(sys.argv) != 2:
+    sys.exit('Usage: %s EVENTLOGGING_ENDPOINT' % sys.argv[0])
+
+logging.basicConfig(format='[%(asctime)-15s] %(message)s', level=logging.INFO)
+
+PLATFORMS = ('mobile', 'desktop')
+
+METRICS = (
+    'responseStart',  # Time to user agent receiving first byte
+    'firstPaint',     # Time to initial render
+    'domComplete',    # Time to DOM Comlete event
+    'loadEventEnd',   # Time to load event completion
+)
+
+# Size of sliding window, in seconds.
+WINDOW_SIZE = 300
+
+# Aggregation intervals.
+INTERVALS = (
+    60 * 60,                # Hour
+    60 * 60 * 24,           # Day
+    60 * 60 * 24 * 7,       # Week
+    60 * 60 * 24 * 30,      # Month
+    60 * 60 * 24 * 365.25,  # Year
+)
+
+# Store 120 values at each resolution. This makes graphing simpler,
+# because we're always working with a fixed number of points.
+ROWS = 120
+
+# We will push an aggregate value as often as we need in order to have
+# ROWS many values at the smallest INTERVAL.
+STEP = INTERVALS[0] / ROWS
+
+# Set the maximum acceptable interval between samples ("heartbeat") to a
+# full day. This means RRD will record an estimate for missing samples as
+# long as it has at least one sample from the last 24h to go by. If we go
+# longer than 24h without reporting a measurement, RRD will record a value
+# of UNKNOWN instead.
+HEARTBEAT = 86400
+
+# The expected range for measurements is 0 - 60,000 milliseconds.
+MIN, MAX = 0, 30000
+
+SOURCES = ['DS:%s:GAUGE:%d:%d:%d' % (metric, HEARTBEAT, MIN, MAX)
+           for metric in METRICS]
+
+ARCHIVES = ['RRA:AVERAGE:0.5:%d:%d' % (interval_length / ROWS, ROWS)
+            for interval_length in INTERVALS]
+
+
+def median(sorted_list):
+    """Compute the median of a sorted list."""
+    if not sorted_list:
+        raise ValueError('Cannot compute median of empty list.')
+    length = len(sorted_list)
+    index = (length - 1) // 2
+    if length % 2:
+        return sorted_list[index]
+    sum_of_terms = sorted_list[index] + sorted_list[index + 1]
+    return sum_of_terms / 2.0
+
+
+def accumulate(event_data):
+    """Group samples by metric and platform and compute medians."""
+    data = {p: {m: [] for m in METRICS} for p in PLATFORMS}
+    for timestamp, event in event_data:
+        platform = 'mobile' if 'mobileMode' in event else 'desktop'
+        for metric in METRICS:
+            value = event.get(metric)
+            if value:
+                bisect.insort(data[platform][metric], value)
+    for platform in PLATFORMS:
+        for metric in METRICS:
+            values = data[platform][metric]
+            data[platform][metric] = median(values)
+    return data
+
+
+# Create RRD files.
+for platform in PLATFORMS:
+    rrd_file = platform + '.rrd'
+    args = [
+        rrd_file,
+        '--no-overwrite',
+        '--step', str(STEP),
+        '--start', 'N'
+    ] + SOURCES + ARCHIVES
+    try:
+        rrdtool.create(*args)
+    except rrdtool.OperationalError as e:
+        if not e.message.endswith('File exists'):
+            raise
+
+# Ensure we wait and accumulate data for a full WINDOW_SIZE
+# before we report any metrics.
+last_update = time.time() + WINDOW_SIZE
+
+events = eventlogging.connect(sys.argv[1])
+
+for meta in events.filter(schema='NavigationTiming'):
+    sample = meta['timestamp'], meta['event']
+    heapq.heappush(heap, sample)
+
+    now = time.time()
+
+    # Prune old entries. Python's heapq is a min heap,
+    # meaning heap[0] is always the oldest entry.
+    cutoff = now - WINDOW_SIZE
+    while heap[0][0] < cutoff:
+        heapq.heappop(heap)
+
+    # Check if we should push updates to RRD.
+    if now - last_update >= STEP:
+        last_update = now
+        try:
+            data = accumulate(heap)
+        except ValueError:
+            # We don't have any data for one or more metrics.
+            # We have to give RRD a full update or nothing, so move on.
+            continue
+
+        # Actually push updates.
+        for platform in PLATFORMS:
+            rrd_file = platform + '.rrd'
+            values = data[platform]
+            update = 'N:' + ':'.join(str(values[m]) for m in METRICS)
+            logging.info('%s: %s', rrd_file, update)
+            rrdtool.update(rrd_file, update)

-- 
To view, visit https://gerrit.wikimedia.org/r/202362
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id6e662a77d024363882bd54bd45c4aa5445d5503
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to