Ori.livneh has uploaded a new change for review.
https://gerrit.wikimedia.org/r/269086
Change subject: varnish: report response age to StatsD
......................................................................
varnish: report response age to StatsD
e10801bfe8 added a 'Backend-Timing' response header to all Apache responses.
We can use it to derive the age of a Varnish response (i.e., how long it has
been sitting in the cache, if at all). This change adds a varnishlog.py-backed
metric reporter that sends the response age of 1 in every 1000 responses on
the frontend varnishes.
Change-Id: I4574a43b8b6eef66564c87459c224c27827460c1
---
M modules/role/manifests/cache/statsd.pp
A modules/varnish/files/varnishage
A modules/varnish/manifests/logging/age.pp
A modules/varnish/templates/initscripts/varnishage.systemd.erb
4 files changed, 175 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/86/269086/1
diff --git a/modules/role/manifests/cache/statsd.pp
b/modules/role/manifests/cache/statsd.pp
index e631820..d64cc26 100644
--- a/modules/role/manifests/cache/statsd.pp
+++ b/modules/role/manifests/cache/statsd.pp
@@ -23,4 +23,10 @@
::varnish::logging::rls { 'rls':
statsd_server => 'statsd.eqiad.wmnet',
}
+
+ # Derive (and report to StatsD) the response age from
+ # the 'Backend-Timing' header set by Apache.
+ ::varnish::logging::age { 'age':
+ statsd_server => 'statsd.eqiad.wmnet',
+ }
}
diff --git a/modules/varnish/files/varnishage b/modules/varnish/files/varnishage
new file mode 100755
index 0000000..d4affcb
--- /dev/null
+++ b/modules/varnish/files/varnishage
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ varnishage
+ ~~~~~~~~~~
+
+ Wikimedia Foundation Apaches are configured to emit a "Backend-Timing"
+ header, which contains the time the request was received and the time it
+ took to process it. From this, the age of the object in the cache can be
+ determined. This script reports object ages to a StatsD host.
+
+ Usage: varnishage [--statsd-server HOST:PORT]
+ [--metric-key KEY]
+ [--varnish-name INSTANCE]
+ [--sampling-ratio RATIO]
+
+ --statsd-server HOST:PORT StatsD server address, in HOST:PORT format
+ (default: none; only print metrics).
+
+ --metric-key KEY Metric name (default: "varnish.response_age").
+
+ --varnish-name INSTANCE Varnish instance name (default: "frontend").
+
+ --sampling-ratio RATIO Sampling ratio (default: 0.001). A value of
+ 0.001 means send/print one in every 1000
+ observations.
+
+ This script assumes that the system clock is set to UTC on both
+ the backend server and localhost.
+
+ Copyright 2015 Ori Livneh <[email protected]>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+"""
+from __future__ import division
+
+import argparse
+import random
+import re
+import socket
+import time
+
+import varnishlog
+
+
+SOCK = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+
+
+def parse_statsd_server_string(server_string):
+ parsed = urlparse.urlparse('//' + server_string)
+ return parsed.hostname, parsed.port or 8125
+
+
+def parse_metric_key(metric_key):
+ metric_key = metric_key.strip('.')
+ if not metric_key:
+ raise ValueError('metric key must not be empty')
+ return metric_key
+
+
+def parse_ratio(ratio):
+ ratio = float(ratio)
+ if ratio <= 0 or ratio > 1:
+ raise ValueError('ratio must be greater than zero and less than'
+ ' or equal to one (got: %f)', ratio)
+ return ratio
+
+
+ap = argparse.ArgumentParser(
+ description='Backend-Timing metric reporter',
+ epilog='If no statsd server is specified, prints stats to stdout instead.'
+)
+ap.add_argument('--metric-key', type=parse_metric_key,
+ default='varnish.response_age',
+ help='metric key (default: "varnish.response_age")')
+ap.add_argument('--statsd-server', type=parse_statsd_server_string,
+ default=None, help='statsd server (default: none)')
+ap.add_argument('--varnish-name', default='frontend',
+ help='varnish instance name (default: "frontend")')
+ap.add_argument('--sampling-ratio', type=parse_ratio, default=0.001,
+ help='sampling ratio (default: 0.001)')
+args = ap.parse_args()
+
+
+def vsl_callback(transaction_id, tag, record, remote_party):
+ if random.random() > args.sampling_ratio:
+ return
+
+ # Backend-Timing header values have the format 'D=NNN, t=NNN', where 't'
+ # is a timestamp for when the request was received and 'D' is how long
+ # it took to begin serving the request, both measured in microseconds.
+ resp_time, req_start = map(int, re.findall(r'\d+', record))
+ resp_start = req_start + resp_time
+
+ resp_age_ms = (time.time() * 1e3) - (resp_start / 1e3)
+
+ metric = '%s:%.2f|ms' % (args.metric_key, resp_age_ms)
+ if args.statsd_server is not None:
+ SOCK.sendto(metric.encode('utf-8'), args.statsd_server)
+ print(metric)
+
+
+varnishlog.varnishlog((
+ ('i', 'TxHeader'),
+ ('I', 'Backend-Timing'),
+ ('n', args.varnish_name),
+ ('c', None),
+), vsl_callback)
diff --git a/modules/varnish/manifests/logging/age.pp
b/modules/varnish/manifests/logging/age.pp
new file mode 100644
index 0000000..f93f38a
--- /dev/null
+++ b/modules/varnish/manifests/logging/age.pp
@@ -0,0 +1,41 @@
+# == Define: varnish::logging::age
+#
+# Report response age to StatsD (where "response age" is the difference
+# between the current time and the time at which the response was generated
+# by an Apache backend.
+#
+# === Parameters
+#
+# [*statsd_server*]
+# StatsD server address, in "host:port" format.
+# Defaults to localhost:8125.
+#
+# === Examples
+#
+# varnish::logging::age {
+# statsd_server => 'statsd.eqiad.wmnet:8125
+# }
+#
+define varnish::logging::age( $statsd_server = 'statsd' ) {
+ include varnish::common
+
+ file { '/usr/local/bin/varnishage':
+ source => 'puppet:///modules/varnish/varnishage',
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ require =>
File['/usr/local/lib/python2.7/dist-packages/varnishlog.py'],
+ notify => Service['varnishage'],
+ }
+
+ base::service_unit { 'varnishage':
+ ensure => present,
+ systemd => true,
+ strict => false,
+ template_name => 'varnishage',
+ require => File['/usr/local/bin/varnishage'],
+ service_params => {
+ enable => true,
+ },
+ }
+}
diff --git a/modules/varnish/templates/initscripts/varnishage.systemd.erb
b/modules/varnish/templates/initscripts/varnishage.systemd.erb
new file mode 100644
index 0000000..529e08f
--- /dev/null
+++ b/modules/varnish/templates/initscripts/varnishage.systemd.erb
@@ -0,0 +1,9 @@
+[Unit]
+Description=Varnish Response Age StatsD Reporter
+After=varnish-frontend.service
+
+[Service]
+ExecStart=/usr/local/bin/varnishage --statsd-server=<%= @statsd_server %>
+
+[Install]
+WantedBy=multi-user.target
--
To view, visit https://gerrit.wikimedia.org/r/269086
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4574a43b8b6eef66564c87459c224c27827460c1
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ori.livneh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits