Ori.livneh has uploaded a new change for review. https://gerrit.wikimedia.org/r/269086
Change subject: varnish: report response age to StatsD ...................................................................... varnish: report response age to StatsD e10801bfe8 added a 'Backend-Timing' response header to all Apache responses. We can use it to derive the age of a Varnish response (i.e., how long it has been sitting in the cache, if at all). This change adds a varnishlog.py-backed metric reporter that sends the response age of 1 in every 1000 responses on the frontend varnishes. Change-Id: I4574a43b8b6eef66564c87459c224c27827460c1 --- M modules/role/manifests/cache/statsd.pp A modules/varnish/files/varnishage A modules/varnish/manifests/logging/age.pp A modules/varnish/templates/initscripts/varnishage.systemd.erb 4 files changed, 175 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/86/269086/1 diff --git a/modules/role/manifests/cache/statsd.pp b/modules/role/manifests/cache/statsd.pp index e631820..d64cc26 100644 --- a/modules/role/manifests/cache/statsd.pp +++ b/modules/role/manifests/cache/statsd.pp @@ -23,4 +23,10 @@ ::varnish::logging::rls { 'rls': statsd_server => 'statsd.eqiad.wmnet', } + + # Derive (and report to StatsD) the response age from + # the 'Backend-Timing' header set by Apache. + ::varnish::logging::age { 'age': + statsd_server => 'statsd.eqiad.wmnet', + } } diff --git a/modules/varnish/files/varnishage b/modules/varnish/files/varnishage new file mode 100755 index 0000000..d4affcb --- /dev/null +++ b/modules/varnish/files/varnishage @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + varnishage + ~~~~~~~~~~ + + Wikimedia Foundation Apaches are configured to emit a "Backend-Timing" + header, which contains the time the request was received and the time it + took to process it. From this, the age of the object in the cache can be + determined. This script reports object ages to a StatsD host. + + Usage: varnishage [--statsd-server HOST:PORT] + [--metric-key KEY] + [--varnish-name INSTANCE] + [--sampling-ratio RATIO] + + --statsd-server HOST:PORT StatsD server address, in HOST:PORT format + (default: none; only print metrics). + + --metric-key KEY Metric name (default: "varnish.response_age"). + + --varnish-name INSTANCE Varnish instance name (default: "frontend"). + + --sampling-ratio RATIO Sampling ratio (default: 0.001). A value of + 0.001 means send/print one in every 1000 + observations. + + This script assumes that the system clock is set to UTC on both + the backend server and localhost. + + Copyright 2015 Ori Livneh <o...@wikimedia.org> + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +""" +from __future__ import division + +import argparse +import random +import re +import socket +import time + +import varnishlog + + +SOCK = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + + +def parse_statsd_server_string(server_string): + parsed = urlparse.urlparse('//' + server_string) + return parsed.hostname, parsed.port or 8125 + + +def parse_metric_key(metric_key): + metric_key = metric_key.strip('.') + if not metric_key: + raise ValueError('metric key must not be empty') + return metric_key + + +def parse_ratio(ratio): + ratio = float(ratio) + if ratio <= 0 or ratio > 1: + raise ValueError('ratio must be greater than zero and less than' + ' or equal to one (got: %f)', ratio) + return ratio + + +ap = argparse.ArgumentParser( + description='Backend-Timing metric reporter', + epilog='If no statsd server is specified, prints stats to stdout instead.' +) +ap.add_argument('--metric-key', type=parse_metric_key, + default='varnish.response_age', + help='metric key (default: "varnish.response_age")') +ap.add_argument('--statsd-server', type=parse_statsd_server_string, + default=None, help='statsd server (default: none)') +ap.add_argument('--varnish-name', default='frontend', + help='varnish instance name (default: "frontend")') +ap.add_argument('--sampling-ratio', type=parse_ratio, default=0.001, + help='sampling ratio (default: 0.001)') +args = ap.parse_args() + + +def vsl_callback(transaction_id, tag, record, remote_party): + if random.random() > args.sampling_ratio: + return + + # Backend-Timing header values have the format 'D=NNN, t=NNN', where 't' + # is a timestamp for when the request was received and 'D' is how long + # it took to begin serving the request, both measured in microseconds. + resp_time, req_start = map(int, re.findall(r'\d+', record)) + resp_start = req_start + resp_time + + resp_age_ms = (time.time() * 1e3) - (resp_start / 1e3) + + metric = '%s:%.2f|ms' % (args.metric_key, resp_age_ms) + if args.statsd_server is not None: + SOCK.sendto(metric.encode('utf-8'), args.statsd_server) + print(metric) + + +varnishlog.varnishlog(( + ('i', 'TxHeader'), + ('I', 'Backend-Timing'), + ('n', args.varnish_name), + ('c', None), +), vsl_callback) diff --git a/modules/varnish/manifests/logging/age.pp b/modules/varnish/manifests/logging/age.pp new file mode 100644 index 0000000..f93f38a --- /dev/null +++ b/modules/varnish/manifests/logging/age.pp @@ -0,0 +1,41 @@ +# == Define: varnish::logging::age +# +# Report response age to StatsD (where "response age" is the difference +# between the current time and the time at which the response was generated +# by an Apache backend. +# +# === Parameters +# +# [*statsd_server*] +# StatsD server address, in "host:port" format. +# Defaults to localhost:8125. +# +# === Examples +# +# varnish::logging::age { +# statsd_server => 'statsd.eqiad.wmnet:8125 +# } +# +define varnish::logging::age( $statsd_server = 'statsd' ) { + include varnish::common + + file { '/usr/local/bin/varnishage': + source => 'puppet:///modules/varnish/varnishage', + owner => 'root', + group => 'root', + mode => '0555', + require => File['/usr/local/lib/python2.7/dist-packages/varnishlog.py'], + notify => Service['varnishage'], + } + + base::service_unit { 'varnishage': + ensure => present, + systemd => true, + strict => false, + template_name => 'varnishage', + require => File['/usr/local/bin/varnishage'], + service_params => { + enable => true, + }, + } +} diff --git a/modules/varnish/templates/initscripts/varnishage.systemd.erb b/modules/varnish/templates/initscripts/varnishage.systemd.erb new file mode 100644 index 0000000..529e08f --- /dev/null +++ b/modules/varnish/templates/initscripts/varnishage.systemd.erb @@ -0,0 +1,9 @@ +[Unit] +Description=Varnish Response Age StatsD Reporter +After=varnish-frontend.service + +[Service] +ExecStart=/usr/local/bin/varnishage --statsd-server=<%= @statsd_server %> + +[Install] +WantedBy=multi-user.target -- To view, visit https://gerrit.wikimedia.org/r/269086 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I4574a43b8b6eef66564c87459c224c27827460c1 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Ori.livneh <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits