Ema has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/399176 )
Change subject: Smarter Varnish slow log
......................................................................
Smarter Varnish slow log
Using python-logstash allows us to send structured data
to ELK. Additionally, we can leverage varnishlog, which
has more options than varnishncsa.
Bug: T181315
Change-Id: I08851a84857783cfacc75768a3c0216633aa9242
---
M modules/profile/manifests/cache/base.pp
A modules/varnish/files/varnishslowlog.py
M modules/varnish/manifests/common.pp
M modules/varnish/manifests/instance.pp
M modules/varnish/manifests/logging.pp
D modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb
A modules/varnish/templates/initscripts/varnishslowlog.systemd.erb
7 files changed, 193 insertions(+), 25 deletions(-)
Approvals:
Ema: Looks good to me, approved
jenkins-bot: Verified
diff --git a/modules/profile/manifests/cache/base.pp
b/modules/profile/manifests/cache/base.pp
index 6fc84a6..179a534 100644
--- a/modules/profile/manifests/cache/base.pp
+++ b/modules/profile/manifests/cache/base.pp
@@ -22,6 +22,7 @@
$be_runtime_params = hiera('profile::cache::base::be_runtime_params', []),
$logstash_host = hiera('logstash_host', undef),
$logstash_syslog_port = hiera('logstash_syslog_port', undef),
+ $logstash_json_lines_port = hiera('logstash_json_lines_port', undef),
$log_slow_request_threshold =
hiera('profile::cache::base::log_slow_request_threshold', '60.0'),
$allow_iptables = hiera('profile::cache::base::allow_iptables', false),
) {
@@ -78,6 +79,8 @@
fe_runtime_params => $fe_runtime_params,
be_runtime_params => $be_runtime_params,
log_slow_request_threshold => $log_slow_request_threshold,
+ logstash_host => $logstash_host,
+ logstash_json_lines_port => $logstash_json_lines_port,
}
class { [
diff --git a/modules/varnish/files/varnishslowlog.py
b/modules/varnish/files/varnishslowlog.py
new file mode 100644
index 0000000..349e347
--- /dev/null
+++ b/modules/varnish/files/varnishslowlog.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ VarnishSlowLog
+ ~~~~~~~~~~~~~~
+ VarnishSlowLog is responsible for gathering slow requests from varnishlog
+ and sending them to logstash.
+
+ Copyright 2016-2017 Emanuele Rocca <[email protected]>
+ Copyright 2017 Gilles Dubuc <[email protected]>
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+"""
+
+import argparse
+import logging
+import logstash
+import os
+import sys
+import urlparse
+
+from subprocess import PIPE, Popen
+
+
+def parse_logstash_server_string(server_string):
+ """Convert logstash server string into (hostname, port) tuple."""
+ parsed = urlparse.urlparse('//' + server_string)
+ return parsed.hostname, parsed.port or 12202
+
+
+class VarnishSlowLog(object):
+ description = 'Varnish slow log logstash logger'
+
+ def __init__(self, argument_list):
+ """Parse CLI arguments.
+
+ argument_list is a list such as ['--foo', 'FOO', '--bar', 'BAR']"""
+ ap = argparse.ArgumentParser(
+ description=self.description,
+ epilog='If no logstash server is specified, '
+ 'prints log entries to stdout instead.')
+
+ ap.add_argument('--logstash-server', help='logstash server',
+ type=parse_logstash_server_string, default=None)
+
+ ap.add_argument('--slow-threshold', help='varnish fetch duration
threshold',
+ type=float, default=10.0)
+
+ ap.add_argument('--transaction-timeout', help='varnish transaction
timeout',
+ type=int, default=600)
+
+ ap.add_argument('--varnishd-instance-name', help='varnishd instance
name',
+ default=None)
+
+ ap.add_argument('--varnishlog-path', help='varnishlog full path',
+ default='/usr/bin/varnishlog')
+
+ self.args = ap.parse_args(argument_list)
+
+ self.cmd = [
+ self.args.varnishlog_path,
+ # VSL query matching anything but purges
+ '-q', 'ReqMethod ne "PURGE" and Timestamp:Fetch[3] > %f' %
self.args.slow_threshold,
+ # Set maximum Varnish transaction duration to track
+ '-T', '%d' % self.args.transaction_timeout
+ ]
+
+ self.layer = 'backend'
+
+ if self.args.varnishd_instance_name:
+ self.cmd.extend(['-n', self.args.varnishd_instance_name])
+ self.layer = self.args.varnishd_instance_name
+
+ if self.args.logstash_server:
+ handler = logstash.LogstashHandler(
+ self.args.logstash_server[0],
+ port=self.args.logstash_server[1],
+ version=1,
+ message_type='varnishslowlog'
+ )
+ else:
+ handler = logging.StreamHandler(sys.stdout)
+
+ handler.setLevel(logging.DEBUG)
+
+ self.logger = logging.getLogger('varnishslowlog')
+ self.logger.setLevel(logging.DEBUG)
+ self.logger.addHandler(handler)
+
+ self.tx = {}
+
+ def handle_line(self, line):
+ splitline = line.split(None, 2)
+
+ # Ignore any line that doesn't have at least the 2 parts expected
+ # for a transacation data field
+ if len(splitline) < 2:
+ return
+
+ # Ignore any line that doesn't contain transaction data
+ if splitline[0] != '-':
+ return
+
+ tag = splitline[1]
+
+ if len(splitline) > 2: # The End tag has no 3rd part
+ value = splitline[2]
+
+ if tag == 'Begin':
+ splitagain = value.split()
+ self.tx = {'id': splitagain[1], 'layer': self.layer}
+ elif tag == 'End':
+ if 'request-Host' in self.tx and 'http-url' in self.tx:
+ self.logger.info(self.tx['request-Host'] +
self.tx['http-url'], extra=self.tx)
+ self.tx = {}
+ elif tag == 'ReqMethod':
+ self.tx['http-method'] = value
+ elif tag == 'ReqURL':
+ self.tx['http-url'] = value
+ elif tag == 'ReqProtocol':
+ self.tx['http-protocol'] = value
+ elif tag == 'RespStatus':
+ self.tx['http-status'] = value
+ elif tag == 'Timestamp':
+ splitagain = value.split()
+ ts_name = splitagain[0][:-1].lower()
+ self.tx['time-' + ts_name] = float(splitagain[3])
+ elif tag == 'ReqHeader':
+ splitagain = value.split(None, 1)
+ header_name = splitagain[0][:-1]
+ header_value = splitagain[1]
+ self.tx['request-' + header_name] = header_value
+ elif tag == 'RespHeader':
+ splitagain = value.split(None, 1)
+ header_name = splitagain[0][:-1]
+ header_value = splitagain[1]
+ self.tx['response-' + header_name] = header_value
+
+ def main(self):
+ """Execute the command specified in self.cmd and call handle_record for
+ each output line produced by the command"""
+ p = Popen(self.cmd, stdout=PIPE, bufsize=-1)
+
+ try:
+ while True:
+ line = p.stdout.readline().rstrip('\n')
+ self.handle_line(line)
+ except KeyboardInterrupt:
+ os.waitpid(p.pid, 0)
+
+
+if __name__ == "__main__":
+ VarnishSlowLog(sys.argv[1:]).main()
diff --git a/modules/varnish/manifests/common.pp
b/modules/varnish/manifests/common.pp
index 9550fdf..8d0f0aa 100644
--- a/modules/varnish/manifests/common.pp
+++ b/modules/varnish/manifests/common.pp
@@ -3,6 +3,8 @@
$fe_runtime_params=[],
$be_runtime_params=[],
$log_slow_request_threshold='60.0',
+ $logstash_host=undef,
+ $logstash_json_lines_port=undef,
) {
require ::varnish::packages
diff --git a/modules/varnish/manifests/instance.pp
b/modules/varnish/manifests/instance.pp
index 882e781..f7d9473 100644
--- a/modules/varnish/manifests/instance.pp
+++ b/modules/varnish/manifests/instance.pp
@@ -168,21 +168,18 @@
},
}
- # Log slow requests to syslog
- if $instance_name == '' {
- $slow_requests_programname = 'varnish-be-slowreqs'
- } else {
- $slow_requests_programname = 'varnish-fe-slowreqs'
- }
+ require_package('python-logstash')
- systemd::service { "varnish${instancesuffix}-slowreqs":
+ systemd::service { "varnish${instancesuffix}-slowlog":
ensure => present,
- content => systemd_template('varnish-slowreqs'),
+ content => systemd_template('varnishslowlog'),
restart => true,
service_params => {
require => Service["varnish${instancesuffix}"],
enable => true,
},
+ require => File['/usr/local/bin/varnishslowlog'],
+ subscribe => File['/usr/local/bin/varnishslowlog'],
}
# This mechanism with the touch/rm conditionals in the pair of execs
diff --git a/modules/varnish/manifests/logging.pp
b/modules/varnish/manifests/logging.pp
index 95491fd..afa3346 100644
--- a/modules/varnish/manifests/logging.pp
+++ b/modules/varnish/manifests/logging.pp
@@ -91,4 +91,11 @@
key_prefix => "varnish.${::site}.${cache_cluster}.xcache",
statsd_server => $statsd_host,
}
+
+ file { '/usr/local/bin/varnishslowlog':
+ source => 'puppet:///modules/varnish/varnishslowlog.py',
+ owner => 'root',
+ group => 'root',
+ mode => '0555',
+ }
}
diff --git a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb
b/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb
deleted file mode 100644
index 619ffc7..0000000
--- a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb
+++ /dev/null
@@ -1,17 +0,0 @@
-[Unit]
-Description=Varnish <%= @inst %> Slow Requests
-After=varnish<%= @instancesuffix %>.service
-Requires=varnish<%= @instancesuffix %>.service
-
-[Service]
-StandardOutput=syslog
-StandardError=syslog
-SyslogIdentifier=<%= @slow_requests_programname %>
-Restart=always
-# Note the usage of the %% specifier here.
-# See systemd.unit(5), section SPECIFIERS.
-ExecStart=/usr/bin/varnishncsa <%= @extraopts %> -q 'ReqMethod ne "PURGE" and
Timestamp:Resp[2] > <%= scope['::varnish::common::log_slow_request_threshold']
%>' -F '%{VSL:Timestamp:Start[1]}x Start: %{VSL:Timestamp:Start[3]}x Req:
%{VSL:Timestamp:Req[3]}x Fetch: %{VSL:Timestamp:Fetch[3]}x Process:
%{VSL:Timestamp:Process[3]}x Resp: %{VSL:Timestamp:Resp[3]}x %%r %%s'
-
-[Install]
-WantedBy=multi-user.target
-
diff --git a/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb
b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb
new file mode 100644
index 0000000..e848983
--- /dev/null
+++ b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb
@@ -0,0 +1,12 @@
+[Unit]
+Description=Varnish <%= @inst %> Slow Requests
+After=varnish<%= @instancesuffix %>.service
+Requires=varnish<%= @instancesuffix %>.service
+
+[Service]
+ExecStart=/usr/local/bin/varnishslowlog <% if @instance_name and
@instance_name != '' -%> --varnishd-instance-name <%= @instance_name %> <% end
%> --slow-threshold <%= scope['::varnish::common::log_slow_request_threshold']
%> --logstash-server=<%= scope['::varnish::common::logstash_host'] %>:<%=
scope['::varnish::common::logstash_json_lines_port'] %>
+Restart=always
+RestartSec=5s
+
+[Install]
+WantedBy=multi-user.target
--
To view, visit https://gerrit.wikimedia.org/r/399176
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I08851a84857783cfacc75768a3c0216633aa9242
Gerrit-PatchSet: 9
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Gilles <[email protected]>
Gerrit-Reviewer: BBlack <[email protected]>
Gerrit-Reviewer: Ema <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits