Ema has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/399176 )
Change subject: Smarter Varnish slow log ...................................................................... Smarter Varnish slow log Using python-logstash allows us to send structured data to ELK. Additionally, we can leverage varnishlog, which has more options than varnishncsa. Bug: T181315 Change-Id: I08851a84857783cfacc75768a3c0216633aa9242 --- M modules/profile/manifests/cache/base.pp A modules/varnish/files/varnishslowlog.py M modules/varnish/manifests/common.pp M modules/varnish/manifests/instance.pp M modules/varnish/manifests/logging.pp D modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb A modules/varnish/templates/initscripts/varnishslowlog.systemd.erb 7 files changed, 193 insertions(+), 25 deletions(-) Approvals: Ema: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/profile/manifests/cache/base.pp b/modules/profile/manifests/cache/base.pp index 6fc84a6..179a534 100644 --- a/modules/profile/manifests/cache/base.pp +++ b/modules/profile/manifests/cache/base.pp @@ -22,6 +22,7 @@ $be_runtime_params = hiera('profile::cache::base::be_runtime_params', []), $logstash_host = hiera('logstash_host', undef), $logstash_syslog_port = hiera('logstash_syslog_port', undef), + $logstash_json_lines_port = hiera('logstash_json_lines_port', undef), $log_slow_request_threshold = hiera('profile::cache::base::log_slow_request_threshold', '60.0'), $allow_iptables = hiera('profile::cache::base::allow_iptables', false), ) { @@ -78,6 +79,8 @@ fe_runtime_params => $fe_runtime_params, be_runtime_params => $be_runtime_params, log_slow_request_threshold => $log_slow_request_threshold, + logstash_host => $logstash_host, + logstash_json_lines_port => $logstash_json_lines_port, } class { [ diff --git a/modules/varnish/files/varnishslowlog.py b/modules/varnish/files/varnishslowlog.py new file mode 100644 index 0000000..349e347 --- /dev/null +++ b/modules/varnish/files/varnishslowlog.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + VarnishSlowLog + ~~~~~~~~~~~~~~ + VarnishSlowLog is responsible for gathering slow requests from varnishlog + and sending them to logstash. + + Copyright 2016-2017 Emanuele Rocca <e...@wikimedia.org> + Copyright 2017 Gilles Dubuc <gil...@wikimedia.org> + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +""" + +import argparse +import logging +import logstash +import os +import sys +import urlparse + +from subprocess import PIPE, Popen + + +def parse_logstash_server_string(server_string): + """Convert logstash server string into (hostname, port) tuple.""" + parsed = urlparse.urlparse('//' + server_string) + return parsed.hostname, parsed.port or 12202 + + +class VarnishSlowLog(object): + description = 'Varnish slow log logstash logger' + + def __init__(self, argument_list): + """Parse CLI arguments. + + argument_list is a list such as ['--foo', 'FOO', '--bar', 'BAR']""" + ap = argparse.ArgumentParser( + description=self.description, + epilog='If no logstash server is specified, ' + 'prints log entries to stdout instead.') + + ap.add_argument('--logstash-server', help='logstash server', + type=parse_logstash_server_string, default=None) + + ap.add_argument('--slow-threshold', help='varnish fetch duration threshold', + type=float, default=10.0) + + ap.add_argument('--transaction-timeout', help='varnish transaction timeout', + type=int, default=600) + + ap.add_argument('--varnishd-instance-name', help='varnishd instance name', + default=None) + + ap.add_argument('--varnishlog-path', help='varnishlog full path', + default='/usr/bin/varnishlog') + + self.args = ap.parse_args(argument_list) + + self.cmd = [ + self.args.varnishlog_path, + # VSL query matching anything but purges + '-q', 'ReqMethod ne "PURGE" and Timestamp:Fetch[3] > %f' % self.args.slow_threshold, + # Set maximum Varnish transaction duration to track + '-T', '%d' % self.args.transaction_timeout + ] + + self.layer = 'backend' + + if self.args.varnishd_instance_name: + self.cmd.extend(['-n', self.args.varnishd_instance_name]) + self.layer = self.args.varnishd_instance_name + + if self.args.logstash_server: + handler = logstash.LogstashHandler( + self.args.logstash_server[0], + port=self.args.logstash_server[1], + version=1, + message_type='varnishslowlog' + ) + else: + handler = logging.StreamHandler(sys.stdout) + + handler.setLevel(logging.DEBUG) + + self.logger = logging.getLogger('varnishslowlog') + self.logger.setLevel(logging.DEBUG) + self.logger.addHandler(handler) + + self.tx = {} + + def handle_line(self, line): + splitline = line.split(None, 2) + + # Ignore any line that doesn't have at least the 2 parts expected + # for a transacation data field + if len(splitline) < 2: + return + + # Ignore any line that doesn't contain transaction data + if splitline[0] != '-': + return + + tag = splitline[1] + + if len(splitline) > 2: # The End tag has no 3rd part + value = splitline[2] + + if tag == 'Begin': + splitagain = value.split() + self.tx = {'id': splitagain[1], 'layer': self.layer} + elif tag == 'End': + if 'request-Host' in self.tx and 'http-url' in self.tx: + self.logger.info(self.tx['request-Host'] + self.tx['http-url'], extra=self.tx) + self.tx = {} + elif tag == 'ReqMethod': + self.tx['http-method'] = value + elif tag == 'ReqURL': + self.tx['http-url'] = value + elif tag == 'ReqProtocol': + self.tx['http-protocol'] = value + elif tag == 'RespStatus': + self.tx['http-status'] = value + elif tag == 'Timestamp': + splitagain = value.split() + ts_name = splitagain[0][:-1].lower() + self.tx['time-' + ts_name] = float(splitagain[3]) + elif tag == 'ReqHeader': + splitagain = value.split(None, 1) + header_name = splitagain[0][:-1] + header_value = splitagain[1] + self.tx['request-' + header_name] = header_value + elif tag == 'RespHeader': + splitagain = value.split(None, 1) + header_name = splitagain[0][:-1] + header_value = splitagain[1] + self.tx['response-' + header_name] = header_value + + def main(self): + """Execute the command specified in self.cmd and call handle_record for + each output line produced by the command""" + p = Popen(self.cmd, stdout=PIPE, bufsize=-1) + + try: + while True: + line = p.stdout.readline().rstrip('\n') + self.handle_line(line) + except KeyboardInterrupt: + os.waitpid(p.pid, 0) + + +if __name__ == "__main__": + VarnishSlowLog(sys.argv[1:]).main() diff --git a/modules/varnish/manifests/common.pp b/modules/varnish/manifests/common.pp index 9550fdf..8d0f0aa 100644 --- a/modules/varnish/manifests/common.pp +++ b/modules/varnish/manifests/common.pp @@ -3,6 +3,8 @@ $fe_runtime_params=[], $be_runtime_params=[], $log_slow_request_threshold='60.0', + $logstash_host=undef, + $logstash_json_lines_port=undef, ) { require ::varnish::packages diff --git a/modules/varnish/manifests/instance.pp b/modules/varnish/manifests/instance.pp index 882e781..f7d9473 100644 --- a/modules/varnish/manifests/instance.pp +++ b/modules/varnish/manifests/instance.pp @@ -168,21 +168,18 @@ }, } - # Log slow requests to syslog - if $instance_name == '' { - $slow_requests_programname = 'varnish-be-slowreqs' - } else { - $slow_requests_programname = 'varnish-fe-slowreqs' - } + require_package('python-logstash') - systemd::service { "varnish${instancesuffix}-slowreqs": + systemd::service { "varnish${instancesuffix}-slowlog": ensure => present, - content => systemd_template('varnish-slowreqs'), + content => systemd_template('varnishslowlog'), restart => true, service_params => { require => Service["varnish${instancesuffix}"], enable => true, }, + require => File['/usr/local/bin/varnishslowlog'], + subscribe => File['/usr/local/bin/varnishslowlog'], } # This mechanism with the touch/rm conditionals in the pair of execs diff --git a/modules/varnish/manifests/logging.pp b/modules/varnish/manifests/logging.pp index 95491fd..afa3346 100644 --- a/modules/varnish/manifests/logging.pp +++ b/modules/varnish/manifests/logging.pp @@ -91,4 +91,11 @@ key_prefix => "varnish.${::site}.${cache_cluster}.xcache", statsd_server => $statsd_host, } + + file { '/usr/local/bin/varnishslowlog': + source => 'puppet:///modules/varnish/varnishslowlog.py', + owner => 'root', + group => 'root', + mode => '0555', + } } diff --git a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb b/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb deleted file mode 100644 index 619ffc7..0000000 --- a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Varnish <%= @inst %> Slow Requests -After=varnish<%= @instancesuffix %>.service -Requires=varnish<%= @instancesuffix %>.service - -[Service] -StandardOutput=syslog -StandardError=syslog -SyslogIdentifier=<%= @slow_requests_programname %> -Restart=always -# Note the usage of the %% specifier here. -# See systemd.unit(5), section SPECIFIERS. -ExecStart=/usr/bin/varnishncsa <%= @extraopts %> -q 'ReqMethod ne "PURGE" and Timestamp:Resp[2] > <%= scope['::varnish::common::log_slow_request_threshold'] %>' -F '%{VSL:Timestamp:Start[1]}x Start: %{VSL:Timestamp:Start[3]}x Req: %{VSL:Timestamp:Req[3]}x Fetch: %{VSL:Timestamp:Fetch[3]}x Process: %{VSL:Timestamp:Process[3]}x Resp: %{VSL:Timestamp:Resp[3]}x %%r %%s' - -[Install] -WantedBy=multi-user.target - diff --git a/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb new file mode 100644 index 0000000..e848983 --- /dev/null +++ b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb @@ -0,0 +1,12 @@ +[Unit] +Description=Varnish <%= @inst %> Slow Requests +After=varnish<%= @instancesuffix %>.service +Requires=varnish<%= @instancesuffix %>.service + +[Service] +ExecStart=/usr/local/bin/varnishslowlog <% if @instance_name and @instance_name != '' -%> --varnishd-instance-name <%= @instance_name %> <% end %> --slow-threshold <%= scope['::varnish::common::log_slow_request_threshold'] %> --logstash-server=<%= scope['::varnish::common::logstash_host'] %>:<%= scope['::varnish::common::logstash_json_lines_port'] %> +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target -- To view, visit https://gerrit.wikimedia.org/r/399176 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I08851a84857783cfacc75768a3c0216633aa9242 Gerrit-PatchSet: 9 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Gilles <gdu...@wikimedia.org> Gerrit-Reviewer: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: Ema <e...@wikimedia.org> Gerrit-Reviewer: Gilles <gdu...@wikimedia.org> Gerrit-Reviewer: Volans <rcocci...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits