Gilles has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/399176 )
Change subject: Smarter Varnish slow log ...................................................................... Smarter Varnish slow log Using python-logstash allows us to send structured data to ELK. Additionally, we can leverage varnishlog, which has more options than varnishncsa. Bug: T181315 Change-Id: I08851a84857783cfacc75768a3c0216633aa9242 --- M hieradata/common.yaml M modules/profile/manifests/cache/base.pp A modules/varnish/files/varnishslowlog.py M modules/varnish/manifests/common.pp M modules/varnish/manifests/instance.pp D modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb A modules/varnish/templates/initscripts/varnishslowlog.systemd.erb 7 files changed, 189 insertions(+), 19 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/76/399176/1 diff --git a/hieradata/common.yaml b/hieradata/common.yaml index 4ff228a..7349b31 100644 --- a/hieradata/common.yaml +++ b/hieradata/common.yaml @@ -520,6 +520,7 @@ logstash_host: "logstash.svc.eqiad.wmnet" logstash_syslog_port: 10514 logstash_gelf_port: 12201 +logstash_json_port: 11514 udp2log_aggregator: "udplog:8420" tcpircbot_host: 'icinga.wikimedia.org' diff --git a/modules/profile/manifests/cache/base.pp b/modules/profile/manifests/cache/base.pp index 6fc84a6..a8508ff 100644 --- a/modules/profile/manifests/cache/base.pp +++ b/modules/profile/manifests/cache/base.pp @@ -22,6 +22,7 @@ $be_runtime_params = hiera('profile::cache::base::be_runtime_params', []), $logstash_host = hiera('logstash_host', undef), $logstash_syslog_port = hiera('logstash_syslog_port', undef), + $logstash_json_port = hiera('logstash_json_port', undef), $log_slow_request_threshold = hiera('profile::cache::base::log_slow_request_threshold', '60.0'), $allow_iptables = hiera('profile::cache::base::allow_iptables', false), ) { @@ -78,6 +79,8 @@ fe_runtime_params => $fe_runtime_params, be_runtime_params => $be_runtime_params, log_slow_request_threshold => $log_slow_request_threshold, + logstash_host => $logstash_host, + logstash_json_port => $logstash_json_port, } class { [ diff --git a/modules/varnish/files/varnishslowlog.py b/modules/varnish/files/varnishslowlog.py new file mode 100644 index 0000000..588fc06 --- /dev/null +++ b/modules/varnish/files/varnishslowlog.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + VarnishSlowLog + ~~~~~~~~~~~~~~ + VarnishSlowLog is responsible for gathering slow requests from varnishlog + and sending them to logstash. + + Copyright 2016-2017 Emanuele Rocca <e...@wikimedia.org> + Copyright 2017 Gilles Dubuc <gil...@wikimedia.org> + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +""" + +import argparse +import logging +import logstash +import os +import sys +import urlparse + +from subprocess import PIPE, Popen + + +def parse_logstash_server_string(server_string): + """Convert logstash server string into (hostname, port) tuple.""" + parsed = urlparse.urlparse('//' + server_string) + return parsed.hostname, parsed.port or 12202 + + +class VarnishSlowLog(object): + description = 'Varnish slow log logstash logger' + + def __init__(self, argument_list): + """Parse CLI arguments. + + argument_list is a list such as ['--foo', 'FOO', '--bar', 'BAR']""" + ap = argparse.ArgumentParser( + description=self.description, + epilog='If no logstash server is specified, ' + 'prints log entries to stdout instead.') + + ap.add_argument('--logstash-server', help='logstash server', + type=parse_logstash_server_string, default=None) + + ap.add_argument('--slow-threshold', help='varnish fetch duration threshold', + type=float, default=10.0) + + ap.add_argument('--transaction-timeout', help='varnish transaction timeout', + type=int, default=600) + + ap.add_argument('--varnishd-instance-name', help='varnishd instance name', + default=None) + + ap.add_argument('--varnishlog-path', help='varnishlog full path', + default='/usr/bin/varnishlog') + + self.args = ap.parse_args(argument_list) + + self.cmd = [self.args.varnishlog_path, + # VSL query matching anything but purges + '-q', 'ReqMethod ne "PURGE" and Timestamp:Fetch[3] > %f' % self.args.slow_threshold, + # Set maximum Varnish transaction duration to track + '-T', '%d' % self.args.transaction_timeout] + + self.layer = 'backend' + + if self.args.varnishd_instance_name: + self.cmd.extend(['-n', self.args.varnishd_instance_name]) + self.layer = self.args.varnishd_instance_name + + if self.args.logstash_server: + handler = logstash.LogstashHandler( + self.args.logstash_server[0], + port=self.args.logstash_server[1], + version=1, + message_type='varnishslowlog' + ) + else: + handler = logging.StreamHandler(sys.stdout) + + handler.setLevel(logging.DEBUG) + + self.logger = logging.getLogger('varnishslowlog') + self.logger.setLevel(logging.DEBUG) + self.logger.addHandler(handler) + + self.tx = {} + + def handle_line(self, line): + splitline = line.split(None, 2) + + # Ignore any line that doesn't have at least the 2 parts expected + # for a transacation data field + if len(splitline) < 2: + return + + # Ignore any line that doesn't contain transaction data + if splitline[0] != '-': + return + + tag = splitline[1] + + if len(splitline) > 2: # The End tag has no 3rd part + value = splitline[2] + + if tag == 'Begin': + splitagain = value.split() + self.tx = {'id' : splitagain[1], 'layer' : self.layer} + elif tag == 'End': + if self.tx.has_key('request-Host') and self.tx.has_key('http-url'): + self.logger.info(self.tx['request-Host'] + self.tx['http-url'], extra=self.tx) + self.tx = {} + elif tag == 'ReqMethod': + self.tx['http-method'] = value + elif tag == 'ReqURL': + self.tx['http-url'] = value + elif tag == 'ReqProtocol': + self.tx['http-protocol'] = value + elif tag == 'RespStatus': + self.tx['http-status'] = value + elif tag == 'Timestamp': + splitagain = value.split() + ts_name = splitagain[0][:-1].lower() + self.tx['time-' + ts_name ] = float(splitagain[3]) + elif tag == 'ReqHeader': + splitagain = value.split(None, 1) + header_name = splitagain[0][:-1] + header_value = splitagain[1] + self.tx['request-' + header_name] = header_value + elif tag == 'RespHeader': + splitagain = value.split(None, 1) + header_name = splitagain[0][:-1] + header_value = splitagain[1] + self.tx['response-' + header_name] = header_value + + def main(self): + """Execute the command specified in self.cmd and call handle_record for + each output line produced by the command""" + p = Popen(self.cmd, stdout=PIPE, bufsize=-1) + + try: + while True: + line = p.stdout.readline().rstrip('\n') + self.handle_line(line) + except KeyboardInterrupt: + os.waitpid(p.pid, 0) + +if __name__ == "__main__": + VarnishSlowLog(sys.argv[1:]).main() \ No newline at end of file diff --git a/modules/varnish/manifests/common.pp b/modules/varnish/manifests/common.pp index 9550fdf..2881d5d 100644 --- a/modules/varnish/manifests/common.pp +++ b/modules/varnish/manifests/common.pp @@ -3,6 +3,8 @@ $fe_runtime_params=[], $be_runtime_params=[], $log_slow_request_threshold='60.0', + $logstash_host=undef, + $logstash_json_port=undef, ) { require ::varnish::packages diff --git a/modules/varnish/manifests/instance.pp b/modules/varnish/manifests/instance.pp index 882e781..82dcc2a 100644 --- a/modules/varnish/manifests/instance.pp +++ b/modules/varnish/manifests/instance.pp @@ -175,14 +175,22 @@ $slow_requests_programname = 'varnish-fe-slowreqs' } - systemd::service { "varnish${instancesuffix}-slowreqs": + file { '/usr/local/bin/varnishslowlog': + source => 'puppet:///modules/varnish/varnishslowlog.py', + owner => 'root', + group => 'root', + mode => '0555', + } + + systemd::service { "varnish${instancesuffix}-slowlog": ensure => present, - content => systemd_template('varnish-slowreqs'), + content => systemd_template('varnishslowlog'), restart => true, service_params => { require => Service["varnish${instancesuffix}"], enable => true, }, + require => File["/usr/local/bin/varnishslowlog"], } # This mechanism with the touch/rm conditionals in the pair of execs diff --git a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb b/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb deleted file mode 100644 index 619ffc7..0000000 --- a/modules/varnish/templates/initscripts/varnish-slowreqs.systemd.erb +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Varnish <%= @inst %> Slow Requests -After=varnish<%= @instancesuffix %>.service -Requires=varnish<%= @instancesuffix %>.service - -[Service] -StandardOutput=syslog -StandardError=syslog -SyslogIdentifier=<%= @slow_requests_programname %> -Restart=always -# Note the usage of the %% specifier here. -# See systemd.unit(5), section SPECIFIERS. -ExecStart=/usr/bin/varnishncsa <%= @extraopts %> -q 'ReqMethod ne "PURGE" and Timestamp:Resp[2] > <%= scope['::varnish::common::log_slow_request_threshold'] %>' -F '%{VSL:Timestamp:Start[1]}x Start: %{VSL:Timestamp:Start[3]}x Req: %{VSL:Timestamp:Req[3]}x Fetch: %{VSL:Timestamp:Fetch[3]}x Process: %{VSL:Timestamp:Process[3]}x Resp: %{VSL:Timestamp:Resp[3]}x %%r %%s' - -[Install] -WantedBy=multi-user.target - diff --git a/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb new file mode 100644 index 0000000..bbd65e3 --- /dev/null +++ b/modules/varnish/templates/initscripts/varnishslowlog.systemd.erb @@ -0,0 +1,12 @@ +[Unit] +Description=Varnish <%= @inst %> Slow Requests +After=varnish<%= @instancesuffix %>.service +Requires=varnish<%= @instancesuffix %>.service + +[Service] +ExecStart=/usr/local/bin/varnishslowlog --varnishd-instance-name <%= @instance_name %> --slow-threshold <%= scope['::varnish::common::log_slow_request_threshold'] %> --logstash-server=<%= scope['::varnish::common::logstash_host'] %>:<%= scope['::varnish::common::logstash_json_port'] %> +Restart=always +RestartSec=5s + +[Install] +WantedBy=multi-user.target -- To view, visit https://gerrit.wikimedia.org/r/399176 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I08851a84857783cfacc75768a3c0216633aa9242 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Gilles <gdu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits