Giuseppe Lavagetto has submitted this change and it was merged. Change subject: Adding tendril::maintenance class ......................................................................
Adding tendril::maintenance class This puppetizes the cron jobs that are currently running on terbium, related to refresh some statistics about tendril. This module has also been added to mariadb::maintenance, and this one is being applied to terbium (and temporarelly, to a separate mw host). Change-Id: I8b5e6c6c0073804188ac562eb8839ce9157491ca --- M hieradata/hosts/terbium.yaml M manifests/role/db.pp M manifests/role/mariadb.pp M manifests/site.pp A modules/tendril/files/tendril-cron-5m.pl A modules/tendril/files/tendril-queries.pl A modules/tendril/manifests/maintenance.pp A modules/tendril/templates/tendril.cnf.erb 8 files changed, 266 insertions(+), 10 deletions(-) Approvals: Giuseppe Lavagetto: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/hieradata/hosts/terbium.yaml b/hieradata/hosts/terbium.yaml index a214c82..3a7f17f 100644 --- a/hieradata/hosts/terbium.yaml +++ b/hieradata/hosts/terbium.yaml @@ -24,3 +24,4 @@ mediawiki::maintenance::purge_securepoll::ensure: absent mediawiki::maintenance::jobqueue_stats::ensure: absent mediawiki::maintenance::updatequerypages::ensure: absent +tendril::maintenance::ensure: absent diff --git a/manifests/role/db.pp b/manifests/role/db.pp index 20d2342..db93664 100644 --- a/manifests/role/db.pp +++ b/manifests/role/db.pp @@ -116,11 +116,3 @@ content => template('mysql_wmf/skrillex.yaml.erb'), } } - -class role::db::maintenance { - include mysql - - package { 'percona-toolkit': - ensure => latest, - } -} diff --git a/manifests/role/mariadb.pp b/manifests/role/mariadb.pp index 3abb082..844fb2d 100644 --- a/manifests/role/mariadb.pp +++ b/manifests/role/mariadb.pp @@ -756,3 +756,20 @@ rule => 'saddr 10.64.0.15 proto tcp dport (3306) ACCEPT;', } } + +class role::mariadb::maintenance { + # TODO: check if both of these are still needed + include mysql + package { 'percona-toolkit': + ensure => latest, + } + + # place from which tendril-related cron jobs are run + include passwords::tendril + + class { 'tendril::maintenance': + tendril_host => 'db1011.eqiad.wmnet', + tendril_user => 'watchdog', + tendril_password => $passwords::tendril::db_pass, + } +} diff --git a/manifests/site.pp b/manifests/site.pp index 2dfe43d..c732ed4 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -1915,7 +1915,7 @@ # mw1152 is the experimental HAT script runner node 'mw1152.eqiad.wmnet' { - role mediawiki::maintenance, db::maintenance, mediawiki::generic_monitoring + role mediawiki::maintenance, mariadb::maintenance, mediawiki::generic_monitoring include role::noc include standard include ldap::role::client::labs @@ -2416,7 +2416,7 @@ # https://wikitech.wikimedia.org/wiki/Terbium node 'terbium.eqiad.wmnet' { - role db::maintenance, mediawiki::maintenance, backup::host + role mariadb::maintenance, mediawiki::maintenance, backup::host include ldap::role::client::labs diff --git a/modules/tendril/files/tendril-cron-5m.pl b/modules/tendril/files/tendril-cron-5m.pl new file mode 100755 index 0000000..88858f7 --- /dev/null +++ b/modules/tendril/files/tendril-cron-5m.pl @@ -0,0 +1,84 @@ +#!/usr/bin/perl + +use strict; +use DBI; +use Socket; +use Digest::MD5 qw(md5 md5_hex md5_base64); + +my $config = $ARGV[0]; + +my $dbi = "DBI:mysql:;mysql_read_default_file=$config;mysql_read_default_group=tendril"; +my $db = DBI->connect($dbi, undef, undef) or die("db?"); +$db->do("SET NAMES 'utf8';"); + +my $servers = $db->prepare("select id, host, port from servers"); + +$servers->execute(); + +while (my $row = $servers->fetchrow_hashref()) +{ + my $server_id = $row->{id}; + my $host = $row->{host}; + my $port = $row->{port}; + + my ($lock) = $db->selectrow_array("select get_lock('tendril-cron-5m-$server_id', 1)"); + + if ($lock == 1) + { + print "$host:$port\n"; + + my $select = $db->prepare("select id, host, info, md5(info) as info_md5 from processlist_query_log where server_id = ? and info is not null and checksum is null and stamp > now() - interval 3 day group by id, host, info"); + if ($select->execute($server_id)) + { + while (my $row = $select->fetchrow_hashref()) + { + if ($row->{host} =~ /^(\d+\.\d+\.\d+\.\d+):\d+$/) + { + my $ipv4 = $1; + my $iaddr = inet_aton($ipv4); + if (my $host = gethostbyaddr($iaddr, AF_INET)) + { + my $replace = $db->prepare("replace into dns (host, ipv4) values (?, ?)"); + $replace->execute($host, $ipv4); + $replace->finish(); + } + } + + my $query = $row->{info}; + $query =~ s/"(?:[^"\\]|\\.)*"/?/ig; + $query =~ s/'(?:[^'\\]|\\.)*'/?/ig; + $query =~ s/\b([0-9]+)\b/?/ig; + $query =~ s/\/\*.*?\*\///ig; + $query =~ s/\s+/ /ig; + $query =~ s/^\s+//ig; + $query =~ s/\s+$//ig; + $query =~ s/[(][?,'" ]+?[)]/?LIST?/g; + + + my $update = $db->prepare("update processlist_query_log set checksum = md5(?) where server_id = ? and id = ? and checksum is null and md5(info) = ? and stamp > now() - interval 3 day"); + my $rs = $update->execute($query, $server_id, $row->{id}, $row->{info_md5}); + $update->finish(); + + print "."; + } + } + print "\n"; + $select->finish(); + + if (my $ipv4packed = gethostbyname($host)) + { + my $ipv4 = inet_ntoa($ipv4packed); + + my $update = $db->prepare("update servers set ipv4 = ? where id = ?"); + $update->execute($ipv4, $server_id); + $update->finish(); + + my $replace = $db->prepare("replace into dns (host, ipv4) values (?, ?)"); + $replace->execute($host, $ipv4); + $replace->finish(); + } + + $db->do("select release_lock('tendril-cron-5m-$server_id')"); + } +} +$servers->finish(); diff --git a/modules/tendril/files/tendril-queries.pl b/modules/tendril/files/tendril-queries.pl new file mode 100755 index 0000000..2227cd5 --- /dev/null +++ b/modules/tendril/files/tendril-queries.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +use strict; +use DBI; +use Socket; +use Digest::MD5 qw(md5 md5_hex md5_base64); + +my $config = $ARGV[0]; + +my $dbi = "DBI:mysql:;mysql_read_default_file=$config;mysql_read_default_group=tendril"; +my $db = DBI->connect($dbi, undef, undef) or die("db?"); +$db->do("SET NAMES 'utf8';"); + +my $select = $db->prepare("select checksum, content from queries where footprint is null"); +if ($select->execute()) +{ + while (my $row = $select->fetchrow_hashref()) + { + my $query = $row->{content}; + $query =~ s/"(?:[^"\\]|\\.)*"/?/ig; + $query =~ s/'(?:[^'\\]|\\.)*'/?/ig; + $query =~ s/\b([0-9]+)\b/?/ig; + $query =~ s/\/\*.*?\*\///ig; + $query =~ s/\s+/ /ig; + $query =~ s/^\s+//ig; + $query =~ s/\s+$//ig; + $query =~ s/[(][?,'" ]+?[)]/?LIST?/g; + + + my $update = $db->prepare("update queries set footprint = md5(?), template = ? where checksum = ?"); + my $rs = $update->execute($query, $query, $row->{checksum}); + $update->finish(); + + print $row->{content}."\n"; + } +} +$select->finish(); diff --git a/modules/tendril/manifests/maintenance.pp b/modules/tendril/manifests/maintenance.pp new file mode 100644 index 0000000..bb73610 --- /dev/null +++ b/modules/tendril/manifests/maintenance.pp @@ -0,0 +1,114 @@ +# This class sets up the cron jobs needed for the correct execution +# of the tendril web interface and database +# It does not require being run on the same server than the web +# or the database, but it requires having mysql access + +class tendril::maintenance ( + $ensure = present, + $tendril_host, + $tendril_user, + $tendril_password, + $tendril_database = 'tendril', + $tendril_port = 3306, + $wd_user = undef, + $wd_password = undef, + ){ + + # We want to control if cron is running, not if the scripts are installed. + Cron { + ensure => $ensure + } + + File { + ensure => present + } + + $watchdog_user = $wd_user ? { + undef => $tendril_user, + default => $wd_user, + } + $watchdog_password = $wd_password ? { + undef => $tendril_password, + default => $wd_password, + } + + group { 'tendril': + ensure => present, + name => 'tendril', + } + + user { 'tendril': + ensure => present, + gid => 'tendril', + shell => '/bin/false', + home => '/tmp', + system => true, + require => Group['tendril'], + } + + file { '/usr/local/bin/tendril-cron-5m.pl': + owner => 'tendril', + group => 'tendril', + mode => '0750', + source => 'puppet:///modules/tendril/tendril-cron-5m.pl', + } + + file { '/var/log/tendril-cron-5m.log': + owner => 'tendril', + group => 'tendril', + mode => '0640', + } + + file { '/usr/local/bin/tendril-queries.pl': + owner => 'tendril', + group => 'tendril', + mode => '0750', + source => 'puppet:///modules/tendril/tendril-queries.pl', + } + + file { '/var/log/tendril-queries.log': + owner => 'tendril', + group => 'tendril', + mode => '0640', + } + + file { '/var/log/tendril-queries.err': + owner => 'tendril', + group => 'tendril', + mode => '0640', + } + + file { '/etc/mysql/tendril.cnf': + owner => 'tendril', + group => 'tendril', + mode => '0640', + content => template('tendril/tendril.cnf.erb'), + } + + cron { 'tendril-cron-5m': + user => 'tendril', + minute => '*/5', + command => '/usr/local/bin/tendril-cron-5m.pl \ +/etc/mysql/tendril.cnf > /var/log/tendril-cron-5m.log 2> \ +/var/log/tendril-cron-5m.err', + require => [ + File['/usr/local/bin/tendril-cron-5m.pl'], + File['/var/log/tendril-cron-5m.log'], + File['/var/log/tendril-cron-5m.err'], + ] + } + + cron { 'tendril-queries': + user => 'tendril', + minute => '*/5', + command => '/usr/local/bin/tendril-queries.pl \ +/etc/mysql/tendril.cnf > /var/log/tendril-queries.log 2> \ +/var/log/tendril-queries.err', + require => [ + File['/usr/local/bin/tendril-queries.pl'], + File['/var/log/tendril-queries.log'], + File['/var/log/tendril-queries.err'], + File['/etc/mysql/tendril.cfg'], + ] + } +} diff --git a/modules/tendril/templates/tendril.cnf.erb b/modules/tendril/templates/tendril.cnf.erb new file mode 100644 index 0000000..a124e59 --- /dev/null +++ b/modules/tendril/templates/tendril.cnf.erb @@ -0,0 +1,11 @@ +[tendril] +host = <%=@tendril_host%> +port = <%=@tendril_port%> +database = <%=@tendril_db%> +user = <%=@tendril_user%> +password = <%=@tendril_password%> + +[watchdog] +database = information_schema +user = <%=@watchdog_user%> +password = <%=@watchdog_password%> -- To view, visit https://gerrit.wikimedia.org/r/252208 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I8b5e6c6c0073804188ac562eb8839ce9157491ca Gerrit-PatchSet: 14 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Jcrespo <jcre...@wikimedia.org> Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org> Gerrit-Reviewer: Jcrespo <jcre...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits