Giuseppe Lavagetto has submitted this change and it was merged.

Change subject: Adding tendril::maintenance class
......................................................................


Adding tendril::maintenance class

This puppetizes the cron jobs that are currently running on
terbium, related to refresh some statistics about tendril.

This module has also been added to mariadb::maintenance,
and this one is being applied to terbium (and temporarelly,
to a separate mw host).

Change-Id: I8b5e6c6c0073804188ac562eb8839ce9157491ca
---
M hieradata/hosts/terbium.yaml
M manifests/role/db.pp
M manifests/role/mariadb.pp
M manifests/site.pp
A modules/tendril/files/tendril-cron-5m.pl
A modules/tendril/files/tendril-queries.pl
A modules/tendril/manifests/maintenance.pp
A modules/tendril/templates/tendril.cnf.erb
8 files changed, 266 insertions(+), 10 deletions(-)

Approvals:
  Giuseppe Lavagetto: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/hieradata/hosts/terbium.yaml b/hieradata/hosts/terbium.yaml
index a214c82..3a7f17f 100644
--- a/hieradata/hosts/terbium.yaml
+++ b/hieradata/hosts/terbium.yaml
@@ -24,3 +24,4 @@
 mediawiki::maintenance::purge_securepoll::ensure: absent
 mediawiki::maintenance::jobqueue_stats::ensure: absent
 mediawiki::maintenance::updatequerypages::ensure: absent
+tendril::maintenance::ensure: absent
diff --git a/manifests/role/db.pp b/manifests/role/db.pp
index 20d2342..db93664 100644
--- a/manifests/role/db.pp
+++ b/manifests/role/db.pp
@@ -116,11 +116,3 @@
         content => template('mysql_wmf/skrillex.yaml.erb'),
     }
 }
-
-class role::db::maintenance {
-    include mysql
-
-    package { 'percona-toolkit':
-        ensure => latest,
-    }
-}
diff --git a/manifests/role/mariadb.pp b/manifests/role/mariadb.pp
index 3abb082..844fb2d 100644
--- a/manifests/role/mariadb.pp
+++ b/manifests/role/mariadb.pp
@@ -756,3 +756,20 @@
         rule => 'saddr 10.64.0.15 proto tcp dport (3306) ACCEPT;',
     }
 }
+
+class role::mariadb::maintenance {
+    # TODO: check if both of these are still needed
+    include mysql
+    package { 'percona-toolkit':
+        ensure => latest,
+    }
+
+    # place from which tendril-related cron jobs are run
+    include passwords::tendril
+
+    class { 'tendril::maintenance':
+        tendril_host => 'db1011.eqiad.wmnet',
+        tendril_user => 'watchdog',
+        tendril_password => $passwords::tendril::db_pass,
+    }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index 2dfe43d..c732ed4 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -1915,7 +1915,7 @@
 
 # mw1152 is the experimental HAT script runner
 node 'mw1152.eqiad.wmnet' {
-    role mediawiki::maintenance, db::maintenance, mediawiki::generic_monitoring
+    role mediawiki::maintenance, mariadb::maintenance, 
mediawiki::generic_monitoring
     include role::noc
     include standard
     include ldap::role::client::labs
@@ -2416,7 +2416,7 @@
 
 # https://wikitech.wikimedia.org/wiki/Terbium
 node 'terbium.eqiad.wmnet' {
-    role db::maintenance, mediawiki::maintenance, backup::host
+    role mariadb::maintenance, mediawiki::maintenance, backup::host
 
     include ldap::role::client::labs
 
diff --git a/modules/tendril/files/tendril-cron-5m.pl 
b/modules/tendril/files/tendril-cron-5m.pl
new file mode 100755
index 0000000..88858f7
--- /dev/null
+++ b/modules/tendril/files/tendril-cron-5m.pl
@@ -0,0 +1,84 @@
+#!/usr/bin/perl
+
+use strict;
+use DBI;
+use Socket;
+use Digest::MD5 qw(md5 md5_hex md5_base64);
+
+my $config = $ARGV[0];
+
+my $dbi = 
"DBI:mysql:;mysql_read_default_file=$config;mysql_read_default_group=tendril";
+my $db  = DBI->connect($dbi, undef, undef) or die("db?");
+$db->do("SET NAMES 'utf8';");
+
+my $servers = $db->prepare("select id, host, port from servers");
+
+$servers->execute();
+
+while (my $row = $servers->fetchrow_hashref())
+{
+       my $server_id = $row->{id};
+       my $host = $row->{host};
+       my $port = $row->{port};
+
+       my ($lock) = $db->selectrow_array("select 
get_lock('tendril-cron-5m-$server_id', 1)");
+
+       if ($lock == 1)
+       {
+               print "$host:$port\n";
+
+               my $select = $db->prepare("select id, host, info, md5(info) as 
info_md5 from processlist_query_log where server_id = ? and info is not null 
and checksum is null and stamp > now() - interval 3 day group by id, host, 
info");
+               if ($select->execute($server_id))
+               {
+                       while (my $row = $select->fetchrow_hashref())
+                       {
+                               if ($row->{host} =~ 
/^(\d+\.\d+\.\d+\.\d+):\d+$/)
+                               {
+                                       my $ipv4 = $1;
+                                       my $iaddr = inet_aton($ipv4);
+                                       if (my $host = gethostbyaddr($iaddr, 
AF_INET))
+                                       {
+                                               my $replace = 
$db->prepare("replace into dns (host, ipv4) values (?, ?)");
+                                               $replace->execute($host, $ipv4);
+                                               $replace->finish();
+                                       }
+                               }
+
+                               my $query = $row->{info};
+                               $query =~ s/"(?:[^"\\]|\\.)*"/?/ig;
+                               $query =~ s/'(?:[^'\\]|\\.)*'/?/ig;
+                               $query =~ s/\b([0-9]+)\b/?/ig;
+                               $query =~ s/\/\*.*?\*\///ig;
+                               $query =~ s/\s+/ /ig;
+                               $query =~ s/^\s+//ig;
+                               $query =~ s/\s+$//ig;
+                               $query =~ s/[(][?,'" ]+?[)]/?LIST?/g;
+
+
+                               my $update = $db->prepare("update 
processlist_query_log set checksum = md5(?) where server_id = ? and id = ? and 
checksum is null and md5(info) = ? and stamp > now() - interval 3 day");
+                               my $rs = $update->execute($query, $server_id, 
$row->{id}, $row->{info_md5});
+                               $update->finish();
+
+                               print ".";
+                       }
+               }
+               print "\n";
+               $select->finish();
+
+               if (my $ipv4packed = gethostbyname($host))
+               {
+                       my $ipv4 = inet_ntoa($ipv4packed);
+
+                       my $update = $db->prepare("update servers set ipv4 = ? 
where id = ?");
+                       $update->execute($ipv4, $server_id);
+                       $update->finish();
+
+                       my $replace = $db->prepare("replace into dns (host, 
ipv4) values (?, ?)");
+                       $replace->execute($host, $ipv4);
+                       $replace->finish();
+               }
+
+               $db->do("select release_lock('tendril-cron-5m-$server_id')");
+       }
+}
+$servers->finish();
diff --git a/modules/tendril/files/tendril-queries.pl 
b/modules/tendril/files/tendril-queries.pl
new file mode 100755
index 0000000..2227cd5
--- /dev/null
+++ b/modules/tendril/files/tendril-queries.pl
@@ -0,0 +1,37 @@
+#!/usr/bin/perl
+
+use strict;
+use DBI;
+use Socket;
+use Digest::MD5 qw(md5 md5_hex md5_base64);
+
+my $config = $ARGV[0];
+
+my $dbi = 
"DBI:mysql:;mysql_read_default_file=$config;mysql_read_default_group=tendril";
+my $db  = DBI->connect($dbi, undef, undef) or die("db?");
+$db->do("SET NAMES 'utf8';");
+
+my $select = $db->prepare("select checksum, content from queries where 
footprint is null");
+if ($select->execute())
+{
+    while (my $row = $select->fetchrow_hashref())
+    {
+        my $query = $row->{content};
+        $query =~ s/"(?:[^"\\]|\\.)*"/?/ig;
+        $query =~ s/'(?:[^'\\]|\\.)*'/?/ig;
+        $query =~ s/\b([0-9]+)\b/?/ig;
+        $query =~ s/\/\*.*?\*\///ig;
+        $query =~ s/\s+/ /ig;
+        $query =~ s/^\s+//ig;
+        $query =~ s/\s+$//ig;
+        $query =~ s/[(][?,'" ]+?[)]/?LIST?/g;
+
+
+        my $update = $db->prepare("update queries set footprint = md5(?), 
template = ? where checksum = ?");
+        my $rs = $update->execute($query, $query, $row->{checksum});
+        $update->finish();
+
+        print $row->{content}."\n";
+    }
+}
+$select->finish();
diff --git a/modules/tendril/manifests/maintenance.pp 
b/modules/tendril/manifests/maintenance.pp
new file mode 100644
index 0000000..bb73610
--- /dev/null
+++ b/modules/tendril/manifests/maintenance.pp
@@ -0,0 +1,114 @@
+# This class sets up the cron jobs needed for the correct execution
+# of the tendril web interface and database
+# It does not require being run on the same server than the web
+# or the database, but it requires having mysql access
+
+class tendril::maintenance (
+    $ensure = present,
+    $tendril_host,
+    $tendril_user,
+    $tendril_password,
+    $tendril_database = 'tendril',
+    $tendril_port = 3306,
+    $wd_user = undef,
+    $wd_password = undef,
+    ){
+
+    # We want to control if cron is running, not if the scripts are installed.
+    Cron {
+        ensure => $ensure
+    }
+
+    File {
+        ensure => present
+    }
+
+    $watchdog_user = $wd_user ? {
+        undef   => $tendril_user,
+        default => $wd_user,
+    }
+    $watchdog_password = $wd_password ? {
+        undef   => $tendril_password,
+        default => $wd_password,
+    }
+
+    group { 'tendril':
+        ensure => present,
+        name   => 'tendril',
+    }
+
+    user { 'tendril':
+        ensure  => present,
+        gid     => 'tendril',
+        shell   => '/bin/false',
+        home    => '/tmp',
+        system  => true,
+        require => Group['tendril'],
+    }
+
+    file { '/usr/local/bin/tendril-cron-5m.pl':
+        owner  => 'tendril',
+        group  => 'tendril',
+        mode   => '0750',
+        source => 'puppet:///modules/tendril/tendril-cron-5m.pl',
+    }
+
+    file { '/var/log/tendril-cron-5m.log':
+        owner  => 'tendril',
+        group  => 'tendril',
+        mode   => '0640',
+    }
+
+    file { '/usr/local/bin/tendril-queries.pl':
+        owner  => 'tendril',
+        group  => 'tendril',
+        mode   => '0750',
+        source => 'puppet:///modules/tendril/tendril-queries.pl',
+    }
+
+    file { '/var/log/tendril-queries.log':
+        owner  => 'tendril',
+        group  => 'tendril',
+        mode   => '0640',
+    }
+
+    file { '/var/log/tendril-queries.err':
+        owner  => 'tendril',
+        group  => 'tendril',
+        mode   => '0640',
+    }
+
+    file { '/etc/mysql/tendril.cnf':
+        owner   => 'tendril',
+        group   => 'tendril',
+        mode    => '0640',
+        content => template('tendril/tendril.cnf.erb'),
+    }
+
+    cron { 'tendril-cron-5m':
+        user    => 'tendril',
+        minute  => '*/5',
+        command => '/usr/local/bin/tendril-cron-5m.pl \
+/etc/mysql/tendril.cnf > /var/log/tendril-cron-5m.log 2> \
+/var/log/tendril-cron-5m.err',
+        require => [
+            File['/usr/local/bin/tendril-cron-5m.pl'],
+            File['/var/log/tendril-cron-5m.log'],
+            File['/var/log/tendril-cron-5m.err'],
+        ]
+    }
+
+    cron { 'tendril-queries':
+        user    => 'tendril',
+        minute  => '*/5',
+        command => '/usr/local/bin/tendril-queries.pl \
+/etc/mysql/tendril.cnf > /var/log/tendril-queries.log 2> \
+/var/log/tendril-queries.err',
+        require => [
+            File['/usr/local/bin/tendril-queries.pl'],
+            File['/var/log/tendril-queries.log'],
+            File['/var/log/tendril-queries.err'],
+            File['/etc/mysql/tendril.cfg'],
+        ]
+    }
+}
diff --git a/modules/tendril/templates/tendril.cnf.erb 
b/modules/tendril/templates/tendril.cnf.erb
new file mode 100644
index 0000000..a124e59
--- /dev/null
+++ b/modules/tendril/templates/tendril.cnf.erb
@@ -0,0 +1,11 @@
+[tendril]
+host      = <%=@tendril_host%>
+port      = <%=@tendril_port%>
+database  = <%=@tendril_db%> 
+user      = <%=@tendril_user%>
+password  = <%=@tendril_password%>
+
+[watchdog]
+database  = information_schema
+user      = <%=@watchdog_user%>
+password  = <%=@watchdog_password%>

-- 
To view, visit https://gerrit.wikimedia.org/r/252208
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8b5e6c6c0073804188ac562eb8839ce9157491ca
Gerrit-PatchSet: 14
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Jcrespo <jcre...@wikimedia.org>
Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: Jcrespo <jcre...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to