Lcarr has submitted this change and it was merged.

Change subject: Add ganglia graph for global jobqueue length
......................................................................


Add ganglia graph for global jobqueue length

Stolen from the check for en.wiki running on spence, but on hume
to avoid relying on spence having a MediaWiki install.
totalonly added in I0d09961cee333f5c36848277ffa02565fb406efb
Using mwdeploy and ensure present per ^demon.
While rebasing, restored the jobqueue check for en.wiki but in
icinga.pp and running on hume too (was spence), per Leslie.

Change-Id: I4b67f60a62a370ea327f7fa68eea9ca444baa3bc
---
M manifests/ganglia.pp
M manifests/misc/icinga.pp
M manifests/nagios.pp
M manifests/site.pp
4 files changed, 46 insertions(+), 1 deletion(-)

Approvals:
  Lcarr: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/manifests/ganglia.pp b/manifests/ganglia.pp
index a7110e5..429eaab 100644
--- a/manifests/ganglia.pp
+++ b/manifests/ganglia.pp
@@ -618,3 +618,24 @@
                content => template($template),
        }
 }
+
+# Copied from nagios::ganglia::monitor::enwiki
+# Will run on hume to use the local MediaWiki install so that we can use
+# maintenance scripts recycling DB connections and taking a few secs, not mins
+class misc::monitoring::jobqueue {
+
+       cron {
+               all_jobqueue_length:
+                       command => "/usr/bin/gmetric --name='Global JobQueue 
length' --type=int32 --conf=/etc/ganglia/gmond.conf --value=$(mwscript 
getJobQueueLengths.php --totalonly | grep -oE '[0-9]+') > /dev/null 2>&1",
+                       user => mwdeploy,
+                       ensure => present;
+       }
+       # duplicating the above job to experiment with gmetric's host spoofing 
so as to
+       # gather these metrics in a fake host called "www.wikimedia.org"
+       cron {
+               all_jobqueue_length_spoofed:
+                       command => "/usr/bin/gmetric --name='Global JobQueue 
length' --type=int32 --conf=/etc/ganglia/gmond.conf --spoof 
'www.wikimedia.org:www.wikimedia.org' --value=$(/usr/local/bin/mwscript 
getJobQueueLengths.php --totalonly | grep -oE '[0-9]+') > /dev/null 2>&1",
+                       user => mwdeploy,
+                       ensure => present;
+       }
+}
\ No newline at end of file
diff --git a/manifests/misc/icinga.pp b/manifests/misc/icinga.pp
index 9bd63f8..71e7f52 100644
--- a/manifests/misc/icinga.pp
+++ b/manifests/misc/icinga.pp
@@ -861,6 +861,29 @@
   }
 }
 
+# Used to be called nagios::ganglia::monitor::enwiki
+class misc::monitoring::enwikijobqueue {
+
+       include passwords::nagios::mysql
+       $ganglia_mysql_enwiki_pass = 
$passwords::nagios::mysql::mysql_enwiki_pass
+       $ganglia_mysql_enwiki_user = 
$passwords::nagios::mysql::mysql_enwiki_user
+       # Password is actually the same for all clusters and wikis, not en.wiki 
only
+       cron {
+               enwiki_jobqueue_length:
+                       command => "/usr/bin/gmetric --name='enwiki JobQueue 
length' --type=int32 --conf=/etc/ganglia/gmond.conf --value=$(mysql --batch 
--skip-column-names -u $ganglia_mysql_enwiki_user -p$ganglia_mysql_enwiki_pass 
-h db36.pmtpa.wmnet enwiki -e 'select count(*) from job') > /dev/null 2>&1",
+                       user => root,
+                       ensure => present;
+       }
+       # duplicating the above job to experiment with gmetric's host spoofing 
so as to
+       #  gather these metrics in a fake host called "en.wikipedia.org"
+       cron {
+               enwiki_jobqueue_length_spoofed:
+                       command => "/usr/bin/gmetric --name='enwiki JobQueue 
length' --type=int32 --conf=/etc/ganglia/gmond.conf --spoof 
'en.wikipedia.org:en.wikipedia.org' --value=$(mysql --batch --skip-column-names 
-u $ganglia_mysql_enwiki_user -p$ganglia_mysql_enwiki_pass -h db36.pmtpa.wmnet 
enwiki -e 'select count(*) from job') > /dev/null 2>&1",
+                       user => root,
+                       ensure => present;
+       }
+}
+
 # global monitoring groups - formerly misc/nagios.pp
 
 @monitor_group { 'misc_eqiad': description => 'eqiad misc servers' }
diff --git a/manifests/nagios.pp b/manifests/nagios.pp
index 11fed05..39e58c7 100644
--- a/manifests/nagios.pp
+++ b/manifests/nagios.pp
@@ -175,7 +175,6 @@
        }
 }
 
-
 class nagios::gsbmonitoring {
        @monitor_host { "google": ip_address => "74.125.225.84" }
 
diff --git a/manifests/site.pp b/manifests/site.pp
index 2ae3d99..6596c7b 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -1066,6 +1066,8 @@
                nfs::netapp::home,
                nfs::upload,
                misc::deployment::scap_scripts,
+               misc::monitoring::enwikijobqueue,
+               misc::monitoring::jobqueue,
                admins::roots,
                admins::mortals,
                admins::restricted,

-- 
To view, visit https://gerrit.wikimedia.org/r/37441
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I4b67f60a62a370ea327f7fa68eea9ca444baa3bc
Gerrit-PatchSet: 9
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Nemo bis <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
Gerrit-Reviewer: Lcarr <[email protected]>
Gerrit-Reviewer: Nemo bis <[email protected]>
Gerrit-Reviewer: Reedy <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to