Dzahn has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/385484 )

Change subject: Revert "apache: remove ganglia monitoring"
......................................................................


Revert "apache: remove ganglia monitoring"

This reverts commit 6d16aa43a2d14e6a8cd914be962c060756d6d3fc.

Change-Id: I41bcb2fde4474ffa0d5ab9658b573e8725507428
---
M hieradata/role/common/mediawiki/appserver.yaml
M hieradata/role/common/mediawiki/appserver/api.yaml
M hieradata/role/common/mediawiki/appserver/canary_api.yaml
M hieradata/role/common/mediawiki/canary_appserver.yaml
M hieradata/role/common/mediawiki/imagescaler.yaml
M hieradata/role/common/mediawiki/jobrunner.yaml
M hieradata/role/common/mediawiki/memcached.yaml
M hieradata/role/common/mediawiki/videoscaler.yaml
A modules/apache/files/apache_status.py
A modules/apache/files/apache_status.pyconf
M modules/apache/manifests/monitoring.pp
11 files changed, 573 insertions(+), 8 deletions(-)

Approvals:
  Dzahn: Verified; Looks good to me, approved



diff --git a/hieradata/role/common/mediawiki/appserver.yaml 
b/hieradata/role/common/mediawiki/appserver.yaml
index 7d14664..28e1f2f 100644
--- a/hieradata/role/common/mediawiki/appserver.yaml
+++ b/hieradata/role/common/mediawiki/appserver.yaml
@@ -18,4 +18,3 @@
 apache::logrotate::rotate: 12
 nutcracker::verbosity: "4"
 role::mediawiki::webserver::tls: true
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/appserver/api.yaml 
b/hieradata/role/common/mediawiki/appserver/api.yaml
index 7f01396..1195204 100644
--- a/hieradata/role/common/mediawiki/appserver/api.yaml
+++ b/hieradata/role/common/mediawiki/appserver/api.yaml
@@ -18,4 +18,3 @@
 apache::logrotate::rotate: 12
 nutcracker::verbosity: "4"
 role::mediawiki::webserver::tls: true
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/appserver/canary_api.yaml 
b/hieradata/role/common/mediawiki/appserver/canary_api.yaml
index 575a080..99cde6f 100644
--- a/hieradata/role/common/mediawiki/appserver/canary_api.yaml
+++ b/hieradata/role/common/mediawiki/appserver/canary_api.yaml
@@ -23,4 +23,3 @@
 apache::logrotate::rotate: 12
 nutcracker::verbosity: "4"
 role::mediawiki::webserver::tls: true
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/canary_appserver.yaml 
b/hieradata/role/common/mediawiki/canary_appserver.yaml
index 10e7a9e..a33df65 100644
--- a/hieradata/role/common/mediawiki/canary_appserver.yaml
+++ b/hieradata/role/common/mediawiki/canary_appserver.yaml
@@ -23,4 +23,3 @@
 apache::logrotate::rotate: 12
 nutcracker::verbosity: "4"
 role::mediawiki::webserver::tls: true
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/imagescaler.yaml 
b/hieradata/role/common/mediawiki/imagescaler.yaml
index 7a6a277..201ab70 100644
--- a/hieradata/role/common/mediawiki/imagescaler.yaml
+++ b/hieradata/role/common/mediawiki/imagescaler.yaml
@@ -13,4 +13,3 @@
       light_process_count: "10"
 apache::mpm::mpm: worker
 role::mediawiki::webserver::tls: true
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/jobrunner.yaml 
b/hieradata/role/common/mediawiki/jobrunner.yaml
index 10b4bf2..aae358f 100644
--- a/hieradata/role/common/mediawiki/jobrunner.yaml
+++ b/hieradata/role/common/mediawiki/jobrunner.yaml
@@ -20,4 +20,3 @@
 role::lvs::realserver::pools:
   hhvm:
     lvs_name: jobrunner
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/memcached.yaml 
b/hieradata/role/common/mediawiki/memcached.yaml
index c695d12..70221d7 100644
--- a/hieradata/role/common/mediawiki/memcached.yaml
+++ b/hieradata/role/common/mediawiki/memcached.yaml
@@ -22,4 +22,3 @@
 profile::memcached::extended_options:
   - 'slab_reassign'
 profile::memcached::port: '11211'
-standard::has_ganglia: false
diff --git a/hieradata/role/common/mediawiki/videoscaler.yaml 
b/hieradata/role/common/mediawiki/videoscaler.yaml
index 253052e..5fbe653 100644
--- a/hieradata/role/common/mediawiki/videoscaler.yaml
+++ b/hieradata/role/common/mediawiki/videoscaler.yaml
@@ -12,4 +12,3 @@
       connection_timeout_seconds: 86400
       thread_count: 15
   max_execution_time: 86400
-standard::has_ganglia: false
diff --git a/modules/apache/files/apache_status.py 
b/modules/apache/files/apache_status.py
new file mode 100755
index 0000000..15c0f2b
--- /dev/null
+++ b/modules/apache/files/apache_status.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import time
+import urllib2
+import traceback
+import re
+import copy
+
+# global to store state for "total accesses"
+METRICS = {
+    'time': 0,
+    'data': {}
+}
+
+LAST_METRICS = copy.deepcopy(METRICS)
+METRICS_CACHE_MAX = 5
+
+# Metric prefix
+NAME_PREFIX = "ap_"
+SSL_NAME_PREFIX = "apssl_"
+
+SERVER_STATUS_URL = ""
+
+descriptors = list()
+Desc_Skel = {}
+Scoreboard = {
+    NAME_PREFIX + 'waiting': {
+        'key': '_',
+        'desc': 'Waiting for Connection',
+    },
+    NAME_PREFIX + 'starting': {
+        'key': 'S',
+        'desc': 'Starting up',
+    },
+    NAME_PREFIX + 'reading_request': {
+        'key': 'R',
+        'desc': 'Reading Request',
+    },
+    NAME_PREFIX + 'sending_reply': {
+        'key': 'W',
+        'desc': 'Sending Reply',
+    },
+    NAME_PREFIX + 'keepalive': {
+        'key': 'K',
+        'desc': 'Keepalive (read)',
+    },
+    NAME_PREFIX + 'dns_lookup': {
+        'key': 'D',
+        'desc': 'DNS Lookup',
+    },
+    NAME_PREFIX + 'closing': {
+        'key': 'C',
+        'desc': 'Closing connection',
+    },
+    NAME_PREFIX + 'logging': {
+        'key': 'L',
+        'desc': 'Logging',
+    },
+    NAME_PREFIX + 'gracefully_fin': {
+        'key': 'G',
+        'desc': 'Gracefully finishing',
+    },
+    NAME_PREFIX + 'idle': {
+        'key': 'I',
+        'desc': 'Idle cleanup of worker',
+    },
+    NAME_PREFIX + 'open_slot': {
+        'key': '.',
+        'desc': 'Open slot with no current process',
+    },
+}
+Scoreboard_bykey = dict([(v["key"], k) for (k, v) in Scoreboard.iteritems()])
+
+SSL_REGEX = re.compile(
+    '^(cache type:) (.*)(<b>)(?P<shared_mem>[0-9]+)(</b> bytes, current sessio'
+    'ns: <b>)(?P<current_sessions>[0-9]+)(</b><br>subcaches: <b>)(?P<num_subca'
+    'ches>[0-9]+)(</b>, indexes per subcache: <b>)(?P<indexes_per_subcache>[0-'
+    '9]+)(</b><br>)(.*)(<br>index usage: <b>)(?P<index_usage>[0-9]+)(%</b>, ca'
+    'che usage: <b>)(?P<cache_usage>[0-9]+)(%</b><br>total sessions stored sin'
+    'ce starting: <b>)(?P<sessions_stored>[0-9]+)(</b><br>total sessions expir'
+    'ed since starting: <b>)(?P<sessions_expired>[0-9]+)(</b><br>total \(pre-e'
+    'xpiry\) sessions scrolled out of the cache: <b>)(?P<sessions_scrolled_out'
+    'of_cache>[0-9]+)(</b><br>total retrieves since starting: <b>)(?P<retrieve'
+    's_hit>[0-9]+)(</b> hit, <b>)(?P<retrieves_miss>[0-9]+)(</b> miss<br>total'
+    ' removes since starting: <b>)(?P<removes_hit>[0-9]+)(</b> hit, <b>)(?P<re'
+    'moves_miss>[0-9]+)'
+)
+
+Metric_Map = {
+    'Uptime': NAME_PREFIX + "uptime",
+    'IdleWorkers': NAME_PREFIX + "idle_workers",
+    'BusyWorkers': NAME_PREFIX + "busy_workers",
+    'Total kBytes': NAME_PREFIX + "bytes",
+    'CPULoad': NAME_PREFIX + "cpuload",
+    "Total Accesses": NAME_PREFIX + "rps"
+}
+
+
+def get_metrics():
+
+    global METRICS, LAST_METRICS, SERVER_STATUS_URL, COLLECT_SSL
+
+    if (time.time() - METRICS['time']) > METRICS_CACHE_MAX:
+
+        metrics = dict([(k, 0) for k in Scoreboard.keys()])
+
+        # This is the short server-status. Lacks SSL metrics
+        try:
+            req = urllib2.Request(SERVER_STATUS_URL + "?auto")
+
+            # Download the status file
+            res = urllib2.urlopen(req)
+
+            for line in res:
+                split_line = line.rstrip().split(": ")
+                long_metric_name = split_line[0]
+                if long_metric_name == "Scoreboard":
+                    for sck in split_line[1]:
+                        metrics[Scoreboard_bykey[sck]] += 1
+                else:
+                    if long_metric_name in Metric_Map:
+                        metric_name = Metric_Map[long_metric_name]
+                    else:
+                        metric_name = long_metric_name
+                    metrics[metric_name] = split_line[1]
+
+        except urllib2.URLError:
+            traceback.print_exc()
+
+        # If we are collecting SSL metrics we'll do
+        if COLLECT_SSL:
+
+            try:
+                req2 = urllib2.Request(SERVER_STATUS_URL)
+
+                # Download the status file
+                res = urllib2.urlopen(req2)
+
+                for line in res:
+                    regMatch = SSL_REGEX.match(line)
+                    if regMatch:
+                        linebits = regMatch.groupdict()
+                        for key in linebits:
+                            # print SSL_NAME_PREFIX + key + "=" + linebits[key]
+                            metrics[SSL_NAME_PREFIX + key] = linebits[key]
+
+            except urllib2.URLError:
+                traceback.print_exc()
+
+        LAST_METRICS = copy.deepcopy(METRICS)
+        METRICS = {
+            'time': time.time(),
+            'data': metrics
+        }
+
+    return [METRICS, LAST_METRICS]
+
+
+def get_value(name):
+    """Return a value for the requested metric"""
+
+    metrics = get_metrics()[0]
+
+    try:
+        result = metrics['data'][name]
+    except StandardError:
+        result = 0
+
+    return result
+
+
+def get_delta(name):
+    """Return change over time for the requested metric"""
+
+    # get metrics
+    [curr_metrics, last_metrics] = get_metrics()
+
+    # If it's ap_bytes metric multiply result by 1024
+    if name == NAME_PREFIX + "bytes":
+        multiplier = 1024
+    else:
+        multiplier = 1
+
+    try:
+        delta = (
+            multiplier * (
+                float(curr_metrics['data'][name]) -
+                float(last_metrics['data'][name])
+            ) / (curr_metrics['time'] - last_metrics['time'])
+        )
+        if delta < 0:
+            print name + " is less 0"
+            delta = 0
+    except KeyError:
+        delta = 0.0
+
+    return delta
+
+
+def create_desc(prop):
+    d = Desc_Skel.copy()
+    for k, v in prop.iteritems():
+        d[k] = v
+    return d
+
+
+def metric_init(params):
+    global descriptors, Desc_Skel, SERVER_STATUS_URL, COLLECT_SSL
+
+    print '[apache_status] Received the following parameters'
+    print params
+
+    if "metric_group" not in params:
+        params["metric_group"] = "apache"
+
+    Desc_Skel = {
+        'name': 'XXX',
+        'call_back': get_value,
+        'time_max': 60,
+        'value_type': 'uint',
+        'units': 'proc',
+        'slope': 'both',
+        'format': '%d',
+        'description': 'XXX',
+        'groups': params["metric_group"],
+    }
+
+    if "refresh_rate" not in params:
+        params["refresh_rate"] = 15
+
+    if "url" not in params:
+        params["url"] = "http://localhost:7070/server-status";
+
+    if "collect_ssl" not in params:
+        params["collect_ssl"] = False
+
+    SERVER_STATUS_URL = params["url"]
+    COLLECT_SSL = params["collect_ssl"]
+
+    # IP:HOSTNAME
+    if "spoof_host" in params:
+        Desc_Skel["spoof_host"] = params["spoof_host"]
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "rps",
+        "value_type": "float",
+        "units": "req/sec",
+        "call_back": get_delta,
+        "format": "%.3f",
+        "description": "request per second",
+    }))
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "bytes",
+        "value_type": "float",
+        "units": "bytes/sec",
+        "call_back": get_delta,
+        "format": "%.3f",
+        "description": "bytes transferred per second",
+    }))
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "cpuload",
+        "value_type": "float",
+        "units": "pct",
+        "format": "%.6f",
+        "call_back": get_value,
+        "description": "Pct of time CPU utilized",
+    }))
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "busy_workers",
+        "value_type": "uint",
+        "units": "threads",
+        "format": "%u",
+        "call_back": get_value,
+        "description": "Busy threads",
+    }))
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "idle_workers",
+        "value_type": "uint",
+        "units": "threads",
+        "format": "%u",
+        "call_back": get_value,
+        "description": "Idle threads",
+    }))
+
+    descriptors.append(create_desc({
+        "name": NAME_PREFIX + "uptime",
+        "value_type": "uint",
+        "units": "seconds",
+        "format": "%u",
+        "call_back": get_value,
+        "description": "Uptime",
+    }))
+
+    for k, v in Scoreboard.iteritems():
+        descriptors.append(create_desc({
+            "name": k,
+            "call_back": get_value,
+            "description": v["desc"],
+        }))
+
+    ##########################################################################
+    # SSL metrics
+    ##########################################################################
+    if params['collect_ssl']:
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "shared_mem",
+            "value_type": "float",
+            "units": "bytes",
+            "format": "%.3f",
+            "call_back": get_value,
+            "description": "Shared memory",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "current_sessions",
+            "value_type": "uint",
+            "units": "sessions",
+            "format": "%u",
+            "call_back": get_value,
+            "description": "Current sessions",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "num_subcaches",
+            "value_type": "uint",
+            "units": "subcaches",
+            "format": "%u",
+            "call_back": get_value,
+            "description": "Number of subcaches",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "indexes_per_subcache",
+            "value_type": "float",
+            "units": "indexes",
+            "format": "%.3f",
+            "call_back": get_value,
+            "description": "Subcaches",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "index_usage",
+            "value_type": "float",
+            "units": "pct",
+            "format": "%.3f",
+            "call_back": get_value,
+            "description": "Index usage",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "cache_usage",
+            "value_type": "float",
+            "units": "pct",
+            "format": "%.3f",
+            "call_back": get_value,
+            "description": "Cache usage",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "sessions_stored",
+            "value_type": "float",
+            "units": "sessions/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Sessions stored",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "sessions_expired",
+            "value_type": "float",
+            "units": "sessions/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Sessions expired",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "retrieves_hit",
+            "value_type": "float",
+            "units": "retrieves/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Retrieves Hit",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "retrieves_miss",
+            "value_type": "float",
+            "units": "retrieves/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Retrieves Miss",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "removes_hit",
+            "value_type": "float",
+            "units": "removes/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Removes Hit",
+        }))
+
+        descriptors.append(create_desc({
+            "name": SSL_NAME_PREFIX + "removes_miss",
+            "value_type": "float",
+            "units": "removes/sec",
+            "format": "%.3f",
+            "call_back": get_delta,
+            "description": "Removes Miss",
+        }))
+
+        return descriptors
+
+
+if __name__ == '__main__':
+    try:
+        params = {
+            'url': 'http://localhost:7070/server-status',
+            'collect_ssl': False
+        }
+        metric_init(params)
+        while True:
+            for d in descriptors:
+                v = d['call_back'](d['name'])
+                if d['name'] == NAME_PREFIX + "rps":
+                    print 'value for %s is %.4f' % (d['name'], v)
+                else:
+                    print 'value for %s is %s' % (d['name'], v)
+            time.sleep(15)
+    except KeyboardInterrupt:
+        os._exit(1)
diff --git a/modules/apache/files/apache_status.pyconf 
b/modules/apache/files/apache_status.pyconf
new file mode 100644
index 0000000..ca5fecd
--- /dev/null
+++ b/modules/apache/files/apache_status.pyconf
@@ -0,0 +1,113 @@
+modules {
+  module {
+    name     = "apache_status"
+    language = "python"
+    param url {
+        value = "http://127.0.0.1:80/server-status";
+    }
+
+    # Which metric group should these metrics be put into
+    param metric_group {
+        value = "apache"
+    }
+
+    # Collecting SSL metrics under Apache 2.2 appears to cause a memory leak
+    # in mod_status. Watch Apache memory utilization if you enable them
+    param collect_ssl {
+        value = False
+    }
+
+
+  }
+}
+
+collection_group {
+  collect_every  = 30
+  time_threshold = 90
+
+  metric {
+    name  = "ap_busy_workers"
+    title = "Busy Threads"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_idle_workers"
+    title = "Idle Threads"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_logging"
+    title = "Logging"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_open_slot"
+    title = "Open slot with no current process"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_reading_request"
+    title = "Reading Request"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_waiting"
+    title = "Waiting for Connection"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_sending_reply"
+    title = "Sending Reply"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_idle"
+    title = "Idle cleanup of worker"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_dns_lookup"
+    title = "DNS Lookup"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_closing"
+    title = "Closing connection"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_starting"
+    title = "Starting up"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_gracefully_fin"
+    title = "Gracefully finishing"
+    value_threshold = 0
+  }
+  metric {
+    name  = "ap_keepalive"
+    title = "Keepalive (read)"
+    value_threshold = 0
+  }
+
+  metric {
+    name  = "ap_rps"
+    title = "Requests per second"
+    value_threshold = 0.0
+  }
+
+  metric {
+    name  = "ap_cpuload"
+    title = "Pct of time CPU utilized"
+    value_threshold = 0.0
+  }
+
+#  Uncomment if you are collecting SSL metrics
+#  metric {
+#      name_match = "apssl_(.+)"
+#      value_threshold = 0.0
+#  }
+
+
+}
diff --git a/modules/apache/manifests/monitoring.pp 
b/modules/apache/manifests/monitoring.pp
index cfdb447..aebe2f3 100644
--- a/modules/apache/manifests/monitoring.pp
+++ b/modules/apache/manifests/monitoring.pp
@@ -9,6 +9,27 @@
     include ::apache::mod::status
     include ::standard
 
+    if $::standard::has_ganglia {
+        include ::ganglia
+
+        file { '/usr/lib/ganglia/python_modules/apache_status.py':
+            source  => 'puppet:///modules/apache/apache_status.py',
+            owner   => 'root',
+            group   => 'root',
+            mode    => '0444',
+            require => Package['ganglia-monitor'],
+        }
+
+        file { '/etc/ganglia/conf.d/apache_status.pyconf':
+            source  => 'puppet:///modules/apache/apache_status.pyconf',
+            owner   => 'root',
+            group   => 'root',
+            mode    => '0444',
+            require => 
File['/usr/lib/ganglia/python_modules/apache_status.py'],
+            notify  => Service['ganglia-monitor'],
+        }
+    }
+
     # Use `links -dump http://127.0.0.1/server-status` to generate
     # an Apache status report.
     require_package('links')

-- 
To view, visit https://gerrit.wikimedia.org/r/385484
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I41bcb2fde4474ffa0d5ab9658b573e8725507428
Gerrit-PatchSet: 2
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Dzahn <dz...@wikimedia.org>
Gerrit-Reviewer: Dzahn <dz...@wikimedia.org>
Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org>
Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org>
Gerrit-Reviewer: Volans <rcocci...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to