Filippo Giunchedi has submitted this change and it was merged.

Change subject: swift-drive-audit: import icehouse version
......................................................................


swift-drive-audit: import icehouse version

update our version of swift-drive-audit with the version from icehouse, our
changes will be imported back in the next commit to have a clean patch to send
upstream

Change-Id: Iaa1ed71033784bd22b96bd24d55e01027c755401
---
M files/swift/usr.bin.swift-drive-audit
1 file changed, 85 insertions(+), 55 deletions(-)

Approvals:
  Andrew Bogott: Looks good to me, but someone else must approve
  Filippo Giunchedi: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/files/swift/usr.bin.swift-drive-audit 
b/files/swift/usr.bin.swift-drive-audit
index 46adae0..bb9974e 100755
--- a/files/swift/usr.bin.swift-drive-audit
+++ b/files/swift/usr.bin.swift-drive-audit
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# Copyright (c) 2010-2012 OpenStack, LLC.
+#!/usr/bin/python
+# Copyright (c) 2010-2012 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,34 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Exit codes:
-#  0  - no errors
-#  1  - syntax error in calling (eg no config file)
-#  2  - filesystems unmounted
-#  4  - errors found but device not unmounted
-#
-#  When multiple errors are found, exit code will be the bitwise OR of
-#  the errors found.
-
 import datetime
+import glob
 import os
 import re
 import subprocess
 import sys
 from ConfigParser import ConfigParser
 
-from swift.common.utils import get_logger
+from swift.common.utils import backward, get_logger
 
-
-# To search for more types of errors, add the regex to the list below
-error_re = [
-    re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
-    re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
-]
-# Exit code constants here
-ERROR_SYNTAX = 1
-ERROR_UNMOUNTED = 2
-ERROR_NO_UNMOUNTING = 4
 
 def get_devices(device_dir, logger):
     devices = []
@@ -55,10 +37,10 @@
             device['block_device'] = block_device
             try:
                 device_num = os.stat(block_device).st_rdev
-            except OSError, e:
+            except OSError:
                 # If we can't stat the device, then something weird is going on
                 logger.error("Error: Could not stat %s!" %
-                    block_device)
+                             block_device)
                 continue
             device['major'] = str(os.major(device_num))
             device['minor'] = str(os.minor(device_num))
@@ -66,32 +48,70 @@
     for line in open('/proc/partitions').readlines()[2:]:
         major, minor, blocks, kernel_device = line.strip().split()
         device = [d for d in devices
-            if d['major'] == major and d['minor'] == minor]
+                  if d['major'] == major and d['minor'] == minor]
         if device:
             device[0]['kernel_device'] = kernel_device
     return devices
 
 
-def get_errors(minutes):
+def get_errors(error_re, log_file_pattern, minutes):
+    # Assuming log rotation is being used, we need to examine
+    # recently rotated files in case the rotation occurred
+    # just before the script is being run - the data we are
+    # looking for may have rotated.
+    #
+    # The globbing used before would not work with all out-of-box
+    # distro setup for logrotate and syslog therefore moving this
+    # to the config where one can set it with the desired
+    # globbing pattern.
+    log_files = [f for f in glob.glob(log_file_pattern)]
+    log_files.sort()
+
+    now_time = datetime.datetime.now()
+    end_time = now_time - datetime.timedelta(minutes=minutes)
+    # kern.log does not contain the year so we need to keep
+    # track of the year and month in case the year recently
+    # ticked over
+    year = now_time.year
+    prev_entry_month = now_time.month
     errors = {}
-    start_time = datetime.datetime.now() - datetime.timedelta(minutes=minutes)
-    for line in open('/var/log/kern.log'):
-        if '[    0.000000]' in line:
-            # Ignore anything before the last boot
-            errors = {}
-            continue
-        log_time_string = '%s %s' % (start_time.year,
-                                     ' '.join(line.split()[:3]))
+
+    reached_old_logs = False
+    for path in log_files:
         try:
-            # don't die on bad log lines.
-            log_time = datetime.datetime.strptime(
-                log_time_string, '%Y %b %d %H:%M:%S')
-        except:
-            continue
-        if log_time > start_time:
-            for err in error_re:
-                for device in err.findall(line):
-                    errors[device] = errors.get(device, 0) + 1
+            f = open(path)
+        except IOError:
+            logger.error("Error: Unable to open " + path)
+            print("Unable to open " + path)
+            sys.exit(1)
+        for line in backward(f):
+            if '[    0.000000]' in line \
+                or 'KERNEL supported cpus:' in line \
+                    or 'BIOS-provided physical RAM map:' in line:
+                # Ignore anything before the last boot
+                reached_old_logs = True
+                break
+            # Solves the problem with year change - kern.log does not
+            # keep track of the year.
+            log_time_entry = line.split()[:3]
+            if log_time_entry[0] == 'Dec' and prev_entry_month == 'Jan':
+                year -= 1
+            prev_entry_month = log_time_entry[0]
+            log_time_string = '%s %s' % (year, ' '.join(log_time_entry))
+            try:
+                log_time = datetime.datetime.strptime(
+                    log_time_string, '%Y %b %d %H:%M:%S')
+            except ValueError:
+                continue
+            if log_time > end_time:
+                for err in error_re:
+                    for device in err.findall(line):
+                        errors[device] = errors.get(device, 0) + 1
+            else:
+                reached_old_logs = True
+                break
+        if reached_old_logs:
+            break
     return errors
 
 
@@ -113,24 +133,40 @@
         conf_path = sys.argv[1]
     except Exception:
         print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]
-        sys.exit(ERROR_SYNTAX)
+        sys.exit(1)
     if not c.read(conf_path):
         print "Unable to read config file %s" % conf_path
-        sys.exit(ERROR_SYNTAX)
+        sys.exit(1)
     conf = dict(c.items('drive-audit'))
     device_dir = conf.get('device_dir', '/srv/node')
     minutes = int(conf.get('minutes', 60))
     error_limit = int(conf.get('error_limit', 1))
+    log_file_pattern = conf.get('log_file_pattern',
+                                '/var/log/kern.*[!.][!g][!z]')
+    error_re = []
+    for conf_key in conf:
+        if conf_key.startswith('regex_pattern_'):
+            error_pattern = conf[conf_key]
+            try:
+                r = re.compile(error_pattern)
+            except re.error:
+                sys.exit('Error: unable to compile regex pattern "%s"' %
+                         error_pattern)
+            error_re.append(r)
+    if not error_re:
+        error_re = [
+            re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
+            re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
+        ]
     conf['log_name'] = conf.get('log_name', 'drive-audit')
     logger = get_logger(conf, log_route='drive-audit')
     devices = get_devices(device_dir, logger)
     logger.debug("Devices found: %s" % str(devices))
     if not devices:
         logger.error("Error: No devices found!")
-    errors = get_errors(minutes)
+    errors = get_errors(error_re, log_file_pattern, minutes)
     logger.debug("Errors found: %s" % str(errors))
     unmounts = 0
-    exitcode = 0
     for kernel_device, count in errors.items():
         if count >= error_limit:
             device = \
@@ -139,17 +175,11 @@
                 mount_point = device[0]['mount_point']
                 if mount_point.startswith(device_dir):
                     logger.info("Unmounting %s with %d errors" %
-                        (mount_point, count))
+                                (mount_point, count))
                     subprocess.call(['umount', '-fl', mount_point])
                     logger.info("Commenting out %s from /etc/fstab" %
-                        (mount_point))
+                                (mount_point))
                     comment_fstab(mount_point)
                     unmounts += 1
-                    exitcode = exitcode | ERROR_UNMOUNTED
-            else:
-                exitcode = exitcode | ERROR_NO_UNMOUNTING
-                logger.info("Errors found but device unavailable: %s:%s" %
-                        (kernel_device, count))
     if unmounts == 0:
         logger.info("No drives were unmounted")
-    sys.exit(exitcode)

-- 
To view, visit https://gerrit.wikimedia.org/r/155244
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iaa1ed71033784bd22b96bd24d55e01027c755401
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <[email protected]>
Gerrit-Reviewer: Andrew Bogott <[email protected]>
Gerrit-Reviewer: Filippo Giunchedi <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to