Filippo Giunchedi has submitted this change and it was merged.
Change subject: swift-drive-audit: import icehouse version
......................................................................
swift-drive-audit: import icehouse version
update our version of swift-drive-audit with the version from icehouse, our
changes will be imported back in the next commit to have a clean patch to send
upstream
Change-Id: Iaa1ed71033784bd22b96bd24d55e01027c755401
---
M files/swift/usr.bin.swift-drive-audit
1 file changed, 85 insertions(+), 55 deletions(-)
Approvals:
Andrew Bogott: Looks good to me, but someone else must approve
Filippo Giunchedi: Verified; Looks good to me, approved
jenkins-bot: Verified
diff --git a/files/swift/usr.bin.swift-drive-audit
b/files/swift/usr.bin.swift-drive-audit
index 46adae0..bb9974e 100755
--- a/files/swift/usr.bin.swift-drive-audit
+++ b/files/swift/usr.bin.swift-drive-audit
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# Copyright (c) 2010-2012 OpenStack, LLC.
+#!/usr/bin/python
+# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,34 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Exit codes:
-# 0 - no errors
-# 1 - syntax error in calling (eg no config file)
-# 2 - filesystems unmounted
-# 4 - errors found but device not unmounted
-#
-# When multiple errors are found, exit code will be the bitwise OR of
-# the errors found.
-
import datetime
+import glob
import os
import re
import subprocess
import sys
from ConfigParser import ConfigParser
-from swift.common.utils import get_logger
+from swift.common.utils import backward, get_logger
-
-# To search for more types of errors, add the regex to the list below
-error_re = [
- re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
- re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
-]
-# Exit code constants here
-ERROR_SYNTAX = 1
-ERROR_UNMOUNTED = 2
-ERROR_NO_UNMOUNTING = 4
def get_devices(device_dir, logger):
devices = []
@@ -55,10 +37,10 @@
device['block_device'] = block_device
try:
device_num = os.stat(block_device).st_rdev
- except OSError, e:
+ except OSError:
# If we can't stat the device, then something weird is going on
logger.error("Error: Could not stat %s!" %
- block_device)
+ block_device)
continue
device['major'] = str(os.major(device_num))
device['minor'] = str(os.minor(device_num))
@@ -66,32 +48,70 @@
for line in open('/proc/partitions').readlines()[2:]:
major, minor, blocks, kernel_device = line.strip().split()
device = [d for d in devices
- if d['major'] == major and d['minor'] == minor]
+ if d['major'] == major and d['minor'] == minor]
if device:
device[0]['kernel_device'] = kernel_device
return devices
-def get_errors(minutes):
+def get_errors(error_re, log_file_pattern, minutes):
+ # Assuming log rotation is being used, we need to examine
+ # recently rotated files in case the rotation occurred
+ # just before the script is being run - the data we are
+ # looking for may have rotated.
+ #
+ # The globbing used before would not work with all out-of-box
+ # distro setup for logrotate and syslog therefore moving this
+ # to the config where one can set it with the desired
+ # globbing pattern.
+ log_files = [f for f in glob.glob(log_file_pattern)]
+ log_files.sort()
+
+ now_time = datetime.datetime.now()
+ end_time = now_time - datetime.timedelta(minutes=minutes)
+ # kern.log does not contain the year so we need to keep
+ # track of the year and month in case the year recently
+ # ticked over
+ year = now_time.year
+ prev_entry_month = now_time.month
errors = {}
- start_time = datetime.datetime.now() - datetime.timedelta(minutes=minutes)
- for line in open('/var/log/kern.log'):
- if '[ 0.000000]' in line:
- # Ignore anything before the last boot
- errors = {}
- continue
- log_time_string = '%s %s' % (start_time.year,
- ' '.join(line.split()[:3]))
+
+ reached_old_logs = False
+ for path in log_files:
try:
- # don't die on bad log lines.
- log_time = datetime.datetime.strptime(
- log_time_string, '%Y %b %d %H:%M:%S')
- except:
- continue
- if log_time > start_time:
- for err in error_re:
- for device in err.findall(line):
- errors[device] = errors.get(device, 0) + 1
+ f = open(path)
+ except IOError:
+ logger.error("Error: Unable to open " + path)
+ print("Unable to open " + path)
+ sys.exit(1)
+ for line in backward(f):
+ if '[ 0.000000]' in line \
+ or 'KERNEL supported cpus:' in line \
+ or 'BIOS-provided physical RAM map:' in line:
+ # Ignore anything before the last boot
+ reached_old_logs = True
+ break
+ # Solves the problem with year change - kern.log does not
+ # keep track of the year.
+ log_time_entry = line.split()[:3]
+ if log_time_entry[0] == 'Dec' and prev_entry_month == 'Jan':
+ year -= 1
+ prev_entry_month = log_time_entry[0]
+ log_time_string = '%s %s' % (year, ' '.join(log_time_entry))
+ try:
+ log_time = datetime.datetime.strptime(
+ log_time_string, '%Y %b %d %H:%M:%S')
+ except ValueError:
+ continue
+ if log_time > end_time:
+ for err in error_re:
+ for device in err.findall(line):
+ errors[device] = errors.get(device, 0) + 1
+ else:
+ reached_old_logs = True
+ break
+ if reached_old_logs:
+ break
return errors
@@ -113,24 +133,40 @@
conf_path = sys.argv[1]
except Exception:
print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]
- sys.exit(ERROR_SYNTAX)
+ sys.exit(1)
if not c.read(conf_path):
print "Unable to read config file %s" % conf_path
- sys.exit(ERROR_SYNTAX)
+ sys.exit(1)
conf = dict(c.items('drive-audit'))
device_dir = conf.get('device_dir', '/srv/node')
minutes = int(conf.get('minutes', 60))
error_limit = int(conf.get('error_limit', 1))
+ log_file_pattern = conf.get('log_file_pattern',
+ '/var/log/kern.*[!.][!g][!z]')
+ error_re = []
+ for conf_key in conf:
+ if conf_key.startswith('regex_pattern_'):
+ error_pattern = conf[conf_key]
+ try:
+ r = re.compile(error_pattern)
+ except re.error:
+ sys.exit('Error: unable to compile regex pattern "%s"' %
+ error_pattern)
+ error_re.append(r)
+ if not error_re:
+ error_re = [
+ re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
+ re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
+ ]
conf['log_name'] = conf.get('log_name', 'drive-audit')
logger = get_logger(conf, log_route='drive-audit')
devices = get_devices(device_dir, logger)
logger.debug("Devices found: %s" % str(devices))
if not devices:
logger.error("Error: No devices found!")
- errors = get_errors(minutes)
+ errors = get_errors(error_re, log_file_pattern, minutes)
logger.debug("Errors found: %s" % str(errors))
unmounts = 0
- exitcode = 0
for kernel_device, count in errors.items():
if count >= error_limit:
device = \
@@ -139,17 +175,11 @@
mount_point = device[0]['mount_point']
if mount_point.startswith(device_dir):
logger.info("Unmounting %s with %d errors" %
- (mount_point, count))
+ (mount_point, count))
subprocess.call(['umount', '-fl', mount_point])
logger.info("Commenting out %s from /etc/fstab" %
- (mount_point))
+ (mount_point))
comment_fstab(mount_point)
unmounts += 1
- exitcode = exitcode | ERROR_UNMOUNTED
- else:
- exitcode = exitcode | ERROR_NO_UNMOUNTING
- logger.info("Errors found but device unavailable: %s:%s" %
- (kernel_device, count))
if unmounts == 0:
logger.info("No drives were unmounted")
- sys.exit(exitcode)
--
To view, visit https://gerrit.wikimedia.org/r/155244
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iaa1ed71033784bd22b96bd24d55e01027c755401
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Filippo Giunchedi <[email protected]>
Gerrit-Reviewer: Andrew Bogott <[email protected]>
Gerrit-Reviewer: Filippo Giunchedi <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits