ArielGlenn has submitted this change and it was merged.

Change subject: logs audit via salt module
......................................................................


logs audit via salt module

split into two files, one for the remote audit of all host and
one for the local audit on a given host, plus a stanza added
into the salt module which is just a wrapper around the local
audit call

next up: do the same for homes audit

Change-Id: I8702bf2bd79b0e51325ac6337a39b2224697f16d
---
M dataretention/data_auditor.py
A dataretention/retention/locallogaudit.py
M dataretention/retention/remotefileauditor.py
A dataretention/retention/remotelogauditor.py
M dataretention/retention/retentionaudit.py
5 files changed, 509 insertions(+), 22 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/data_auditor.py b/dataretention/data_auditor.py
index 67d9335..0d85c0f 100644
--- a/dataretention/data_auditor.py
+++ b/dataretention/data_auditor.py
@@ -3,8 +3,9 @@
 sys.path.append('/srv/audits/retention/scripts/')
 
 from retention.cli import CommandLine
-from retention.auditor import LogsAuditor, HomesAuditor
+from retention.auditor import HomesAuditor
 from retention.remotefileauditor import RemoteFilesAuditor
+from retention.remotelogauditor import RemoteLogsAuditor
 from retention.examiner import FileExaminer, DirExaminer
 
 def usage(message=None):
@@ -246,11 +247,11 @@
         usage("'oldest' argument may only be used with logs audit")
 
     if audit_type == 'logs':
-        logsaudit = LogsAuditor(hosts_expr, audit_type, prettyprint,
-                                oldest_only, show_sample_content, dirsizes,
-                                show_system_logs,
-                                summary_report, depth, files_to_check, 
ignore_also,
-                                timeout, maxfiles, store_filepath, verbose)
+        logsaudit = RemoteLogsAuditor(hosts_expr, audit_type, prettyprint,
+                                      oldest_only, show_sample_content, 
dirsizes,
+                                      show_system_logs,
+                                      summary_report, depth, files_to_check, 
ignore_also,
+                                      timeout, maxfiles, store_filepath, 
verbose)
         report, ignored = logsaudit.audit_hosts()
         if interactive:
             cmdline = CommandLine(store_filepath, timeout, audit_type, 
hosts_expr)
diff --git a/dataretention/retention/locallogaudit.py 
b/dataretention/retention/locallogaudit.py
new file mode 100644
index 0000000..19d42fc
--- /dev/null
+++ b/dataretention/retention/locallogaudit.py
@@ -0,0 +1,295 @@
+import os
+import sys
+import glob
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.config import Config
+from retention.fileinfo import LogInfo, LogUtils
+from retention.localfileaudit import LocalFilesAuditor
+
+global_keys = [key for key, value_unused in
+               sys.modules[__name__].__dict__.items()]
+
+
+class LocalLogsAuditor(LocalFilesAuditor):
+    def __init__(self, audit_type,
+                 oldest=False,
+                 show_content=False, show_system_logs=False,
+                 dirsizes=False, depth=2,
+                 to_check=None, ignore_also=None,
+                 timeout=60, maxfiles=None):
+        super(LocalLogsAuditor, self).__init__(audit_type,
+                                               show_content, dirsizes,
+                                               depth, to_check, ignore_also,
+                                               timeout, maxfiles)
+
+        self.oldest_only = oldest
+        self.show_system_logs = show_system_logs
+        if self.show_system_logs:
+            self.ignored['files'].pop("/var/log")
+        self.display_from_dict = LogInfo.display_from_dict
+
+    @staticmethod
+    def get_rotated_freq(rotated):
+        '''
+        turn the value you get out of logrotate
+        conf files for 'rotated' into a one
+        char string suitable for our reports
+        '''
+        if rotated == 'weekly':
+            freq = 'w'
+        elif rotated == 'daily':
+            freq = 'd'
+        elif rotated == 'monthly':
+            freq = 'm'
+        elif rotated == 'yearly':
+            freq = 'y'
+        else:
+            freq = None
+        return freq
+
+    @staticmethod
+    def get_rotated_keep(line):
+        fields = line.split()
+        if len(fields) == 2:
+            keep = fields[1]
+        else:
+            keep = None
+        return keep
+
+    @staticmethod
+    def parse_logrotate_contents(contents,
+                                 default_freq='-', default_keep='-'):
+        lines = contents.split('\n')
+        state = 'want_lbracket'
+        logs = {}
+        freq = default_freq
+        keep = default_keep
+        notifempty = '-'
+        log_group = []
+        for line in lines:
+            if line.startswith('#'):
+                continue
+            line = line.strip()
+            if not line:
+                continue
+            if state == 'want_lbracket':
+                if line.endswith('{'):
+                    state = 'want_rbracket'
+                    line = line[:-1].strip()
+                    if not line:
+                        continue
+                if not line.startswith('/'):
+                    # probably a directive or a blank line
+                    continue
+                if '*' in line:
+                    log_group.extend(glob.glob(
+                        os.path.join(Config.cf['rotate_basedir'], line)))
+                else:
+                    log_group.append(line)
+            elif state == 'want_rbracket':
+                tmp_freq = LocalLogsAuditor.get_rotated_freq(line)
+                if tmp_freq:
+                    freq = tmp_freq
+                    continue
+                elif line.startswith('rotate'):
+                    tmp_keep = LocalLogsAuditor.get_rotated_keep(line)
+                    if tmp_keep:
+                        keep = tmp_keep
+                elif line == 'notifempty':
+                    notifempty = 'T'
+                elif line.endswith('}'):
+                    state = 'want_lbracket'
+                    for log in log_group:
+                        logs[log] = [freq, keep, notifempty]
+                    freq = default_freq
+                    keep = default_keep
+                    notifempty = '-'
+                    log_group = []
+        return logs
+
+    def get_logrotate_defaults(self):
+        contents = open(Config.cf['rotate_mainconf']).read()
+        lines = contents.split('\n')
+        skip = False
+        freq = '-'
+        keep = '-'
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            if line.endswith('{'):
+                skip = True
+                continue
+            elif line.endswith('}'):
+                skip = False
+                continue
+            elif skip:
+                continue
+            tmp_freq = LocalLogsAuditor.get_rotated_freq(line)
+            if tmp_freq:
+                freq = tmp_freq
+                continue
+            elif line.startswith('rotate'):
+                tmp_keep = LocalLogsAuditor.get_rotated_keep(line)
+                if tmp_keep:
+                    keep = tmp_keep
+
+        return freq, keep
+
+    def find_rotated_logs(self):
+        '''
+        gather all names of log files from logrotate
+        config files
+        '''
+        rotated_logs = {}
+        default_freq, default_keep = self.get_logrotate_defaults()
+        rotated_logs.update(LocalLogsAuditor.parse_logrotate_contents(
+            open(Config.cf['rotate_mainconf']).read(),
+            default_freq, default_keep))
+        for fname in os.listdir(Config.cf['rotate_basedir']):
+            pathname = os.path.join(Config.cf['rotate_basedir'], fname)
+            if os.path.isfile(pathname):
+                rotated_logs.update(LocalLogsAuditor.parse_logrotate_contents(
+                    open(pathname).read(), default_freq, default_keep))
+        return rotated_logs
+
+    def check_mysqlconf(self):
+        '''
+        check how long mysql logs are kept around
+        '''
+        # note that I also see my.cnf.s3 and we don't check those (yet)
+        output = ''
+        for filename in Config.cf['mysqlconf']:
+            found = False
+            try:
+                contents = open(filename).read()
+            except:
+                # file or directory probably doesn't exist
+                continue
+            lines = contents.split('\n')
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                if line.startswith('datadir'):
+                    fields = line.split('=', 1)
+                    fields = [field.strip() for field in fields]
+                    if fields[0] != 'datadir':
+                        continue
+                    if not fields[1].startswith('/'):
+                        continue
+                    datadir = fields[1]
+                    # strip trailing slash if needed
+                    if len(datadir) > 1 and datadir.endswith('/'):
+                        datadir = datadir[:-1]
+                    # we can skip all bin logs, relay logs, and pid files in 
this
+                    # directory. anything else should get looked at.
+                    if '.' in self.hostname:
+                        hostname = self.hostname.split('.')[0]
+                    else:
+                        hostname = self.hostname
+                    ignore_these = [hostname + '-bin', hostname + '-relay-bin',
+                                    hostname + '.pid', hostname + '-bin.index',
+                                    hostname + '-relay-bin.index']
+
+                    # add these files to ignore list; a one line report on
+                    # mysql log expiry configuration is sufficient
+                    if datadir not in self.ignored['files']:
+                        self.ignored['files'][datadir] = ignore_these
+                    else:
+                        self.ignored['files'][datadir].extend(ignore_these)
+                    # skip the subdirectories in here, they will be full of 
mysql dbs
+                    if datadir not in self.ignored['dirs']:
+                        self.ignored['files'][datadir] = ['*']
+                    else:
+                        self.ignored['files'][datadir].append('*')
+
+                if line.startswith('expire_logs_days'):
+                    fields = line.split('=', 1)
+                    fields = [field.strip() for field in fields]
+                    if fields[0] != 'expire_logs_days':
+                        continue
+                    if not fields[1].isdigit():
+                        continue
+                    found = True
+                    if int(fields[1]) > Config.cf['cutoff']/86400:
+                        if output:
+                            output = output + '\n'
+                        output = output + ('WARNING: some mysql logs expired 
after %s days in %s'
+                                           % (fields[1], filename))
+            if not found:
+                if output:
+                    output = output + '\n'
+                output = output + 'WARNING: some mysql logs never expired in ' 
+ filename
+        return(output)
+
+    def do_local_audit(self):
+        '''
+        note that no summary report is done for a  single host,
+        for logs we summarize across hosts
+        '''
+        mysql_issues = self.check_mysqlconf()
+        result = []
+        if mysql_issues:
+            result.append(mysql_issues)
+
+        open_files = LocalFilesAuditor.get_open_files()
+        rotated = self.find_rotated_logs()
+
+        all_files = {}
+        files = self.find_all_files()
+
+        for (f, st) in files:
+            all_files[f] = LogInfo(f, self.magic, st)
+            all_files[f].load_file_info(self.today, self.cutoff,
+                                        open_files, rotated)
+
+        all_files_sorted = sorted(all_files,
+                                  key=lambda f: all_files[f].path)
+        last_log_normalized = ''
+        last_log = ''
+        age = 0
+
+        if all_files:
+            max_name_length = max([len(all_files[fname].path)
+                                   for fname in all_files]) + 2
+            max_norm_length = max([len(all_files[fname].normalized)
+                                   for fname in all_files]) + 2
+
+        for fname in all_files_sorted:
+            if self.contains(all_files[fname].filetype,
+                             Config.cf['ignored_types']):
+                continue
+
+            if (self.oldest_only and
+                    all_files[fname].normalized == last_log_normalized):
+                # still doing the same group of logs
+                if all_files[fname].age <= age:
+                    continue
+                else:
+                    age = all_files[fname].age
+                    last_log = fname
+            else:
+                if last_log:
+                    result.append(all_files[last_log].format_output(
+                        self.show_sample_content,
+                        False, max_name_length, max_norm_length))
+
+                # starting new set of logs (maybe first set)
+                last_log_normalized = all_files[fname].normalized
+                last_log = fname
+                age = all_files[fname].age
+
+        if last_log:
+            result.append(all_files[last_log].format_output(
+                self.show_sample_content,
+                False, max_name_length, max_norm_length))
+        output = "\n".join(result) + "\n"
+        return output
+
+    def normalize(self, fname):
+        return LogUtils.normalize(fname)
diff --git a/dataretention/retention/remotefileauditor.py 
b/dataretention/retention/remotefileauditor.py
index 3762bfa..76540c5 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -138,22 +138,6 @@
             hosts, "test.ping", expr_form=expr_type)
 
         self.set_up_max_files(maxfiles)
-        fileaudit_args = [self.show_sample_content,
-                          self.dirsizes,
-                          self.depth - 1,
-                          self.to_check,
-                          ",".join(self.ignore_also) if self.ignore_also is 
not None else None,
-                          self.timeout,
-                          self.MAX_FILES]
-
-        self.runner = Runner(hosts_expr,
-                             self.expanded_hosts,
-                             self.audit_type,
-                             fileaudit_args,
-                             self.show_sample_content,
-                             self.to_check,
-                             self.timeout,
-                             self.verbose)
 
         self.perhost_raw = None
         if 
os.path.exists('/srv/audits/retention/scripts/audit_files_perhost_config.py'):
@@ -178,6 +162,27 @@
         self.magic.load()
         self.summary = None
         self.display_from_dict = FileInfo.display_from_dict
+
+    def get_audit_args(self):
+        audit_args = [self.show_sample_content,
+                      self.dirsizes,
+                      self.depth - 1,
+                      self.to_check,
+                      ",".join(self.ignore_also) if self.ignore_also is not 
None else None,
+                      self.timeout,
+                      self.MAX_FILES]
+        return audit_args
+
+    def set_up_runner(self):
+
+        self.runner = Runner(self.hosts_expr,
+                             self.expanded_hosts,
+                             self.audit_type,
+                             self.get_audit_args(),
+                             self.show_sample_content,
+                             self.to_check,
+                             self.timeout,
+                             self.verbose)
 
     def set_up_max_files(self, maxfiles):
         '''
@@ -465,6 +470,7 @@
             print "WARNING: failed to load json from host"
 
     def audit_hosts(self):
+        self.set_up_runner()
         result = self.runner.run_remotely()
         if result is None:
             print "WARNING: failed to get output from audit script on any host"
diff --git a/dataretention/retention/remotelogauditor.py 
b/dataretention/retention/remotelogauditor.py
new file mode 100644
index 0000000..279091b
--- /dev/null
+++ b/dataretention/retention/remotelogauditor.py
@@ -0,0 +1,170 @@
+import sys
+import json
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.fileinfo import LogInfo
+from retention.utils import JsonHelper
+from retention.remotefileauditor import RemoteFilesAuditor
+
+
+global_keys = [key for key, value_unused in
+               sys.modules[__name__].__dict__.items()]
+
+class RemoteLogsAuditor(RemoteFilesAuditor):
+    def __init__(self, hosts_expr, audit_type, prettyprint=False,
+                 oldest=False,
+                 show_content=False, show_system_logs=False,
+                 dirsizes=False, summary_report=False, depth=2,
+                 to_check=None, ignore_also=None,
+                 timeout=60, maxfiles=None, store_filepath=None,
+                 verbose=False):
+        super(RemoteLogsAuditor, self).__init__(hosts_expr, audit_type, 
prettyprint,
+                                                show_content, dirsizes,
+                                                summary_report, depth,
+                                                to_check, ignore_also, timeout,
+                                                maxfiles, store_filepath, 
verbose)
+        self.oldest_only = oldest
+        self.show_system_logs = show_system_logs
+        if self.show_system_logs:
+            self.ignored['files'].pop("/var/log")
+        self.display_from_dict = LogInfo.display_from_dict
+
+    def get_audit_args(self):
+        # fixme check if locallogauditor wants the oldest_only param
+        audit_args = [self.oldest_only,
+                      self.show_sample_content,
+                      self.show_system_logs,
+                      self.dirsizes,
+                      self.depth - 1,
+                      self.to_check,
+                      ",".join(self.ignore_also) if self.ignore_also is not 
None else None,
+                      self.timeout,
+                      self.MAX_FILES]
+        return audit_args
+
+    def display_summary(self, audit_results):
+        logs = {}
+        hosts_count = 0
+        all_hosts = audit_results.keys()
+        hosts_count = len(all_hosts)
+
+        for host in all_hosts:
+            output = None
+            if audit_results[host]:
+                try:
+                    lines = audit_results[host].split('\n')
+                    output = []
+                    for line in lines:
+                        if line == "":
+                            continue
+                        elif (line.startswith("WARNING:") or
+                              line.startswith("INFO:")):
+                            print 'host:', host
+                            print line
+                            continue
+                        output.append(json.loads(
+                            line, object_hook=JsonHelper.decode_dict))
+                except:
+                    if output is not None:
+                        print output
+                    else:
+                        print audit_results[host]
+                    print "WARNING: failed to load json from host", host
+                    continue
+            if output is None:
+                continue
+            for item in output:
+                log_name = item['normalized']
+                if not item['normalized'] in logs:
+                    logs[log_name] = {}
+                    logs[log_name]['old'] = set()
+                    logs[log_name]['maybe_old'] = set()
+                    logs[log_name]['unrot'] = set()
+                    logs[log_name]['notifempty'] = set()
+                if item['old'] == 'T':
+                    logs[log_name]['old'].add(host)
+                elif item['old'] == '-':
+                    logs[log_name]['maybe_old'].add(host)
+                if item['rotated'].startswith('F'):
+                    logs[log_name]['unrot'].add(host)
+                if item['notifempty'] == 'T':
+                    logs[log_name]['notifempty'].add(host)
+        sorted_lognames = sorted(logs.keys())
+        for logname in sorted_lognames:
+            old_count = len(logs[logname]['old'])
+            if not old_count:
+                maybe_old_count = len(logs[logname]['maybe_old'])
+            else:
+                maybe_old_count = 0  # we don't care about possibles now
+            unrot_count = len(logs[logname]['unrot'])
+            notifempty_count = len(logs[logname]['notifempty'])
+            RemoteLogsAuditor.display_variance_info(old_count, hosts_count,
+                                                    logs[logname]['old'],
+                                                    'old', logname)
+            RemoteLogsAuditor.display_variance_info(maybe_old_count, 
hosts_count,
+                                                    logs[logname]['maybe_old'],
+                                                    'maybe old', logname)
+            RemoteLogsAuditor.display_variance_info(unrot_count, hosts_count,
+                                                    logs[logname]['unrot'],
+                                                    'unrotated', logname)
+            RemoteLogsAuditor.display_variance_info(notifempty_count, 
hosts_count,
+                                                    
logs[logname]['notifempty'],
+                                                    'notifempty', logname)
+
+    @staticmethod
+    def display_variance_info(stat_count, hosts_count,
+                              host_list, stat_name, logname):
+        '''
+        assuming most stats are going to be the same across
+        a group of hosts, try to show just the variances
+        from the norm
+        '''
+        if stat_count == 0:
+            return
+
+        percentage = stat_count * 100 / float(hosts_count)
+
+        if stat_count == 1:
+            output_line = ("1 host has %s as %s" %
+                           (logname, stat_name))
+        else:
+            output_line = ("%s (%.2f%%) hosts have %s as %s" %
+                           (stat_count, percentage,
+                            logname, stat_name))
+
+        if percentage < .20 or stat_count < 6:
+            output_line += ': ' + ','.join(host_list)
+
+        print output_line
+
+    def display_remote_host(self, result):
+        '''
+        given the (json) output from the salt run on the remote
+        host, format it nicely and display it
+        '''
+        try:
+            lines = result.split('\n')
+            files = []
+            for line in lines:
+                if line == "":
+                    continue
+                elif line.startswith("WARNING:") or line.startswith("INFO:"):
+                    print line
+                else:
+                    files.append(json.loads(
+                        line, object_hook=JsonHelper.decode_dict))
+
+            if files == []:
+                return
+            path_justify = max([len(finfo['path']) for finfo in files]) + 2
+            norm_justify = max([len(finfo['normalized']) for finfo in files]) 
+ 2
+            for finfo in files:
+                self.display_from_dict(finfo, self.show_sample_content,
+                                       path_justify, norm_justify)
+        except:
+            print "WARNING: failed to load json from host:", result
+
+
diff --git a/dataretention/retention/retentionaudit.py 
b/dataretention/retention/retentionaudit.py
index b7fefc5..5c3e6c1 100644
--- a/dataretention/retention/retentionaudit.py
+++ b/dataretention/retention/retentionaudit.py
@@ -5,6 +5,9 @@
 sys.path.append('/srv/audits/retention/scripts/')
 
 from retention.localfileaudit import LocalFilesAuditor
+from retention.locallogaudit import LocalLogsAuditor
+
+log = logging.getLogger(__name__)
 
 def fileaudit_host(show_content, dirsizes, depth,
                    to_check, ignore_also, timeout,
@@ -15,3 +18,15 @@
                                  maxfiles)
     result = fauditor.do_local_audit()
     return result
+
+def logaudit_host(oldest, show_content, show_system_logs,
+                  dirsizes, depth,
+                   to_check, ignore_also, timeout,
+                   maxfiles):
+    lauditor = LocalLogsAuditor('logs', oldest, show_content,
+                                show_system_logs,
+                                dirsizes, depth, to_check,
+                                ignore_also, timeout,
+                                maxfiles)
+    result = lauditor.do_local_audit()
+    return result

-- 
To view, visit https://gerrit.wikimedia.org/r/233454
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8702bf2bd79b0e51325ac6337a39b2224697f16d
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to