ArielGlenn has submitted this change and it was merged.

Change subject: more pylint: move dup code for file ignore check into its own 
method take care of unused imports in borrowed code for 'magic' more pylint and 
misc cleanups
......................................................................


more pylint: move dup code for file ignore check into its own method
take care of unused imports in borrowed code for 'magic'
more pylint and misc cleanups

move mysql conf file parsing out to its own class
move log rotation conf parsing out to its own class
move a number of static methods out to module methods
break out some code chunks and turn them into functions
move more static methods out to module methods
turn some instance attrs into class attrs
remove an unused import
lower case for instance attributes

Change-Id: I10ffd55689812cffc8eed7624561b21e70d0c8a1
---
M dataretention/retention/cli.py
M dataretention/retention/ignores.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/locallogaudit.py
M dataretention/retention/magic.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/remotehomeauditor.py
M dataretention/retention/remotelogauditor.py
M dataretention/retention/rule.py
9 files changed, 370 insertions(+), 315 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index 0ff6c4b..643d6a2 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -8,7 +8,6 @@
 from clouseau.retention.rule import RuleStore
 import retention.remotefileauditor
 from clouseau.retention.locallogaudit import LocalLogsAuditor
-from clouseau.retention.fileinfo import FileInfo
 import clouseau.retention.fileinfo
 from clouseau.retention.utils import JsonHelper
 import clouseau.retention.config
diff --git a/dataretention/retention/ignores.py 
b/dataretention/retention/ignores.py
index abf1028..622087f 100644
--- a/dataretention/retention/ignores.py
+++ b/dataretention/retention/ignores.py
@@ -81,6 +81,15 @@
         return True
     return False
 
+def check_file_ignoredtype(ignored, igntype, checker, basename, basedir):
+    if '*' in ignored[igntype]:
+        if checker(basename, ignored[igntype]['*']):
+            return True
+        if basedir in ignored[igntype]:
+            if checker(basename, ignored[igntype][basedir]):
+                return True
+    return False
+
 def file_is_ignored(fname, basedir, ignored):
     '''
     pass normalized name (abs path), basedir (location audited),
@@ -92,30 +101,25 @@
     basename = os.path.basename(fname)
 
     if 'prefixes' in ignored:
-        if '*' in ignored['prefixes']:
-            if clouseau.retention.fileutils.startswith(basename, 
ignored['prefixes']['*']):
-                return True
-        if basedir in ignored['prefixes']:
-            if clouseau.retention.fileutils.startswith(
-                    basename, ignored['prefixes'][basedir]):
-                return True
+        if  check_file_ignoredtype(
+                ignored, 'prefixes', clouseau.retention.fileutils.startswith,
+                basename, basedir):
+            return True
 
     if 'extensions' in ignored:
-        if '*' in ignored['extensions']:
-            if clouseau.retention.fileutils.endswith(basename, 
ignored['extensions']['*']):
-                return True
-        if basedir in ignored['extensions']:
-            if clouseau.retention.fileutils.endswith(
-                    basename, ignored['extensions'][basedir]):
-                return True
+        if  check_file_ignoredtype(
+                ignored, 'extensions', clouseau.retention.fileutils.endswith,
+                basename, basedir):
+            return True
 
     if 'files' in ignored:
+        if  check_file_ignoredtype(
+                ignored, 'files', clouseau.retention.fileutils.endswith,
+                basename, basedir):
+            return True
+
         if basename in ignored['files']:
             return True
-        if '*' in ignored['files']:
-            if clouseau.retention.fileutils.endswith(basename, 
ignored['files']['*']):
-                return True
-
         if '/' in ignored['files']:
             if fname in ignored['files']['/']:
                 return True
@@ -123,9 +127,6 @@
                     fname, [w for w in ignored['files']['/'] if '*' in w]):
                 return True
 
-        if basedir in ignored['files']:
-            if clouseau.retention.fileutils.endswith(basename, 
ignored['files'][basedir]):
-                return True
     return False
 
 def get_home_dirs(confdir, locations):
diff --git a/dataretention/retention/localfileaudit.py 
b/dataretention/retention/localfileaudit.py
index caed1d5..c2a0f18 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -64,7 +64,7 @@
 
         self.ignored = self.ignores.merge([ignore_also_ignoreds, 
ignored_from_export], hostname)
 
-        self.MAX_FILES = maxfiles
+        self.max_files = maxfiles
         self.set_up_max_files()
 
     def set_up_max_files(self):
@@ -78,11 +78,11 @@
         their filetype, etc; processing then goes much quicker
         '''
 
-        if self.MAX_FILES is None:
+        if self.max_files is None:
             if self.dirsizes:
-                self.MAX_FILES = 1000
+                self.max_files = 1000
             else:
-                self.MAX_FILES = 100
+                self.max_files = 100
 
     def set_up_to_check(self, to_check):
         '''
@@ -183,7 +183,7 @@
             path = os.path.join(base, fname)
             if self.file_is_wanted(path, location):
                 count += 1
-                if count > self.MAX_FILES:
+                if count > self.max_files:
                     if self.dirsizes:
                         self.warn_dirsize(base)
                     else:
@@ -282,7 +282,7 @@
                              base, p), wildcard_dirs, exact=False))]
             count = self.process_files_from_path(location, base, files,
                                                  count, temp_results)
-            if count > self.MAX_FILES:
+            if count > self.max_files:
                 return
 
         results.extend(temp_results)
@@ -311,7 +311,7 @@
     def warn_dirsize(self, path):
         fields = path.split(os.path.sep)
         print ("WARNING: directory %s has more than %d files"
-               % (os.path.sep.join(fields[:self.depth + 1]), self.MAX_FILES))
+               % (os.path.sep.join(fields[:self.depth + 1]), self.max_files))
 
     def do_local_audit(self):
         open_files = clouseau.retention.fileutils.get_open_files()
diff --git a/dataretention/retention/locallogaudit.py 
b/dataretention/retention/locallogaudit.py
index 7e64a96..a264a5d 100644
--- a/dataretention/retention/locallogaudit.py
+++ b/dataretention/retention/locallogaudit.py
@@ -10,6 +10,256 @@
 import clouseau.retention.fileutils
 import clouseau.retention.magic
 
+def get_rotated_freq(rotated):
+    '''
+    turn the value you get out of logrotate
+    conf files for 'rotated' into a one
+    char string suitable for our reports
+    '''
+    if rotated == 'weekly':
+        freq = 'w'
+    elif rotated == 'daily':
+        freq = 'd'
+    elif rotated == 'monthly':
+        freq = 'm'
+    elif rotated == 'yearly':
+        freq = 'y'
+    else:
+        freq = None
+    return freq
+
+def get_rotated_keep(line):
+    fields = line.split()
+    if len(fields) == 2:
+        keep = fields[1]
+    else:
+        keep = None
+    return keep
+
+def get_logs(line):
+    if not line:
+        return []
+    # path may be in double quotes
+    if line.startswith('"') and line.endswith('"'):
+        line = line[1:-1]
+    # ignore paths that start with ~, skip anything not a path
+    if not line.startswith('/'):
+        # probably a directive or a blank line
+        return []
+    # wildcard allowed in path
+    if '*' in line:
+        return (glob.glob(
+            os.path.join(clouseau.retention.config.conf['rotate_basedir'], 
line)))
+    else:
+        return [line]
+
+def get_logrotate_defaults():
+    contents = open(clouseau.retention.config.conf['rotate_mainconf']).read()
+    lines = contents.split('\n')
+    skip = False
+    freq = '-'
+    keep = '-'
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        if line.endswith('{'):
+            skip = True
+            continue
+        elif line.endswith('}'):
+            skip = False
+            continue
+        elif skip:
+            continue
+        tmp_freq = get_rotated_freq(line)
+        if tmp_freq:
+            freq = tmp_freq
+            continue
+        elif line.startswith('rotate'):
+            tmp_keep = get_rotated_keep(line)
+            if tmp_keep:
+                keep = tmp_keep
+    return freq, keep
+
+def find_rotated_logs(confdir):
+    '''
+    gather all names of log files from logrotate
+    config files
+    '''
+    rotated_logs = {}
+    default_freq, default_keep = get_logrotate_defaults()
+    parser = LogRotParser(confdir, default_freq, default_keep)
+    rotated_logs.update(parser.parse(
+        open(clouseau.retention.config.conf['rotate_mainconf']).read()))
+    for fname in os.listdir(clouseau.retention.config.conf['rotate_basedir']):
+        pathname = 
os.path.join(clouseau.retention.config.conf['rotate_basedir'], fname)
+        if os.path.isfile(pathname):
+            rotated_logs.update(parser.parse(open(pathname).read()))
+    return rotated_logs
+
+def get_mysqldir_ignores(hostname):
+    # we can skip all bin logs, relay logs, and pid files in this
+    # directory. anything else should get looked at.
+    ignore_these = [hostname + '-bin', hostname + '-relay-bin',
+                    hostname + '.pid', hostname + '-bin.index',
+                    hostname + '-relay-bin.index']
+    # add these files to ignore list; a one line report on
+    # mysql log expiry configuration is sufficient
+    file_ignores = ignore_these
+    dir_ignores = ['*']
+    return file_ignores, dir_ignores
+
+def get_datadir(line):
+    datadir = None
+    fields = line.split('=', 1)
+    fields = [field.strip() for field in fields]
+    if fields[0] == 'datadir' and fields[1].startswith('/'):
+        datadir = fields[1]
+        # strip trailing slash if needed
+        if len(datadir) > 1 and datadir.endswith('/'):
+            datadir = datadir[:-1]
+            if not datadir:
+                datadir = None
+    return datadir
+
+def get_expire_days(line, filename):
+    found = False
+    text = None
+    if line.startswith('expire_logs_days'):
+        fields = line.split('=', 1)
+        fields = [field.strip() for field in fields]
+        if fields[0] == 'expire_logs_days' and fields[1].isdigit():
+            found = True
+            if int(fields[1]) > clouseau.retention.config.conf['cutoff'] / 
86400:
+                text = ('WARNING: some mysql logs expired after %s days in %s'
+                        % (fields[1], filename))
+    return found, text
+
+
+class LogRotParser(object):
+    def __init__(self, confdir, default_freq='-', default_keep='-'):
+        self.confdir = confdir
+        self.default_freq = default_freq
+        self.default_keep = default_keep
+        self.freq = None
+        self.keep = None
+        self.notifempty = None
+        self.log_group = None
+        self.clear()
+
+    def clear(self):
+        self.freq = self.default_freq
+        self.keep = self.default_keep
+        self.notifempty = '-'
+        self.log_group = []
+
+# fixme too many branches 16/12
+# we have a lot of dup code now. pull out somehow
+
+    def get_logrot_directives(self, line):
+        if line.startswith('rotate'):
+            tmp_keep = get_rotated_keep(line)
+            if tmp_keep:
+                self.keep = tmp_keep
+        elif line == 'notifempty':
+            self.notifempty = 'T'
+        else:
+            tmp_freq = get_rotated_freq(line)
+            if tmp_freq:
+                self.freq = tmp_freq
+
+    def parse(self, contents):
+        # state indicates the string we look for next
+        clouseau.retention.config.set_up_conf(self.confdir)
+        lines = contents.split('\n')
+        state = '{'
+        self.clear()
+        logs = {}
+        log_group = []
+        for line in lines:
+            line = line.strip()
+            if line.startswith('#') or not line:
+                continue
+            if state == '{':
+                # looking for lines like '/var/account/pacct {'
+                # or '/var/log/syslog { }'
+                if line.endswith('{'):
+                    state = '}'
+                    line = line[:-1].strip()
+                    log_group.extend(get_logs(line))
+                elif line.endswith('}'):
+                    if line[:-1].rstrip().endswith('{'):
+                        line = line[:-1].rstrip()
+                        log_group.extend(get_logs(line))
+                        state = '{'
+                        for log in log_group:
+                            logs[log] = [self.freq, self.keep, self.notifempty]
+                        self.clear()
+            elif state == '}':
+                if line.endswith('}'):
+                    state = '{'
+                    for log in log_group:
+                        logs[log] = [self.freq, self.keep, self.notifempty]
+                    self.clear()
+                else:
+                    self.get_logrot_directives(line)
+        return logs
+
+
+class MySqlCfParser(object):
+    def __init__(self, confdir):
+        self.confdir = confdir
+
+    def check_mysqlconf(self):
+        '''
+        check how long mysql logs are kept around
+        '''
+        clouseau.retention.config.set_up_conf(self.confdir)
+        mysql_ignores = clouseau.retention.ignores.init_ignored()
+
+        # note that I also see my.cnf.s3 and we don't check those (yet)
+        hostname = socket.getfqdn()
+        if '.' in hostname:
+            hostname = hostname.split('.')[0]
+        output = ''
+        for filename in clouseau.retention.config.conf['mysqlconf']:
+            expires = False
+            datadir = None
+            try:
+                contents = open(filename).read()
+            except:
+                # file or directory probably doesn't exist
+                continue
+            lines = contents.split('\n')
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                result = get_datadir(line)
+                if result is not None:
+                    datadir = result
+                    file_ignores, dir_ignores = get_mysqldir_ignores(hostname)
+                    for igntype in 'files', 'dirs':
+                        if datadir not in mysql_ignores[igntype]:
+                            mysql_ignores[igntype][datadir] = []
+
+                    mysql_ignores['files'][datadir].extend(file_ignores)
+                    mysql_ignores['dirs'][datadir].extend(dir_ignores)
+                    continue
+                result, message = get_expire_days(line, filename)
+                if message is not None:
+                    if output:
+                        output = output + '\n'
+                    output = output + message
+                if result is not None:
+                    expires = result
+
+                if expires and (datadir is not None):
+                    break
+
+        return mysql_ignores, output
+
+
 class LocalLogsAuditor(LocalFilesAuditor):
     def __init__(self, audit_type, confdir=None,
                  oldest=False,
@@ -30,219 +280,8 @@
 
         self.display_from_dict = LogInfo.display_from_dict
 
-    @staticmethod
-    def get_rotated_freq(rotated):
-        '''
-        turn the value you get out of logrotate
-        conf files for 'rotated' into a one
-        char string suitable for our reports
-        '''
-        if rotated == 'weekly':
-            freq = 'w'
-        elif rotated == 'daily':
-            freq = 'd'
-        elif rotated == 'monthly':
-            freq = 'm'
-        elif rotated == 'yearly':
-            freq = 'y'
-        else:
-            freq = None
-        return freq
-
-    @staticmethod
-    def get_rotated_keep(line):
-        fields = line.split()
-        if len(fields) == 2:
-            keep = fields[1]
-        else:
-            keep = None
-        return keep
-
-    def parse_logrotate_contents(self, contents,
-                                 default_freq='-', default_keep='-'):
-        clouseau.retention.config.set_up_conf(self.confdir)
-        lines = contents.split('\n')
-        state = 'want_lbracket'
-        logs = {}
-        freq = default_freq
-        keep = default_keep
-        notifempty = '-'
-        log_group = []
-        for line in lines:
-            if line.startswith('#'):
-                continue
-            line = line.strip()
-            if not line:
-                continue
-            if state == 'want_lbracket':
-                if line.endswith('{'):
-                    state = 'want_rbracket'
-                    line = line[:-1].strip()
-                    if not line:
-                        continue
-                if not line.startswith('/'):
-                    # probably a directive or a blank line
-                    continue
-                if '*' in line:
-                    log_group.extend(glob.glob(
-                        
os.path.join(clouseau.retention.config.conf['rotate_basedir'], line)))
-                else:
-                    log_group.append(line)
-            elif state == 'want_rbracket':
-                tmp_freq = LocalLogsAuditor.get_rotated_freq(line)
-                if tmp_freq:
-                    freq = tmp_freq
-                    continue
-                elif line.startswith('rotate'):
-                    tmp_keep = LocalLogsAuditor.get_rotated_keep(line)
-                    if tmp_keep:
-                        keep = tmp_keep
-                elif line == 'notifempty':
-                    notifempty = 'T'
-                elif line.endswith('}'):
-                    state = 'want_lbracket'
-                    for log in log_group:
-                        logs[log] = [freq, keep, notifempty]
-                    freq = default_freq
-                    keep = default_keep
-                    notifempty = '-'
-                    log_group = []
-        return logs
-
-    def get_logrotate_defaults(self):
-        contents = 
open(clouseau.retention.config.conf['rotate_mainconf']).read()
-        lines = contents.split('\n')
-        skip = False
-        freq = '-'
-        keep = '-'
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            if line.endswith('{'):
-                skip = True
-                continue
-            elif line.endswith('}'):
-                skip = False
-                continue
-            elif skip:
-                continue
-            tmp_freq = LocalLogsAuditor.get_rotated_freq(line)
-            if tmp_freq:
-                freq = tmp_freq
-                continue
-            elif line.startswith('rotate'):
-                tmp_keep = LocalLogsAuditor.get_rotated_keep(line)
-                if tmp_keep:
-                    keep = tmp_keep
-
-        return freq, keep
-
-    def find_rotated_logs(self):
-        '''
-        gather all names of log files from logrotate
-        config files
-        '''
-        rotated_logs = {}
-        default_freq, default_keep = self.get_logrotate_defaults()
-        rotated_logs.update(self.parse_logrotate_contents(
-            open(clouseau.retention.config.conf['rotate_mainconf']).read(),
-            default_freq, default_keep))
-        for fname in 
os.listdir(clouseau.retention.config.conf['rotate_basedir']):
-            pathname = 
os.path.join(clouseau.retention.config.conf['rotate_basedir'], fname)
-            if os.path.isfile(pathname):
-                rotated_logs.update(self.parse_logrotate_contents(
-                    open(pathname).read(), default_freq, default_keep))
-        return rotated_logs
-
-    def check_mysqlconf(self):
-        '''
-        check how long mysql logs are kept around
-        '''
-        mysql_ignores = clouseau.retention.ignores.init_ignored()
-
-        # note that I also see my.cnf.s3 and we don't check those (yet)
-        hostname = socket.getfqdn()
-        output = ''
-        for filename in clouseau.retention.config.conf['mysqlconf']:
-            found = False
-            try:
-                contents = open(filename).read()
-            except:
-                # file or directory probably doesn't exist
-                continue
-            lines = contents.split('\n')
-            for line in lines:
-                line = line.strip()
-                if not line:
-                    continue
-                if line.startswith('datadir'):
-                    fields = line.split('=', 1)
-                    fields = [field.strip() for field in fields]
-                    if fields[0] != 'datadir':
-                        continue
-                    if not fields[1].startswith('/'):
-                        continue
-                    datadir = fields[1]
-                    # strip trailing slash if needed
-                    if len(datadir) > 1 and datadir.endswith('/'):
-                        datadir = datadir[:-1]
-                    # we can skip all bin logs, relay logs, and pid files in 
this
-                    # directory. anything else should get looked at.
-                    if '.' in hostname:
-                        hostname = hostname.split('.')[0]
-                    ignore_these = [hostname + '-bin', hostname + '-relay-bin',
-                                    hostname + '.pid', hostname + '-bin.index',
-                                    hostname + '-relay-bin.index']
-
-                    # add these files to ignore list; a one line report on
-                    # mysql log expiry configuration is sufficient
-
-                    if datadir not in mysql_ignores['files']:
-                        mysql_ignores['files'][datadir] = ignore_these
-                    else:
-                        mysql_ignores['files'][datadir].extend(ignore_these)
-                    # skip the subdirectories in here, they will be full of 
mysql dbs
-                    if datadir not in mysql_ignores['dirs']:
-                        mysql_ignores['files'][datadir] = ['*']
-                    else:
-                        mysql_ignores['files'][datadir].append('*')
-
-                if line.startswith('expire_logs_days'):
-                    fields = line.split('=', 1)
-                    fields = [field.strip() for field in fields]
-                    if fields[0] != 'expire_logs_days':
-                        continue
-                    if not fields[1].isdigit():
-                        continue
-                    found = True
-                    if int(fields[1]) > 
clouseau.retention.config.conf['cutoff'] / 86400:
-                        if output:
-                            output = output + '\n'
-                        output = output + ('WARNING: some mysql logs expired 
after %s days in %s'
-                                           % (fields[1], filename))
-            if not found:
-                if output:
-                    output = output + '\n'
-                output = output + 'WARNING: some mysql logs never expired in ' 
+ filename
-
-        self.ignored = self.ignores.merge([self.ignored, mysql_ignores])
-
-        return output
-
-    def do_local_audit(self):
-        '''
-        note that no summary report is done for a  single host,
-        for logs we summarize across hosts
-        '''
-        mysql_issues = self.check_mysqlconf()
-        result = []
-        if mysql_issues:
-            result.append(mysql_issues)
-
-        open_files = clouseau.retention.fileutils.get_open_files()
-        rotated = self.find_rotated_logs()
-
+    # fixme this name no good after 'find_all_files' right?
+    def get_all_files(self, cutoff, open_files, rotated):
         all_files = {}
         files = self.find_all_files()
 
@@ -251,8 +290,27 @@
         today = time.time()
         for (fname, stat) in files:
             all_files[fname] = LogInfo(fname, magic, stat)
-            all_files[fname].load_file_info(today, 
clouseau.retention.config.conf['cutoff'],
-                                            open_files, rotated)
+            all_files[fname].load_file_info(today, cutoff, open_files, rotated)
+        return all_files
+
+    def do_local_audit(self):
+        '''
+        note that no summary report is done for a  single host,
+        for logs we summarize across hosts
+        '''
+        mysqlcfparser = MySqlCfParser(self.confdir)
+        mysql_ignores, mysql_issues = mysqlcfparser.check_mysqlconf()
+        self.ignored = self.ignores.merge([self.ignored, mysql_ignores])
+
+        result = []
+        if mysql_issues:
+            result.append(mysql_issues)
+
+        open_files = clouseau.retention.fileutils.get_open_files()
+        rotated = find_rotated_logs(self.confdir)
+
+        all_files = 
self.get_all_files(clouseau.retention.config.conf['cutoff'],
+                                       open_files, rotated)
 
         all_files_sorted = sorted(all_files,
                                   key=lambda f: all_files[f].path)
@@ -275,9 +333,7 @@
             if (self.oldest_only and
                     all_files[fname].normalized == last_log_normalized):
                 # still doing the same group of logs
-                if fage <= age:
-                    continue
-                else:
+                if fage > age:
                     age = fage
                     last_log = fname
             else:
diff --git a/dataretention/retention/magic.py b/dataretention/retention/magic.py
index 96b484c..c0b8393 100644
--- a/dataretention/retention/magic.py
+++ b/dataretention/retention/magic.py
@@ -4,7 +4,7 @@
 
 import ctypes
 
-from ctypes import *
+from ctypes import c_int, c_char_p, c_void_p, c_size_t, Structure, POINTER
 from ctypes.util import find_library
 
 
diff --git a/dataretention/retention/remotefileauditor.py 
b/dataretention/retention/remotefileauditor.py
index 9012af0..8392c0c 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -136,7 +136,7 @@
         self.expanded_hosts = client.cmd_expandminions(
             hosts, "test.ping", expr_form=expr_type)
 
-        self.MAX_FILES = maxfiles
+        self.max_files = maxfiles
         self.set_up_max_files(maxfiles)
 
         self.cdb = RuleStore(self.store_filepath)
@@ -156,7 +156,7 @@
                       self.depth - 1,
                       self.to_check,
                       self.ignore_also,
-                      self.MAX_FILES]
+                      self.max_files]
         return audit_args
 
     def set_up_runner(self):
@@ -185,11 +185,11 @@
 
         if maxfiles is None:
             if self.dirsizes:
-                self.MAX_FILES = 1000
+                self.max_files = 1000
             else:
-                self.MAX_FILES = 100
+                self.max_files = 100
         else:
-            self.MAX_FILES = maxfiles
+            self.max_files = maxfiles
 
     def set_up_and_export_rule_store(self):
         hosts = self.cdb.store_db_list_all_hosts()
diff --git a/dataretention/retention/remotehomeauditor.py 
b/dataretention/retention/remotehomeauditor.py
index 4615bce..813151d 100644
--- a/dataretention/retention/remotehomeauditor.py
+++ b/dataretention/retention/remotehomeauditor.py
@@ -34,7 +34,7 @@
                       self.depth - 1,
                       self.to_check,
                       ",".join(self.ignore_also) if self.ignore_also is not 
None else None,
-                      self.MAX_FILES]
+                      self.max_files]
         return audit_args
 
     def display_host_summary(self):
diff --git a/dataretention/retention/remotelogauditor.py 
b/dataretention/retention/remotelogauditor.py
index 2c45553..5e3b4b2 100644
--- a/dataretention/retention/remotelogauditor.py
+++ b/dataretention/retention/remotelogauditor.py
@@ -53,7 +53,7 @@
                       self.depth - 1,
                       self.to_check,
                       self.ignore_also,
-                      self.MAX_FILES]
+                      self.max_files]
         return audit_args
 
     def display_summary(self, audit_results):
diff --git a/dataretention/retention/rule.py b/dataretention/retention/rule.py
index 51ce11c..04f2e3b 100644
--- a/dataretention/retention/rule.py
+++ b/dataretention/retention/rule.py
@@ -17,6 +17,14 @@
         newparam = param
     return newparam
 
+def get_tablename(host):
+    '''
+    each host's rules are stored in a separate table,
+    get and return the table name for the given host
+    the hostname should be the fqdn
+    '''
+    return RuleStore.TABLE + "_" + host.replace('-', '__').replace('.', '_')
+
 def from_unicode(param):
     '''
     convert a parameter from unicode back to bytes it is not already
@@ -30,6 +38,41 @@
         if newparam is None:
             newparam = param
     return newparam
+
+def check_params(params, fieldlist=None, show=True):
+    if fieldlist is None:
+        fieldlist = RuleStore.FIELDS.keys()
+    err = False
+    for field in fieldlist:
+        if field not in params:
+            if show:
+                print "WARNING: missing field %s" % field
+                print "WARNING: received:", params
+            err = True
+        else:
+            ftype = RuleStore.FIELDS[field]
+            # fixme what about path, no sanitizing there, this is bad.
+            # same for hostname, no checking...
+            if ftype == 'integer' and not params[field].isdigit():
+                if show:
+                    print ("WARNING: bad param %s, should be number: %s"
+                           % (field, params[field]))
+                err = True
+            elif ftype == 'text':
+                if field == 'type' and params[field] not in RuleStore.TYPES:
+                    if show:
+                        print "WARNING: bad type %s specified" % params[field]
+                        err = True
+                elif (field == 'status' and
+                      params[field] not in Status.STATUSES):
+                    if show:
+                        print ("WARNING: bad status %s specified" %
+                               params[field])
+                    err = True
+    if err:
+        return False
+    else:
+        return True
 
 
 class RuleStore(object):
@@ -59,6 +102,9 @@
     though they have U status (unknown)
     '''
 
+    TABLE = 'filestatus'
+    FIELDS = {'basedir': 'text', 'name': 'text',
+              'type': 'text', 'status': 'text'}
     TYPES_TO_TEXT = {'D': 'dir', 'F': 'file', 'L': 'link', 'U': 'unknown'}
     TYPES = TYPES_TO_TEXT.keys()
 
@@ -70,22 +116,10 @@
         on an as needed basis
         '''
 
-        self.TABLE = 'filestatus'
-        self.FIELDS = {'basedir': 'text', 'name': 'text',
-                       'type': 'text', 'status': 'text'}
         self.storename = storename
         self.store_db = None
         self.crs = None
         self.known_hosts = None
-
-    def get_tablename(self, host):
-        '''
-        each host's rules are stored in a separate table,
-        get and return the table name for the given host
-        the hostname should be the fqdn
-        '''
-
-        return self.TABLE + "_" + host.replace('-', '__').replace('.', '_')
 
     def get_salt_known_hosts(self):
         '''
@@ -124,7 +158,7 @@
                     '''CREATE TABLE %s
                     (`basedir` text, `name` text, `type` text,
                     `status` text, PRIMARY KEY (`basedir`, `name`))'''
-                    % self.get_tablename(host))
+                    % get_tablename(host))
                 self.store_db.commit()
 
     def no_table_for_host(self, host):
@@ -149,47 +183,12 @@
         else:
             return False
 
-    def check_params(self, params, fieldlist=None, show=True):
-        if fieldlist is None:
-            fieldlist = self.FIELDS.keys()
-        err = False
-        for field in fieldlist:
-            if field not in params:
-                if show:
-                    print "WARNING: missing field %s" % field
-                    print "WARNING: received:", params
-                err = True
-            else:
-                ftype = self.FIELDS[field]
-                # fixme what about path, no sanitizing there, this is bad.
-                # same for hostname, no checking...
-                if ftype == 'integer' and not params[field].isdigit():
-                    if show:
-                        print ("WARNING: bad param %s, should be number: %s"
-                               % (field, params[field]))
-                    err = True
-                elif ftype == 'text':
-                    if field == 'type' and params[field] not in 
RuleStore.TYPES:
-                        if show:
-                            print "WARNING: bad type %s specified" % 
params[field]
-                            err = True
-                    elif (field == 'status' and
-                          params[field] not in Status.STATUSES):
-                        if show:
-                            print ("WARNING: bad status %s specified" %
-                                   params[field])
-                        err = True
-        if err:
-            return False
-        else:
-            return True
-
     def store_db_check_host_table(self, host):
         '''
         check if a table for the specific host exists, returning True if so
         '''
         self.crs.execute("SELECT name FROM sqlite_master WHERE type='table' 
AND name='%s'"
-                         % self.get_tablename(host))
+                         % get_tablename(host))
         self.store_db.commit()
         result = self.crs.fetchone()
         if result is None:
@@ -205,11 +204,11 @@
         with values for those fields
 
         '''
-        if not self.check_params(params):
+        if not check_params(params):
             print "WARNING: bad parameters specified"
 
         self.crs.execute("INSERT INTO %s VALUES (?, ?, ?, ?)"
-                         % self.get_tablename(host),
+                         % get_tablename(host),
                          (to_unicode(params['basedir']),
                           to_unicode(params['name']),
                           params['type'],
@@ -223,11 +222,11 @@
         of a file/dir, and the params passed in should be a dict
         with values for those fields
         '''
-        if not self.check_params(params):
+        if not check_params(params):
             print "WARNING: bad params passed", params
 
         self.crs.execute("INSERT OR REPLACE INTO  %s VALUES (?, ?, ?, ?)"
-                         % self.get_tablename(host),
+                         % get_tablename(host),
                          (to_unicode(params['basedir']),
                           to_unicode(params['name']),
                           params['type'],
@@ -262,12 +261,12 @@
             clause, params_to_sub = RuleStore.params_to_string(where_params)
             query = ("SELECT %s FROM %s WHERE %s"
                      % (",".join(from_params),
-                        self.get_tablename(host),
+                        get_tablename(host),
                         clause))
         else:
             query = ("SELECT %s FROM %s"
                      % (",".join(from_params),
-                        self.get_tablename(host)))
+                        get_tablename(host)))
             params_to_sub = None
 
         if params_to_sub:
@@ -301,12 +300,12 @@
         that status will be deleted
         '''
         # fixme quoting
-        if (not self.check_params(params, ['basedir', 'name'], show=False)
-                and not self.check_params(params, ['status'], show=False)):
+        if (not check_params(params, ['basedir', 'name'], show=False)
+                and not check_params(params, ['status'], show=False)):
             print "WARNING: bad params passed", params
         clause, params_to_sub = RuleStore.params_to_string(params)
         query = ("DELETE FROM %s WHERE %s"
-                 % (self.get_tablename(host),
+                 % (get_tablename(host),
                     clause))
         self.crs.execute(query, params_to_sub)
         self.store_db.commit()
@@ -341,7 +340,7 @@
 
         tables = [row[0] for row in crs.fetchall()]
         for table in tables:
-            if table.startswith(self.TABLE + "_"):
-                hosts.append(table[len(self.TABLE + "_"):].
+            if table.startswith(RuleStore.TABLE + "_"):
+                hosts.append(table[len(RuleStore.TABLE + "_"):].
                              replace('__', '-').replace('_', '.'))
         return hosts

-- 
To view, visit https://gerrit.wikimedia.org/r/233473
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I10ffd55689812cffc8eed7624561b21e70d0c8a1
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to