ArielGlenn has submitted this change and it was merged.
Change subject: move rule static methods out to a seperate utils file
......................................................................
move rule static methods out to a seperate utils file
Change-Id: Ie73f7215a8ff4be9c9a079e03f465b393b3de329
---
M dataretention/retention/cli.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/rule.py
A dataretention/retention/ruleutils.py
M dataretention/rulestore.py
6 files changed, 464 insertions(+), 481 deletions(-)
Approvals:
ArielGlenn: Verified; Looks good to me, approved
diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index df474b7..3b09b1f 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -21,6 +21,7 @@
from retention.config import Config
from retention.examiner import DirExaminer, FileExaminer
import retention.fileutils
+import retention.ruleutils
class LocalIgnores(object):
'''
@@ -138,7 +139,7 @@
if hosts is None:
hosts = self.cdb.store_db_list_all_hosts()
for host in hosts:
- self.perhost_rules_from_store = Rule.get_rules(
+ self.perhost_rules_from_store = retention.ruleutils.get_rules(
self.cdb, host, Status.text_to_status('good'))
if self.perhost_rules_from_store is not None:
@@ -812,7 +813,7 @@
print 'skipping %s, not in current dir listing' % entry
print self.current_dir_contents_dict
continue
- filetype = Rule.entrytype_to_text(
+ filetype = retention.ruleutils.entrytype_to_text(
self.current_dir_contents_dict[entry]['type'])
if filetype == 'link':
print 'No need to mark', file_expr, 'links are always skipped'
@@ -821,7 +822,7 @@
print 'Not a dir or regular file, no need to mark, skipping'
continue
status = Status.text_to_status('good')
- Rule.do_add_rule(self.cdb, file_expr, filetype, status, self.host)
+ retention.ruleutils.do_add_rule(self.cdb, file_expr, filetype,
status, self.host)
return True
def check_rules_path(self, rules_path):
@@ -866,11 +867,11 @@
path = os.path.join(self.current_dir, path)
if path[-1] == os.path.sep:
path = path[:-1]
- filetype = Rule.text_to_entrytype('dir')
+ filetype = retention.ruleutils.text_to_entrytype('dir')
else:
- filetype = Rule.text_to_entrytype('file')
+ filetype = retention.ruleutils.text_to_entrytype('file')
- Rule.do_add_rule(self.cdb, path, filetype, status, self.host)
+ retention.ruleutils.do_add_rule(self.cdb, path, filetype, status,
self.host)
# update the ignores list since we have a new rule
self.perhost_ignores_from_rules = {}
self.get_perhost_ignores_from_rules([self.host])
@@ -899,23 +900,23 @@
if prefix == "":
prefix = "/"
if status == 'a' or status == 'A':
- Rule.show_rules(self.cdb, self.host, prefix=prefix)
+ retention.ruleutils.show_rules(self.cdb, self.host,
prefix=prefix)
return True
elif status[0].upper() in Status.STATUSES:
- Rule.show_rules(self.cdb, self.host, status[0].upper(),
- prefix=prefix)
+ retention.ruleutils.show_rules(self.cdb, self.host,
status[0].upper(),
+ prefix=prefix)
return True
elif command == 'D' or command == 'd':
if not self.current_dir_contents_list:
self.get_dir_contents(self.current_dir, self.batchno)
- Rule.get_rules_for_path(self.cdb, self.current_dir, self.host)
+ retention.ruleutils.get_rules_for_path(self.cdb, self.current_dir,
self.host)
return True
elif command == 'C' or command == 'c':
if not self.current_dir_contents_list:
self.get_dir_contents(self.current_dir, self.batchno)
- Rule.get_rules_for_entries(self.cdb, self.current_dir,
- self.current_dir_contents_dict,
- self.host)
+ retention.ruleutils.get_rules_for_entries(self.cdb,
self.current_dir,
+
self.current_dir_contents_dict,
+ self.host)
return True
elif command == 'R' or command == 'r':
# fixme need different completer here I think, that
@@ -929,7 +930,7 @@
path = os.path.join(self.current_dir, path)
if path[-1] == os.path.sep:
path = path[:-1]
- Rule.do_remove_rule(self.cdb, path, self.host)
+ retention.ruleutils.do_remove_rule(self.cdb, path, self.host)
# update the ignores list since we removed a rule
self.perhost_ignores_from_rules = {}
self.get_perhost_ignores_from_rules([self.host])
@@ -943,7 +944,7 @@
if not self.check_rules_path(rules_path):
print "bad rules file path specified, aborting"
else:
- Rule.import_rules(self.cdb, rules_path, self.host)
+ retention.ruleutils.import_rules(self.cdb, rules_path,
self.host)
return True
elif command == 'E' or command == 'e':
readline.set_completer(None)
@@ -954,7 +955,7 @@
if not self.check_rules_path(rules_path):
print "bad rules file path specified, aborting"
else:
- Rule.export_rules(self.cdb, rules_path, self.host)
+ retention.ruleutils.export_rules(self.cdb, rules_path,
self.host)
return True
elif command == 'Q' or command == 'q':
print "quitting this level"
@@ -1116,9 +1117,9 @@
if command in Status.STATUSES:
# this option is invoked on a directory so
# type is dir every time
- Rule.do_add_rule(self.cdb, dir_path,
- Rule.text_to_entrytype('dir'),
- command, self.host)
+ retention.ruleutils.do_add_rule(self.cdb, dir_path,
+
retention.ruleutils.text_to_entrytype('dir'),
+ command, self.host)
return None
elif command == 'Q' or command == 'q':
return None
diff --git a/dataretention/retention/localfileaudit.py
b/dataretention/retention/localfileaudit.py
index 5a19be9..24b54c2 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -14,6 +14,7 @@
from retention.config import Config
from retention.fileinfo import FileInfo
import retention.fileutils
+import retention.ruleutils
class LocalFilesAuditor(object):
@@ -172,10 +173,10 @@
for rule in self.perhost_rules_from_store[host]:
path = os.path.join(rule['basedir'], rule['name'])
if rule['status'] == 'good':
- if Rule.entrytype_to_text(rule['type']) == 'dir':
+ if retention.ruleutils.entrytype_to_text(rule['type'])
== 'dir':
if path not in self.ignored['dirs']['/']:
self.ignored['dirs']['/'].append(path)
- elif Rule.entrytype_to_text(rule['type']) == 'file':
+ elif
retention.ruleutils.entrytype_to_text(rule['type']) == 'file':
if path not in self.ignored['files']['/']:
self.ignored['files']['/'].append(path)
else:
diff --git a/dataretention/retention/remotefileauditor.py
b/dataretention/retention/remotefileauditor.py
index aa45231..040b58b 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -17,6 +17,7 @@
from retention.utils import JsonHelper
from retention.runner import Runner
from retention.localfileaudit import LocalFilesAuditor
+import retention.ruleutils
global_keys = [key for key, value_unused in
sys.modules[__name__].__dict__.items()]
@@ -105,8 +106,6 @@
store_filepath: full path to rule store (sqlite3 db)
verbose: show informative messages during processing
'''
-
- global rules
self.hosts_expr = hosts_expr
self.audit_type = audit_type
@@ -211,7 +210,7 @@
os.makedirs(where_to_put, 0755)
for host in hosts:
nicepath = os.path.join(where_to_put, host + ".conf")
- Rule.export_rules(self.cdb, nicepath, host)
+ retention.ruleutils.export_rules(self.cdb, nicepath, host)
def set_up_ignored(self):
'''
@@ -272,10 +271,10 @@
continue
if entry[-1] == os.path.sep:
entry = entry[:-1]
- entry_type = Rule.text_to_entrytype('dir')
+ entry_type =
retention.ruleutils.text_to_entrytype('dir')
else:
- entry_type = Rule.text_to_entrytype('file')
- rule = Rule.get_rule_as_json(
+ entry_type =
retention.ruleutils.text_to_entrytype('file')
+ rule = retention.ruleutils.get_rule_as_json(
entry, entry_type, status)
rules[host].append(rule)
return rules
@@ -287,7 +286,7 @@
rulescode = "rules = {}\n\n"
rulescode += "rules['%s'] = [\n" % host
rulescode += (indent +
- (",\n%s" % (indent + indent)).join(rules[host]) + "\n")
+ (",\n%s" % (indent + indent)).join(rules[host]) +
"\n")
rulescode += "]\n"
with open("/srv/salt/audits/retention/configs/%s_store.py" % host,
"w+") as fp:
@@ -335,7 +334,7 @@
if basedir in basedirs or basedir == '*':
sys.stderr.write("INFO: " + ','.join(
self.ignored['extensions'][basedir])
- + " in " + basedir + '\n')
+ + " in " + basedir + '\n')
def normalize(self, fname):
'''
@@ -482,7 +481,7 @@
hostlist = report.keys()
for host in hostlist:
try:
- problem_rules = Rule.get_rules(self.cdb, host,
Status.text_to_status('problem'))
+ problem_rules = retention.ruleutils.get_rules(self.cdb, host,
Status.text_to_status('problem'))
except:
print 'WARNING: problem retrieving problem rules for host',
host
problem_rules = None
@@ -495,9 +494,9 @@
if dirs_problem is not None:
dirs_problem = list(set(dirs_problem))
for dirname in dirs_problem:
- Rule.do_add_rule(self.cdb, dirname,
- Rule.text_to_entrytype('dir'),
- Status.text_to_status('problem'), host)
+ retention.ruleutils.do_add_rule(self.cdb, dirname,
+
retention.ruleutils.text_to_entrytype('dir'),
+
Status.text_to_status('problem'), host)
if dirs_skipped is not None:
dirs_skipped = list(set(dirs_skipped))
@@ -505,8 +504,6 @@
if dirname in dirs_problem or dirname in existing_problems:
# problem report overrides 'too many to audit'
continue
- Rule.do_add_rule(self.cdb, dirname,
- Rule.text_to_entrytype('dir'),
- Status.text_to_status('unreviewed'), host)
-
-
+ retention.ruleutils.do_add_rule(self.cdb, dirname,
+
retention.ruleutils.text_to_entrytype('dir'),
+
Status.text_to_status('unreviewed'), host)
diff --git a/dataretention/retention/rule.py b/dataretention/retention/rule.py
index 7b3149a..c46e670 100644
--- a/dataretention/retention/rule.py
+++ b/dataretention/retention/rule.py
@@ -7,6 +7,34 @@
from retention.saltclientplus import LocalClientPlus
from retention.status import Status
+def to_unicode(param):
+ '''
+ convert a parameter to unicode if it is not already
+ '''
+ newparam = param
+ if not isinstance(param, unicode):
+ try:
+ newparam = unicode(param, 'utf-8')
+ except:
+ pass
+ if newparam is None:
+ newparam = param
+ return newparam
+
+def from_unicode(param):
+ '''
+ convert a parameter from unicode back to bytes it is not already
+ '''
+ newparam = param
+ if isinstance(param, unicode):
+ try:
+ newparam = param.encode('utf-8', 'replace')
+ except:
+ pass
+ if newparam is None:
+ newparam = param
+ return newparam
+
class Rule(object):
'''
manage rules, i.e. tuples (status, abspath, type)
@@ -25,437 +53,6 @@
STATE_START = 0
STATE_EXPECT_STATUS = 1
STATE_EXPECT_ENTRIES = 2
-
- @staticmethod
- def to_unicode(param):
- '''
- convert a parameter to unicode if it is not already
- '''
- newparam = param
- if not isinstance(param, unicode):
- try:
- newparam = unicode(param, 'utf-8')
- except:
- pass
- if newparam is None:
- newparam = param
- return newparam
-
- @staticmethod
- def from_unicode(param):
- '''
- convert a parameter from unicode back to bytes it is not already
- '''
- newparam = param
- if isinstance(param, unicode):
- try:
- newparam = param.encode('utf-8', 'replace')
- except:
- pass
- if newparam is None:
- newparam = param
- return newparam
-
- @staticmethod
- def get_rules_for_entries(cdb, path, path_entries, host, quiet=False):
- rules = Rule.get_rules_for_path(cdb, path, host, True)
- for entry in path_entries:
- rules.extend(Rule.get_rules_for_path(cdb, entry, host, True))
-
- paths_kept = []
- uniq = []
- for rule in rules:
- if rule['path'] not in paths_kept:
- paths_kept.append(rule['path'])
- uniq.append(rule)
-
- if not quiet:
- uniq_sorted = sorted(uniq, key=lambda r: r['path'])
- for rule in uniq_sorted:
- print rule
- return uniq_sorted
-
- @staticmethod
- def format_rules_for_export(rules_list, indent_count):
- if len(rules_list) == 0:
- return "[]"
-
- spaces = " " * 4
- indent = spaces * indent_count
- return ("[\n" + indent + spaces +
- (",\n" + indent + spaces).join(
- ["'" + rule['path'].replace("'", r"\'") + "'"
- for rule in rules_list]
- )
- + "\n" + indent + "]")
-
- @staticmethod
- def import_rule_list(cdb, entries, status, host):
- '''
- import status rules for a list of files or dirs
- - anything not ending in '/' is considered to be a file
- - files/dirs must be specified by full path, anything else
- will be skipped
- - failures to add to rule store are reported but processing continues
- '''
- for entry in entries:
- if entry[0] != os.path.sep:
- print "relative path in rule, skipping:", entry
- continue
- if entry[-1] == '/':
- entry_type = Rule.text_to_entrytype('dir')
- entry = entry[:-1]
- else:
- entry_type = Rule.text_to_entrytype('file')
- try:
- Rule.do_add_rule(cdb, entry, entry_type,
- status, host)
- except:
- exc_type, exc_value, exc_traceback = sys.exc_info()
- sys.stderr.write(repr(traceback.format_exception(
- exc_type, exc_value, exc_traceback)))
- sys.stderr.write("Couldn't add rule for %s to rule store\n" %
- entry)
-
- @staticmethod
- def import_handle_status(line):
- '''
- see if the line passed is a status def line
- returns status found (if any) and next state
- '''
- for stat in Status.status_cf:
- result = Status.status_cf[stat][1].match(line)
- if result is not None:
- if "]" in result.group(0):
- return None, Rule.STATE_EXPECT_STATUS
- else:
- return stat, Rule.STATE_EXPECT_ENTRIES
- return None, None
-
- @staticmethod
- def import_rules(cdb, rules_path, host):
- # we don't toss all existing rules, these get merged into
- # the rules already in the rules store
-
- # it is possible to bork the list of files by deliberately
- # including a file/dir with a newline in the name; this will
- # just mean that your rule doesn't cover the files/dirs you want.
- try:
- rules_text = open(rules_path).read()
- except:
- exc_type, exc_value, exc_traceback = sys.exc_info()
- sys.stderr.write(repr(traceback.format_exception(
- exc_type, exc_value, exc_traceback)))
- sys.stderr.write("Couldn't read rules from %s.\n" % rules_path)
- return
-
- lines = rules_text.split("\n")
- state = Rule.STATE_START
- rules = {}
- active = None
- for line in lines:
- if Rule.comment_expr.match(line) or Rule.blank_expr.match(line):
- continue
- elif state == Rule.STATE_START:
- if not Rule.first_line_expected.match(line):
- print "unexpected line in rules file, wanted "
- print "'dir_rules = ...', aborting:"
- print line
- return
- else:
- state = Rule.STATE_EXPECT_STATUS
- elif state == Rule.STATE_EXPECT_STATUS:
- if Rule.last_line_expected.match(line):
- # done parsing file
- break
- active, state = Rule.import_handle_status(line)
- if state == Rule.STATE_EXPECT_STATUS:
- continue
- elif state == Rule.STATE_EXPECT_ENTRIES:
- rules[active] = []
- elif state is None:
- # not a status with empty list, not a status
- # expecting entries on following lines, bail
- print "unexpected line in rules file, aborting:"
- print line
- return
- elif state == Rule.STATE_EXPECT_ENTRIES:
- if Rule.entry_expr.match(line):
- result = Rule.entry_expr.match(line)
- rules[active].append(result.group(1))
- elif Rule.end_entries_expr.match(line):
- active = None
- state = Rule.STATE_EXPECT_STATUS
- else:
- active, state = Rule.import_handle_status(line)
- if state == Rule.STATE_EXPECT_STATUS:
- # end of entries with crap syntax, we forgive
- continue
- elif state == Rule.STATE_EXPECT_ENTRIES:
- # found a status line with empty list.
- # so end of these entries ayways
- state = Rule.STATE_EXPECT_STATUS
- continue
- elif state is None:
- # not an entry, not a status, not end of entries
- print "unexpected line in rules file, wanted entry, "
- print "status or entry end marker, aborting:"
- print line
- return
- else:
- print "unexpected line in rules file, aborting:"
- print line
- return
-
- for status in Status.status_cf:
- if status in rules:
- Rule.import_rule_list(
- cdb, rules[status],
- Status.status_cf[status][0], host)
-
- @staticmethod
- def do_remove_rule(cdb, path, host):
- cdb.store_db_delete({'basedir': os.path.dirname(path),
- 'name': os.path.basename(path)},
- host)
-
- @staticmethod
- def do_remove_rules(cdb, status, host):
- cdb.store_db_delete({'status': status},
- host)
-
- @staticmethod
- def do_add_rule(cdb, path, rtype, status, host):
- cdb.store_db_replace({'basedir': os.path.dirname(path),
- 'name': os.path.basename(path),
- 'type': rtype,
- 'status': status},
- host)
-
- @staticmethod
- def check_host_table_exists(cdb, host):
- return cdb.store_db_check_host_table(host)
-
- @staticmethod
- def normalize_path(path, ptype):
- '''
- make sure the path ends in '/' if it's dir type, otherwise
- that it does not, return the normalized path
- '''
- if ptype == 'dir':
- if path[-1] != os.path.sep:
- path = path + os.path.sep
- else:
- if path[-1] == os.path.sep:
- path = path[:-1]
- return path
-
- @staticmethod
- def export_rules(cdb, rules_path, host, status=None):
- # would be nice to be able to only export some rules. whatever
-
- rules = Rule.get_rules(cdb, host, status)
- sorted_rules = {}
- for stext in Status.STATUS_TEXTS:
- sorted_rules[stext] = []
- for rule in rules:
- if rule['status'] in Status.STATUS_TEXTS:
- rule['path'] = Rule.normalize_path(rule['path'], rule['type'])
- sorted_rules[rule['status']].append(rule)
- else:
- continue
-
- output = "dir_rules = {\n"
- for status in Status.STATUS_TEXTS:
- output += " '%s': %s,\n" % (
- status, Rule.format_rules_for_export(sorted_rules[status], 2))
- output += "}\n"
- try:
- filep = open(rules_path, "w")
- filep.write("# -*- coding: utf-8 -*-\n")
- filep.write(output)
- filep.close()
- except:
- exc_type, exc_value, exc_traceback = sys.exc_info()
- sys.stderr.write(repr(traceback.format_exception(
- exc_type, exc_value, exc_traceback)))
- sys.stderr.write("Couldn't save rules into %s.\n" % rules_path)
-
- @staticmethod
- def entrytype_to_text(abbrev):
- if abbrev in Rule.TYPES:
- return Rule.TYPES_TO_TEXT[abbrev]
- else:
- return None
-
- @staticmethod
- def text_to_entrytype(fullname):
- for key in Rule.TYPES_TO_TEXT:
- if Rule.TYPES_TO_TEXT[key] == fullname:
- return key
- return None
-
- @staticmethod
- def row_to_rule(row):
- # ('/home/ariel/wmf/security', '/home/ariel/wmf/security/openjdk6',
'D', 'G')
- (basedir, name, entrytype, status) = row
- basedir = Rule.from_unicode(basedir)
- name = Rule.from_unicode(name)
- rule = {'path': os.path.join(basedir, name),
- 'type': Rule.entrytype_to_text(entrytype),
- 'status': Status.status_to_text(status)}
- return rule
-
- @staticmethod
- def get_rules(cdb, host, status=None):
- if status:
- crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
- {'status': status}, host)
- else:
- crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
- None, host)
- rules = []
- rows = RuleStore.store_db_get_all_rows(crs)
- for row in rows:
- rules.append(Rule.row_to_rule(row))
- return rules
-
- @staticmethod
- def show_rules(cdb, host, status=None, prefix=None):
- rules = Rule.get_rules(cdb, host, status)
- if rules:
- rules_sorted = sorted(rules, key=lambda r: r['path'])
- for rule in rules_sorted:
- if prefix is None or rule['path'].startswith(prefix):
- print rule
-
- @staticmethod
- def get_rules_with_prefix(cdb, path, host):
- '''
- retrieve all rules where the basedir starts with the specified path
- '''
- # prefixes...
- crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
- {'basedir': path}, host)
- rules = []
- rows = RuleStore.store_db_get_all_rows(crs)
- for row in rows:
- rules.append(Rule.row_to_rule(row))
- return rules
-
- @staticmethod
- def check_rule_prefixes(rows):
- '''
- separate out the rules with wildcards in the name field
- and those without
- '''
- text = []
- wildcards = []
- if rows is None:
- return text, wildcards
-
- for row in rows:
- if '*' in os.path.basename(row['path']):
- wildcards.append(row)
- else:
- text.append(row)
- return text, wildcards
-
- @staticmethod
- def rule_is_prefix(basedir, name, path, wildcard=False):
- '''
- if the dir part of the rule entry plus the basename is
- a proper path prefix of the specified path (followed by the
- path separator, or it's the exact path), return True, else False
-
- wildcard matches are done only for a single wildcard in the name
- component of the rule entry and does not cross a directory path
- component i.e. basedir = /a/b and name = c* will not match
- path /a/b/cow/dog but will match /a/b/cow
- '''
- if not wildcard:
- if path.startswith(os.path.join(basedir, name) + os.path.sep):
- return True
- elif path == os.path.join(basedir, name):
- return True
- else:
- rulepath = os.path.join(basedir, name)
- if len(rulepath) >= len(path):
- return False
-
- left, right = rulepath.split('*', 1)
- if path.startswith(left):
- if path.endswith(right):
- if os.path.sep not in path[len(left): -1 * len(right)]:
- return True
- return False
-
- @staticmethod
- def get_rules_for_path(cdb, path, host, quiet=False):
- # get all paths starting from / and descending to the specified path
- prefixes = Rule.get_prefixes(path)
- rows = []
- # get all entries where the dir part of the path is a prefix and the
- # name part of the path will be checked to see if it is the next dir
- # elt in the path or wildcard matches it
-
- for pref in prefixes:
- rows.extend(Rule.get_rules_with_prefix(cdb, pref, host))
- # split out the rules with wildcards in the basename from the rest
- regulars, wildcards = Rule.check_rule_prefixes(rows)
- keep = []
- paths_kept = []
- for plain in regulars:
- if Rule.rule_is_prefix(os.path.dirname(plain['path']),
- os.path.basename(plain['path']), path):
- if plain['path'] not in paths_kept:
- keep.append(plain)
- paths_kept.append(plain['path'])
- for wild in wildcards:
- if Rule.rule_is_prefix(os.path.dirname(wild['path']),
- os.path.basename(wild['path']),
- path, wildcard=True):
- if wild['path'] not in paths_kept:
- keep.append(wild)
- paths_kept.append(wild['path'])
-
- if len(keep) == 0:
- keep_sorted = keep
- else:
- keep_sorted = sorted(keep, key=lambda r: r['path'])
- if not quiet:
- print "No rules for directory"
- else:
- for rule in keep_sorted:
- print rule
- return keep_sorted
-
- @staticmethod
- def get_prefixes(path):
- '''
- given an absolute path like /a/b/c, return the list of all paths
- starting from / and descending to the specified path
- i.e. if given '/a/b/c', would return ['/', '/a', '/a/b', 'a/b/c']
- for relative paths or empty paths we return an empty prefix list
- '''
- if not path or path[0] != '/':
- return []
- fields = path.split(os.path.sep)
- prefix = "/"
- prefixes = [prefix]
- for field in fields:
- if field:
- prefix = os.path.join(prefix, field)
- prefixes.append(prefix)
- return prefixes
-
- @staticmethod
- def get_rule_as_json(path, ptype, status):
- rule = {'basedir': os.path.dirname(path),
- 'name': os.path.basename(path),
- 'type': ptype,
- 'status': status}
- return json.dumps(rule)
class RuleStore(object):
@@ -633,8 +230,8 @@
self.crs.execute("INSERT INTO %s VALUES (?, ?, ?, ?)"
% self.get_tablename(host),
- (Rule.to_unicode(params['basedir']),
- Rule.to_unicode(params['name']),
+ (to_unicode(params['basedir']),
+ to_unicode(params['name']),
params['type'],
params['status']))
self.store_db.commit()
@@ -651,8 +248,8 @@
self.crs.execute("INSERT OR REPLACE INTO %s VALUES (?, ?, ?, ?)"
% self.get_tablename(host),
- (Rule.to_unicode(params['basedir']),
- Rule.to_unicode(params['name']),
+ (to_unicode(params['basedir']),
+ to_unicode(params['name']),
params['type'],
params['status']))
self.store_db.commit()
diff --git a/dataretention/retention/ruleutils.py
b/dataretention/retention/ruleutils.py
new file mode 100644
index 0000000..b23086f
--- /dev/null
+++ b/dataretention/retention/ruleutils.py
@@ -0,0 +1,386 @@
+import os
+import sys
+import json
+import traceback
+from retention.status import Status
+import retention.rule
+from retention.rule import Rule, RuleStore
+
+def get_rules_for_entries(cdb, path, path_entries, host, quiet=False):
+ rules = get_rules_for_path(cdb, path, host, True)
+ for entry in path_entries:
+ rules.extend(get_rules_for_path(cdb, entry, host, True))
+
+ paths_kept = []
+ uniq = []
+ for rule in rules:
+ if rule['path'] not in paths_kept:
+ paths_kept.append(rule['path'])
+ uniq.append(rule)
+
+ if not quiet:
+ uniq_sorted = sorted(uniq, key=lambda r: r['path'])
+ for rule in uniq_sorted:
+ print rule
+ return uniq_sorted
+
+def format_rules_for_export(rules_list, indent_count):
+ if len(rules_list) == 0:
+ return "[]"
+
+ spaces = " " * 4
+ indent = spaces * indent_count
+ return ("[\n" + indent + spaces +
+ (",\n" + indent + spaces).join(
+ ["'" + rule['path'].replace("'", r"\'") + "'"
+ for rule in rules_list]
+ )
+ + "\n" + indent + "]")
+
+def import_rule_list(cdb, entries, status, host):
+ '''
+ import status rules for a list of files or dirs
+ - anything not ending in '/' is considered to be a file
+ - files/dirs must be specified by full path, anything else
+ will be skipped
+ - failures to add to rule store are reported but processing continues
+ '''
+ for entry in entries:
+ if entry[0] != os.path.sep:
+ print "relative path in rule, skipping:", entry
+ continue
+ if entry[-1] == '/':
+ entry_type = text_to_entrytype('dir')
+ entry = entry[:-1]
+ else:
+ entry_type = text_to_entrytype('file')
+ try:
+ do_add_rule(cdb, entry, entry_type,
+ status, host)
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ sys.stderr.write(repr(traceback.format_exception(
+ exc_type, exc_value, exc_traceback)))
+ sys.stderr.write("Couldn't add rule for %s to rule store\n" %
+ entry)
+
+def import_handle_status(line):
+ '''
+ see if the line passed is a status def line
+ returns status found (if any) and next state
+ '''
+ for stat in Status.status_cf:
+ result = Status.status_cf[stat][1].match(line)
+ if result is not None:
+ if "]" in result.group(0):
+ return None, Rule.STATE_EXPECT_STATUS
+ else:
+ return stat, Rule.STATE_EXPECT_ENTRIES
+ return None, None
+
+def import_rules(cdb, rules_path, host):
+ # we don't toss all existing rules, these get merged into
+ # the rules already in the rules store
+
+ # it is possible to bork the list of files by deliberately
+ # including a file/dir with a newline in the name; this will
+ # just mean that your rule doesn't cover the files/dirs you want.
+ try:
+ rules_text = open(rules_path).read()
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ sys.stderr.write(repr(traceback.format_exception(
+ exc_type, exc_value, exc_traceback)))
+ sys.stderr.write("Couldn't read rules from %s.\n" % rules_path)
+ return
+
+ lines = rules_text.split("\n")
+ state = Rule.STATE_START
+ rules = {}
+ active = None
+ for line in lines:
+ if Rule.comment_expr.match(line) or Rule.blank_expr.match(line):
+ continue
+ elif state == Rule.STATE_START:
+ if not Rule.first_line_expected.match(line):
+ print "unexpected line in rules file, wanted "
+ print "'dir_rules = ...', aborting:"
+ print line
+ return
+ else:
+ state = Rule.STATE_EXPECT_STATUS
+ elif state == Rule.STATE_EXPECT_STATUS:
+ if Rule.last_line_expected.match(line):
+ # done parsing file
+ break
+ active, state = import_handle_status(line)
+ if state == Rule.STATE_EXPECT_STATUS:
+ continue
+ elif state == Rule.STATE_EXPECT_ENTRIES:
+ rules[active] = []
+ elif state is None:
+ # not a status with empty list, not a status
+ # expecting entries on following lines, bail
+ print "unexpected line in rules file, aborting:"
+ print line
+ return
+ elif state == Rule.STATE_EXPECT_ENTRIES:
+ if Rule.entry_expr.match(line):
+ result = Rule.entry_expr.match(line)
+ rules[active].append(result.group(1))
+ elif Rule.end_entries_expr.match(line):
+ active = None
+ state = Rule.STATE_EXPECT_STATUS
+ else:
+ active, state = import_handle_status(line)
+ if state == Rule.STATE_EXPECT_STATUS:
+ # end of entries with crap syntax, we forgive
+ continue
+ elif state == Rule.STATE_EXPECT_ENTRIES:
+ # found a status line with empty list.
+ # so end of these entries ayways
+ state = Rule.STATE_EXPECT_STATUS
+ continue
+ elif state is None:
+ # not an entry, not a status, not end of entries
+ print "unexpected line in rules file, wanted entry, "
+ print "status or entry end marker, aborting:"
+ print line
+ return
+ else:
+ print "unexpected line in rules file, aborting:"
+ print line
+ return
+
+ for status in Status.status_cf:
+ if status in rules:
+ import_rule_list(
+ cdb, rules[status],
+ Status.status_cf[status][0], host)
+
+def do_remove_rule(cdb, path, host):
+ cdb.store_db_delete({'basedir': os.path.dirname(path),
+ 'name': os.path.basename(path)},
+ host)
+
+def do_remove_rules(cdb, status, host):
+ cdb.store_db_delete({'status': status},
+ host)
+
+def do_add_rule(cdb, path, rtype, status, host):
+ cdb.store_db_replace({'basedir': os.path.dirname(path),
+ 'name': os.path.basename(path),
+ 'type': rtype,
+ 'status': status},
+ host)
+
+def check_host_table_exists(cdb, host):
+ return cdb.store_db_check_host_table(host)
+
+def normalize_path(path, ptype):
+ '''
+ make sure the path ends in '/' if it's dir type, otherwise
+ that it does not, return the normalized path
+ '''
+ if ptype == 'dir':
+ if path[-1] != os.path.sep:
+ path = path + os.path.sep
+ else:
+ if path[-1] == os.path.sep:
+ path = path[:-1]
+ return path
+
+def export_rules(cdb, rules_path, host, status=None):
+ # would be nice to be able to only export some rules. whatever
+
+ rules = get_rules(cdb, host, status)
+ sorted_rules = {}
+ for stext in Status.STATUS_TEXTS:
+ sorted_rules[stext] = []
+ for rule in rules:
+ if rule['status'] in Status.STATUS_TEXTS:
+ rule['path'] = normalize_path(rule['path'], rule['type'])
+ sorted_rules[rule['status']].append(rule)
+ else:
+ continue
+
+ output = "dir_rules = {\n"
+ for status in Status.STATUS_TEXTS:
+ output += " '%s': %s,\n" % (
+ status, format_rules_for_export(sorted_rules[status], 2))
+ output += "}\n"
+ try:
+ filep = open(rules_path, "w")
+ filep.write("# -*- coding: utf-8 -*-\n")
+ filep.write(output)
+ filep.close()
+ except:
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ sys.stderr.write(repr(traceback.format_exception(
+ exc_type, exc_value, exc_traceback)))
+ sys.stderr.write("Couldn't save rules into %s.\n" % rules_path)
+
+def entrytype_to_text(abbrev):
+ if abbrev in Rule.TYPES:
+ return Rule.TYPES_TO_TEXT[abbrev]
+ else:
+ return None
+
+def text_to_entrytype(fullname):
+ for key in Rule.TYPES_TO_TEXT:
+ if Rule.TYPES_TO_TEXT[key] == fullname:
+ return key
+ return None
+
+def row_to_rule(row):
+ # ('/home/ariel/wmf/security', '/home/ariel/wmf/security/openjdk6', 'D',
'G')
+ (basedir, name, entrytype, status) = row
+ basedir = retention.rule.from_unicode(basedir)
+ name = retention.rule.from_unicode(name)
+ rule = {'path': os.path.join(basedir, name),
+ 'type': entrytype_to_text(entrytype),
+ 'status': Status.status_to_text(status)}
+ return rule
+
+def get_rules(cdb, host, status=None):
+ if status:
+ crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+ {'status': status}, host)
+ else:
+ crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+ None, host)
+ rules = []
+ rows = RuleStore.store_db_get_all_rows(crs)
+ for row in rows:
+ rules.append(row_to_rule(row))
+ return rules
+
+def show_rules(cdb, host, status=None, prefix=None):
+ rules = get_rules(cdb, host, status)
+ if rules:
+ rules_sorted = sorted(rules, key=lambda r: r['path'])
+ for rule in rules_sorted:
+ if prefix is None or rule['path'].startswith(prefix):
+ print rule
+
+def get_rules_with_prefix(cdb, path, host):
+ '''
+ retrieve all rules where the basedir starts with the specified path
+ '''
+ # prefixes...
+ crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+ {'basedir': path}, host)
+ rules = []
+ rows = RuleStore.store_db_get_all_rows(crs)
+ for row in rows:
+ rules.append(row_to_rule(row))
+ return rules
+
+def check_rule_prefixes(rows):
+ '''
+ separate out the rules with wildcards in the name field
+ and those without
+ '''
+ text = []
+ wildcards = []
+ if rows is None:
+ return text, wildcards
+
+ for row in rows:
+ if '*' in os.path.basename(row['path']):
+ wildcards.append(row)
+ else:
+ text.append(row)
+ return text, wildcards
+
+def rule_is_prefix(basedir, name, path, wildcard=False):
+ '''
+ if the dir part of the rule entry plus the basename is
+ a proper path prefix of the specified path (followed by the
+ path separator, or it's the exact path), return True, else False
+
+ wildcard matches are done only for a single wildcard in the name
+ component of the rule entry and does not cross a directory path
+ component i.e. basedir = /a/b and name = c* will not match
+ path /a/b/cow/dog but will match /a/b/cow
+ '''
+ if not wildcard:
+ if path.startswith(os.path.join(basedir, name) + os.path.sep):
+ return True
+ elif path == os.path.join(basedir, name):
+ return True
+ else:
+ rulepath = os.path.join(basedir, name)
+ if len(rulepath) >= len(path):
+ return False
+
+ left, right = rulepath.split('*', 1)
+ if path.startswith(left):
+ if path.endswith(right):
+ if os.path.sep not in path[len(left): -1 * len(right)]:
+ return True
+ return False
+
+def get_rules_for_path(cdb, path, host, quiet=False):
+ # get all paths starting from / and descending to the specified path
+ prefixes = get_prefixes(path)
+ rows = []
+ # get all entries where the dir part of the path is a prefix and the
+ # name part of the path will be checked to see if it is the next dir
+ # elt in the path or wildcard matches it
+
+ for pref in prefixes:
+ rows.extend(get_rules_with_prefix(cdb, pref, host))
+ # split out the rules with wildcards in the basename from the rest
+ regulars, wildcards = check_rule_prefixes(rows)
+ keep = []
+ paths_kept = []
+ for plain in regulars:
+ if rule_is_prefix(os.path.dirname(plain['path']),
+ os.path.basename(plain['path']), path):
+ if plain['path'] not in paths_kept:
+ keep.append(plain)
+ paths_kept.append(plain['path'])
+ for wild in wildcards:
+ if rule_is_prefix(os.path.dirname(wild['path']),
+ os.path.basename(wild['path']),
+ path, wildcard=True):
+ if wild['path'] not in paths_kept:
+ keep.append(wild)
+ paths_kept.append(wild['path'])
+
+ if len(keep) == 0:
+ keep_sorted = keep
+ else:
+ keep_sorted = sorted(keep, key=lambda r: r['path'])
+ if not quiet:
+ print "No rules for directory"
+ else:
+ for rule in keep_sorted:
+ print rule
+ return keep_sorted
+
+def get_prefixes(path):
+ '''
+ given an absolute path like /a/b/c, return the list of all paths
+ starting from / and descending to the specified path
+ i.e. if given '/a/b/c', would return ['/', '/a', '/a/b', 'a/b/c']
+ for relative paths or empty paths we return an empty prefix list
+ '''
+ if not path or path[0] != '/':
+ return []
+ fields = path.split(os.path.sep)
+ prefix = "/"
+ prefixes = [prefix]
+ for field in fields:
+ if field:
+ prefix = os.path.join(prefix, field)
+ prefixes.append(prefix)
+ return prefixes
+
+def get_rule_as_json(path, ptype, status):
+ rule = {'basedir': os.path.dirname(path),
+ 'name': os.path.basename(path),
+ 'type': ptype,
+ 'status': status}
+ return json.dumps(rule)
diff --git a/dataretention/rulestore.py b/dataretention/rulestore.py
index 9490f02..976590d 100644
--- a/dataretention/rulestore.py
+++ b/dataretention/rulestore.py
@@ -10,6 +10,7 @@
from retention.saltclientplus import LocalClientPlus
import retention.utils
+import retention.ruleutils
from retention.rule import Rule, RuleStore
from retention.status import Status
@@ -65,7 +66,7 @@
if path and path[-1] == os.path.sep:
path = path[:-1]
for host in hosts:
- Rule.show_rules(cdb, host, status, prefix=path)
+ retention.ruleutils.show_rules(cdb, host, status, prefix=path)
elif action == 'delete':
if path and path[-1] == os.path.sep:
@@ -76,13 +77,13 @@
print "would remove rule for %s in %s" % (path, hosts)
else:
for host in hosts:
- Rule.do_remove_rule(cdb, path, host)
+ retention.ruleutils.do_remove_rule(cdb, path, host)
elif status:
if dryrun:
print "would remove rules for status %s in %s" % (status,
hosts)
else:
for host in hosts:
- Rule.do_remove_rules(cdb, status, host)
+ retention.ruleutils.do_remove_rules(cdb, status, host)
elif action == 'add':
if status is None:
@@ -91,17 +92,17 @@
usage('path must be specified to add a rule')
if path[-1] == os.path.sep:
- rtype = Rule.text_to_entrytype('dir')
+ rtype = retention.ruleutils.text_to_entrytype('dir')
path = path[:-1]
else:
- rtype = Rule.text_to_entrytype('file')
+ rtype = retention.ruleutils.text_to_entrytype('file')
if dryrun:
print "would add rule for %s in %s with status %s of type %s" % (
hosts, path, status, rtype)
for host in hosts:
- Rule.do_add_rule(cdb, path, rtype, status, host)
+ retention.ruleutils.do_add_rule(cdb, path, rtype, status, host)
def main():
host = None
@@ -150,10 +151,10 @@
cdb.store_db_init(None)
hosts, htype = retention.utils.get_hosts_expr_type(host)
-
+
# if we are given one host, check that the host has a table or whine
if htype == 'glob' and '*' not in hosts:
- if not Rule.check_host_table_exists(cdb, host):
+ if not retention.ruleutils.check_host_table_exists(cdb, host):
usage('no such host in rule store, %s' % host)
elif htype == 'grain':
client = LocalClientPlus()
--
To view, visit https://gerrit.wikimedia.org/r/233457
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie73f7215a8ff4be9c9a079e03f465b393b3de329
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits