Attached is a work-in-progress patch to support the --include and --exclude selection options in concert with --remove-older-than. This provides the functionality described in http://wiki.rdiff-backup.org/wiki/index.php/RemoveOlderThanAllowsSubdirectories and http://wiki.rdiff-backup.org/wiki/index.php/RemoveSpecifiedFiles. I'm mainly posting this for feedback on the implementation, particularly the changes in selection.py and metadata.py.

AFAICT, the main pieces left are mungering the file_statistics files and loosening validation. Regarding the loosened validation, currently there is validation with --remove-older-than that it doesn't remove more than one increment. I think that this should be loosened when using --remove-older-than with selection options, but other than simply removing the check, I don't have a lot of ideas. Thoughts?

Thanks,
JoshN
--- rdiff_backup/Main.py	12 Oct 2008 02:21:29 -0000	1.121
+++ rdiff_backup/Main.py	27 Oct 2008 02:06:49 -0000
@@ -716,10 +716,17 @@
 	rootrp = require_root_set(rootrp, 0)
 	rot_require_rbdir_base(rootrp)
 
+	# Validate that the selection options are valid
+	for select_opt in select_opts:
+		if select_opt[0] != '--include' and \
+				select_opt[0] != '--exclude':
+			Log.FatalError("Only --include and --exclude are "
+				"supported with --remove-older-than.")
+
 	time = rot_check_time(remove_older_than_string)
 	if time is None: return
 	Log("Actual remove older than time: %s" % (time,), 6)
-	manage.delete_earlier_than(Globals.rbdir, time)
+	manage.delete_earlier_than(Globals.rbdir, time, select_opts)
 
 def rot_check_time(time_string):
 	"""Check remove older than time_string, return time in seconds"""
--- rdiff_backup/manage.py	7 Jul 2007 22:43:34 -0000	1.13
+++ rdiff_backup/manage.py	27 Oct 2008 02:06:49 -0000
@@ -22,6 +22,7 @@
 from __future__ import generators
 from log import Log
 import Globals, Time, static, statistics, restore, selection, FilenameMapping
+import metadata
 
 
 class ManageException(Exception): pass
@@ -80,7 +81,7 @@
 	result.append("Current mirror: %s" % Time.timetopretty(mirror_time))
 	return "\n".join(result)
 
-def delete_earlier_than(baserp, time):
+def delete_earlier_than(baserp, time, select_opts):
 	"""Deleting increments older than time in directory baserp
 
 	time is in seconds.  It will then delete any empty directories
@@ -88,9 +89,9 @@
 	rdiff-backup-data directory should be the root of the tree.
 
 	"""
-	baserp.conn.manage.delete_earlier_than_local(baserp, time)
+	baserp.conn.manage.delete_earlier_than_local(baserp, time, select_opts)
 
-def delete_earlier_than_local(baserp, time):
+def delete_earlier_than_local(baserp, time, select_opts):
 	"""Like delete_earlier_than, but run on local connection for speed"""
 	assert baserp.conn is Globals.local_connection
 	def yield_files(rp):
@@ -100,13 +101,37 @@
 					yield sub_rp
 		yield rp
 
-	for rp in yield_files(baserp):
-		if ((rp.isincfile() and rp.getinctime() < time) or
-			(rp.isdir() and not rp.listdir())):
-			Log("Deleting increment file %s" % rp.path, 5)
-			rp.delete()
-
+	if not select_opts:
+		# Simple remove. Delete all files with timestamp older
+		# than time.
+		for rp in yield_files(baserp):
+			if ((rp.isincfile() and rp.getinctime() < time) or
+				(rp.isdir() and not rp.listdir())):
 
+				Log("Deleting increment file %s" % rp.path, 5)
+				rp.delete()
+	else:
+		# Remove with selection options. Remove all increments
+		# that match, then modify metadata files to match.
+		select = selection.Select(baserp.append_path('increments'), True)
+		select.ParseArgs(select_opts, [])
+
+		for rp in yield_files(baserp.append_path('increments')):
+			if ((rp.isincfile() and rp.getinctime() < time) or
+				(rp.isdir() and not rp.listdir())):
+
+				if select.Select(rp) == 1: # File matched
+					Log("Deleting increment file %s" % rp.path, 5)
+					rp.delete()
+
+		# Process metadata
+		select = selection.Select(baserp)
+		select.ParseArgs(select_opts, [])
+		metadata.SetManager()
+		def callback(rorp):
+			return select.Select(rorp) == 1
+		metadata.rewrite_meta_files(time, callback)
+		
 class IncObj:
 	"""Increment object - represent a completed increment"""
 	def __init__(self, incrp):
--- rdiff_backup/metadata.py	27 Sep 2008 00:17:24 -0000	1.32
+++ rdiff_backup/metadata.py	27 Oct 2008 02:06:49 -0000
@@ -429,6 +429,61 @@
 	_extractor = RorpExtractor
 	_object_to_record = staticmethod(RORP2Record)
 
+def rewrite_meta_files(beforetime, callback):
+	""" Rewrites the various metadata files, removing historical
+	increments for rorp's where callback returns False. 
+
+	This function is more complicated because of the metadata
+	snapshot files. rewrite_meta_files goes from the latest
+	metadata information, and works earlier. Whenever it encounters
+	an entry in a snapshot file, it finds what that information
+	should be, based on later metadata files, and puts that in
+	the new file."""
+
+	meta_base = Globals.rbdir.append_path('mirror_metadata')
+	metatimes = restore.get_inclist(meta_base)
+	metatimes = [file.getinctime() for file in metatimes]
+	metatimes.sort()
+	metatimes.reverse()
+
+	single_manager = Manager()
+
+	prevtime = None
+	for time in metatimes:
+		if time < beforetime:
+			inc_type = ManagerObj.get_meta_inctype(time)
+			reader = single_manager.GetAtTime(time, None)
+			writer = ManagerObj.GetWriter(inc_type, time)
+			if inc_type != 'snapshot':
+				# We're rewriting a diff file. Since the entries in these
+				# files work like the increments (they only exist if
+				# there's a change), we can just write the entries that
+				# don't match the callback.
+				for rorp in reader:
+					if not callback(rorp):
+						writer.write_object(rorp)
+			else:
+				# We're rewriting a snapshot file. Iterate through both
+				# this snapshot and the metadata as it existed at the
+				# previous backup. For each rorp, if it matches the
+				# callback, use the previous version (effectively
+				# removing this backup's increment), otherwise use this
+				# backup's version.
+				assert not prevtime is None
+				prev_reader = ManagerObj.GetAtTime(prevtime, None)
+				iter = rorpiter.Collate2Iters(reader, prev_reader)
+				for this_rorp, prev_rorp in iter:
+					rorp = this_rorp or prev_rorp
+					if callback(rorp):
+						# Use previous version
+						if prev_rorp:
+							writer.write_object(prev_rorp)
+					else:
+						if this_rorp:
+							writer.write_object(this_rorp)
+
+			writer.close()
+		prevtime = time
 
 class CombinedWriter:
 	"""Used for simultaneously writting metadata, eas, and acls"""
@@ -482,6 +537,12 @@
 		if self.prefixmap.has_key(incbase): self.prefixmap[incbase].append(rp)
 		else: self.prefixmap[incbase] = [rp]
 
+	def get_meta_inctype(self, time):
+		metas = filter(lambda x: x.getinctime() == time,
+					self.prefixmap['mirror_metadata'])
+		assert len(metas) == 1, metas
+		return metas[0].getinctype()
+
 	def _iter_helper(self, prefix, flatfileclass, time, restrict_index):
 		"""Used below to find the right kind of file by time"""
 		if not self.timerpmap.has_key(time): return None
@@ -690,3 +751,4 @@
 
 
 import eas_acls, win_acls # put at bottom to avoid python circularity bug
+import restore
--- rdiff_backup/selection.py	4 Sep 2008 23:36:20 -0000	1.47
+++ rdiff_backup/selection.py	27 Oct 2008 02:06:49 -0000
@@ -79,12 +79,13 @@
 	# This re should not match normal filenames, but usually just globs
 	glob_re = re.compile("(.*[*?[\\\\]|ignorecase\\:)", re.I | re.S)
 
-	def __init__(self, rootrp):
+	def __init__(self, rootrp, use_incr_name=False):
 		"""Select initializer.  rpath is the root directory"""
 		assert isinstance(rootrp, rpath.RPath)
 		self.selection_functions = []
 		self.rpath = rootrp
 		self.prefix = self.rpath.path
+		self.use_incr_name = use_incr_name
 
 	def set_iter(self, sel_func = None):
 		"""Initialize more variables, get ready to iterate
@@ -537,6 +538,15 @@
 		sel_func.name = "%s size %d" % (min_max and "Maximum" or "Minimum", size)
 		return sel_func
 
+	def get_filename(self, rp):
+		if self.use_incr_name and rp.isincfile():
+			return rp.getincbase().path
+		else:
+			if hasattr(rp, 'path'):
+				return rp.path
+			else:
+				return self.prefix + '/'.join(rp.index)
+
 	def glob_get_sf(self, glob_str, include):
 		"""Return selection function given by glob string"""
 		assert include == 0 or include == 1
@@ -614,12 +624,12 @@
 							   "|".join(self.glob_get_prefix_res(glob_str)))
 
 		def include_sel_func(rp):
-			if glob_comp_re.match(rp.path): return 1
-			elif scan_comp_re.match(rp.path): return 2
+			if glob_comp_re.match(self.get_filename(rp)): return 1
+			elif scan_comp_re.match(self.get_filename(rp)): return 2
 			else: return None
 
 		def exclude_sel_func(rp):
-			if glob_comp_re.match(rp.path): return 0
+			if glob_comp_re.match(self.get_filename(rp)): return 0
 			else: return None
 
 		# Check to make sure prefix is ok
_______________________________________________
rdiff-backup-users mailing list at [email protected]
http://lists.nongnu.org/mailman/listinfo/rdiff-backup-users
Wiki URL: http://rdiff-backup.solutionsfirst.com.au/index.php/RdiffBackupWiki

Reply via email to