-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Chris Lieb wrote:
> I am setting up Bacula right now for the first time and have pretty much
> everything working, but have one nagging concern.  Since Bacula was
> designed around tape backup systems, it supports recycling of volumes so
> that they can be used again after a certain period of time.  However, I
> do not think that this is very useful for a hard drive-based backup setup.
> 
> My setup currently is using the three pool system (Full-Diff-Incr) that
> is mentioned in the Bacula documentation.  My labeling scheme produces a
> unique label for every volume (1 job per volume).  When a volume reaches
> a certain age, based on the pool that it is in, I want the volume to be
> deleted from disk instead of recycled.  You can purge files from the
> database as part of the configuration, but I don't see any way to have
> Bacula delete volumes over a certain age.
> 
> How can this be achieved?  I am using Bacula 2.4.4 for my director,
> storage, and all file daemons, with the director and storage daemons
> running on a current Linux install and the backups being stored onto
> their own hard drive.
> 
> I can post config files if needed.
> 
> Thanks,
> Chris Lieb

For anyone interested, here is a Python script, tested on Python 2.5,
that will delete any backups that are marked as recyclable and have been
held past their retention time, based on the Label Date (can easily be
changed to use First Written[0] or Last Written[1]).  I'm not very
experienced with Python, so there could be better ways to do this that I
missed and I could very well have abused Python :).  Comments are welcome.

Enjoy,
Chris

[0] Patch to begin retention period on First Written

- --- delete-old-bacula-volume.py 2009-02-19 11:04:12.000000000 -0600
+++ delete-old-bacula-volume2.py        2009-02-19 11:11:43.000000000 -0600
@@ -36 +36 @@
- -               return self.label_date + self.retention
+               return self.first_written + self.retention

[1] Patch to begin retention period on Last Written

- --- delete-old-bacula-volume.py 2009-02-19 11:04:12.000000000 -0600
+++ delete-old-bacula-volume2.py        2009-02-19 11:11:43.000000000 -0600
@@ -36 +36 @@
- -               return self.label_date + self.retention
+               return self.last_written + self.retention
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (MingW32)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iQEcBAEBAgAGBQJJnZR9AAoJEJWxx7fgsD+CWtsH+wVAuucYInpIbeozMjgHpBKf
LJ4pIQlQ1jPt3RjyzFdHqkCqO57eJxVyKHdvl1UA9FKtgQbCG7A++UQqqS84t/5A
UgFLKGHPSN8C39v47P2ts6+hamelKn8Kg7a1m+ZzuTGEFPOf5xNS3DL6Dg3UJDzB
CPR4gOLilxFfB654VnrZ2kPoJazArcBko3wrQNXU0r9zuZbtDcfWq+wuQFbb4OUM
cCMjjpIoC0J+lZFDJwHQfCItAl6tAgEWvuckjZUCCnXtmhWL57jChvAZcR0AgJD6
DSAroMt4w5hLK7rHQPHJ2uz0eZuCtLbui0hUeuaNllwllf1h1SOInBjSDJPU/UQ=
=fFRD
-----END PGP SIGNATURE-----
#!/usr/bin/env python

from datetime import datetime, timedelta

class Volume:
        media_id = 0
        name = ''
        first_written = None
        last_written = None
        label_date = None
        recycle = False
        retention = None

        def __init__(self, data):
                self.media_id = int(data[0])
                self.name = data[1]
                self.first_written = datetime.strptime(data[2], '%Y-%m-%d 
%H:%M:%S')
                self.last_written = datetime.strptime(data[3], '%Y-%m-%d 
%H:%M:%S')
                self.label_date = datetime.strptime(data[4], '%Y-%m-%d 
%H:%M:%S')
                if int(data[5]) == 0:
                        self.recycle = False
                else:
                        self.recycle = True
                self.retention = timedelta(seconds = int(data[6].replace(',', 
'')))
        
        def can_delete(self):
                if self.recycle:
                        if self.delete_date() < datetime.now():
                                return True
                        else:
                                return False
                else:
                        return False
                        
        def delete_date(self):
                return self.label_date + self.retention
        
        def __repr__(self):
                return 'Volume[' + str(self.media_id) + ']: ' + self.name + ' 
(del-after: ' + str(self.delete_date()) + ')'

def callff(cmd):
        '''Call a command and fail fast if the command does not return 0 
(success)'''
        from subprocess import call
        
        ret = call(cmd, shell = True)
        if ret != 0:
                print "Error occurred (" + str(ret) + ")"
                quit()
                
def get_volumes_from_bconsole():
        '''Get a list of volumes in Bacula using bconsole'''
        from subprocess import Popen, PIPE
        
        get_media_script = 'sqlquery\nselect MediaId, VolumeName, FirstWritten, 
LastWritten, LabelDate, Recycle, VolRetention from Media;\n\nquit\n'

        # get the list of volumes from bconsole
        p = Popen('/usr/sbin/bconsole <<EOF\n' + get_media_script + 'EOF', 
shell = True, stdout = PIPE)
        p.wait()
        raw_input = p.stdout.readlines()
        
        # pull the data we need out of the bconsole output
        data = []
        in_title_row = False
        in_table = False
        for line in raw_input:
                line = line.replace('\n', '')
                if line[0] == '+':
                        if not in_title_row and not in_table:
                                in_title_row = True
                        elif in_title_row and not in_table:
                                in_table = True
                                in_title_row = False
                        elif not in_title_row and in_table:
                                in_table = False
                elif in_table:
                        data.append(line)
        
        # parse the bconsole output
        for i in range(0, len(data)):
                line = data[i].split('|')[1:-1]
                for j in range(0, len(line)):
                        line[j] = line[j].strip()
                data[i] = Volume(line)
                
        return data
        
def get_volumes_to_delete(volume_list):
        '''Discover which volumes can be safely deleted'''
        to_delete = []
        for volume in volume_list:
                if volume.can_delete():
                        to_delete.append(volume)
        
        return to_delete

def delete_volumes(volume_list):
        '''Delete the volumes from the Bacula catalog and from disk'''
        
        def get_storage_dir():
                '''Get the directory that volumes are stored in'''
                from subprocess import Popen, PIPE
                import re
                
                get_storage_dir_script = 'status storage\nquit\n'
                
                # get the storage daemon status from bconsole
                p = Popen('bconsole <<EOF\n' + get_storage_dir_script + 'EOF', 
shell=True, stdout = PIPE)
                p.wait()
                raw = p.stdout.readlines()
                
                # get the line from the status that we're interested in
                dir = ''
                is_next_line = False
                for line in raw:
                        if is_next_line:
                                dir = line
                                break
                        if line.startswith('Device status:'):
                                is_next_line = True
                
                # pull out the path used by the storage daemon
                dir = dir.replace('\n', '')
                dir = re.sub(r'Device \"[a-zA-Z0-9]+\" \(([a-zA-Z0-9/]+)\) is 
not open.', r'\1', dir)
                if dir[-1] != '/':
                        dir += '/'
                        
                return dir
        
        def delete_from_catalog(volume):
                '''Delete a volume from the Bacula catalog'''
                delete_volume_script = 'delete media volume=' + volume.name + 
'\nquit\n'
                
                # delete the volume from the catalog; raise error if not 
successful
                p = Popen('/usr/sbin/bconsole <<EOF\n' + get_media_script + 
'EOF', shell = True, stdout = PIPE)
                p.wait()
        
        def delete_from_disk(volume, location):
                '''Delete a volume from disk'''
                callff('rm -f ' + location + volume.name)
        
        storage = get_storage_dir()
        for volume in volume_list:
                delete_from_catalog(volume)
                delete_from_disk(volume, storage)
        
if __name__ == '__main__':
        volumes = get_volumes_from_bconsole()
        to_delete = get_volumes_to_delete(volumes)
        delete_volumes(to_delete)

Attachment: delete-old-bacula-volume.py.sig
Description: Binary data

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users

Reply via email to