#! /usr/bin/env python
#
# Maintenance for amanda on-disk backups
#
# Copyright (C) 2002 Georg C. F. Greve <greve@gnu.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#  
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#  
#   Copies of the GNU General Public License can be obtained at
#    http://www.gnu.org/licenses/gpl.html
#   or by writing to Free Software Foundation, Inc., 59 Temple Place,
#   Suite 330, Boston, MA 02111-1307 USA 
#

# The program has proven useful to its author on a Debian GNU/Linux
# system with file names of "<host>.<partition>.<level>[.<part>]"
# in directories of <YYYYMMDD>.
#
# If your system is non-Unix or is using another directory or file
# name convention, this program is not likely to work without modification;
# in fact your chances to delete good data increase. You have been warned.

import sys, os, string, re, time

"""Path to backups."""
backuproot="/backup"

"""How many backups to keep of every level."""
keep=2

"""Which logfile to use for output."""
logfile="/var/log/amanda/maintenance.log"

"""Name of file for index of backups generated after each run."""
indexfile="Index"

"""What to do about incomplete backups?

If set to 0: IGNORE incomplete backups,
             which may end up cluttering the hard drive

If set to 1: DELETE incomplete backups,
             which may create problems if run while
             a backup is in progress."""
enforce_cleanliness=0

###
### *** No configuration option below this line ***
###

def c_mul(a, b):
    """Rather ugly wrapping-around multiplication function to calculate hashes.
    Reference: http://effbot.org/zone/python-hash.htm"""
    return eval(hex((long(a) * b) & 0xFFFFFFFFL)[:-1])

### --- File Classes ---
class FileID:
    """Class to provide ID for backup files.
    
    The class is not multi-file aware, because even if a backup
    is split, it is still one entity."""
    def __init__(self, directory, host, partition, level):
        """Initialize FileID"""
        self.directory=directory
        self.host=host
        self.partition=partition
        self.level=level

    def __cmp__(self, other):
        """Compare two FileID objects."""
        a=cmp(self.directory,other.directory)
        if a: return a
        a=cmp(self.host,other.host)
        if a: return a
        a=cmp(self.partition,other.partition)
        if a: return a
        a=cmp(self.level,other.level)
        if a: return a
        return 0

    def __hash__(self):
        """Return hash for FileID object."""
        value = hash(self.directory)
        value = c_mul(1000003, value) ^ hash(self.host)
        value = c_mul(1000003, value) ^ hash(self.partition)
        value = c_mul(1000003, value) + self.level
        if value == -1:
            value = -2
        return value
        
    def __str__(self):        
        """String output of FileID."""
        return ("%s (level %d)(in %s)") % (string.ljust("%s:%s" % (self.host,self.partition), 20),
                                           self.level,
                                           self.directory)

    def __repr__(self):        
        """Full representation of FileID."""
        return "FileID" + `self.directory,self.host,self.partition,self.level`

    
class File:
    """Class for each backup 'file,' where a file may consist
    of multiple parts (since amanda might have split it)."""
    def __init__(self, id, part):
        """Initialize File object."""
        self.id=id
        self.parts=part
        self.time=os.stat(string.join([self.id.directory+os.sep+self.id.host,
                                       self.id.partition,
                                       "%d"%self.id.level],"."))[9]

    def __str__(self):
        """String output of File."""
        return ("%s(from %s) %s") % (self.id,
                                   time.ctime(self.time),
                                   self.parts)

    def __hash__(self):
        """Return hash for File object. Equal to its FileID object."""
        return hash(self.id)
    
    def __cmp__(self, other):
        """Compare two File objects by comparing their FileID objects."""
        return cmp(self.id,other.id)

    def addpart(self, suffix):
        """Add another part (split-file) to a File object."""
        self.parts+=suffix
        self.parts.sort()

    def delete(self):
        """Remove all parts for this backup-file."""
        base=string.join([self.id.directory+os.sep+self.id.host,
                          self.id.partition,
                          "%d"%self.id.level],".")
        os.unlink(base)
        for suffix in self.parts:
            os.unlink(string.join([base,suffix],"."))


class LOF:
    """Class for list of files.
    Wraps around a dictionary."""
    def __init__(self):
        """Initialize dictionary of files."""
        self.__dict__['dict']={}

    def __getattr__(self,name):
        return getattr(self.dict,name)

    def __setattr__(self,name,value):
        return setattr(self.dict,name,value)

    def __delattr__(self,name):
        return delattr(self.dict,name)

    def __getitem__(self,id):
        return self.dict[id]

    def __setitem__(self,id,value):
        self.dict[id]=value

    def __delitem__(self,id):
        del self.dict[id]
    

### --- Partition Classes ---

class PartitionID:
    """Class to provide ID for backed up partitions."""
    def __init__(self, host, partition):
        """Initialize PartitionID."""
        self.host=host
        self.partition=partition

    def __cmp__(self, other):
        """Compare two PartitionID objects."""
        a=cmp(self.host,other.host)
        if a: return a
        a=cmp(self.partition,other.partition)
        if a: return a
        return 0

    def __hash__(self):
        """Return hash for PartitionID object."""
        value = hash(self.host)
        value = c_mul(1000003, value) ^ hash(self.partition)
        if value == -1:
            value = -2
        return value

    def __str__(self):
        """String output of PartitionID."""
        return ("[%s:%s]") % (self.host,self.partition)

    def __repr__(self):        
        """Full representation of PartitionID."""
        return "PartitionID" + `self.host,self.partition`

        
class Partition:
    """Class for each backed up partition."""
    def __init__(self, id, fileid):
        """Initialize backup Partition object."""
        self.id=id
        self.files=[]
        self.files.append(fileid)

    def append(self, fileid):
        """Append new FileID object for a backup of this Partition."""
        self.files.append(fileid)

    def __str__(self):
        """String output of Partition."""
        return ("%s\n\t%s") % (self.id,self.files)

    def __repr__(self):
        """Full representation of Partition."""
        return "Partition" + `self.id,self.files`

def by_inv_date(a,b):
    """Sort a list of FileID objects by their associated File.time."""
    if lof[a].time <= lof[b].time: return 1
    if lof[a].time == lof[b].time: return 0
    return -1

def by_level(a,b):
    """Sort a list of FileID objects by their level."""
    if a.level >= b.level: return 1
    if a.level == b.level: return 0
    return -1


class LOP:
    """Class for list of partitions.
    Wraps around a list"""
    def __init__(self):
        """Initialize dictionary of partitions."""
        self.__dict__['dict']={}

    def __getattr__(self,name):
        return getattr(self.dict,name)

    def __setattr__(self,name,value):
        return setattr(self.dict,name,value)

    def __delattr__(self,name):
        return delattr(self.dict,name)

    def __getitem__(self,id):
        return self.dict[id]

    def __setitem__(self,id,value):
        self.dict[id]=value

    def __delitem__(self,id):
        del self.dict[id]
    

###
### Begin Program
###

# open log
log=open(logfile,"a",0)
log.write("\n***** "+time.asctime(time.localtime(time.time()))+": maintenance.py starting up\n")

# STEP 1: Find directories containing backups

# Create list of directories in /backup
# that consist of 6 numbers
directories = []
for D in os.listdir(backuproot):
    if re.match("^[0-9]{8}$",D):
        directories.append(D)


# STEP 2: Create list of all backup files on hard disk

# List will be available globally in lof
lof=LOF()

def walkdir(arg,dir,path):
    log.write("Walking directory %s\n" % dir)
    for val in path:
        # first rough sanity check for file name(s)
        if re.match("^[A-Za-z]+\.[_A-Za-z]+\.[0-9]",val):
            splitval=string.split(val,".")
            id=FileID(dir,splitval[0],splitval[1],string.atoi(splitval[2]))
            # is this a real backup or a temporary file?
            if splitval[-1]=="tmp":
                log.write("Incomplete backup found: \"%s.\" " % val)
                if enforce_cleanliness:
                    os.unlink(val)
                    log.write("Deleted.\n")
                else:
                    log.write("Ignored.\n")
            # if it is a complete backup - is it an additional part of
            # a backup already in list?
            elif lof.has_key(id):
                if splitval[3:]:
                    lof[id].addpart(splitval[3:])
            # otherwise assume it is a complete backup that we have
            # come across for the first time and add it to the list
            else:
                lof[id]=File(id,splitval[3:])
        else:
            log.write("Inconsistent filename \"%s\". Ignored.\n" % val)

for D in directories:
    os.path.walk(backuproot+os.sep+D,walkdir,None)

# Print list of files - activate for debugging purposes only
#for I in lof.keys():
#    print lof[I]


# STEP 3: Sort the files into list of backed up partitions

# List will be available globally in lop
lop=LOP()

for I in lof.keys():
    id=PartitionID(lof[I].id.host, lof[I].id.partition)
    if lop.has_key(id):
        lop[id].append(I)
    else:
        lop[id]=Partition(id,I)

# Log Partition information
log.write("\nPartitions:\n")
for PID in lop.keys():
    lop[PID].files.sort(by_inv_date)
    log.write("\t%s\n" % lop[PID])
log.write("\nDeleting obsolete backups:\n")


# STEP 4: Determine which files we want to delete
for PID in lop.keys():

    # Sort the backup files for this partition by their inverse date
    lop[PID].files.sort(by_inv_date)

    # Setup run variables
    last=10
    ctr=keep-1
    delete=[]

    # Loop to find delete-worthy backup files
    for FID in lop[PID].files:
        # Backup of lower level than the last one we know of?
        if FID.level<last:
            # remember level
            last=FID.level
            # reset counter
            ctr=keep-1
        else:
            if FID.level>last or ctr==0:
                # add to "to be deleted" list
                delete.append(FID)
            elif ctr > 0:
                # Reduce counter by 1, continue loop
                ctr-=1

    # Loop to delete delete-worthy backup files
    for FID in delete:
        log.write("\tDEL %s\n" % lof[FID])
        lop[PID].files.remove(FID)
        lof[FID].delete()
        del lof[FID]


# STEP 5: Search for empty directories and delete them
log.write("\nSearching for empty directories:\n")
for D in directories:
    if not os.listdir(backuproot+os.sep+D):
        os.rmdir(backuproot+os.sep+D)
        log.write("\tDirectory %s empty. Removed.\n" % D)

# STEP 6: Write current backup index
log.write("\nWriting backup index information... ")

# sort partitions alphabetically
pid_list=lop.keys()
def by_host_partition(a,b):
    """Sort a list of PartitionID objects."""
    if a.host==b.host:
        if a.partition<=b.partition: return -1
        if a.partition==b.partition: return 0
        return 1
    else:
        if a.host<=b.host: return -1
        if a.host==b.host: return 0        
    return 1
pid_list.sort(by_host_partition)

# open file for index
indexfile=open(backuproot+os.sep+indexfile,"w",0)
indexfile.write("Index of "+time.asctime(time.localtime(time.time()))+"\n\n")

for PID in pid_list:
    lop[PID].files.sort(by_inv_date)
    indexfile.write("%s\n" % PID)
    for FID in lop[PID].files:
        indexfile.write("\tLevel %d made at %s available in %s\n" %
                        (FID.level,time.ctime(lof[FID].time),FID.directory))
    indexfile.write("\n")

# close index file
indexfile.close()

# STEP 7: Terminate
log.write("Done.\n")
log.close()
