I wrote a script to do this for me, I call it bacula-du, since it
accepts many of the same options as du(1), and the output is the same.

Usage: bacula-du [OPTIONS] -j JOBID
Summarize disk usage of directories included in the backup JOBID

Options are:
  -a, --all             write counts for all files, not just directories
  -b, --bytes           use size in octets rather than number of blocks
  -B, --block-size=SIZE report SIZE-byte blocks (default 1Ki)
  -m                    like --block-size=1Mi
  -S, --separate-dirs   do not include size of subdirectories
  -t, --threshold=SIZE  skip output for files or directories with usage
                        below SIZE
  -L, --largest=NUM     only print NUM largest directories/files

SIZE may be (or may be an integer optionally followed by) one of following:
k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi.

I hope others can find it useful.
-- 
Kjetil T. Homme
Redpill Linpro AS - Changing the game
#! /usr/bin/perl -w

# bacula-du 1.0
# Written by Kjetil Torgrim Homme <kjetil.ho...@redpill-linpro.com>
# Released under GPLv3 or the same terms as Bacula itself

sub usage {
    print <<"_END_";
Usage: $0 [OPTIONS] -j JOBID
Summarize disk usage of directories included in the backup JOBID

Options are:
  -a, --all             write counts for all files, not just directories
  -b, --bytes           use size in octets rather than number of blocks
  -B, --block-size=SIZE report SIZE-byte blocks (default 1Ki)
  -m                    like --block-size=1Mi
  -S, --separate-dirs   do not include size of subdirectories
  -t, --threshold=SIZE  skip output for files or directories with usage
                        below SIZE
  -L, --largest=NUM     only print NUM largest directories/files

SIZE may be (or may be an integer optionally followed by) one of following:
k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi.
_END_
   exit(64);
}

use strict;
use DBD::mysql;
use DBI;
use MIME::Base64;
use Getopt::Long qw(:config bundling no_ignore_case);
use Data::Dumper;

my $dbhost = "localhost";
my $db = "bacula";
my $dsn = "DBI:Pg:dbname=$db;host=$dbhost";
my $dbuser = "postgres";
my $dbpass = "";
# Suggestion for MySQL:
# my $dsn = "DBI:mysql:database=mysql;mysql_read_default_group=clientp";
# my $dbuser = "mysql";
# my $dbpass = undef;

#######################

my $i = 0;
my %base64 = map { $_ => $i++ } split("", 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");

sub decode_bacula_base64 {
    my $acc = 0;
    for (split("", $_[0])) {
        $acc <<= 6;
        $acc += $base64{$_};
    }
    return $acc;
}

sub extract_size_from_lstat {
    return decode_bacula_base64((split(" ", shift))[7]);
}

sub extract_blocks_from_lstat {
    return 512 * decode_bacula_base64((split(" ", shift))[9]);
}

sub convert_units {
    my $num = shift;

    my %units = ("k" => 1000**1, "Ki" => 1024**1, "ki" => 1024**1,
                 "M" => 1000**2, "Mi" => 1024**2,
                 "G" => 1000**3, "Gi" => 1024**3,
                 "T" => 1000**4, "Ti" => 1024**4,
                 "P" => 1000**5, "Pi" => 1024**5);

    if ($num =~ /^(\d*)([kKMGTP]i?)B?$/) {
        $num = ($1 ? $1 : 1) * $units{$2};
    } elsif ($num !~ /^\d+$/) {
        die "Can't parse: $num\n";
    }
    return $num;
}

### main program resumes

my $threshold = 1; # omit 0 octet sized files/directories by default
my $blocksize = 1024;
my ($jobid, $all, $bytes, $separate_dirs, $largest);

GetOptions("jobid|j=i" => \$jobid,
           "threshold|t=s" => \$threshold,
           "separate-dirs|S" => \$separate_dirs,
           "all|a" => \$all,
           "bytes|b" => \$bytes,
           "block-size|B=s" => \$blocksize,
           "largest|L=i" => \$largest,
           "m" => sub { $blocksize = "1Mi" },
    ) || usage();

usage() unless $jobid;

$threshold = convert_units($threshold);
$blocksize = convert_units($blocksize);


my @padding = ("", "A==", "==", "=");

sub extract_size_from_lstat_foo {
    my ($b64) = (split(" ", shift))[7];

    my $acc = 0;
    for (split("", decode_base64($b64 . $padding[length($b64) % 4]))) {
        $acc <<= 8;
        $acc += ord($_);
    }
    return $acc;
}

my $extract_size = $bytes
    ? \&extract_size_from_lstat
    : \&extract_blocks_from_lstat;

my $dbh;
unless ($dbh = DBI->connect($dsn, $dbuser, $dbpass, {AutoCommit => 0})) {
    print STDERR "Could not connect to database $db on host $dbhost\n";
    exit 2;
}

my $sth = $dbh->prepare("
   SELECT p.Path, fn.Name, LStat
   FROM Path p
     JOIN File f ON f.PathId = p.PathId
     JOIN Filename fn ON f.FilenameId = fn.FilenameId
   WHERE f.JobId = $jobid");
$sth->execute();

my %du;
my $rowcount = 0;
while (my ($path, $fname, $lstat) = $sth->fetchrow_array) {
    my $size = $extract_size->($lstat);
    # print STDERR "Got '$path' size $size\n";
    $du{"$path$fname"} += $size if $all;
    $du{$path} += $size;
    next if $separate_dirs;
    while ($path ne '/') {
        $path =~ s,[^/]+/$,,;
        $du{$path} += $size;
    }
    if ((++$rowcount % 1000) == 0) {
        print STDERR "got $rowcount rows\r";
    }
}
$dbh->disconnect();
print STDERR "done reading database.\n";

if ($largest) {
    my @sizes = sort { $a <=> $b } values %du;
    my $cutoff = $largest < @sizes ? $sizes[-$largest] : 0;
    $threshold = $cutoff unless ($threshold && $threshold > $cutoff);
}

# We add ~ to the filename so that the parent directory is printed
# below the children.  ('~' could be any character which sorts after
# '/')
for my $path (sort { "$a~" cmp "$b~" } keys %du) {
    next if $du{$path} < $threshold;
    printf("%9d %s\n", ($du{$path} + $blocksize - 1) / $blocksize, $path);
}
------------------------------------------------------------------------------
Colocation vs. Managed Hosting
A question and answer guide to determining the best fit
for your organization - today and in the future.
http://p.sf.net/sfu/internap-sfd2d
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users

Reply via email to