I wrote a script to do this for me, I call it bacula-du, since it
accepts many of the same options as du(1), and the output is the same.
Usage: bacula-du [OPTIONS] -j JOBID
Summarize disk usage of directories included in the backup JOBID
Options are:
-a, --all write counts for all files, not just directories
-b, --bytes use size in octets rather than number of blocks
-B, --block-size=SIZE report SIZE-byte blocks (default 1Ki)
-m like --block-size=1Mi
-S, --separate-dirs do not include size of subdirectories
-t, --threshold=SIZE skip output for files or directories with usage
below SIZE
-L, --largest=NUM only print NUM largest directories/files
SIZE may be (or may be an integer optionally followed by) one of following:
k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi.
I hope others can find it useful.
--
Kjetil T. Homme
Redpill Linpro AS - Changing the game
#! /usr/bin/perl -w
# bacula-du 1.0
# Written by Kjetil Torgrim Homme <kjetil.ho...@redpill-linpro.com>
# Released under GPLv3 or the same terms as Bacula itself
sub usage {
print <<"_END_";
Usage: $0 [OPTIONS] -j JOBID
Summarize disk usage of directories included in the backup JOBID
Options are:
-a, --all write counts for all files, not just directories
-b, --bytes use size in octets rather than number of blocks
-B, --block-size=SIZE report SIZE-byte blocks (default 1Ki)
-m like --block-size=1Mi
-S, --separate-dirs do not include size of subdirectories
-t, --threshold=SIZE skip output for files or directories with usage
below SIZE
-L, --largest=NUM only print NUM largest directories/files
SIZE may be (or may be an integer optionally followed by) one of following:
k (1000), Ki (1024), M (1000*1000), Mi (1024*1024), G, Gi, T, Ti, P, Pi.
_END_
exit(64);
}
use strict;
use DBD::mysql;
use DBI;
use MIME::Base64;
use Getopt::Long qw(:config bundling no_ignore_case);
use Data::Dumper;
my $dbhost = "localhost";
my $db = "bacula";
my $dsn = "DBI:Pg:dbname=$db;host=$dbhost";
my $dbuser = "postgres";
my $dbpass = "";
# Suggestion for MySQL:
# my $dsn = "DBI:mysql:database=mysql;mysql_read_default_group=clientp";
# my $dbuser = "mysql";
# my $dbpass = undef;
#######################
my $i = 0;
my %base64 = map { $_ => $i++ } split("",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
sub decode_bacula_base64 {
my $acc = 0;
for (split("", $_[0])) {
$acc <<= 6;
$acc += $base64{$_};
}
return $acc;
}
sub extract_size_from_lstat {
return decode_bacula_base64((split(" ", shift))[7]);
}
sub extract_blocks_from_lstat {
return 512 * decode_bacula_base64((split(" ", shift))[9]);
}
sub convert_units {
my $num = shift;
my %units = ("k" => 1000**1, "Ki" => 1024**1, "ki" => 1024**1,
"M" => 1000**2, "Mi" => 1024**2,
"G" => 1000**3, "Gi" => 1024**3,
"T" => 1000**4, "Ti" => 1024**4,
"P" => 1000**5, "Pi" => 1024**5);
if ($num =~ /^(\d*)([kKMGTP]i?)B?$/) {
$num = ($1 ? $1 : 1) * $units{$2};
} elsif ($num !~ /^\d+$/) {
die "Can't parse: $num\n";
}
return $num;
}
### main program resumes
my $threshold = 1; # omit 0 octet sized files/directories by default
my $blocksize = 1024;
my ($jobid, $all, $bytes, $separate_dirs, $largest);
GetOptions("jobid|j=i" => \$jobid,
"threshold|t=s" => \$threshold,
"separate-dirs|S" => \$separate_dirs,
"all|a" => \$all,
"bytes|b" => \$bytes,
"block-size|B=s" => \$blocksize,
"largest|L=i" => \$largest,
"m" => sub { $blocksize = "1Mi" },
) || usage();
usage() unless $jobid;
$threshold = convert_units($threshold);
$blocksize = convert_units($blocksize);
my @padding = ("", "A==", "==", "=");
sub extract_size_from_lstat_foo {
my ($b64) = (split(" ", shift))[7];
my $acc = 0;
for (split("", decode_base64($b64 . $padding[length($b64) % 4]))) {
$acc <<= 8;
$acc += ord($_);
}
return $acc;
}
my $extract_size = $bytes
? \&extract_size_from_lstat
: \&extract_blocks_from_lstat;
my $dbh;
unless ($dbh = DBI->connect($dsn, $dbuser, $dbpass, {AutoCommit => 0})) {
print STDERR "Could not connect to database $db on host $dbhost\n";
exit 2;
}
my $sth = $dbh->prepare("
SELECT p.Path, fn.Name, LStat
FROM Path p
JOIN File f ON f.PathId = p.PathId
JOIN Filename fn ON f.FilenameId = fn.FilenameId
WHERE f.JobId = $jobid");
$sth->execute();
my %du;
my $rowcount = 0;
while (my ($path, $fname, $lstat) = $sth->fetchrow_array) {
my $size = $extract_size->($lstat);
# print STDERR "Got '$path' size $size\n";
$du{"$path$fname"} += $size if $all;
$du{$path} += $size;
next if $separate_dirs;
while ($path ne '/') {
$path =~ s,[^/]+/$,,;
$du{$path} += $size;
}
if ((++$rowcount % 1000) == 0) {
print STDERR "got $rowcount rows\r";
}
}
$dbh->disconnect();
print STDERR "done reading database.\n";
if ($largest) {
my @sizes = sort { $a <=> $b } values %du;
my $cutoff = $largest < @sizes ? $sizes[-$largest] : 0;
$threshold = $cutoff unless ($threshold && $threshold > $cutoff);
}
# We add ~ to the filename so that the parent directory is printed
# below the children. ('~' could be any character which sorts after
# '/')
for my $path (sort { "$a~" cmp "$b~" } keys %du) {
next if $du{$path} < $threshold;
printf("%9d %s\n", ($du{$path} + $blocksize - 1) / $blocksize, $path);
}
------------------------------------------------------------------------------
Colocation vs. Managed Hosting
A question and answer guide to determining the best fit
for your organization - today and in the future.
http://p.sf.net/sfu/internap-sfd2d
_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users