Tracy R Reed wrote:
> Device:            tps   Blk_read/s   Blk_wrtn/s   Blk_read   Blk_wrtn
> hda               0.00         0.00         0.00          0          0
> sda             969.39       146.94     13069.39        144      12808
>
> How on earth could it be writing so much more than it is reading? That
> is quite puzzling. If I strace the bacula-fd or bacula-sd processes they
> are just sitting in a select. I never see them doing anything else. But
> the spool file is growing so I know it is making progress.
>   

A little more info: I noticed further down in the iostat output that it
says all of the write IO is going to dm-10 which means the 10th device
manager device which in my lvm setup is  /var. This happens to be where
the bacula db is located. If I do an strace on bacula-dir when I first
start up bacula I see:


access("/var/bacula/working/bacula.db-journal", F_OK) = -1 ENOENT (No
such file or directory)
fstat(3, {st_mode=S_IFREG|0640, st_size=4776960, ...}) = 0
lseek(3, 0, SEEK_SET)                   = 0
read(3, "SQLite format 3\0\4\0\1\1\0@  \0\0\254\350\0\0\0\0"..., 1024) =
1024
fcntl(3, F_SETLK, {type=F_WRLCK, whence=SEEK_SET, start=1073741825,
len=1}) = 0
access("/var/bacula/working/bacula.db-journal", F_OK) = -1 ENOENT (No
such file or directory)
open("/var/bacula/working/bacula.db-journal", O_RDWR|O_CREAT|O_EXCL,
0644) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
open("/var/bacula/working", O_RDONLY)   = 5
lseek(4, 0, SEEK_SET)                   = 0
open("/dev/urandom", O_RDONLY)          = 6
read(6, "5\370\2c\347\311j\5\37bmH\341\23n;s\263\17o\206\nS9ANE"...,
256) = 256
close(6)                                = 0
write(4, "\331\325\5\371 \241c\327\0\0\0\0004\333\3217\0\0\0229\0"...,
24) = 24
lseek(4, 511, SEEK_SET)                 = 511
write(4, "\0", 1)                       = 1
lseek(3, 24576, SEEK_SET)               = 24576
read(3, "\r\0\0\0\3\3v\0\3\323\3\245\3v\0\0\0\0\0\0\0\0\0\0\0\0"...,
1024) = 1024
write(4, "\0\0\0\31\r\0\0\0\3\3v\0\3\323\3\245\3v\0\0\0\0\0\0\0\0"...,
1032) = 1032
write(4, "\0\0\0\1SQLite format 3\0\4\0\1\1\0@  \0\0\254\350"..., 1032)
= 1032
fsync(4)                                = 0
fsync(5)                                = 0
close(5)                                = 0
lseek(4, 8, SEEK_SET)                   = 8
write(4, "\0\0\0\2", 4)                 = 4
lseek(4, 2576, SEEK_SET)                = 2576
fdatasync(4)                            = 0
fcntl(3, F_SETLK, {type=F_WRLCK, whence=SEEK_SET, start=1073741824,
len=1}) = 0
fcntl(3, F_SETLK, {type=F_WRLCK, whence=SEEK_SET, start=1073741826,
len=510}) = 0
lseek(3, 0, SEEK_SET)                   = 0
write(3, "SQLite format 3\0\4\0\1\1\0@  \0\0\254\351\0\0\0\0"..., 1024)
= 1024
lseek(3, 24576, SEEK_SET)               = 24576
write(3, "\r\0\0\0\3\3v\0\3\323\3\245\3v\0\0\0\0\0\0\0\0\0\0\0\0"...,
1024) = 1024
fsync(3)                                = 0
close(4)                                = 0
unlink("/var/bacula/working/bacula.db-journal") = 0
fcntl(3, F_SETLK, {type=F_RDLCK, whence=SEEK_SET, start=1073741826,
len=510}) = 0
fcntl(3, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=1073741824,
len=2}) = 0
fcntl(3, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=0, len=0}) = 0
fcntl(3, F_SETLK, {type=F_RDLCK, whence=SEEK_SET, start=1073741824,
len=1}) = 0
fcntl(3, F_SETLK, {type=F_RDLCK, whence=SEEK_SET, start=1073741826,
len=510}) = 0
fcntl(3, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=1073741824,
len=1}) = 0
access("/var/bacula/working/bacula.db-journal", F_OK) = -1 ENOENT (No
such file or directory)

and it just continues like this constantly trying to access
bacula.db-journal. Anyone know why it does this? But I can only strace
for a second or so before:

unlink("/var/bacula/working/bacula.db-journal") = 0
fcntl(3, F_SETLK, {type=F_RDLCK, whence=SEEK_SET, start=1073741826,
len=510}) = 0
fcntl(3, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=1073741824,
len=2}) = 0
fcntl(3, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=0, len=0}) = 0
close(3)                                = 0
open("/dev/null", O_RDONLY)             = 3
close(3)                                = 0
clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x2abca5182100) = 32156
--- SIGCHLD (Child exited) @ 0 (0) ---
rt_sigreturn(0x11)                      = 32156
exit_group(0)                           = ?
Process 32155 detached

Why is bacula-dir cloning itself? I noticed sometimes during use of
strace -f -p <pid> when I try to ctrl-c to exit that bacula-dir
completely hung. I would get things like this in the bacula log:

18-Jun 04:00 home-dir: Fatal Error at bnet_server.c:157 because: Error
in select: Unknown error 514
18-Jun 04:00 home-dir: Fatal Error because: Bacula interrupted by signal
11: Segmentation violation

But during a successful run of strace -f -p we get:

[pid 32633] access("/var/bacula/working/bacula.db-journal", F_OK) = -1
ENOENT (No such file or directory)
[pid 32633] open("/var/bacula/working/bacula.db-journal",
O_RDWR|O_CREAT|O_EXCL, 0644) = 6
[pid 32633] fstat(6, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0
[pid 32633] open("/var/bacula/working", O_RDONLY) = 12
[pid 32633] lseek(6, 0, SEEK_SET)       = 0
[pid 32633] write(6, "\331\325\5\371
\241c\327\0\0\0\0\242\\\243\354\0\0\25\372"..., 24) = 24
[pid 32633] lseek(6, 511, SEEK_SET)     = 511
[pid 32633] write(6, "\0", 1)           = 1
[pid 32633] lseek(7, 5120, SEEK_SET)    = 5120
[pid 32633] read(7,
"\5\0\0\0$\3\21\0\0\0\25\221\3\372\3\364\3\356\3\350\3\342"..., 1024) = 1024
[pid 32633] lseek(7, 7168, SEEK_SET)    = 7168
[pid 32633] read(7,
"\2\3\246\0\v\3>\6\0\0\f\376\3j\3\\\3>\3M\3y\3\342\3\323"..., 1024) = 1024
[pid 32633] lseek(7, 6144, SEEK_SET)    = 6144
[pid 32633] read(7,
"\2\0\0\0\5\3\312\0\0\0\25\331\3\366\3\354\3\341\3\326\3"..., 1024) = 1024
[pid 32633] lseek(7, 5652480, SEEK_SET) = 5652480
[pid 32633] read(7,
"\5\0\0\0i\1!\0\0\0\25\372\0016\1=\1D\1K\1R\1Y\1`\1g\1n"..., 1024) = 1024
[pid 32633] lseek(7, 5760000, SEEK_SET) = 5760000
[pid 32633] read(7,
"\r\0\0\0\6\1\355\0\3\264\3S\3\1\2\257\2N\1\355\0\0\0\0"..., 1024) = 1024
[pid 32633] lseek(7, 5726208, SEEK_SET) = 5726208
[pid 32633] read(7,
"\2\0\0\0&\0028\0\0\0\25\362\2P\2\\\2h\2t\2\200\2\214\2"..., 1024) = 1024
[pid 32633] lseek(7, 5751808, SEEK_SET) = 5751808
[pid 32633] read(7,
"\n\0\0\0e\0\330\0\0\340\0\350\0\360\0\370\1\0\1\10\1\20"..., 1024) = 1024
[pid 32633] write(6,
"\0\0\25\362\n\0\0\0e\0\330\0\0\340\0\350\0\360\0\370\1"..., 1032) = 1032
[pid 32633] write(6,
"\0\0\25\331\2\0\0\0&\0028\0\0\0\25\362\2P\2\\\2h\2t\2\200"..., 1032) = 1032
[pid 32633] lseek(7, 5725184, SEEK_SET) = 5725184
[pid 32633] read(7,
"\n\0\0\0e\0\330\0\0\330\0\340\0\350\0\360\0\370\1\0\1\10"..., 1024) = 1024
[pid 32633] lseek(7, 5740544, SEEK_SET) = 5740544
[pid 32633] read(7,
"\n\0\0\0d\0\340\0\0\340\0\350\0\360\0\370\1\0\1\10\1\20"..., 1024) = 1024
[pid 32633] write(6,
"\0\0\25\330\n\0\0\0e\0\330\0\0\330\0\340\0\350\0\360\0"..., 1032) = 1032
[pid 32633] write(6,
"\0\0\25\347\n\0\0\0d\0\340\0\0\340\0\350\0\360\0\370\1"..., 1032) = 1032
[pid 32633] lseek(7, 2056192, SEEK_SET) = 2056192
[pid 32633] read(7,
"\2\0\0\0008\0\265\0\0\0\5\6\0\364\1\3\1\22\0010\0\325\1"..., 1024) = 1024
[pid 32633] lseek(7, 4612096, SEEK_SET) = 4612096
[pid 32633] read(7,
"\n\0\0\0I\0\315\0\1\201\1\214\1\227\1\243\1\256\1u\1\271"..., 1024) = 1024
[pid 32633] write(6,
"\0\0\21\231\n\0\0\0I\0\315\0\1\201\1\214\1\227\1\243\1"..., 1032) = 1032
[pid 32633] write(6,
"\0\0\25\372\r\0\0\0\6\1\355\0\3\264\3S\3\1\2\257\2N\1\355"..., 1032) = 1032
[pid 32633] write(6, "\0\0\0\1SQLite format 3\0\4\0\1\1\0@ 
\0\0\312\305"..., 1032) = 1032
[pid 32633] fsync(6)                    = 0
[pid 32633] fsync(12)                   = 0
[pid 32633] close(12)                   = 0
[pid 32633] lseek(6, 8, SEEK_SET)       = 8
[pid 32633] write(6, "\0\0\0\7", 4)     = 4
[pid 32633] lseek(6, 7736, SEEK_SET)    = 7736
[pid 32633] fdatasync(6)                = 0
[pid 32633] fcntl(7, F_SETLK, {type=F_WRLCK, whence=SEEK_SET,
start=1073741824, len=1}) = 0
[pid 32633] fcntl(7, F_SETLK, {type=F_WRLCK, whence=SEEK_SET,
start=1073741826, len=510}) = 0
[pid 32633] lseek(7, 0, SEEK_SET)       = 0
[pid 32633] write(7, "SQLite format 3\0\4\0\1\1\0@ 
\0\0\312\306\0\0\0\0"..., 1024) = 1024
[pid 32633] lseek(7, 5760000, SEEK_SET) = 5760000
[pid 32633] write(7,
"\r\0\0\0\7\1\237\0\3\264\3S\3\1\2\257\2N\1\355\1\237\0"..., 1024) = 1024
[pid 32633] lseek(7, 5726208, SEEK_SET) = 5726208
[pid 32633] write(7,
"\2\0\0\0&\0028\0\0\0\25\362\2P\2\\\2h\2t\2\200\2\214\2"..., 1024) = 1024
[pid 32633] lseek(7, 5751808, SEEK_SET) = 5751808
[pid 32633] write(7,
"\n\0\0\0e\0\330\0\0\330\0\340\0\350\0\360\0\370\1\0\1\10"..., 1024) = 1024
[pid 32633] lseek(7, 5725184, SEEK_SET) = 5725184
[pid 32633] write(7,
"\n\0\0\0e\0\330\0\0\330\0\340\0\350\0\360\0\370\1\0\1\10"..., 1024) = 1024
[pid 32633] lseek(7, 5740544, SEEK_SET) = 5740544
[pid 32633] write(7,
"\n\0\0\0e\0\330\0\0\330\0\340\0\350\0\360\0\370\1\0\1\10"..., 1024) = 1024
[pid 32633] lseek(7, 4612096, SEEK_SET) = 4612096
[pid 32633] write(7,
"\n\0\0\0J\0\301\0\1\201\1\214\1\227\1\243\1\256\1u\1\271"..., 1024) = 1024
[pid 32633] fsync(7)                    = 0
[pid 32633] close(6)                    = 0
[pid 32633] unlink("/var/bacula/working/bacula.db-journal") = 0
[pid 32633] fcntl(7, F_SETLK, {type=F_RDLCK, whence=SEEK_SET,
start=1073741826, len=510}) = 0
[pid 32633] fcntl(7, F_SETLK, {type=F_UNLCK, whence=SEEK_SET,
start=1073741824, len=2}) = 0
[pid 32633] fcntl(7, F_SETLK, {type=F_UNLCK, whence=SEEK_SET, start=0,
len=0}) = 0
[pid 32633] read(10, "\0\0\0\261", 4)   = 4
[pid 32633] read(10, "UpdCat Job=home.2006-06-18_03.55"..., 177) = 177

So fd 6 is the journal and we write a ton of stuff into it as fast as we
can judging from the way the strace scrolls by. What is on fd 7? I can't
tell. But it looks like we are writing a ton of data and doing a ton of
syncs on the journal. This has got to be what is killing my backup
performance. We are doing way more IO talking to the db for some reason
than we are doing the actual backup!

I am on a 64 bit platform and I know when I first tried bacula Kern had
to help me out with a couple of bugfixes to get it to compile. Could
there be some remaining performance problems or general undesireable
behavior lurking because of 64 bittedness as well?

-- 
Tracy R Reed
http://ultraviolet.org



_______________________________________________
Bacula-users mailing list
Bacula-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/bacula-users

Reply via email to