Package: backuppc
Version: 2.1.2-4
Package: rsync
Version: 2.6.7-1
amd64 server, i386 clients. All uniprocessor, Debian unstable.
Reverting the clients to rsync 2.6.6-1 solves the problem. Obviously,
I'm not sure who's breaking the rules, but BackupPC is crashing while the
rsync server on the backup client is logging a polite error message about
the backup server disconnecting, so I'm pointing the finger at backuppc.
Rsync on the server is 2.6.7-1.
I had to comment out the ping test in order to strace the backup process,
and with that done, a failed backup proceeds as follows:
Running
strace -f -o /tmp/blog -s128 su backuppc -c
"/usr/share/backuppc/bin/BackupPC_dump -v -i $CLIENT"
produces
started incr dump, share=backup
Connected to $CLIENT:873, remote version 29
Connected to module backup
Sending args: --server --sender --numeric-ids --perms --owner --group --devices
--links --times --block-size=2048 --recursive --exclude=/proc --exclude=/tmp
--exclude=/temp --exclude=/media --exclude=/mnt --exclude=/var/tmp
--exclude=/sys --exclude=/cdrom --exclude=/cd . .
Xfer PIDs are now 8320
xferPids 8320
and then within a few seconds the kernel logs
BackupPC_dump[8322]: segfault at 0000000000000000 rip 00002b66cd83a4d0 rsp
00007fffffbdc3d8 error 4
BackupPC_dump[8332]: segfault at 0000000000000000 rip 00002b66cd83a4d0 rsp
00007fffffbdc3d8 error 4
BackupPC_dump[8323]: segfault at 0000000000000000 rip 00002b12c48bc4d0 rsp
00007ffffffe3358 error 4
BackupPC_dump[8345]: segfault at 0000000000000000 rip 00002b12c48bc4d0 rsp
00007ffffffe3358 error 4
The strace ends without much fanfare:
8320 open("/var/lib/backuppc/pc/$CLIENT/102/fbackup/fboot/fdev/ffd0u820",
O_RDONLY) = 6
8320 ioctl(6, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fffff8ec4f0) = -1 ENOTTY
(Inappropriate ioctl for device)
8320 lseek(6, 0, SEEK_CUR) = 0
8320 fstat(6, {st_mode=S_IFREG|0640, st_size=12, ...}) = 0
8320 fcntl(6, F_SETFD, FD_CLOEXEC) = 0
8320 mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b31bcc03000
8320 read(6, "x^3\32215\2\0\1\354\0\306", 131072) = 12
8320 read(6, "", 131072) = 0
8320 close(6) = 0
8320 munmap(0x2b31bcc03000, 135168) = 0
8320 open("/var/lib/backuppc/pc/$CLIENT/102/fbackup/fboot/fdev/ffd0u830",
O_RDONLY) = 6
8320 ioctl(6, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fffff8ec4f0) = -1 ENOTTY
(Inappropriate ioctl for device)
8320 lseek(6, 0, SEEK_CUR) = 0
8320 fstat(6, {st_mode=S_IFREG|0640, st_size=12, ...}) = 0
8320 fcntl(6, F_SETFD, FD_CLOEXEC) = 0
8320 mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b31bcc03000
8320 read(6, "x^3\3221\263\0\0\1\364\0\315", 131072) = 12
8320 read(6, "", 131072) = 0
8320 close(6) = 0
8320 munmap(0x2b31bcc03000, 135168) = 0
8320 open("/var/lib/backuppc/pc/$CLIENT/102/fbackup/fboot/fdev/ffull",
O_RDONLY) = 6
8320 ioctl(6, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fffff8ec4f0) = -1 ENOTTY
(Inappropriate ioctl for device)
8320 lseek(6, 0, SEEK_CUR) = 0
8320 fstat(6, {st_mode=S_IFREG|0640, st_size=11, ...}) = 0
8320 fcntl(6, F_SETFD, FD_CLOEXEC) = 0
8320 mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b31bcc03000
8320 read(6, "x^3\3241\7\0\1%\0\225", 131072) = 11
8320 read(6, "", 131072) = 0
8320 close(6) = 0
8320 munmap(0x2b31bcc03000, 135168) = 0
8320 open("/var/lib/backuppc/pc/$CLIENT/102/fbackup/fboot/fdev/ffwdata",
O_RDONLY) = 6
8320 ioctl(6, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fffff8ec4f0) = -1 ENOTTY
(Inappropriate ioctl for device)
8320 lseek(6, 0, SEEK_CUR) = 0
8320 fstat(6, {st_mode=S_IFREG|0640, st_size=12, ...}) = 0
8320 fcntl(6, F_SETFD, FD_CLOEXEC) = 0
8320 mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b31bcc03000
8320 read(6, "x^36\3231\6\0\1\375\0\311", 131072) = 12
8320 read(6, "", 131072) = 0
8320 close(6) = 0
8320 munmap(0x2b31bcc03000, 135168) = 0
8320 open("/var/lib/backuppc/pc/$CLIENT/102/fbackup/fboot/fdev/fgpmdata",
O_RDONLY) = 6
8320 ioctl(6, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fffff8ec4f0) = -1 ENOTTY
(Inappropriate ioctl for device)
8320 lseek(6, 0, SEEK_CUR) = 0
8320 fstat(6, {st_mode=S_IFREG|0640, st_size=0, ...}) = 0
8320 fcntl(6, F_SETFD, FD_CLOEXEC) = 0
8320 mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b31bcc03000
8320 read(6, "", 131072) = 0
8320 close(6) = 0
8320 munmap(0x2b31bcc03000, 135168) = 0
8320 --- SIGSEGV (Segmentation fault) @ 0 (0) ---
For what it's worth, /fbackup/fboot/fdev/fgpmdata was an empty file,
while the others were mostly 12 bytes long.
(And reminds me that I don't need the repeat flag to gpm any more since 2.6
allows multiple readers of /dev/input/mice, but I'll leave it alone until
this problem is solved.)
The client is complaining that the server is hanging up:
Mar 22 01:11:32 $CLIENT rsyncd[24321]: connect from $SERVER.horizon.com
(192.35.100.xxx)
Mar 22 01:11:32 $CLIENT rsyncd[24321]: rsync on . from [EMAIL PROTECTED]
(192.35.100.xxx)
Mar 22 01:11:33 $CLIENT rsyncd[24321]: rsync: writefd_unbuffered failed to
write 4 bytes: phase "send_file_entry" [sender]: Connection reset by peer (104)
Mar 22 01:11:33 $CLIENT rsyncd[24321]: rsync error: error in rsync protocol
data stream (code 12) at io.c(1126) [sender]
I ran an strace of a different failing backup on a different machine, which
ended with:
32139 lstat64("etc/iproute2", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/ircd-hybrid", {st_mode=S_IFDIR|0770, st_size=4096, ...}) = 0
32139 lstat64("etc/ldap", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/linkchecker", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 select(6, NULL, [5], NULL, {60, 0}) = 1 (out [5], left {60, 0})
32139 write(5, "\374\17\0\7", 4) = 4
32139 select(6, NULL, [5], NULL, {60, 0}) = 1 (out [5], left {60, 0})
32139 write(5,
"6\0\0\0\0N6\0\0\272\f\0017\0\0\0\0O6\0\0:\10\0037d0\0\0\0\0Utc9\0007\0\0\272\v\2p1\0\0\0\0\0017\0\0\272\f\0012\0\0\0\0\0027\0\0\272\f\0013\0\0\0\0\0037\0\0:\f\0014\0\0\0\0Vtc9\0047\0\0\272\f\0015\0\0\0\0\0057\0\0\272\f\0016\0\0\0\0\0067\0\0\272\f"...,
4092) = 4092
32139 lstat64("etc/logcheck", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/logrotate.d", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/lprng", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/magicfilter", {st_mode=S_IFDIR|0755, st_size=8192, ...}) = 0
32139 lstat64("etc/minicom", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/passwd", {st_mode=S_IFREG|0644, st_size=1701, ...}) = 0
32139 lstat64("etc/modutils", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/mysql", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/network", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/news", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
32139 lstat64("etc/nut", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
... Lots of open(O_DIRECTORY), getdents(), lstat64(), and close() snipped ...
32139 lstat64("etc/X11/xkb/symbols/pc/ge_la", {st_mode=S_IFREG|0644,
st_size=1980, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/ge_ru", {st_mode=S_IFREG|0644,
st_size=2597, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/guj", {st_mode=S_IFREG|0644,
st_size=2739, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/gur", {st_mode=S_IFREG|0644,
st_size=2696, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/il_phonetic", {st_mode=S_IFREG|0644,
st_size=1754, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/iu", {st_mode=S_IFREG|0644, st_size=2309,
...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/kan", {st_mode=S_IFREG|0644,
st_size=2551, ...}) = 0
32139 lstat64("etc/X11/xkb/symbols/pc/lo", {st_mode=S_IFREG|0644, st_size=2784,
...}) = 0
32139 select(6, NULL, [5], NULL, {60, 0}) = 1 (out [5], left {60, 0})
32139 write(5, "\374\17\0\7", 4) = -1 ECONNRESET (Connection reset by
peer)
32139 time([1143013599]) = 1143013599
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 send(3, "<28>Mar 22 02:46:39 rsyncd[32139]: rsync: writefd_unbuffered
failed to write 4 bytes: phase \"send_file_entry\" [sender]: Connecti"..., 151,
MSG_NOSIGNAL) = 151
32139 rt_sigaction(SIGUSR1, {SIG_IGN}, NULL, 8) = 0
32139 rt_sigaction(SIGUSR2, {SIG_IGN}, NULL, 8) = 0
32139 time([1143013599]) = 1143013599
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
32139 send(3, "<28>Mar 22 02:46:39 rsyncd[32139]: rsync error: error in rsync
protocol data stream (code 12) at io.c(1126) [sender]\n", 117, MSG_NOSIGNAL) =
117
32139 exit_group(12) = ?
I could collect corresponding strace outputs if useful, but hopefully this
is enough to reproduce the problem. Unfortunately, the rsync 2.6.6 -> 2.6.7
changelog is quite large.
Thanks!
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]