Another VM have the same problem, new errors appear: # dmesg | grep one-vm-8099-disk-0 [20158467.625882] drbd one-vm-8099-disk-0: susp-io( no -> user) [20158469.308525] drbd one-vm-8099-disk-0: susp-io( user -> no) [20158469.309400] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.309504] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.309553] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.309647] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.309989] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.310208] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_complete: Logic BUG rq_state: 8000, completion_ref = -1 [20158469.310762] drbd one-vm-8099-disk-0/0 drbd1545: drbd_req_destroy: Logic BUG rq_state: 8000, completion_ref = -1
Best Regards, Andrei Kvapil On Fri, May 29, 2020 at 3:14 PM kvaps <[email protected]> wrote: > Hello, > > I'm not sure if this bug was already fixed on the latest drbd version but > better to report it. > I'm using 9.0.19-1 (6f5fa5d348a99e5eeb09d83c49853d72e614fd07) and kernel > 4.15.18-18-pve > > We're running weekly backup for all our resources, backups are made for > each resource like described here: > > https://github.com/LINBIT/linstor-server/issues/150#issuecomment-635942823 > > Thus for each resource we create new snapshot and new deploy the resource > from this snapshot, then snapshot is removed, then perform the backup for > created resource, then remove this resource created from the snapshot. > > Time-to-time the VMs might stuck forever even resource is unsuspected > after snapshot. > Such VMs can be killed only with -9 signal. But resource stay on primary > mode and can't be shuted down: > > # drbdsetup status one-vm-7944-disk-0 --verbose --statistics > one-vm-7944-disk-0 node-id:3 role:Primary suspended:no > write-ordering:none > volume:0 minor:1509 disk:Diskless client:yes quorum:yes > size:272632908 read:0 written:0 al-writes:0 bm-writes:0 > upper-pending:4 lower-pending:0 al-suspended:no blocked:no > m13c28 node-id:0 connection:Connected role:Secondary congested:no > ap-in-flight:0 rs-in-flight:0 > volume:0 replication:Established peer-disk:UpToDate resync-suspended:no > received:2027182412 sent:1080354540 out-of-sync:0 pending:0 > unacked:0 > m14c10 node-id:1 connection:Connected role:Secondary congested:no > ap-in-flight:0 rs-in-flight:0 > volume:0 replication:Established peer-disk:UpToDate resync-suspended:no > received:2027184872 sent:1080354540 out-of-sync:0 pending:0 > unacked:0 > > # dmesg | grep one-vm-7944-disk-0 > [20157532.568950] drbd one-vm-7944-disk-0: susp-io( no -> user) > [20157534.979777] drbd one-vm-7944-disk-0: susp-io( user -> no) > > # drbdsetup secondary one-vm-7944-disk-0 > <stuck forever> > > strace log: > > execve("/usr/sbin/drbdsetup", ["drbdsetup", "secondary", > "one-vm-7944-disk-0"], 0x7ffc6b833b10 /* 16 vars */) = 0 > brk(NULL) = 0x56134ae0f000 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or > directory) > openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=37110, ...}) = 0 > mmap(NULL, 37110, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f5155c12000 > close(3) = 0 > access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or > directory) > openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 > read(3, > "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\260\34\2\0\0\0\0\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=2030544, ...}) = 0 > mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = > 0x7f5155c10000 > mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) > = 0x7f5155604000 > mprotect(0x7f51557eb000, 2097152, PROT_NONE) = 0 > mmap(0x7f51559eb000, 24576, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f51559eb000 > mmap(0x7f51559f1000, 15072, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f51559f1000 > close(3) = 0 > arch_prctl(ARCH_SET_FS, 0x7f5155c11580) = 0 > mprotect(0x7f51559eb000, 16384, PROT_READ) = 0 > mprotect(0x561349fd2000, 4096, PROT_READ) = 0 > mprotect(0x7f5155c1c000, 4096, PROT_READ) = 0 > munmap(0x7f5155c12000, 37110) = 0 > chdir("/") = 0 > stat("/proc/drbd", {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 > openat(AT_FDCWD, "/proc/drbd", O_RDONLY) = 3 > brk(NULL) = 0x56134ae0f000 > brk(0x56134ae30000) = 0x56134ae30000 > read(3, "version: 9.0.19-1 (api:2/proto:8"..., 4095) = 170 > close(3) = 0 > socket(AF_NETLINK, SOCK_DGRAM, NETLINK_GENERIC) = 3 > setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1048576], 4) = 0 > setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 > bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 > getsockname(3, {sa_family=AF_NETLINK, nl_pid=8512, nl_groups=00000000}, > [12]) = 0 > write(3, " \0\0\0\20\0\1\0\340\t\321^@!\0\0\3\2\0\0\t\0\2\0drbd\0\0\0\0", > 32) = 32 > poll([{fd=3, events=POLLIN}], 1, 3000) = 1 ([{fd=3, revents=POLLIN}]) > recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, > msg_namelen=12, msg_iov=[{iov_base={{len=816, type=nlctrl, flags=0, > seq=1590757856, pid=8512}, > "\x01\x02\x00\x00\x09\x00\x02\x00\x64\x72\x62\x64\x00\x00\x00\x00\x06\x00\x01\x00\x1f\x00\x00\x00\x08\x00\x03\x00\x02\x00\x00\x00"...}, > iov_len=8192}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = > 816 > poll([{fd=3, events=POLLIN}], 1, 3000) = 1 ([{fd=3, revents=POLLIN}]) > recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, > msg_namelen=12, msg_iov=[{iov_base={{len=816, type=nlctrl, flags=0, > seq=1590757856, pid=8512}, > "\x01\x02\x00\x00\x09\x00\x02\x00\x64\x72\x62\x64\x00\x00\x00\x00\x06\x00\x01\x00\x1f\x00\x00\x00\x08\x00\x03\x00\x02\x00\x00\x00"...}, > iov_len=8192}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 816 > write(3, > "8\0\0\0\37\0\1\0\341\t\321^@!\0\0\17\2\0\0\377\377\377\377\0\0\0\0\34\0\2\0"..., > 56 > > Best Regards, > Andrei Kvapil >
_______________________________________________ Star us on GITHUB: https://github.com/LINBIT drbd-user mailing list [email protected] https://lists.linbit.com/mailman/listinfo/drbd-user
