[Qemu-devel] [Bug 1807052] Re: Qemu hangs during migration

2018-12-05 Thread Matthew Schumacher
If I remote iothreads and writeback caching, it seems more reliable, but
I can still get it to hang.

This time the source server shows the VM as running, backtrace looks
like:

(gdb) bt full
#0  0x7f27eab0028c in __lll_lock_wait () at /lib64/libpthread.so.0
#1  0x7f27eaaf9d35 in pthread_mutex_lock () at /lib64/libpthread.so.0
#2  0x00865419 in qemu_mutex_lock_impl (mutex=mutex@entry=0x115b8e0 
, file=file@entry=0x8fdf14 "/tmp/qemu-3.0.0/cpus.c", 
line=line@entry=1768)
at util/qemu-thread-posix.c:66
err = 
__PRETTY_FUNCTION__ = "qemu_mutex_lock_impl"
__func__ = "qemu_mutex_lock_impl"
#3  0x00477578 in qemu_mutex_lock_iothread () at 
/tmp/qemu-3.0.0/cpus.c:1768
#4  0x008622b0 in main_loop_wait (timeout=) at 
util/main-loop.c:236
context = 0x1e72810
ret = 1
ret = 1
timeout = 4294967295
timeout_ns = 
#5  0x008622b0 in main_loop_wait (nonblocking=nonblocking@entry=0) at 
util/main-loop.c:497
ret = 1
timeout = 4294967295
timeout_ns = 
#6  0x00595dee in main_loop () at vl.c:1866
#7  0x0041f35d in main (argc=, argv=, 
envp=) at vl.c:4644
i = 
snapshot = 0
linux_boot = 
initrd_filename = 0x0
kernel_filename = 
kernel_cmdline = 
boot_order = 0x918f44 "cad"
boot_once = 0x0
ds = 
opts = 
machine_opts = 
icount_opts = 
accel_opts = 0x0
olist = 
optind = 71
optarg = 0x7fff5edcff69 "timestamp=on"
loadvm = 0x0
machine_class = 0x0
cpu_model = 0x7fff5edcf88a 
"Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"...
vga_model = 0x0
qtest_chrdev = 0x0
qtest_log = 0x0
pid_file = 
incoming = 0x7fff5edcff0a "defer"
userconfig = 
nographic = false
display_remote = 
log_mask = 
log_file = 
trace_file = 
maxram_size = 4294967296
ram_slots = 0
vmstate_dump_file = 0x0
main_loop_err = 0x0
---Type  to continue, or q  to quit---
err = 0x0
list_data_dirs = false
dir = 
dirs = 
bdo_queue = {sqh_first = 0x0, sqh_last = 0x7fff5edcd670}
__func__ = "main"


Dest server is paused, and looks like this:

#0  0x7f11c48bc3c1 in ppoll () at /lib64/libc.so.6
#1  0x00861659 in qemu_poll_ns (fds=, nfds=, timeout=timeout@entry=2999892383) at util/qemu-timer.c:334
ts = {tv_sec = 2, tv_nsec = 999892383}
Python Exception  That operation is not available on 
integers of more than 8 bytes.:
#2  0x008622a4 in main_loop_wait (timeout=) at 
util/main-loop.c:233
context = 0x2342810
ret = 
ret = -1295074913
timeout = 4294967295
timeout_ns = 
#3  0x008622a4 in main_loop_wait (nonblocking=nonblocking@entry=0) at 
util/main-loop.c:497
ret = -1295074913
timeout = 4294967295
timeout_ns = 
#4  0x00595dee in main_loop () at vl.c:1866
#5  0x0041f35d in main (argc=, argv=, 
envp=) at vl.c:4644
i = 
snapshot = 0
linux_boot = 
initrd_filename = 0x0
kernel_filename = 
kernel_cmdline = 
boot_order = 0x918f44 "cad"
boot_once = 0x0
ds = 
opts = 
machine_opts = 
icount_opts = 
accel_opts = 0x0
olist = 
optind = 71
optarg = 0x7ffe6b899f69 "timestamp=on"
loadvm = 0x0
machine_class = 0x0
cpu_model = 0x7ffe6b89988a 
"Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"...
vga_model = 0x0
qtest_chrdev = 0x0
qtest_log = 0x0
pid_file = 
incoming = 0x7ffe6b899f0a "defer"
userconfig = 
nographic = false
display_remote = 
log_mask = 
log_file = 
trace_file = 
maxram_size = 4294967296
ram_slots = 0
vmstate_dump_file = 0x0
main_loop_err = 0x0
err = 0x0
list_data_dirs = false
dir = 
dirs = 
bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffe6b8988e0}
---Type  to continue, or q  to quit---
__func__ = "main"

Honestly looks pretty much like the same bug

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1807052

Title:
  Qemu hangs during migration

Status in QEMU:
  New

Bug description:
  Source server: linux 4.19.5 qemu-3.0.0 from source, libvirt 4.9
  Dest server: linux 4.18.19 qemu-3.0.0 from source, libvirt 4.9

  

[Qemu-devel] [Bug 1807052] [NEW] Qemu hangs during migration

2018-12-05 Thread Matthew Schumacher
Public bug reported:

Source server: linux 4.19.5 qemu-3.0.0 from source, libvirt 4.9
Dest server: linux 4.18.19 qemu-3.0.0 from source, libvirt 4.9

When this VM is running on source server:

/usr/bin/qemu-system-x86_64 -name guest=testvm,debug-threads=on -S
-object
secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-13-testvm
/master-key.aes -machine pc-q35-3.0,accel=kvm,usb=off,dump-guest-
core=off -cpu Skylake-Server-
IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer,hv_reset,hv_vendor_id=KVM
Hv -m 4096 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1
-object iothread,id=iothread1 -uuid 3b00b788-ee91-4e45-80a6-c7319da71225
-no-user-config -nodefaults -chardev
socket,id=charmonitor,fd=23,server,nowait -mon
chardev=charmonitor,id=monitor,mode=control -rtc
base=localtime,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-
hpet -no-shutdown -boot strict=on -device pcie-root-
port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2
-device pcie-root-
port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-
pci-bridge,id=pci.3,bus=pci.1,addr=0x0 -device pcie-root-
port,port=0x12,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x2 -device pcie-
root-port,port=0x13,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x3 -device
piix3-usb-uhci,id=usb,bus=pci.3,addr=0x1 -device virtio-scsi-
pci,iothread=iothread1,id=scsi0,bus=pci.4,addr=0x0 -drive
file=/dev/zvol/datastore/vm/testvm-vda,format=raw,if=none,id=drive-
scsi0-0-0-0,cache=writeback,aio=threads -device scsi-
hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-
scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2,write-cache=on -drive if=none,id
=drive-sata0-0-4,media=cdrom,readonly=on -device ide-cd,bus=ide.4,drive
=drive-sata0-0-4,id=sata0-0-4,bootindex=1 -netdev
tap,fd=25,id=hostnet0,vhost=on,vhostfd=26 -device virtio-net-
pci,netdev=hostnet0,id=net0,mac=52:54:00:a2:b7:a1,bus=pci.2,addr=0x0
-device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 127.0.0.1:0 -device
cirrus-vga,id=video0,bus=pcie.0,addr=0x1 -s -sandbox
on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny
-msg timestamp=on

I try to migrate it and the disks to the other side:

virsh migrate --live --undefinesource --persistent --verbose --copy-
storage-all testvm qemu+ssh://wasvirt1/system

We get to 99% then hang with both sides in the pause state.

Source server is stuck here:
(gdb) bt full
#0  0x7f327994f3c1 in ppoll () at /lib64/libc.so.6
#1  0x0086167b in qemu_poll_ns (fds=, nfds=nfds@entry=1, 
timeout=) at util/qemu-timer.c:322
#2  0x00863302 in aio_poll (ctx=0x21044e0, 
blocking=blocking@entry=true) at util/aio-posix.c:629
node = 
i = 
ret = 0
progress = 
timeout = 
start = 
__PRETTY_FUNCTION__ = "aio_poll"
#3  0x007e0d52 in nbd_client_close (bs=0x2ba2400) at 
block/nbd-client.c:62
waited_ = 
wait_ = 0x2ba563c
ctx_ = 0x2109bb0
bs_ = 0x2ba2400
client = 0x31287e0
client = 
request = {handle = 0, from = 0, len = 0, flags = 0, type = 2}
#4  0x007e0d52 in nbd_client_close (bs=0x2ba2400) at 
block/nbd-client.c:965
client = 
request = {handle = 0, from = 0, len = 0, flags = 0, type = 2}
#5  0x007de5ca in nbd_close (bs=) at block/nbd.c:491
s = 0x31287e0
#6  0x007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:3352
ban = 
ban_next = 
child = 
next = 
#7  0x007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:3560
#8  0x007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:4616
#9  0x00782403 in bdrv_unref (bs=0x2af96f0) at block.c:3359
ban = 
ban_next = 
child = 
next = 
#10 0x00782403 in bdrv_unref (bs=0x2af96f0) at block.c:3560
#11 0x00782403 in bdrv_unref (bs=0x2af96f0) at block.c:4616
#12 0x00785784 in block_job_remove_all_bdrv (job=job@entry=0x2f32570) 
at blockjob.c:200
c = 0x23bac30
l = 0x20dd330 = {0x23bac30, 0x2b89410}
#13 0x007ceb5f in mirror_exit (job=0x2f32570, opaque=0x7f326407a350) at 
block/mirror.c:700
s = 0x2f32570
bjob = 0x2f32570
data = 0x7f326407a350
bs_opaque = 0x30d5600
replace_aio_context = 
src = 0x2131080
target_bs = 0x2af96f0
mirror_top_bs = 0x210eb70
local_err = 0x0
#14 0x00786452 in job_defer_to_main_loop_bh (opaque=0x7f32640786a0) at 
job.c:973
data = 0x7f32640786a0
job = 
aio_context = 0x2109bb0
#15 0x0085fd3f in aio_bh_poll (ctx=ctx@entry=0x21044e0) at 
util/async.c:118
---Type  to continue, or q  to quit---
bh = 
bhp = 
next = 0x2ea86e0
ret = 1
deleted = false
#16 0x008631b0 in aio_dispatch (ctx=0x21044e0) at 

[Qemu-devel] [Bug 1793791] Re: Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-25 Thread Matthew Schumacher
Okay, this is probably a race condition bug.  If remove:

1
and
iothread='1' from the disk which causes the command to change from: 

-device virtio-blk-
pci,iothread=iothread1,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-
disk0,id=virtio-disk0,bootindex=2,write-cache=on

to

-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-
disk0,id=virtio-disk0,bootindex=2,write-cache=on

I don't get crashes anymore.

So for sure it has something to do with iothreads.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1793791

Title:
  Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags &
  NBD_REPLY_FLAG_DONE' failed

Status in QEMU:
  New

Bug description:
  Qemu version on both sides: 2.12.1
  Host A Linux: 4.9.76
  Host B Linux: 4.14.67

  While calling from Host A:
  virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

  I get a qemu crash with:

  2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : 
OBJECT_UNREF: obj=0x7f922c03d990
  qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
  2018-09-21 16:12:41.230+: shutting down, reason=crashed
  2018-09-21 16:12:52.900+: shutting down, reason=failed

  It doesn't do it every time, but most of the time.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1793791/+subscriptions



[Qemu-devel] [Bug 1793791] Re: Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-24 Thread Matthew Schumacher
>From the core:

structured = {magic = 1732535960, flags = 0, type = 0, handle =
94174913593865, length = 0}

You would think that would pass:

chunk = >structured;

if (chunk->type == NBD_REPLY_TYPE_NONE) {
/* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
assert(chunk->flags & NBD_REPLY_FLAG_DONE);
goto break_loop;
}

Given:

#define NBD_REPLY_TYPE_NONE  0


Perhaps this is a problem with my compiler.  (or maybe it's an ignorant guess) 
I'm using:

gcc version 5.5.0 (GCC)

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1793791

Title:
  Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags &
  NBD_REPLY_FLAG_DONE' failed

Status in QEMU:
  New

Bug description:
  Qemu version on both sides: 2.12.1
  Host A Linux: 4.9.76
  Host B Linux: 4.14.67

  While calling from Host A:
  virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

  I get a qemu crash with:

  2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : 
OBJECT_UNREF: obj=0x7f922c03d990
  qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
  2018-09-21 16:12:41.230+: shutting down, reason=crashed
  2018-09-21 16:12:52.900+: shutting down, reason=failed

  It doesn't do it every time, but most of the time.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1793791/+subscriptions



[Qemu-devel] [Bug 1793791] Re: Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-24 Thread Matthew Schumacher
I'm back to trying to figure this out.  I can't use migrate and copy
storage until this bug is fixed, so I'm pretty motivated.  Today I
configured libvirt/qemu to dump the core, and I compiled qemu with
debugging symbols.  Here is the backtrace.  I'm not sure it says
anything we don't already know.

I may try to hack in some more debugging later today, but my C is
terrible.  Any other ideas on ways I can help?

(gdb) bt full
#0  0x7f1a6a3313f8 in raise () at /lib64/libc.so.6
#1  0x7f1a6a332ffa in abort () at /lib64/libc.so.6
#2  0x7f1a6a329c17 in __assert_fail_base () at /lib64/libc.so.6
#3  0x7f1a6a329cc2 in  () at /lib64/libc.so.6
#4  0x55a6cba705a6 in nbd_reply_chunk_iter_receive 
(s=s@entry=0x55a6ce458200, iter=iter@entry=0x7f1945fe8890, 
handle=handle@entry=94174913593865, qiov=qiov@entry=0x0, reply=0x7f1945fe8800,
reply@entry=0x0, payload=payload@entry=0x0) at block/nbd-client.c:606
local_reply =
{simple = {magic = 1732535960, error = 0, handle = 94174913593865}, 
structured = {magic = 1732535960, flags = 0, type = 0, handle = 94174913593865, 
length = 0}, {magic = 1732535960, _skip = 0, handle = 94174913593865}}
chunk = 0x7f1945fe8800
local_err = 0x0
__func__ = "nbd_reply_chunk_iter_receive"
__PRETTY_FUNCTION__ = "nbd_reply_chunk_iter_receive"
#5  0x55a6cba706d6 in nbd_co_request (errp=0x7f1945fe, 
handle=94174913593865, s=0x55a6ce458200) at block/nbd-client.c:634
iter = {ret = 0, fatal = false, err = 0x0, done = false, 
only_structured = true}
ret = 
local_err = 0x0
client = 0x55a6ce458200
__PRETTY_FUNCTION__ = "nbd_co_request"
#6  0x55a6cba706d6 in nbd_co_request (bs=bs@entry=0x55a6ce450130, 
request=request@entry=0x7f1945fe88e0, write_qiov=write_qiov@entry=0x0) at 
block/nbd-client.c:772
ret = 
local_err = 0x0
client = 0x55a6ce458200
__PRETTY_FUNCTION__ = "nbd_co_request"
#7  0x55a6cba70cb5 in nbd_client_co_pwrite_zeroes (bs=0x55a6ce450130, 
offset=2483027968, bytes=16777216, flags=) at 
block/nbd-client.c:860
client = 
request = {handle = 94174913593865, from = 2483027968, len = 16777216, 
flags = 0, type = 6}
__PRETTY_FUNCTION__ = "nbd_client_co_pwrite_zeroes"
#8  0x55a6cba67f44 in bdrv_co_do_pwrite_zeroes (bs=bs@entry=0x55a6ce450130, 
offset=offset@entry=2483027968, bytes=bytes@entry=16777216, 
flags=flags@entry=6) at block/io.c:1410
num = 16777216
drv = 0x55a6cc3b0600 
qiov = {iov = 0x10, niov = -834338512, nalloc = 21926, size = 
1831862272}
iov = {iov_base = 0x0, iov_len = 0}
ret = -95
need_flush = false
head = 0
tail = 0
max_write_zeroes = 33554432
alignment = 512
max_transfer = 16777216
__PRETTY_FUNCTION__ = "bdrv_co_do_pwrite_zeroes"
#9  0x55a6cba68373 in bdrv_aligned_pwritev (req=req@entry=0x7f1945fe8b50, 
offset=offset@entry=2483027968, bytes=bytes@entry=16777216, 
align=align@entry=512, qiov=0x0, flags=6, child=0x55a6ce333f50, 
child=0x55a6ce333f50)
at block/io.c:1522
bs = 0x55a6ce450130
drv = 0x55a6cc3b0600 
waited = 
ret = 
end_sector = 4882432
bytes_remaining = 16777216
max_transfer = 33554432
#10 0x55a6cba69a42 in bdrv_co_pwritev (req=0x7f1945fe8b50, flags=6, 
bytes=16777216, offset=2483027968, child=0x55a6ce333f50) at block/io.c:1625
aligned_bytes = 16777216
bs = 0x55a6ce450130
buf = 
tail_padding_bytes = 0
---Type  to continue, or q  to quit---
local_qiov = {iov = 0x0, niov = 0, nalloc = 0, size = 1825570816}
align = 512
head_padding_bytes = 
ret = 0
iov = {iov_base = 0x7f1945fe8bc0, iov_len = 1}
bs = 0x55a6ce450130
req =
  {bs = 0x55a6ce450130, offset = 2483027968, bytes = 16777216, type = 
BDRV_TRACKED_WRITE, serialising = false, overlap_offset = 2483027968, 
overlap_bytes = 16777216, list = {le_next = 0x0, le_prev = 0x7f19452dbb80}, co 
= 0x7f1a5c003030, wait_queue = {entries = {sqh_first = 0x0, sqh_last = 
0x7f1945fe8b98}}, waiting_for = 0x0}
align = 
head_buf = 0x0
tail_buf = 0x0
local_qiov = {iov = 0x7f1945fe8bc0, niov = 1, nalloc = 0, size = 
94171452932608}
use_local_qiov = false
ret = 
__PRETTY_FUNCTION__ = "bdrv_co_pwritev"
#11 0x55a6cba69a42 in bdrv_co_pwritev (child=child@entry=0x55a6ce333f50, 
offset=offset@entry=2483027968, bytes=bytes@entry=16777216, 
qiov=qiov@entry=0x0, flags=flags@entry=6) at block/io.c:1698
bs = 0x55a6ce450130
req =
  {bs = 0x55a6ce450130, offset = 2483027968, bytes = 16777216, type = 
BDRV_TRACKED_WRITE, serialising = false, overlap_offset = 2483027968, 
overlap_bytes = 16777216, list = {le_next = 0x0, le_prev = 0x7f19452dbb80}, co 
= 0x7f1a5c003030, wait_queue = {entries = {sqh_first = 0x0, sqh_last = 

[Qemu-devel] [Bug 1793791] Re: Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-21 Thread Matthew Schumacher
Hi Eric,

Thanks for looking at this.

I looked at the nbd/server.c code and couldn't see how it could send a
NBD_REPLY_TYPE_NONE packet without setting the NBD_REPLY_FLAG_DONE bit.
The only place NBD_REPLY_TYPE_NONE is set is on line 1603:

set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE,
handle, 0);

Anyway, here is the command line generated:

/usr/bin/qemu-system-x86_64 -name guest=dng-smokeping,debug-threads=on
-S -object
secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-7-dng-
smokeping/master-key.aes -machine pc-1.1,accel=kvm,usb=off,dump-guest-
core=off -cpu qemu64,pmu=off -m 4096 -realtime mlock=off -smp
2,sockets=2,cores=1,threads=1 -object iothread,id=iothread1 -uuid
3d0e1603-ad08-4876-9d9f-2d563fac07ea -no-user-config -nodefaults
-chardev socket,id=charmonitor,fd=26,server,nowait -mon
chardev=charmonitor,id=monitor,mode=control -rtc
base=localtime,clock=vm,driftfix=slew -global kvm-
pit.lost_tick_policy=delay -no-shutdown -boot strict=on -device piix3
-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/datastore/vm/dng-
smokeping.raw,format=raw,if=none,id=drive-virtio-
disk0,cache=writeback,aio=threads -device virtio-blk-
pci,iothread=iothread1,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-
disk0,id=virtio-disk0,bootindex=2,write-cache=on -drive if=none,id
=drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive=drive-
ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
tap,fd=28,id=hostnet0,vhost=on,vhostfd=29 -device virtio-net-
pci,netdev=hostnet0,id=net0,mac=52:54:00:1d:da:b9,bus=pci.0,addr=0x3
-device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 0.0.0.0:59 -device
cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-
pci,id=balloon0,bus=pci.0,addr=0x4 -sandbox
on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny
-msg timestamp=on


Is there some way to turn on NBD trace?  I don't see any trace code around the 
assert, so I'm guessing it would need to be written   Is there a log event 
in QMP?  Can that be used to trace what is going on?  If so it would be easy to 
make libvirt log all of that, which should tell us what is going on...

If that won't work, I can run the VM outside of libvirt and tell it to
migrate over the QMP socket

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1793791

Title:
  Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags &
  NBD_REPLY_FLAG_DONE' failed

Status in QEMU:
  New

Bug description:
  Qemu version on both sides: 2.12.1
  Host A Linux: 4.9.76
  Host B Linux: 4.14.67

  While calling from Host A:
  virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

  I get a qemu crash with:

  2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : 
OBJECT_UNREF: obj=0x7f922c03d990
  qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
  2018-09-21 16:12:41.230+: shutting down, reason=crashed
  2018-09-21 16:12:52.900+: shutting down, reason=failed

  It doesn't do it every time, but most of the time.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1793791/+subscriptions



[Qemu-devel] [Bug 1793791] Re: Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-21 Thread Matthew Schumacher
Tested with Qemu 3.0.0 and this still happens.

Also tested with kernel 4.9.128 on one side and 4.9.76 on the other
thinking it might be a kernel 4.14 issue.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1793791

Title:
  Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags &
  NBD_REPLY_FLAG_DONE' failed

Status in QEMU:
  New

Bug description:
  Qemu version on both sides: 2.12.1
  Host A Linux: 4.9.76
  Host B Linux: 4.14.67

  While calling from Host A:
  virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

  I get a qemu crash with:

  2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : 
OBJECT_UNREF: obj=0x7f922c03d990
  qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
  2018-09-21 16:12:41.230+: shutting down, reason=crashed
  2018-09-21 16:12:52.900+: shutting down, reason=failed

  It doesn't do it every time, but most of the time.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1793791/+subscriptions



[Qemu-devel] [Bug 1793791] [NEW] Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed

2018-09-21 Thread Matthew Schumacher
Public bug reported:

Qemu version on both sides: 2.12.1
Host A Linux: 4.9.76
Host B Linux: 4.14.67

While calling from Host A:
virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

I get a qemu crash with:

2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : OBJECT_UNREF: 
obj=0x7f922c03d990
qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
2018-09-21 16:12:41.230+: shutting down, reason=crashed
2018-09-21 16:12:52.900+: shutting down, reason=failed

It doesn't do it every time, but most of the time.

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1793791

Title:
  Crash with nbd_reply_chunk_iter_receive: Assertion `chunk->flags &
  NBD_REPLY_FLAG_DONE' failed

Status in QEMU:
  New

Bug description:
  Qemu version on both sides: 2.12.1
  Host A Linux: 4.9.76
  Host B Linux: 4.14.67

  While calling from Host A:
  virsh migrate virtualmachine qemu+ssh://hostB/system --live --undefinesource 
--persistent --verbose --copy-storage-all

  I get a qemu crash with:

  2018-09-21 16:12:23.073+: 14428: info : virObjectUnref:350 : 
OBJECT_UNREF: obj=0x7f922c03d990
  qemu-system-x86_64: block/nbd-client.c:606: nbd_reply_chunk_iter_receive: 
Assertion `chunk->flags & NBD_REPLY_FLAG_DONE' failed.
  2018-09-21 16:12:41.230+: shutting down, reason=crashed
  2018-09-21 16:12:52.900+: shutting down, reason=failed

  It doesn't do it every time, but most of the time.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1793791/+subscriptions



Re: [Qemu-devel] question: I found a qemu crash about migration

2018-01-12 Thread Matthew Schumacher
Am 28.09.2017 um 19:01 hat Dr. David Alan Gilbert geschrieben:
> Hi,
>   This is a 'fun' bug;  I had a good chat to kwolf about it earlier.
> A proper fix really needs to be done together with libvirt so that we
> can sequence:
>a) The stopping of the CPU on the source
>b) The termination of the mirroring block job
>c) The inactivation of the block devices on the source
>(bdrv_inactivate_all)
>d) The activation of the block devices on the destination
>(bdrv_invalidate_cache_all)
>e) The start of the CPU on the destinationOn 01/12/2018 03:21 PM,
qemu-devel-confirm+7e23769bf079599cf1f3db6b00d347e8675d87f
3...@nongnu.org wrote:
>
>
> It looks like you're hitting a race between b/c;  we've had races
> between c/d in the past and moved the bdrv_inactivate_all.
>
> During the discussion we ended up with two proposed solutions;
> both of them require one extra command and one extra migration
> capability.
>
> The block way
> -
>1) Add a new migration capability pause-at-complete
>2) Add a new migration state almost-complete
>3) After saving devices, if pause-at-complete is set,
>   transition to almost-complete
>4) Add a new command (migration-continue) that
>   causes the migration to inactivate the devices (c)
>   and send the final EOF to the destination.
>
> You set pause-at-complete, wait until migrate hits almost-complete;
> cleanup the mirror job, and then do migration-continue.  When it
> completes do 'cont' on the destination.
>
> The migration way
> -
>1) Stop doing (d) when the destination is started with -S
>   since it happens anyway when 'cont' is issued
>2) Add a new migration capability ext-manage-storage
>3) When 'ext-manage-storage' is set, we don't bother doing (c)
>4) Add a new command 'block-inactivate' on the source
>
> You set ext-manage-storage, do the migrate and when it's finished
> clean up the block job, block-inactivate on the source, and
> then cont on the destination.
>
>
> My worry about the 'block way' is that the point at which we
> do the pause seems pretty interesting;  it probably is best
> done after the final device save but before the inactivate,
> but could be done before it.  But it probably becomes API
> and something might become dependent on where we did it.
>
> I think Kevin's worry about the 'migration way' is that
> it's a bit of a block-specific fudge; which is probably right.
>
>
> I've not really thought what happens when you have a mix of shared and
> non-shared storage.
>
> Could we do any hack that isn't libvirt-visible for existing versions?
> I guess maybe hack drive-mirror so it interlocks with the migration
> code somehow to hold off on that inactivate?
>
> This code is visible probalby from 2.9.ish with the new locking code;
> but really that b/c race has been there for ever - there's maybe
> always the chance that the last few blocks of mirroring might have
> happened too late ?
>
> Thoughts?
> What are the libvirt view on the preferred solution.
>
> Dave

Devs,

Did this issue ever get addressed?  I'm looking at the history for
mirror.c at https://github.com/qemu/qemu/commits/master/block/mirror.c
and I don't see anything that leads me to believe this was fixed.

I'm still unable to live migrate storage without risking corruption on
even a moderately loaded vm.

Thanks,
schu



[Qemu-devel] [Bug 1712818] Re: live migration with storage encounter assert(!(bs->open_flags & BDRV_O_INACTIVE)) crashes

2017-12-28 Thread Matthew Schumacher
I see the same thing:

2017-12-28 21:36:26.837+: initiating migration
qemu-system-x86_64: block/io.c:1537: bdrv_co_pwritev: Assertion 
`!(bs->open_flags & BDRV_O_INACTIVE)' failed.
2017-12-28 21:36:40.516+: shutting down, reason=crashed
~

Running:
QEMU emulator version 2.10.1
libvirtd (libvirt) 3.10.0

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1712818

Title:
  live migration with storage encounter assert(!(bs->open_flags &
  BDRV_O_INACTIVE)) crashes

Status in QEMU:
  New

Bug description:
  The vm guest runs a iotest program, and i migrate it with virsh
  --copy-storage-all,then the qemu process on the source host happens to
  crash with the following message:

  kvm: block/io.c:1543: bdrv_co_pwritev: Assertion `!(bs->open_flags & 0x0800)' 
failed.
  2017-08-24 11:43:45.919+: shutting down, reason=crashed

  here is the release:
  qemu 2.7 & 2.10.rc3 were tested.
  libvirt 3.0.0 & 3.2.0 were tested.

  command line:
  src_host:virsh migrate --verbose --live --persistent --copy-storage-all 
vm-core qemu+ssh://dst_host/system

  Possible Reason: After bdrv_inactivate_all() was called, mirror_run
  coroutine still writes the left dirty disk data to remote nbd server,
  which triggers the assertion. But I don't known how to avoid the
  problem, help is needed!  Thanks.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1712818/+subscriptions



[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-22 Thread Matthew Schumacher
Running master as of this morning 4/22 and I'm not getting any more
crashes, and I'm flat beating on it.  RC3 still crashes on me, so
whatever the fix is, came after rc3.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1570134

Title:
  While committing snapshot qemu crashes with SIGABRT

Status in QEMU:
  New

Bug description:
  Information:

  OS: Slackware64-Current
  Compiled with: gcc version 5.3.0 (GCC)  / glibc 2.23
  Compiled using: 

  CFLAGS="-O2 -fPIC" \
  CXXFLAGS="-O2 -fPIC" \
  LDFLAGS="-L/usr/lib64" \
  ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--libdir=/usr/lib64 \
--enable-spice \
--enable-kvm \
--enable-glusterfs \
--enable-libiscsi \
--enable-libusb \
--target-list=x86_64-softmmu,i386-softmmu \
--enable-debug

  Source: qemu-2.5.1.tar.bz2

  Running as:

  /usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine
  pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp
  2,sockets=2,cores=1,threads=1 -uuid
  4b30ec13-6609-4a56-8731-d400c38189ef -no-user-config -nodefaults
  -chardev
  
socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=control -rtc
  base=localtime,clock=vm,driftfix=slew -global kvm-
  pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device
  piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
  file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-
  virtio-disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive
  =drive-virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id
  =drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive
  =drive-ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
  tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net
  pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3
  -vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device
  virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -msg timestamp=on

  File system:  zfs v0.6.5.6

  While running: 
  virsh blockcommit test1 vda --active --pivot --verbose

  VM running very heavy IO load

  GDB reporting:

  #0  0x7fd80132c3f8 in raise () at /lib64/libc.so.6
  #1  0x7fd80132dffa in abort () at /lib64/libc.so.6
  #2  0x7fd801324c17 in __assert_fail_base () at /lib64/libc.so.6
  #3  0x7fd801324cc2 in  () at /lib64/libc.so.6
  #4  0x55d9918d7572 in bdrv_replace_in_backing_chain (old=0x55d993ed9c10, 
new=0x55d9931ccc10) at block.c:2096
  __PRETTY_FUNCTION__ = "bdrv_replace_in_backing_chain"
  #5  0x55d991911869 in mirror_exit (job=0x55d993fef830, 
opaque=0x55d999bbefe0) at block/mirror.c:376
  to_replace = 0x55d993ed9c10
  s = 0x55d993fef830
  data = 0x55d999bbefe0
  replace_aio_context = 
  src = 0x55d993ed9c10
  #6  0x55d9918da1dc in block_job_defer_to_main_loop_bh 
(opaque=0x55d9940ce850) at blockjob.c:481
  data = 0x55d9940ce850
  aio_context = 0x55d9931a2610
  #7  0x55d9918d014b in aio_bh_poll (ctx=ctx@entry=0x55d9931a2610) at 
async.c:92
  bh = 
  bhp = 
  next = 0x55d99440f910
  ret = 1
  #8  0x55d9918dc8c0 in aio_dispatch (ctx=0x55d9931a2610) at aio-posix.c:305
  node = 
  progress = false
  #9  0x55d9918d000e in aio_ctx_dispatch (source=, 
callback=, user_data=) at async.c:231
  ctx = 
  #10 0x7fd8037cf787 in g_main_context_dispatch () at 
/usr/lib64/libglib-2.0.so.0
  #11 0x55d9918db03b in main_loop_wait () at main-loop.c:211
  context = 0x55d9931a3200
  pfds = 
  ret = 0
  spin_counter = 1
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #12 0x55d9918db03b in main_loop_wait (timeout=) at 
main-loop.c:256
  ret = 0
  spin_counter = 1
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #13 0x55d9918db03b in main_loop_wait (nonblocking=) at 
main-loop.c:504
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #14 0x55d991679cc4 in main () at vl.c:1923
  nonblocking = 
  last_io = 2
  i = 
  snapshot = 
  linux_boot = 
  initrd_filename = 
  kernel_filename = 
  kernel_cmdline = 
  boot_order = 
  boot_once = 
  ds = 
  cyls = 
  heads = 
  secs = 
  translation = 
  hda_opts = 
  opts = 
  machine_opts = 
  icount_opts = 
  olist = 
  optind = 49
  optarg = 0x7fffc6d27f43 "timestamp=on"
  loadvm = 
  machine_class = 0x55d993194d10
  cpu_model = 
  vga_model = 0x0
  qtest_chrdev = 
  qtest_log = 
  pid_file = 
  incoming = 

[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-19 Thread Matthew Schumacher
Max,

Qemu still crashes for me, but the debug is again very different.  When
I attach to the qemu process from gdb, it is unable to provide a
backtrace when it crashes.  The log file is different too.  Any ideas?

qemu-system-x86_64: block.c:2307: bdrv_replace_in_backing_chain:
Assertion `!bdrv_requests_pending(old)' failed.

(gdb) attach 5563
Attaching to process 5563
Reading symbols from /usr/bin/qemu-system-x86_64...cdone.
oReading symbols from /usr/lib64/libepoxy.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libdrm.so.2...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgbm.so.1...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libX11.so.6...n(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libz.so.1...(no debugging symbols found)...done.
Reading symbols from /lib64/libaio.so.1...(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libiscsi.so.4...done.
Reading symbols from /usr/lib64/libcurl.so.4...(no debugging symbols 
found)...done.
Reading symbols from /lib64/libacl.so.1...(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libgfapi.so.0...done.
Reading symbols from /usr/lib64/libglusterfs.so.0...done.
Reading symbols from /usr/lib64/libgfrpc.so.0...done.
Reading symbols from /usr/lib64/libgfxdr.so.0...done.
Reading symbols from /lib64/libuuid.so.1...(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libssh2.so.1...done.
Reading symbols from /lib64/libbz2.so.1...(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libpixman-1.so.0...(no debugging symbols 
found)...done.
Reading symbols from /lib64/libutil.so.1...(no debugging symbols found)...done.
Reading symbols from /lib64/libncurses.so.5...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libpng16.so.16...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libjpeg.so.62...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libsasl2.so.3...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libSDL-1.2.so.0...(no debugging symbols 
found)...done.
Reading symbols from /lib64/libpthread.so.0...(no debugging symbols 
found)...done.
[New LWP 5588]
[New LWP 5587]
[New LWP 5586]
[New LWP 5585]
[New LWP 5584]
[New LWP 5583]
[New LWP 5582]
[New LWP 5581]
[New LWP 5580]
[New LWP 5579]
[New LWP 5578]
[New LWP 5577]
[New LWP 5576]
[New LWP 5575]
[New LWP 5574]
[New LWP 5573]
[New LWP 5572]
[New LWP 5571]
[New LWP 5570]
[New LWP 5568]
[New LWP 5567]
[New LWP 5566]
[New LWP 5564]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Reading symbols from /usr/lib64/libvte.so.9...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgtk-x11-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgdk-x11-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libpangocairo-1.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libatk-1.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgdk_pixbuf-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libpangoft2-1.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libpango-1.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libfontconfig.so.1...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libfreetype.so.6...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgio-2.0.so.0...t(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgobject-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libglib-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libcairo.so.2...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libXext.so.6...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libnettle.so.6...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgnutls.so.30...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/liblzo2.so.2...done.
Reading symbols from /usr/lib64/libspice-server.so.1...done.
Reading symbols from /usr/lib64/libcacard.so.0...done.
Reading symbols from /usr/lib64/libusb-1.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /usr/lib64/libgthread-2.0.so.0...(no debugging symbols 
found)...done.
Reading symbols from /lib64/librt.so.1...(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libstdc++.so.6...(no debugging symbols 
found)...done.
Reading symbols from /lib64/libm.so.6...i(no debugging symbols found)...done.
Reading symbols from /usr/lib64/libgcc_s.so.1...(no debugging symbols 
found)...done.
Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done.

[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-18 Thread Matthew Schumacher
Thank you for working on this.  Super helpful to have someone looking at
this issue!

With those two patches applied to 2.6.0-rc2 I still get the following:

qemu-system-x86_64: block/mirror.c:342: mirror_iteration: Assertion
`hbitmap_next == next_sector' failed.

The line number confirms that qemu was patched before it was compiled.
Here is the full backtrace:

#0  0x7f4e5aa213f8 in raise () at /lib64/libc.so.6
#1  0x7f4e5aa22ffa in abort () at /lib64/libc.so.6
#2  0x7f4e5aa19c17 in __assert_fail_base () at /lib64/libc.so.6
#3  0x7f4e5aa19cc2 in  () at /lib64/libc.so.6
#4  0x564d5afc1dab in mirror_run (s=0x564d5eb9c2d0) at block/mirror.c:342
hbitmap_next = 
next_sector = 29561984
next_chunk = 230953
nb_chunks = 4
end = 209715200
sectors_per_chunk = 128
source = 0x564d5d273b00
sector_num = 29561472
delay_ns = 0
delay_ns = 0
cnt = 
should_complete = 
s = 0x564d5eb9c2d0
data = 
bs = 0x564d5d273b00
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = 
  {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000\060"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#5  0x564d5afc1dab in mirror_run (opaque=0x564d5eb9c2d0) at 
block/mirror.c:619
delay_ns = 0
cnt = 
should_complete = 
s = 0x564d5eb9c2d0
data = 
bs = 0x564d5d273b00
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = 
  {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000\060"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#6  0x564d5b027e4a in coroutine_trampoline (i0=, 
i1=) at util/coroutine-ucontext.c:78
self = 0x564d5eacc520
co = 0x564d5eacc520
#7  0x7f4e5aa36560 in __start_context () at /lib64/libc.so.6
#8  0x7ffc151258c0 in  ()
#9  0x in  ()

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1570134

Title:
  While committing snapshot qemu crashes with SIGABRT

Status in QEMU:
  New

Bug description:
  Information:

  OS: Slackware64-Current
  Compiled with: gcc version 5.3.0 (GCC)  / glibc 2.23
  Compiled using: 

  CFLAGS="-O2 -fPIC" \
  CXXFLAGS="-O2 -fPIC" \
  LDFLAGS="-L/usr/lib64" \
  ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--libdir=/usr/lib64 \
--enable-spice \
--enable-kvm \
--enable-glusterfs \
--enable-libiscsi \
--enable-libusb \
--target-list=x86_64-softmmu,i386-softmmu \
--enable-debug

  Source: qemu-2.5.1.tar.bz2

  Running as:

  /usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine
  pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp
  2,sockets=2,cores=1,threads=1 -uuid
  4b30ec13-6609-4a56-8731-d400c38189ef -no-user-config -nodefaults
  -chardev
  
socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=control -rtc
  base=localtime,clock=vm,driftfix=slew -global kvm-
  pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device
  piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
  file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-
  virtio-disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive
  =drive-virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id
  =drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive
  =drive-ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
  tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net
  pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3
  -vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device
  virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -msg timestamp=on

  File system:  zfs v0.6.5.6

  While running: 
  virsh blockcommit test1 vda --active --pivot --verbose

  VM running very heavy IO load

  GDB reporting:

  #0  0x7fd80132c3f8 in raise () at /lib64/libc.so.6
  #1  0x7fd80132dffa in abort () at /lib64/libc.so.6
  #2  0x7fd801324c17 in __assert_fail_base () at /lib64/libc.so.6
  #3  0x7fd801324cc2 in  () at /lib64/libc.so.6
  #4  0x55d9918d7572 in bdrv_replace_in_backing_chain (old=0x55d993ed9c10, 
new=0x55d9931ccc10) at block.c:2096
  __PRETTY_FUNCTION__ = "bdrv_replace_in_backing_chain"
  #5  0x55d991911869 in mirror_exit 

[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-15 Thread Matthew Schumacher
I just tested master, and it does the same as 2.6.0-rc

The 2.6.0 branch crashes much faster than 2.5.x

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1570134

Title:
  While committing snapshot qemu crashes with SIGABRT

Status in QEMU:
  New

Bug description:
  Information:

  OS: Slackware64-Current
  Compiled with: gcc version 5.3.0 (GCC)  / glibc 2.23
  Compiled using: 

  CFLAGS="-O2 -fPIC" \
  CXXFLAGS="-O2 -fPIC" \
  LDFLAGS="-L/usr/lib64" \
  ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--libdir=/usr/lib64 \
--enable-spice \
--enable-kvm \
--enable-glusterfs \
--enable-libiscsi \
--enable-libusb \
--target-list=x86_64-softmmu,i386-softmmu \
--enable-debug

  Source: qemu-2.5.1.tar.bz2

  Running as:

  /usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine
  pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp
  2,sockets=2,cores=1,threads=1 -uuid
  4b30ec13-6609-4a56-8731-d400c38189ef -no-user-config -nodefaults
  -chardev
  
socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=control -rtc
  base=localtime,clock=vm,driftfix=slew -global kvm-
  pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device
  piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
  file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-
  virtio-disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive
  =drive-virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id
  =drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive
  =drive-ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
  tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net
  pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3
  -vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device
  virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -msg timestamp=on

  File system:  zfs v0.6.5.6

  While running: 
  virsh blockcommit test1 vda --active --pivot --verbose

  VM running very heavy IO load

  GDB reporting:

  #0  0x7fd80132c3f8 in raise () at /lib64/libc.so.6
  #1  0x7fd80132dffa in abort () at /lib64/libc.so.6
  #2  0x7fd801324c17 in __assert_fail_base () at /lib64/libc.so.6
  #3  0x7fd801324cc2 in  () at /lib64/libc.so.6
  #4  0x55d9918d7572 in bdrv_replace_in_backing_chain (old=0x55d993ed9c10, 
new=0x55d9931ccc10) at block.c:2096
  __PRETTY_FUNCTION__ = "bdrv_replace_in_backing_chain"
  #5  0x55d991911869 in mirror_exit (job=0x55d993fef830, 
opaque=0x55d999bbefe0) at block/mirror.c:376
  to_replace = 0x55d993ed9c10
  s = 0x55d993fef830
  data = 0x55d999bbefe0
  replace_aio_context = 
  src = 0x55d993ed9c10
  #6  0x55d9918da1dc in block_job_defer_to_main_loop_bh 
(opaque=0x55d9940ce850) at blockjob.c:481
  data = 0x55d9940ce850
  aio_context = 0x55d9931a2610
  #7  0x55d9918d014b in aio_bh_poll (ctx=ctx@entry=0x55d9931a2610) at 
async.c:92
  bh = 
  bhp = 
  next = 0x55d99440f910
  ret = 1
  #8  0x55d9918dc8c0 in aio_dispatch (ctx=0x55d9931a2610) at aio-posix.c:305
  node = 
  progress = false
  #9  0x55d9918d000e in aio_ctx_dispatch (source=, 
callback=, user_data=) at async.c:231
  ctx = 
  #10 0x7fd8037cf787 in g_main_context_dispatch () at 
/usr/lib64/libglib-2.0.so.0
  #11 0x55d9918db03b in main_loop_wait () at main-loop.c:211
  context = 0x55d9931a3200
  pfds = 
  ret = 0
  spin_counter = 1
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #12 0x55d9918db03b in main_loop_wait (timeout=) at 
main-loop.c:256
  ret = 0
  spin_counter = 1
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #13 0x55d9918db03b in main_loop_wait (nonblocking=) at 
main-loop.c:504
  ret = 0
  timeout = 4294967295
  timeout_ns = 
  #14 0x55d991679cc4 in main () at vl.c:1923
  nonblocking = 
  last_io = 2
  i = 
  snapshot = 
  linux_boot = 
  initrd_filename = 
  kernel_filename = 
  kernel_cmdline = 
  boot_order = 
  boot_once = 
  ds = 
  cyls = 
  heads = 
  secs = 
  translation = 
  hda_opts = 
  opts = 
  machine_opts = 
  icount_opts = 
  olist = 
  optind = 49
  optarg = 0x7fffc6d27f43 "timestamp=on"
  loadvm = 
  machine_class = 0x55d993194d10
  cpu_model = 
  vga_model = 0x0
  qtest_chrdev = 
  qtest_log = 
  pid_file = 
  incoming = 
  defconfig = 
  userconfig = false
  log_mask 

[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-14 Thread Matthew Schumacher
It still fails with ext4:

#0  0x7fbaa12b33f8 in raise () at /lib64/libc.so.6
#1  0x7fbaa12b4ffa in abort () at /lib64/libc.so.6
#2  0x7fbaa12abc17 in __assert_fail_base () at /lib64/libc.so.6
#3  0x7fbaa12abcc2 in  () at /lib64/libc.so.6
#4  0x5646b990f926 in mirror_run (s=0x5646bc50f480) at block/mirror.c:335
next_sector = 36659200
next_chunk = 286400
nb_chunks = 80
end = 209715200
sectors_per_chunk = 128
source = 0x5646bcb7
sector_num = 36648960
delay_ns = 0
delay_ns = 0
cnt = 15360
should_complete = 
s = 0x5646bc50f480
data = 
bs = 0x5646bcb7
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#5  0x5646b990f926 in mirror_run (opaque=0x5646bc50f480) at 
block/mirror.c:613
delay_ns = 0
cnt = 15360
should_complete = 
s = 0x5646bc50f480
data = 
bs = 0x5646bcb7
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#6  0x5646b997568a in coroutine_trampoline (i0=, 
i1=) at util/coroutine-ucontext.c:78
self = 0x5646bc5115b0
co = 0x5646bc5115b0
#7  0x7fbaa12c8560 in __start_context () at /lib64/libc.so.6
#8  0x5646bd2b98b0 in  ()
#9  0x in  ()

qemu-system-x86_64: block/mirror.c:335: mirror_iteration: Assertion
`hbitmap_next == next_sector' failed.


I can't seem to get stable snapshotting and blockpull with a loaded VM.

Interestingly enough, the last command libvirt passes to qemu is:

2016-04-14 20:47:58.196+: 18932: debug : qemuMonitorJSONCommandWithFd:294 : 
Send command '{"execute":"query-block-jobs","id":"libvirt-69"}' for write with 
FD -1
2016-04-14 20:47:58.196+: 18932: info : qemuMonitorSend:1005 : 
QEMU_MONITOR_SEND_MSG: mon=0x7f1874001a30 
msg={"execute":"query-block-jobs","id":"libvirt-69"}
2016-04-14 20:47:58.197+: 18929: info : qemuMonitorIOWrite:529 : 
QEMU_MONITOR_IO_WRITE: mon=0x7f1874001a30 
buf={"execute":"query-block-jobs","id":"libvirt-69"}


Odd that it would SIGABRT on a smile query-block-jobs.

Even more interesting is that it crashes on the first or second or third
snapshot/block-commit cycle when using EXT4, but would sometimes go for
30-40 cycles on ZFS.

Any ideas?  I'm certainly willing to test and help in any way I can.

Thanks!

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1570134

Title:
  While committing snapshot qemu crashes with SIGABRT

Status in QEMU:
  New

Bug description:
  Information:

  OS: Slackware64-Current
  Compiled with: gcc version 5.3.0 (GCC)  / glibc 2.23
  Compiled using: 

  CFLAGS="-O2 -fPIC" \
  CXXFLAGS="-O2 -fPIC" \
  LDFLAGS="-L/usr/lib64" \
  ./configure \
--prefix=/usr \
--sysconfdir=/etc \
--localstatedir=/var \
--libdir=/usr/lib64 \
--enable-spice \
--enable-kvm \
--enable-glusterfs \
--enable-libiscsi \
--enable-libusb \
--target-list=x86_64-softmmu,i386-softmmu \
--enable-debug

  Source: qemu-2.5.1.tar.bz2

  Running as:

  /usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine
  pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp
  2,sockets=2,cores=1,threads=1 -uuid
  4b30ec13-6609-4a56-8731-d400c38189ef -no-user-config -nodefaults
  -chardev
  
socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait
  -mon chardev=charmonitor,id=monitor,mode=control -rtc
  base=localtime,clock=vm,driftfix=slew -global kvm-
  pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device
  piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
  file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-
  virtio-disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive
  =drive-virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id
  =drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive
  =drive-ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
  tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net
  pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3
  -vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device
  

[Qemu-devel] [Bug 1570134] Re: While committing snapshot qemu crashes with SIGABRT

2016-04-14 Thread Matthew Schumacher
Sure, I did the same test and still got a SIGABRT, but the debug looks a
little different:

Backtrace:

#0  0x7f8f0d46a3f8 in raise () at /lib64/libc.so.6
#1  0x7f8f0d46bffa in abort () at /lib64/libc.so.6
#2  0x7f8f0d462c17 in __assert_fail_base () at /lib64/libc.so.6
#3  0x7f8f0d462cc2 in  () at /lib64/libc.so.6
#4  0x55ff4ce33926 in mirror_run (s=0x55ff4fc00dd0) at block/mirror.c:335
next_sector = 31174784
next_chunk = 243553
nb_chunks = 29
end = 209715200
sectors_per_chunk = 128
source = 0x55ff4e1eb050
sector_num = 31171072
delay_ns = 0
delay_ns = 0
cnt = 157184
should_complete = 
s = 0x55ff4fc00dd0
data = 
bs = 0x55ff4e1eb050
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000\021"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#5  0x55ff4ce33926 in mirror_run (opaque=0x55ff4fc00dd0) at 
block/mirror.c:613
delay_ns = 0
cnt = 157184
should_complete = 
s = 0x55ff4fc00dd0
data = 
bs = 0x55ff4e1eb050
sector_num = 
end = 
length = 
last_pause_ns = 
bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = 
false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, 
needs_compressed_writes = false}
backing_filename = "\000\021"
ret = 
n = 1048576
target_cluster_size = 
__PRETTY_FUNCTION__ = "mirror_run"
#6  0x55ff4ce9968a in coroutine_trampoline (i0=, 
i1=) at util/coroutine-ucontext.c:78
self = 0x55ff4f6c2c80
co = 0x55ff4f6c2c80
#7  0x7f8f0d47f560 in __start_context () at /lib64/libc.so.6
#8  0x7ffc759cb060 in  ()
#9  0x in  ()

I get this in the log:

qemu-system-x86_64: block/mirror.c:335: mirror_iteration: Assertion
`hbitmap_next == next_sector' failed.


The system was compiled like this:

Install prefix/usr
BIOS directory/usr/share/qemu
binary directory  /usr/bin
library directory /usr/lib64
module directory  /usr/lib64/qemu
libexec directory /usr/libexec
include directory /usr/include
config directory  /etc
local state directory   /var
Manual directory  /usr/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path   /tmp/qemu-2.6.0-rc1
C compilercc
Host C compiler   cc
C++ compiler  c++
Objective-C compiler clang
ARFLAGS   rv
CFLAGS-pthread -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include -g -O2 -fPIC
QEMU_CFLAGS   -I/usr/include/pixman-1 -I$(SRC_PATH)/dtc/libfdt 
-DHAS_LIBSSH2_SFTP_FSYNC -fPIE -DPIE -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 
-D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef 
-Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common  
-Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs 
-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers 
-Wold-style-declaration -Wold-style-definition -Wtype-limits 
-fstack-protector-strong -I/usr/include/p11-kit-1-I/usr/include/libpng16 
-I/usr/include/spice-server -I/usr/include/cacard -I/usr/include/nss 
-I/usr/include/nspr -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include 
-I/usr/include/pixman-1 -I/usr/include/spice-1 -I/usr/include/cacard 
-I/usr/include/nss -I/usr/include/nspr -I/usr/include/glib-2.0 
-I/usr/lib64/glib-2.0/include -I/usr/include/libusb-1.0
LDFLAGS   -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g 
-L/usr/lib64
make  make
install   install
pythonpython -B
smbd  /usr/sbin/smbd
module supportno
host CPU  x86_64
host big endian   no
target list   x86_64-softmmu i386-softmmu
tcg debug enabled yes
gprof enabled no
sparse enabledno
strip binariesno
profiler  no
static build  no
pixmansystem
SDL support   yes
GTK support   yes
GTK GL supportno
GNUTLS supportyes
GNUTLS hash   yes
GNUTLS rndyes
libgcrypt no
libgcrypt kdf no
nettleyes (3.2)
nettle kdfyes
libtasn1  yes
VTE support   yes
curses supportyes
virgl support no
curl support  yes
mingw32 support   no
Audio drivers oss
Block whitelist (rw) 
Block whitelist (ro) 
VirtFS supportyes
VNC support   yes
VNC SASL support  yes
VNC JPEG support  yes
VNC PNG support   yes
xen support   no
brlapi supportno
bluez  supportno
Documentation yes
PIE   yes
vde support   no
netmap supportno
Linux AIO support yes
ATTR/XATTR support yes
Install blobs yes
KVM support   yes
RDMA 

[Qemu-devel] [Bug 1570134] [NEW] While committing snapshot qemu crashes with SIGABRT

2016-04-13 Thread Matthew Schumacher
Public bug reported:

Information:

OS: Slackware64-Current
Compiled with: gcc version 5.3.0 (GCC)  / glibc 2.23
Compiled using: 

CFLAGS="-O2 -fPIC" \
CXXFLAGS="-O2 -fPIC" \
LDFLAGS="-L/usr/lib64" \
./configure \
  --prefix=/usr \
  --sysconfdir=/etc \
  --localstatedir=/var \
  --libdir=/usr/lib64 \
  --enable-spice \
  --enable-kvm \
  --enable-glusterfs \
  --enable-libiscsi \
  --enable-libusb \
  --target-list=x86_64-softmmu,i386-softmmu \
  --enable-debug

Source: qemu-2.5.1.tar.bz2

Running as:

/usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine
pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp
2,sockets=2,cores=1,threads=1 -uuid 4b30ec13-6609-4a56-8731-d400c38189ef
-no-user-config -nodefaults -chardev
socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait
-mon chardev=charmonitor,id=monitor,mode=control -rtc
base=localtime,clock=vm,driftfix=slew -global kvm-
pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device piix3
-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive
file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-virtio-
disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-
virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id=drive-
ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive=drive-
ide0-1-0,id=ide0-1-0,bootindex=1 -netdev
tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net
pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3
-vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -msg timestamp=on

File system:  zfs v0.6.5.6

While running: 
virsh blockcommit test1 vda --active --pivot --verbose

VM running very heavy IO load

GDB reporting:

#0  0x7fd80132c3f8 in raise () at /lib64/libc.so.6
#1  0x7fd80132dffa in abort () at /lib64/libc.so.6
#2  0x7fd801324c17 in __assert_fail_base () at /lib64/libc.so.6
#3  0x7fd801324cc2 in  () at /lib64/libc.so.6
#4  0x55d9918d7572 in bdrv_replace_in_backing_chain (old=0x55d993ed9c10, 
new=0x55d9931ccc10) at block.c:2096
__PRETTY_FUNCTION__ = "bdrv_replace_in_backing_chain"
#5  0x55d991911869 in mirror_exit (job=0x55d993fef830, 
opaque=0x55d999bbefe0) at block/mirror.c:376
to_replace = 0x55d993ed9c10
s = 0x55d993fef830
data = 0x55d999bbefe0
replace_aio_context = 
src = 0x55d993ed9c10
#6  0x55d9918da1dc in block_job_defer_to_main_loop_bh 
(opaque=0x55d9940ce850) at blockjob.c:481
data = 0x55d9940ce850
aio_context = 0x55d9931a2610
#7  0x55d9918d014b in aio_bh_poll (ctx=ctx@entry=0x55d9931a2610) at 
async.c:92
bh = 
bhp = 
next = 0x55d99440f910
ret = 1
#8  0x55d9918dc8c0 in aio_dispatch (ctx=0x55d9931a2610) at aio-posix.c:305
node = 
progress = false
#9  0x55d9918d000e in aio_ctx_dispatch (source=, 
callback=, user_data=) at async.c:231
ctx = 
#10 0x7fd8037cf787 in g_main_context_dispatch () at 
/usr/lib64/libglib-2.0.so.0
#11 0x55d9918db03b in main_loop_wait () at main-loop.c:211
context = 0x55d9931a3200
pfds = 
ret = 0
spin_counter = 1
ret = 0
timeout = 4294967295
timeout_ns = 
#12 0x55d9918db03b in main_loop_wait (timeout=) at 
main-loop.c:256
ret = 0
spin_counter = 1
ret = 0
timeout = 4294967295
timeout_ns = 
#13 0x55d9918db03b in main_loop_wait (nonblocking=) at 
main-loop.c:504
ret = 0
timeout = 4294967295
timeout_ns = 
#14 0x55d991679cc4 in main () at vl.c:1923
nonblocking = 
last_io = 2
i = 
snapshot = 
linux_boot = 
initrd_filename = 
kernel_filename = 
kernel_cmdline = 
boot_order = 
boot_once = 
ds = 
cyls = 
heads = 
secs = 
translation = 
hda_opts = 
opts = 
machine_opts = 
icount_opts = 
olist = 
optind = 49
optarg = 0x7fffc6d27f43 "timestamp=on"
loadvm = 
machine_class = 0x55d993194d10
cpu_model = 
vga_model = 0x0
qtest_chrdev = 
qtest_log = 
pid_file = 
incoming = 
defconfig = 
userconfig = false
log_mask = 
log_file = 
trace_events = 
trace_file = 
maxram_size = 
ram_slots = 
vmstate_dump_file = 
main_loop_err = 0x0
err = 0x0
__func__ = "main"
#15 0x55d991679cc4 in main (argc=, argv=, 
envp=) at vl.c:4699
i = 
snapshot = 
linux_boot = 
initrd_filename = 
kernel_filename = 
kernel_cmdline = 
boot_order = 
boot_once = 
ds = 
cyls = 
heads = 
secs = 
translation = 
hda_opts = 
opts =