The big conversion of bdrv_read/write to coroutines caused the two homonymous callbacks in BlockDriver to become reentrant. It goes like this:
1) bdrv_read is now called in a coroutine, and calls bdrv_read or bdrv_pread. 2) the nested bdrv_read goes through the fast path in bdrv_rw_co_entry; 3) in the common case when the protocol is file, bdrv_co_do_readv calls bdrv_co_readv_em (and from here goes to bdrv_co_io_em), which yields until the AIO operation is complete; 4) if bdrv_read had been called from a bottom half, the main loop is free to iterate again: a device model or another bottom half can then come and call bdrv_read again. This applies to all four of read/write/flush/discard. It would also apply to is_allocated, but it is not used from within coroutines: besides qemu-img.c and qemu-io.c, which operate synchronously, the only user is the monitor. Copy-on-read will introduce a use in the block layer, and will require converting it. The solution is "simply" to convert all drivers to coroutines! We have nothing to do for the read-only drivers. For the others, we add a Rwlock that is taken around affected operations. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- block/cow.c | 2 ++ block/nbd.c | 2 ++ block/vmdk.c | 2 ++ block/vpc.c | 2 ++ block/vvfat.c | 2 ++ 5 files changed, 10 insertions(+), 0 deletions(-) diff --git a/block/cow.c b/block/cow.c index 4cf543c..d27e0aa 100644 --- a/block/cow.c +++ b/block/cow.c @@ -42,6 +42,7 @@ struct cow_header_v2 { }; typedef struct BDRVCowState { + CoRwlock lock; int64_t cow_sectors_offset; } BDRVCowState; @@ -84,6 +85,7 @@ static int cow_open(BlockDriverState *bs, int flags) bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); s->cow_sectors_offset = (bitmap_size + 511) & ~511; + qemu_co_rwlock_init(&s->lock); return 0; fail: return -1; diff --git a/block/nbd.c b/block/nbd.c index 76f04d8..ec8f086 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -47,6 +47,7 @@ #endif typedef struct BDRVNBDState { + CoRwlock lock; int sock; uint32_t nbdflags; off_t size; @@ -175,6 +176,7 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) */ result = nbd_establish_connection(bs); + qemu_co_rwlock_init(&s->lock); return result; } diff --git a/block/vmdk.c b/block/vmdk.c index 12b38d2..6afd53e 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -90,6 +90,7 @@ typedef struct VmdkExtent { } VmdkExtent; typedef struct BDRVVmdkState { + CoRwlock lock; int desc_offset; bool cid_updated; uint32_t parent_cid; @@ -644,6 +645,7 @@ static int vmdk_open(BlockDriverState *bs, int flags) goto fail; } s->parent_cid = vmdk_read_cid(bs, 1); + qemu_co_rwlock_init(&s->lock); return ret; fail: diff --git a/block/vpc.c b/block/vpc.c index 549a632..7220488 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -110,6 +110,7 @@ struct vhd_dyndisk_header { }; typedef struct BDRVVPCState { + CoRwlock lock; uint8_t footer_buf[HEADER_SIZE]; uint64_t free_data_block_offset; int max_table_entries; @@ -226,6 +227,7 @@ static int vpc_open(BlockDriverState *bs, int flags) s->last_pagetable = -1; #endif + qemu_co_rwlock_init(&s->lock); return 0; fail: return err; diff --git a/block/vvfat.c b/block/vvfat.c index c567697..08a72ee 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -317,6 +317,7 @@ static void print_mapping(const struct mapping_t* mapping); /* here begins the real VVFAT driver */ typedef struct BDRVVVFATState { + CoRwlock lock; BlockDriverState* bs; /* pointer to parent */ unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */ unsigned char first_sectors[0x40*0x200]; @@ -1063,6 +1064,7 @@ DLOG(if (stderr == NULL) { } // assert(is_consistent(s)); + qemu_co_rwlock_init(&s->lock); return 0; } -- 1.7.6