Am 12.05.2015 um 08:09 hat Fam Zheng geschrieben: > For zero write, callers pass in NULL qiov (qemu-io "write -z" or > scsi-disk "write same"). > > Commit fc3959e466 fixed bdrv_co_write_zeroes which is the common case > for this bug, but it still exists in bdrv_aio_write_zeroes. A simpler > fix would be in bdrv_co_do_pwritev which is the NULL dereference point > and covers both cases. > > So don't access it in bdrv_co_do_pwritev in this case, use three aligned > writes. > > Signed-off-by: Fam Zheng <f...@redhat.com> > --- > block/io.c | 95 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 95 insertions(+) > > diff --git a/block/io.c b/block/io.c > index 4e5a92e..d766220 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -1174,6 +1174,97 @@ static int coroutine_fn > bdrv_aligned_pwritev(BlockDriverState *bs, > return ret; > } > > +static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, > + int64_t offset, > + unsigned int bytes, > + BdrvRequestFlags flags) > +{ > + BdrvTrackedRequest req; > + uint8_t *buf = NULL; > + QEMUIOVector local_qiov; > + struct iovec iov; > + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); > + unsigned int head_padding_bytes, tail_padding_bytes; > + int ret; > + > + head_padding_bytes = offset & (align - 1); > + tail_padding_bytes = align - ((offset + bytes) & (align - 1));
Don't we have macros for these calculations? > + tracked_request_begin(&req, bs, offset, bytes, true); Why duplicate this when it would already be the next line in bdrv_co_do_pwritev()? > + mark_request_serialising(&req, align); > + wait_serialising_requests(&req); So this patch serialises all zero writes, even if they are perfectly aligned? Why? Actually, even for misaligned requests, I think the part in the middle doesn't require any serialisation, only the RMW parts do. > + assert(flags & BDRV_REQ_ZERO_WRITE); > + if (head_padding_bytes || tail_padding_bytes) { > + buf = qemu_blockalign(bs, align); > + iov = (struct iovec) { > + .iov_base = buf, > + .iov_len = align, > + }; > + qemu_iovec_init_external(&local_qiov, &iov, 1); > + } > + if (head_padding_bytes) { > + uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); > + > + /* RMW the unaligned part before head. */ > + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); > + ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, > + align, &local_qiov, 0); > + if (ret < 0) { > + goto fail; > + } > + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); > + > + memset(buf + head_padding_bytes, 0, zero_bytes); > + ret = bdrv_aligned_pwritev(bs, &req, offset & ~(align - 1), align, > + &local_qiov, > + flags & ~BDRV_REQ_ZERO_WRITE); > + if (ret < 0) { > + goto fail; > + } > + offset += zero_bytes; > + bytes -= zero_bytes; > + } > + > + assert((offset & (align - 1)) == 0); > + if (bytes >= align) { > + /* Write the aligned part in the middle. */ > + uint64_t aligned_bytes = bytes & ~(align - 1); > + ret = bdrv_aligned_pwritev(bs, &req, offset, aligned_bytes, > + NULL, flags); > + if (ret < 0) { > + goto fail; > + } > + bytes -= aligned_bytes; > + offset += aligned_bytes; > + } > + > + assert((offset & (align - 1)) == 0); > + if (bytes) { > + assert(align == tail_padding_bytes + bytes); > + /* RMW the unaligned part after tail. */ > + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); > + ret = bdrv_aligned_preadv(bs, &req, offset, align, > + align, &local_qiov, 0); > + if (ret < 0) { > + goto fail; > + } > + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); > + > + memset(buf, 0, bytes); > + printf("tail part %ld %d\n", offset, bytes); > + ret = bdrv_aligned_pwritev(bs, &req, offset, align, > + &local_qiov, flags & > ~BDRV_REQ_ZERO_WRITE); > + } > +fail: > + tracked_request_end(&req); > + if (buf) { > + qemu_vfree(buf); > + } > + return ret; > + > +} > + > /* > * Handle a write request in coroutine context > */ > @@ -1207,6 +1298,10 @@ static int coroutine_fn > bdrv_co_do_pwritev(BlockDriverState *bs, > bdrv_io_limits_intercept(bs, bytes, true); > } > > + if (!qiov) { > + return bdrv_co_do_zero_pwritev(bs, offset, bytes, flags); > + } > + > /* > * Align write if necessary by performing a read-modify-write cycle. > * Pad qiov with the read parts and be sure to have a tracked request not Kevin