02.10.2019 17:22, Andrey Shinkevich wrote: > QEMU currently supports writing compressed data of size less than or > equal to one cluster. This patch allows writing QCOW2 compressed data > that exceed one cluster. The implementation is simple, we split buffered > data into separate clusters and write them using the existing > functionality. For unaligned requests, we use a workaround that writes > the data without compression. > > Suggested-by: Pavel Butsykin <pbutsy...@virtuozzo.com> > Suggested-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> > Signed-off-by: Andrey Shinkevich <andrey.shinkev...@virtuozzo.com> > --- > block/qcow2.c | 113 > +++++++++++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 85 insertions(+), 28 deletions(-) > > diff --git a/block/qcow2.c b/block/qcow2.c > index 7961c05..54ccaf6 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -4152,10 +4152,8 @@ fail: > return ret; > } > > -/* XXX: put compressed sectors first, then all the cluster aligned > - tables to avoid losing bytes in alignment */ > static coroutine_fn int > -qcow2_co_pwritev_compressed_part(BlockDriverState *bs, > +qcow2_co_pwritev_compressed_task(BlockDriverState *bs, > uint64_t offset, uint64_t bytes, > QEMUIOVector *qiov, size_t qiov_offset) > { > @@ -4165,36 +4163,14 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, > uint8_t *buf, *out_buf; > uint64_t cluster_offset; > > - if (has_data_file(bs)) { > - return -ENOTSUP; > - } > - > - if (bytes == 0) { > - /* align end of file to a sector boundary to ease reading with > - sector based I/Os */ > - int64_t len = bdrv_getlength(bs->file->bs); > - if (len < 0) { > - return len; > - } > - return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL); > - } > - > - if (offset_into_cluster(s, offset)) { > - return -EINVAL; > - } > + assert(bytes <= s->cluster_size);
I think we'd better assert assert(bytes == s->cluster_size || (bytes < s->cluster_size && (offset + bytes == bdrv_getlength()) to match old logic.jkjkj > > buf = qemu_blockalign(bs, s->cluster_size); > - if (bytes != s->cluster_size) { > - if (bytes > s->cluster_size || > - offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) > - { > - qemu_vfree(buf); > - return -EINVAL; > - } > + if (bytes < s->cluster_size) { > /* Zero-pad last write if image size is not cluster aligned */ > memset(buf + bytes, 0, s->cluster_size - bytes); > } > - qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes); > + qemu_iovec_to_buf(qiov, qiov_offset, buf, s->cluster_size); Why did you changed it to s->cluster_size? bytes may be less than cluster > > out_buf = g_malloc(s->cluster_size); > > @@ -4228,6 +4204,9 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, > > BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED); > ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0); > + if (ret == -ENOTSUP) { > + ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0); > + } It should not be here: 1. write will unlikely return ENOTSUP 2. if you mean fallback to normal write if we failed to compress, it's done above already. > if (ret < 0) { > goto fail; > } > @@ -4239,6 +4218,84 @@ fail: > return ret; > } > > +static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task) > +{ > + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); > + > + assert(!t->cluster_type); and assert(!t->l2meta) > + > + return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, > t->qiov, > + t->qiov_offset); > +} > + > +/* > + * XXX: put compressed sectors first, then all the cluster aligned > + tables to avoid losing bytes in alignment > + */ > +static coroutine_fn int > +qcow2_co_pwritev_compressed_part(BlockDriverState *bs, > + uint64_t offset, uint64_t bytes, > + QEMUIOVector *qiov, size_t qiov_offset) > +{ > + BDRVQcow2State *s = bs->opaque; > + QCowL2Meta *l2meta = NULL; it's always NULL and actually unused. > + AioTaskPool *aio = NULL; > + uint64_t curr_off = 0; > + int ret; > + > + if (has_data_file(bs)) { > + return -ENOTSUP; > + } > + > + if (bytes == 0) { > + /* > + * align end of file to a sector boundary to ease reading with > + * sector based I/Os > + */ > + int64_t cluster_offset = bdrv_getlength(bs->file->bs); Hmm, I like old variable name 'len' more. > + if (cluster_offset < 0) { > + return cluster_offset; > + } > + return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, > + NULL); > + } > + > + if (offset_into_cluster(s, offset)) { > + return -EINVAL; > + } > + > + while (bytes && aio_task_pool_status(aio) == 0) { > + uint32_t chunk_size = MIN(bytes, s->cluster_size); > + > + assert((curr_off & (BDRV_SECTOR_SIZE - 1)) == 0); > + assert((chunk_size & (BDRV_SECTOR_SIZE - 1)) == 0); first: always use QEMU_IS_ALIGNED instead second: why do you add these asserts, do we need them? > + > + if (!aio && chunk_size != bytes) { > + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); > + } > + > + ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry, > + 0, 0, offset + curr_off, chunk_size, > + qiov, qiov_offset, l2meta); > + if (ret < 0) { > + break; > + } > + qiov_offset += chunk_size; > + curr_off += chunk_size; you may just offset += chunk_size and drop extra variable > + bytes -= chunk_size; > + } > + > + if (aio) { > + aio_task_pool_wait_all(aio); > + if (ret == 0) { > + ret = aio_task_pool_status(aio); > + } > + g_free(aio); > + } > + > + return ret; > +} > + > static int coroutine_fn > qcow2_co_preadv_compressed(BlockDriverState *bs, > uint64_t file_cluster_offset, > overall - OK -- Best regards, Vladimir