Re: [PATCH v6 2/4] block: introduce zone append write for zoned devices
On Fri, Mar 10, 2023 at 06:31:04PM +0800, Sam Li wrote: > A zone append command is a write operation that specifies the first > logical block of a zone as the write position. When writing to a zoned > block device using zone append, the byte offset of writes is pointing > to the write pointer of that zone. Upon completion the device will > respond with the position the data has been written in the zone. > > Signed-off-by: Sam Li > --- > block/block-backend.c | 60 +++ > block/file-posix.c| 54 +--- > block/io.c| 21 +++ > block/io_uring.c | 4 +++ > block/linux-aio.c | 3 ++ > block/raw-format.c| 8 + > include/block/block-io.h | 4 +++ > include/block/block_int-common.h | 5 +++ > include/block/raw-aio.h | 4 ++- > include/sysemu/block-backend-io.h | 9 + > 10 files changed, 166 insertions(+), 6 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index f70b08e3f6..28e8f5d778 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1888,6 +1888,45 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, > BlockZoneOp op, > return >common; > } > > +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) > +{ > +BlkAioEmAIOCB *acb = opaque; > +BlkRwCo *rwco = >rwco; > + > +rwco->ret = blk_co_zone_append(rwco->blk, >bytes, > + rwco->iobuf, rwco->flags); > +blk_aio_complete(acb); > +} > + > +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, > +QEMUIOVector *qiov, BdrvRequestFlags flags, > +BlockCompletionFunc *cb, void *opaque) { > +BlkAioEmAIOCB *acb; > +Coroutine *co; > +IO_CODE(); > + > +blk_inc_in_flight(blk); > +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque); > +acb->rwco = (BlkRwCo) { > +.blk= blk, > +.ret= NOT_DONE, > +.flags = flags, > +.iobuf = qiov, > +}; > +acb->bytes = *offset; > +acb->has_returned = false; > + > +co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); > +aio_co_enter(blk_get_aio_context(blk), co); > +acb->has_returned = true; > +if (acb->rwco.ret != NOT_DONE) { > +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), > + blk_aio_complete_bh, acb); > +} > + > +return >common; > +} How is the resulting offset value communicated back to the caller? I see offset being read (dereferenced) but there is no write (assignment). Maybe this function should pass through acb->bytes = (int64_t)offset instead so that blk_co_zone_append() can modify the offset? signature.asc Description: PGP signature
Re: [PATCH v6 2/4] block: introduce zone append write for zoned devices
On Fri, 2023-03-10 at 18:31 +0800, Sam Li wrote: > A zone append command is a write operation that specifies the first > logical block of a zone as the write position. When writing to a zoned > block device using zone append, the byte offset of writes is pointing > to the write pointer of that zone. s/writes is pointing to the write pointer of that zone/the call may point at any position within the zone to which the data is being appended/ > Upon completion the device will > respond with the position the data s/position the data/position where the data/ > has been written in the zone. > > Signed-off-by: Sam Li With nits above, Reviewed-by: Dmitry Fomichev > --- > block/block-backend.c | 60 +++ > block/file-posix.c | 54 +--- > block/io.c | 21 +++ > block/io_uring.c | 4 +++ > block/linux-aio.c | 3 ++ > block/raw-format.c | 8 + > include/block/block-io.h | 4 +++ > include/block/block_int-common.h | 5 +++ > include/block/raw-aio.h | 4 ++- > include/sysemu/block-backend-io.h | 9 + > 10 files changed, 166 insertions(+), 6 deletions(-) > > diff --git a/block/block-backend.c b/block/block-backend.c > index f70b08e3f6..28e8f5d778 100644 > --- a/block/block-backend.c > +++ b/block/block-backend.c > @@ -1888,6 +1888,45 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, > BlockZoneOp op, > return >common; > } > > +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) > +{ > + BlkAioEmAIOCB *acb = opaque; > + BlkRwCo *rwco = >rwco; > + > + rwco->ret = blk_co_zone_append(rwco->blk, >bytes, > + rwco->iobuf, rwco->flags); > + blk_aio_complete(acb); > +} > + > +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, > + QEMUIOVector *qiov, BdrvRequestFlags flags, > + BlockCompletionFunc *cb, void *opaque) { > + BlkAioEmAIOCB *acb; > + Coroutine *co; > + IO_CODE(); > + > + blk_inc_in_flight(blk); > + acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque); > + acb->rwco = (BlkRwCo) { > + .blk = blk, > + .ret = NOT_DONE, > + .flags = flags, > + .iobuf = qiov, > + }; > + acb->bytes = *offset; > + acb->has_returned = false; > + > + co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); > + aio_co_enter(blk_get_aio_context(blk), co); > + acb->has_returned = true; > + if (acb->rwco.ret != NOT_DONE) { > + replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), > + blk_aio_complete_bh, acb); > + } > + > + return >common; > +} > + > /* > * Send a zone_report command. > * offset is a byte offset from the start of the device. No alignment > @@ -1939,6 +1978,27 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, > BlockZoneOp op, > return ret; > } > > +/* > + * Send a zone_append command. > + */ > +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, > + QEMUIOVector *qiov, BdrvRequestFlags flags) > +{ > + int ret; > + IO_CODE(); > + > + blk_inc_in_flight(blk); > + blk_wait_while_drained(blk); > + if (!blk_is_available(blk)) { > + blk_dec_in_flight(blk); > + return -ENOMEDIUM; > + } > + > + ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags); > + blk_dec_in_flight(blk); > + return ret; > +} > + > void blk_drain(BlockBackend *blk) > { > BlockDriverState *bs = blk_bs(blk); > diff --git a/block/file-posix.c b/block/file-posix.c > index 61ed769ac8..2ba9174778 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -160,6 +160,7 @@ typedef struct BDRVRawState { > bool has_write_zeroes:1; > bool use_linux_aio:1; > bool use_linux_io_uring:1; > + int64_t *offset; /* offset of zone append operation */ > int page_cache_inconsistent; /* errno from fdatasync failure */ > bool has_fallocate; > bool needs_alignment; > @@ -1672,7 +1673,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData > *aiocb) > ssize_t len; > > len = RETRY_ON_EINTR( > - (aiocb->aio_type & QEMU_AIO_WRITE) ? > + (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ? > qemu_pwritev(aiocb->aio_fildes, > aiocb->io.iov, > aiocb->io.niov, > @@ -1701,7 +1702,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData > *aiocb, char *buf) > ssize_t len; > > while (offset < aiocb->aio_nbytes) { > - if (aiocb->aio_type & QEMU_AIO_WRITE) { > + if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { > len = pwrite(aiocb->aio_fildes, > (const char *)buf + offset, >
[PATCH v6 2/4] block: introduce zone append write for zoned devices
A zone append command is a write operation that specifies the first logical block of a zone as the write position. When writing to a zoned block device using zone append, the byte offset of writes is pointing to the write pointer of that zone. Upon completion the device will respond with the position the data has been written in the zone. Signed-off-by: Sam Li --- block/block-backend.c | 60 +++ block/file-posix.c| 54 +--- block/io.c| 21 +++ block/io_uring.c | 4 +++ block/linux-aio.c | 3 ++ block/raw-format.c| 8 + include/block/block-io.h | 4 +++ include/block/block_int-common.h | 5 +++ include/block/raw-aio.h | 4 ++- include/sysemu/block-backend-io.h | 9 + 10 files changed, 166 insertions(+), 6 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index f70b08e3f6..28e8f5d778 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1888,6 +1888,45 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, return >common; } +static void coroutine_fn blk_aio_zone_append_entry(void *opaque) +{ +BlkAioEmAIOCB *acb = opaque; +BlkRwCo *rwco = >rwco; + +rwco->ret = blk_co_zone_append(rwco->blk, >bytes, + rwco->iobuf, rwco->flags); +blk_aio_complete(acb); +} + +BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, +QEMUIOVector *qiov, BdrvRequestFlags flags, +BlockCompletionFunc *cb, void *opaque) { +BlkAioEmAIOCB *acb; +Coroutine *co; +IO_CODE(); + +blk_inc_in_flight(blk); +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque); +acb->rwco = (BlkRwCo) { +.blk= blk, +.ret= NOT_DONE, +.flags = flags, +.iobuf = qiov, +}; +acb->bytes = *offset; +acb->has_returned = false; + +co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); +aio_co_enter(blk_get_aio_context(blk), co); +acb->has_returned = true; +if (acb->rwco.ret != NOT_DONE) { +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + blk_aio_complete_bh, acb); +} + +return >common; +} + /* * Send a zone_report command. * offset is a byte offset from the start of the device. No alignment @@ -1939,6 +1978,27 @@ int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op, return ret; } +/* + * Send a zone_append command. + */ +int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset, +QEMUIOVector *qiov, BdrvRequestFlags flags) +{ +int ret; +IO_CODE(); + +blk_inc_in_flight(blk); +blk_wait_while_drained(blk); +if (!blk_is_available(blk)) { +blk_dec_in_flight(blk); +return -ENOMEDIUM; +} + +ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags); +blk_dec_in_flight(blk); +return ret; +} + void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); diff --git a/block/file-posix.c b/block/file-posix.c index 61ed769ac8..2ba9174778 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -160,6 +160,7 @@ typedef struct BDRVRawState { bool has_write_zeroes:1; bool use_linux_aio:1; bool use_linux_io_uring:1; +int64_t *offset; /* offset of zone append operation */ int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; @@ -1672,7 +1673,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) ssize_t len; len = RETRY_ON_EINTR( -(aiocb->aio_type & QEMU_AIO_WRITE) ? +(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ? qemu_pwritev(aiocb->aio_fildes, aiocb->io.iov, aiocb->io.niov, @@ -1701,7 +1702,7 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) ssize_t len; while (offset < aiocb->aio_nbytes) { -if (aiocb->aio_type & QEMU_AIO_WRITE) { +if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { len = pwrite(aiocb->aio_fildes, (const char *)buf + offset, aiocb->aio_nbytes - offset, @@ -1794,7 +1795,7 @@ static int handle_aiocb_rw(void *opaque) } nbytes = handle_aiocb_rw_linear(aiocb, buf); -if (!(aiocb->aio_type & QEMU_AIO_WRITE)) { +if (!(aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))) { char *p = buf; size_t count = aiocb->aio_nbytes, copy; int i; @@ -2431,6 +2432,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, #if defined(CONFIG_BLKZONED) if (bs->bl.wps) { qemu_co_mutex_lock(>bl.wps->colock);