Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-27 Thread Damien Le Moal
On 6/25/22 00:49, Stefan Hajnoczi wrote:
> On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote:
>> Hi Stefan,
>>
>> Stefan Hajnoczi  于2022年6月20日周一 15:55写道:
>>>
>>> On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
>>>
>>> Hi Sam,
>>> Is this version 2 of "[RFC v1] Add support for zoned device"? Please
>>> keep the email subject line the same (except for "v2", "v3", etc) so
>>> that it's clear which patch series this new version replaces.
>>>
 Fix some mistakes before. It can report a range of zones now.
>>>
>>> This looks like the description of what changed compared to v1. Please
>>> put the changelog below "---" in the future. When patch emails are
>>> merged by git-am(1) it keeps the text above "---" and discards the text
>>> below "---". The changelog is usually no longer useful once the patches
>>> are merged, so it should be located below the "---" line.
>>>
>>> The text above the "---" is the commit description (an explanation of
>>> why this commit is necessary). In this case the commit description
>>> should explain that this patch adds .bdrv_co_zone_report() and
>>> .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
>>> supported.
>>>

 Signed-off-by: Sam Li 
 ---
  block/block-backend.c |  22 
  block/coroutines.h|   5 +
  block/file-posix.c| 182 ++
  block/io.c|  23 
  include/block/block-common.h  |  43 ++-
  include/block/block-io.h  |  13 +++
  include/block/block_int-common.h  |  20 
  qemu-io-cmds.c| 118 +++
  tests/qemu-iotests/tests/zoned.sh |  52 +
  9 files changed, 477 insertions(+), 1 deletion(-)
  create mode 100644 tests/qemu-iotests/tests/zoned.sh

 diff --git a/block/block-backend.c b/block/block-backend.c
 index e0e1aff4b1..20248e4a35 100644
 --- a/block/block-backend.c
 +++ b/block/block-backend.c
 @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
  int ret;
  } BlockBackendAIOCB;

 +
 +
>>>
>>> Please avoid whitespace changes in code that is otherwise untouched by
>>> your patch. Code changes can cause merge conflicts and they make it
>>> harder to use git-annotate(1), so only changes that are necessary should
>>> be included in a patch.
>>>
  static const AIOCBInfo block_backend_aiocb_info = {
  .get_aio_context = blk_aiocb_get_aio_context,
  .aiocb_size = sizeof(BlockBackendAIOCB),
 @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
  return ret;
  }

>>>
>>> Please add a documentation comment for blk_co_zone_report() that
>>> explains how to use the functions and the purpose of the arguments. For
>>> example, does offset have to be the first byte in a zone or can it be
>>> any byte offset? What are the alignment requirements of offset and len?
>>> Why is nr_zones a pointer?
>>>
 +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
>>>
>>> Functions that run in coroutine context must be labeled with
>>> coroutine_fn:
>>>
>>> int coroutine_fn blk_co_zone_report(...)
>>>
>>> This tells humans and tools that the function can only be called from a
>>> coroutine. There is a blog post about coroutines in QEMU here:
>>> https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
>>>
 +   int64_t *nr_zones,
 +   struct BlockZoneDescriptor *zones)
>>>
>>> QEMU coding style uses typedefs when defining structs, so "struct
>>> BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
>>> *zones".
>>>
 +{
 +int ret;
>>>
>>> This function is called from the I/O code path, please mark it with:
>>>
>>>   IO_CODE();
>>>
>>> From include/block/block-io.h:
>>>
>>>   * I/O API functions. These functions are thread-safe, and therefore
>>>   * can run in any thread as long as the thread has called
>>>   * aio_context_acquire/release().
>>>   *
>>>   * These functions can only call functions from I/O and Common categories,
>>>   * but can be invoked by GS, "I/O or GS" and I/O APIs.
>>>   *
>>>   * All functions in this category must use the macro
>>>   * IO_CODE();
>>>   * to catch when they are accidentally called by the wrong API.
>>>
 +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, 
 zones);
>>>
>>> Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
>>> function call to ensure that zone report requests finish before I/O is
>>> drained (see bdrv_drained_begin()). This is necessary so that it's
>>> possible to wait for I/O requests, including zone report, to complete.
>>>
>>> Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
>>> blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
>>> bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
>>> 

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-24 Thread Sam Li
Stefan Hajnoczi  于2022年6月24日周五 23:50写道:
>
> On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote:
> > Hi Stefan,
> >
> > Stefan Hajnoczi  于2022年6月20日周一 15:55写道:
> > >
> > > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> > >
> > > Hi Sam,
> > > Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> > > keep the email subject line the same (except for "v2", "v3", etc) so
> > > that it's clear which patch series this new version replaces.
> > >
> > > > Fix some mistakes before. It can report a range of zones now.
> > >
> > > This looks like the description of what changed compared to v1. Please
> > > put the changelog below "---" in the future. When patch emails are
> > > merged by git-am(1) it keeps the text above "---" and discards the text
> > > below "---". The changelog is usually no longer useful once the patches
> > > are merged, so it should be located below the "---" line.
> > >
> > > The text above the "---" is the commit description (an explanation of
> > > why this commit is necessary). In this case the commit description
> > > should explain that this patch adds .bdrv_co_zone_report() and
> > > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> > > supported.
> > >
> > > >
> > > > Signed-off-by: Sam Li 
> > > > ---
> > > >  block/block-backend.c |  22 
> > > >  block/coroutines.h|   5 +
> > > >  block/file-posix.c| 182 ++
> > > >  block/io.c|  23 
> > > >  include/block/block-common.h  |  43 ++-
> > > >  include/block/block-io.h  |  13 +++
> > > >  include/block/block_int-common.h  |  20 
> > > >  qemu-io-cmds.c| 118 +++
> > > >  tests/qemu-iotests/tests/zoned.sh |  52 +
> > > >  9 files changed, 477 insertions(+), 1 deletion(-)
> > > >  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> > > >
> > > > diff --git a/block/block-backend.c b/block/block-backend.c
> > > > index e0e1aff4b1..20248e4a35 100644
> > > > --- a/block/block-backend.c
> > > > +++ b/block/block-backend.c
> > > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
> > > >  int ret;
> > > >  } BlockBackendAIOCB;
> > > >
> > > > +
> > > > +
> > >
> > > Please avoid whitespace changes in code that is otherwise untouched by
> > > your patch. Code changes can cause merge conflicts and they make it
> > > harder to use git-annotate(1), so only changes that are necessary should
> > > be included in a patch.
> > >
> > > >  static const AIOCBInfo block_backend_aiocb_info = {
> > > >  .get_aio_context = blk_aiocb_get_aio_context,
> > > >  .aiocb_size = sizeof(BlockBackendAIOCB),
> > > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
> > > >  return ret;
> > > >  }
> > > >
> > >
> > > Please add a documentation comment for blk_co_zone_report() that
> > > explains how to use the functions and the purpose of the arguments. For
> > > example, does offset have to be the first byte in a zone or can it be
> > > any byte offset? What are the alignment requirements of offset and len?
> > > Why is nr_zones a pointer?
> > >
> > > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
> > >
> > > Functions that run in coroutine context must be labeled with
> > > coroutine_fn:
> > >
> > > int coroutine_fn blk_co_zone_report(...)
> > >
> > > This tells humans and tools that the function can only be called from a
> > > coroutine. There is a blog post about coroutines in QEMU here:
> > > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
> > >
> > > > +   int64_t *nr_zones,
> > > > +   struct BlockZoneDescriptor *zones)
> > >
> > > QEMU coding style uses typedefs when defining structs, so "struct
> > > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> > > *zones".
> > >
> > > > +{
> > > > +int ret;
> > >
> > > This function is called from the I/O code path, please mark it with:
> > >
> > >   IO_CODE();
> > >
> > > From include/block/block-io.h:
> > >
> > >   * I/O API functions. These functions are thread-safe, and therefore
> > >   * can run in any thread as long as the thread has called
> > >   * aio_context_acquire/release().
> > >   *
> > >   * These functions can only call functions from I/O and Common 
> > > categories,
> > >   * but can be invoked by GS, "I/O or GS" and I/O APIs.
> > >   *
> > >   * All functions in this category must use the macro
> > >   * IO_CODE();
> > >   * to catch when they are accidentally called by the wrong API.
> > >
> > > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, 
> > > > zones);
> > >
> > > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> > > function call to ensure that zone report requests finish before I/O is
> > > drained (see bdrv_drained_begin()). This is necessary so that it's
> > > possible to wait for I/O 

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-24 Thread Stefan Hajnoczi
On Fri, Jun 24, 2022 at 11:14:32AM +0800, Sam Li wrote:
> Hi Stefan,
> 
> Stefan Hajnoczi  于2022年6月20日周一 15:55写道:
> >
> > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> >
> > Hi Sam,
> > Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> > keep the email subject line the same (except for "v2", "v3", etc) so
> > that it's clear which patch series this new version replaces.
> >
> > > Fix some mistakes before. It can report a range of zones now.
> >
> > This looks like the description of what changed compared to v1. Please
> > put the changelog below "---" in the future. When patch emails are
> > merged by git-am(1) it keeps the text above "---" and discards the text
> > below "---". The changelog is usually no longer useful once the patches
> > are merged, so it should be located below the "---" line.
> >
> > The text above the "---" is the commit description (an explanation of
> > why this commit is necessary). In this case the commit description
> > should explain that this patch adds .bdrv_co_zone_report() and
> > .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> > supported.
> >
> > >
> > > Signed-off-by: Sam Li 
> > > ---
> > >  block/block-backend.c |  22 
> > >  block/coroutines.h|   5 +
> > >  block/file-posix.c| 182 ++
> > >  block/io.c|  23 
> > >  include/block/block-common.h  |  43 ++-
> > >  include/block/block-io.h  |  13 +++
> > >  include/block/block_int-common.h  |  20 
> > >  qemu-io-cmds.c| 118 +++
> > >  tests/qemu-iotests/tests/zoned.sh |  52 +
> > >  9 files changed, 477 insertions(+), 1 deletion(-)
> > >  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> > >
> > > diff --git a/block/block-backend.c b/block/block-backend.c
> > > index e0e1aff4b1..20248e4a35 100644
> > > --- a/block/block-backend.c
> > > +++ b/block/block-backend.c
> > > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
> > >  int ret;
> > >  } BlockBackendAIOCB;
> > >
> > > +
> > > +
> >
> > Please avoid whitespace changes in code that is otherwise untouched by
> > your patch. Code changes can cause merge conflicts and they make it
> > harder to use git-annotate(1), so only changes that are necessary should
> > be included in a patch.
> >
> > >  static const AIOCBInfo block_backend_aiocb_info = {
> > >  .get_aio_context = blk_aiocb_get_aio_context,
> > >  .aiocb_size = sizeof(BlockBackendAIOCB),
> > > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
> > >  return ret;
> > >  }
> > >
> >
> > Please add a documentation comment for blk_co_zone_report() that
> > explains how to use the functions and the purpose of the arguments. For
> > example, does offset have to be the first byte in a zone or can it be
> > any byte offset? What are the alignment requirements of offset and len?
> > Why is nr_zones a pointer?
> >
> > > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
> >
> > Functions that run in coroutine context must be labeled with
> > coroutine_fn:
> >
> > int coroutine_fn blk_co_zone_report(...)
> >
> > This tells humans and tools that the function can only be called from a
> > coroutine. There is a blog post about coroutines in QEMU here:
> > https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
> >
> > > +   int64_t *nr_zones,
> > > +   struct BlockZoneDescriptor *zones)
> >
> > QEMU coding style uses typedefs when defining structs, so "struct
> > BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> > *zones".
> >
> > > +{
> > > +int ret;
> >
> > This function is called from the I/O code path, please mark it with:
> >
> >   IO_CODE();
> >
> > From include/block/block-io.h:
> >
> >   * I/O API functions. These functions are thread-safe, and therefore
> >   * can run in any thread as long as the thread has called
> >   * aio_context_acquire/release().
> >   *
> >   * These functions can only call functions from I/O and Common categories,
> >   * but can be invoked by GS, "I/O or GS" and I/O APIs.
> >   *
> >   * All functions in this category must use the macro
> >   * IO_CODE();
> >   * to catch when they are accidentally called by the wrong API.
> >
> > > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, 
> > > zones);
> >
> > Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> > function call to ensure that zone report requests finish before I/O is
> > drained (see bdrv_drained_begin()). This is necessary so that it's
> > possible to wait for I/O requests, including zone report, to complete.
> >
> > Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
> > blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
> > bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
> > 

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-23 Thread Sam Li
Hi Stefan,

Stefan Hajnoczi  于2022年6月20日周一 15:55写道:
>
> On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
>
> Hi Sam,
> Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> keep the email subject line the same (except for "v2", "v3", etc) so
> that it's clear which patch series this new version replaces.
>
> > Fix some mistakes before. It can report a range of zones now.
>
> This looks like the description of what changed compared to v1. Please
> put the changelog below "---" in the future. When patch emails are
> merged by git-am(1) it keeps the text above "---" and discards the text
> below "---". The changelog is usually no longer useful once the patches
> are merged, so it should be located below the "---" line.
>
> The text above the "---" is the commit description (an explanation of
> why this commit is necessary). In this case the commit description
> should explain that this patch adds .bdrv_co_zone_report() and
> .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> supported.
>
> >
> > Signed-off-by: Sam Li 
> > ---
> >  block/block-backend.c |  22 
> >  block/coroutines.h|   5 +
> >  block/file-posix.c| 182 ++
> >  block/io.c|  23 
> >  include/block/block-common.h  |  43 ++-
> >  include/block/block-io.h  |  13 +++
> >  include/block/block_int-common.h  |  20 
> >  qemu-io-cmds.c| 118 +++
> >  tests/qemu-iotests/tests/zoned.sh |  52 +
> >  9 files changed, 477 insertions(+), 1 deletion(-)
> >  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index e0e1aff4b1..20248e4a35 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
> >  int ret;
> >  } BlockBackendAIOCB;
> >
> > +
> > +
>
> Please avoid whitespace changes in code that is otherwise untouched by
> your patch. Code changes can cause merge conflicts and they make it
> harder to use git-annotate(1), so only changes that are necessary should
> be included in a patch.
>
> >  static const AIOCBInfo block_backend_aiocb_info = {
> >  .get_aio_context = blk_aiocb_get_aio_context,
> >  .aiocb_size = sizeof(BlockBackendAIOCB),
> > @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
>
> Please add a documentation comment for blk_co_zone_report() that
> explains how to use the functions and the purpose of the arguments. For
> example, does offset have to be the first byte in a zone or can it be
> any byte offset? What are the alignment requirements of offset and len?
> Why is nr_zones a pointer?
>
> > +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
>
> Functions that run in coroutine context must be labeled with
> coroutine_fn:
>
> int coroutine_fn blk_co_zone_report(...)
>
> This tells humans and tools that the function can only be called from a
> coroutine. There is a blog post about coroutines in QEMU here:
> https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
>
> > +   int64_t *nr_zones,
> > +   struct BlockZoneDescriptor *zones)
>
> QEMU coding style uses typedefs when defining structs, so "struct
> BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> *zones".
>
> > +{
> > +int ret;
>
> This function is called from the I/O code path, please mark it with:
>
>   IO_CODE();
>
> From include/block/block-io.h:
>
>   * I/O API functions. These functions are thread-safe, and therefore
>   * can run in any thread as long as the thread has called
>   * aio_context_acquire/release().
>   *
>   * These functions can only call functions from I/O and Common categories,
>   * but can be invoked by GS, "I/O or GS" and I/O APIs.
>   *
>   * All functions in this category must use the macro
>   * IO_CODE();
>   * to catch when they are accidentally called by the wrong API.
>
> > +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, zones);
>
> Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> function call to ensure that zone report requests finish before I/O is
> drained (see bdrv_drained_begin()). This is necessary so that it's
> possible to wait for I/O requests, including zone report, to complete.
>
> Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
> blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
> bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
> bdrv_co_zone_report() returns.
>
After adding similar structure to blk_co_do_preadv(), zone operation
command will always fail at blk_wait_while_drained(blk) because
blk->inflight <= 0. Would it be ok to just remove
blk_wait_while_drained?

> > +return ret;
> > +}
> > +
> > +int 

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-20 Thread Stefan Hajnoczi
On Mon, Jun 20, 2022 at 07:11:40PM +0900, Damien Le Moal wrote:
> On 6/20/22 16:55, Stefan Hajnoczi wrote:
> > On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> >> +uint32_t nr_zones;
> > 
> > Should this really be limited to 32-bit? For example, take 256 MB zones,
> > then the max nr_zones * 256 MB is much smaller than a uint64_t capacity
> > value. It seems safer to make this 64-bit, but maybe Dmitry, Hannes, or
> > Damien can tell us what to do here.
> 
> u32 is fine. We are nowhere near 4G zones :)
> The max out there today is 20TB SMR drive with 128MB zones. About 150,000
> zones. Nowhere near 4G limit. Linux kernel also uses unsigned int for
> number of zones everywhere.

Thanks!

Stefan


signature.asc
Description: PGP signature


Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-20 Thread Damien Le Moal
On 6/20/22 16:55, Stefan Hajnoczi wrote:
> On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:
> 
> Hi Sam,
> Is this version 2 of "[RFC v1] Add support for zoned device"? Please
> keep the email subject line the same (except for "v2", "v3", etc) so
> that it's clear which patch series this new version replaces.
> 
>> Fix some mistakes before. It can report a range of zones now.
> 
> This looks like the description of what changed compared to v1. Please
> put the changelog below "---" in the future. When patch emails are
> merged by git-am(1) it keeps the text above "---" and discards the text
> below "---". The changelog is usually no longer useful once the patches
> are merged, so it should be located below the "---" line.
> 
> The text above the "---" is the commit description (an explanation of
> why this commit is necessary). In this case the commit description
> should explain that this patch adds .bdrv_co_zone_report() and
> .bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
> supported.
> 
>>
>> Signed-off-by: Sam Li 
>> ---
>>  block/block-backend.c |  22 
>>  block/coroutines.h|   5 +
>>  block/file-posix.c| 182 ++
>>  block/io.c|  23 
>>  include/block/block-common.h  |  43 ++-
>>  include/block/block-io.h  |  13 +++
>>  include/block/block_int-common.h  |  20 
>>  qemu-io-cmds.c| 118 +++
>>  tests/qemu-iotests/tests/zoned.sh |  52 +
>>  9 files changed, 477 insertions(+), 1 deletion(-)
>>  create mode 100644 tests/qemu-iotests/tests/zoned.sh
>>
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> index e0e1aff4b1..20248e4a35 100644
>> --- a/block/block-backend.c
>> +++ b/block/block-backend.c
>> @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
>>  int ret;
>>  } BlockBackendAIOCB;
>>  
>> +
>> +
> 
> Please avoid whitespace changes in code that is otherwise untouched by
> your patch. Code changes can cause merge conflicts and they make it
> harder to use git-annotate(1), so only changes that are necessary should
> be included in a patch.
> 
>>  static const AIOCBInfo block_backend_aiocb_info = {
>>  .get_aio_context = blk_aiocb_get_aio_context,
>>  .aiocb_size = sizeof(BlockBackendAIOCB),
>> @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
>>  return ret;
>>  }
>>  
> 
> Please add a documentation comment for blk_co_zone_report() that
> explains how to use the functions and the purpose of the arguments. For
> example, does offset have to be the first byte in a zone or can it be
> any byte offset? What are the alignment requirements of offset and len?
> Why is nr_zones a pointer?
> 
>> +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
> 
> Functions that run in coroutine context must be labeled with
> coroutine_fn:
> 
> int coroutine_fn blk_co_zone_report(...)
> 
> This tells humans and tools that the function can only be called from a
> coroutine. There is a blog post about coroutines in QEMU here:
> https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
> 
>> +   int64_t *nr_zones,
>> +   struct BlockZoneDescriptor *zones)
> 
> QEMU coding style uses typedefs when defining structs, so "struct
> BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
> *zones".
> 
>> +{
>> +int ret;
> 
> This function is called from the I/O code path, please mark it with:
> 
>   IO_CODE();
> 
> From include/block/block-io.h:
> 
>   * I/O API functions. These functions are thread-safe, and therefore
>   * can run in any thread as long as the thread has called
>   * aio_context_acquire/release().
>   *
>   * These functions can only call functions from I/O and Common categories,
>   * but can be invoked by GS, "I/O or GS" and I/O APIs.
>   *
>   * All functions in this category must use the macro
>   * IO_CODE();
>   * to catch when they are accidentally called by the wrong API.
> 
>> +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, zones);
> 
> Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
> function call to ensure that zone report requests finish before I/O is
> drained (see bdrv_drained_begin()). This is necessary so that it's
> possible to wait for I/O requests, including zone report, to complete.
> 
> Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
> blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
> bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
> bdrv_co_zone_report() returns.
> 
>> +return ret;
>> +}
>> +
>> +int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
>> +int64_t offset, int64_t len)
>> +{
>> +int ret;
>> +ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len);
>> +
>> +return ret;
>> +}
> 
> The same applies to this function.
> 
>> +
>> 

Re: [RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-20 Thread Stefan Hajnoczi
On Mon, Jun 20, 2022 at 11:36:11AM +0800, Sam Li wrote:

Hi Sam,
Is this version 2 of "[RFC v1] Add support for zoned device"? Please
keep the email subject line the same (except for "v2", "v3", etc) so
that it's clear which patch series this new version replaces.

> Fix some mistakes before. It can report a range of zones now.

This looks like the description of what changed compared to v1. Please
put the changelog below "---" in the future. When patch emails are
merged by git-am(1) it keeps the text above "---" and discards the text
below "---". The changelog is usually no longer useful once the patches
are merged, so it should be located below the "---" line.

The text above the "---" is the commit description (an explanation of
why this commit is necessary). In this case the commit description
should explain that this patch adds .bdrv_co_zone_report() and
.bdrv_co_zone_mgmt() to BlockDriver so that zoned block devices can be
supported.

> 
> Signed-off-by: Sam Li 
> ---
>  block/block-backend.c |  22 
>  block/coroutines.h|   5 +
>  block/file-posix.c| 182 ++
>  block/io.c|  23 
>  include/block/block-common.h  |  43 ++-
>  include/block/block-io.h  |  13 +++
>  include/block/block_int-common.h  |  20 
>  qemu-io-cmds.c| 118 +++
>  tests/qemu-iotests/tests/zoned.sh |  52 +
>  9 files changed, 477 insertions(+), 1 deletion(-)
>  create mode 100644 tests/qemu-iotests/tests/zoned.sh
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index e0e1aff4b1..20248e4a35 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
>  int ret;
>  } BlockBackendAIOCB;
>  
> +
> +

Please avoid whitespace changes in code that is otherwise untouched by
your patch. Code changes can cause merge conflicts and they make it
harder to use git-annotate(1), so only changes that are necessary should
be included in a patch.

>  static const AIOCBInfo block_backend_aiocb_info = {
>  .get_aio_context = blk_aiocb_get_aio_context,
>  .aiocb_size = sizeof(BlockBackendAIOCB),
> @@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
>  return ret;
>  }
>  

Please add a documentation comment for blk_co_zone_report() that
explains how to use the functions and the purpose of the arguments. For
example, does offset have to be the first byte in a zone or can it be
any byte offset? What are the alignment requirements of offset and len?
Why is nr_zones a pointer?

> +int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,

Functions that run in coroutine context must be labeled with
coroutine_fn:

int coroutine_fn blk_co_zone_report(...)

This tells humans and tools that the function can only be called from a
coroutine. There is a blog post about coroutines in QEMU here:
https://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html

> +   int64_t *nr_zones,
> +   struct BlockZoneDescriptor *zones)

QEMU coding style uses typedefs when defining structs, so "struct
BlockZoneDescriptor *zones" should be written as "BlockZoneDescriptor
*zones".

> +{
> +int ret;

This function is called from the I/O code path, please mark it with:

  IO_CODE();

From include/block/block-io.h:

  * I/O API functions. These functions are thread-safe, and therefore
  * can run in any thread as long as the thread has called
  * aio_context_acquire/release().
  *
  * These functions can only call functions from I/O and Common categories,
  * but can be invoked by GS, "I/O or GS" and I/O APIs.
  *
  * All functions in this category must use the macro
  * IO_CODE();
  * to catch when they are accidentally called by the wrong API.

> +ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, zones);

Please add blk_inc_in_flight(blk) and blk_dec_in_flight(blk) around this
function call to ensure that zone report requests finish before I/O is
drained (see bdrv_drained_begin()). This is necessary so that it's
possible to wait for I/O requests, including zone report, to complete.

Similar to blk_co_do_preadv() we need blk_wait_while_drained(blk),
blk_check_byte_request(), and bdrv_inc_in_flight(bs) before calling
bdrv_co_zone_report(). bdrv_dec_in_flight(bs) needs to be called after
bdrv_co_zone_report() returns.

> +return ret;
> +}
> +
> +int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
> +int64_t offset, int64_t len)
> +{
> +int ret;
> +ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len);
> +
> +return ret;
> +}

The same applies to this function.

> +
> +
>  void blk_drain(BlockBackend *blk)
>  {
>  BlockDriverState *bs = blk_bs(blk);
> @@ -2634,3 +2655,4 @@ int blk_make_empty(BlockBackend *blk, Error **errp)
>  
>  return bdrv_make_empty(blk->root, errp);
>  }
> +

Unrelated whitespace 

[RFC v2] Adding block layer APIs resembling Linux ZoneBlockDevice ioctls.

2022-06-19 Thread Sam Li
Fix some mistakes before. It can report a range of zones now.

Signed-off-by: Sam Li 
---
 block/block-backend.c |  22 
 block/coroutines.h|   5 +
 block/file-posix.c| 182 ++
 block/io.c|  23 
 include/block/block-common.h  |  43 ++-
 include/block/block-io.h  |  13 +++
 include/block/block_int-common.h  |  20 
 qemu-io-cmds.c| 118 +++
 tests/qemu-iotests/tests/zoned.sh |  52 +
 9 files changed, 477 insertions(+), 1 deletion(-)
 create mode 100644 tests/qemu-iotests/tests/zoned.sh

diff --git a/block/block-backend.c b/block/block-backend.c
index e0e1aff4b1..20248e4a35 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -104,6 +104,8 @@ typedef struct BlockBackendAIOCB {
 int ret;
 } BlockBackendAIOCB;
 
+
+
 static const AIOCBInfo block_backend_aiocb_info = {
 .get_aio_context = blk_aiocb_get_aio_context,
 .aiocb_size = sizeof(BlockBackendAIOCB),
@@ -1810,6 +1812,25 @@ int blk_flush(BlockBackend *blk)
 return ret;
 }
 
+int blk_co_zone_report(BlockBackend *blk, int64_t offset, int64_t len,
+   int64_t *nr_zones,
+   struct BlockZoneDescriptor *zones)
+{
+int ret;
+ret = bdrv_co_zone_report(blk->root->bs, offset, len, nr_zones, zones);
+return ret;
+}
+
+int blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
+int64_t offset, int64_t len)
+{
+int ret;
+ret = bdrv_co_zone_mgmt(blk->root->bs, op, offset, len);
+
+return ret;
+}
+
+
 void blk_drain(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
@@ -2634,3 +2655,4 @@ int blk_make_empty(BlockBackend *blk, Error **errp)
 
 return bdrv_make_empty(blk->root, errp);
 }
+
diff --git a/block/coroutines.h b/block/coroutines.h
index 830ecaa733..247326255f 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -80,6 +80,11 @@ int coroutine_fn
 blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
 
 int coroutine_fn blk_co_do_flush(BlockBackend *blk);
+int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
+int64_t len, int64_t *nr_zones,
+struct BlockZoneDescriptor *zones);
+int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, enum zone_op op,
+int64_t offset, int64_t len);
 
 
 /*
diff --git a/block/file-posix.c b/block/file-posix.c
index 48cd096624..71fd21f8ba 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -178,6 +178,137 @@ typedef struct BDRVRawReopenState {
 bool check_cache_dropped;
 } BDRVRawReopenState;
 
+/*
+ * parse_zone - Fill a zone descriptor
+ */
+static inline void parse_zone(struct BlockZoneDescriptor *zone,
+struct blk_zone *blkz) {
+zone->start = blkz->start << BDRV_SECTOR_BITS;
+zone->length = blkz->len << BDRV_SECTOR_BITS;
+zone->cap = blkz->capacity << BDRV_SECTOR_BITS;
+zone->wp = blkz->wp << BDRV_SECTOR_BITS;
+zone->type = blkz->type;
+zone->cond = blkz->type;
+}
+
+/**
+ * zone report - Get a zone block device's information in the
+ * form of an array of zone descriptors.
+ *
+ * @param bs: passing zone block device file descriptor
+ * @param zones: Space to hold zone information on reply
+ * @param offset: the location in the zone block device
+ * @return 0 on success, -1 on failure
+ */
+static int raw_co_zone_report(BlockDriverState *bs, int64_t offset, int64_t 
len,
+  int64_t *nr_zones,
+  struct BlockZoneDescriptor *zones) {
+BDRVRawState *s = bs->opaque;
+struct blk_zone_report *rep;
+struct BlockZoneDescriptor bzd;
+struct blk_zone *blkz;
+int64_t zone_size_mask, end, rep_size, nrz;
+int ret, n = 0, i = 0;
+
+printf("%s\n", "start to report zone");
+nrz = *nr_zones;
+rep_size = sizeof(struct blk_zone_report) + nrz * sizeof(struct blk_zone);
+rep = (struct blk_zone_report *)malloc(rep_size);
+if (!rep) {
+return -1;
+}
+
+zone_size_mask = zone_start_sector - 1;
+/* align up */
+end = ((offset + len + zone_size_mask) & (~zone_size_mask))
+>> BDRV_SECTOR_BITS;
+
+blkz = (struct blk_zone *)(rep + 1);
+while (offset < end) {
+memset(rep, 0, rep_size);
+rep->sector = offset;
+rep->nr_zones = nrz;
+
+ret = ioctl(s->fd, BLKREPORTZONE, rep);
+if (ret != 0) {
+ret = -errno;
+error_report("%d: ioctl BLKREPORTZONE at %ld failed %d",
+ s->fd, offset, errno);
+free(rep);
+return ret;
+}
+
+if (!rep->nr_zones) {
+break;
+}
+
+for (i = 0; i < rep->nr_zones; i++) {
+parse_zone(, [i]);
+if (zones) {
+memcpy([n], , sizeof(bzd));
+}
+}
+
+offset =