Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-05 Thread Michael S. Tsirkin
On Wed, Jul 05, 2017 at 07:57:07AM +, Liu, Changpeng wrote:
> 
> 
> > -Original Message-
> > From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> > Sent: Tuesday, July 4, 2017 5:24 PM
> > To: Liu, Changpeng <changpeng@intel.com>; virtualization@lists.linux-
> > foundation.org
> > Cc: stefa...@gmail.com; h...@lst.de; m...@redhat.com
> > Subject: Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver
> > 
> > 
> > 
> > On 05/07/2017 10:44, Changpeng Liu wrote:
> > > Currently virtio-blk driver does not provide discard feature flag, so the
> > > filesystems which built on top of the block device will not send discard
> > > command. This is okay for HDD backend, but it will impact the performance
> > > for SSD backend.
> > >
> > > Add a feature flag VIRTIO_BLK_F_DISCARD and command
> > VIRTIO_BLK_T_DISCARD
> > > to extend exist virtio-blk protocol, define 16 bytes discard descriptor
> > > for each discard segment, the discard segment defination aligns with
> > > SCSI or NVM Express protocols, virtio-blk driver will support multi-range
> > > discard request as well.
> > >
> > > Signed-off-by: Changpeng Liu <changpeng@intel.com>
> > 
> > Please include a patch for the specification.  Since we are at it, I
> Thanks Paolo, do you mean include a text file which describe the changes for 
> the specification?

Paolo answered that. But please also CC code patch to 
virtio-comm...@lists.oasis-open.org
.
This is a subscriber-only list so pls subscribe beforehand:
https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=virtio#feedback


-- 
MST
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-05 Thread Paolo Bonzini


On 05/07/2017 09:57, Liu, Changpeng wrote:
>> Please include a patch for the specification.  Since we are at it, I
> Thanks Paolo, do you mean include a text file which describe the changes for 
> the specification?

The specification is hosted in an svn (Subversion) repository at
https://tools.oasis-open.org/version-control/svn/virtio.  You can
provide a patch and send it to virtio-comm...@lists.oasis-open.org.

Thanks,

Paolo

>> would like to have three operations defined using the same descriptor:
>>
>> - discard (SCSI UNMAP)
>>
>> - write zeroes (SCSI WRITE SAME without UNMAP flag)
>>
>> - write zeroes and possibly discard (SCSI WRITE SAME with UNMAP flag)
>>
>> The last two can use the same command VIRTIO_BLK_T_WRITE_ZEROES, using
>> the reserved field as a flags field.
>
> Will add write zeroes feature.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-05 Thread Liu, Changpeng


> -Original Message-
> From: Paolo Bonzini [mailto:pbonz...@redhat.com]
> Sent: Tuesday, July 4, 2017 5:24 PM
> To: Liu, Changpeng <changpeng@intel.com>; virtualization@lists.linux-
> foundation.org
> Cc: stefa...@gmail.com; h...@lst.de; m...@redhat.com
> Subject: Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver
> 
> 
> 
> On 05/07/2017 10:44, Changpeng Liu wrote:
> > Currently virtio-blk driver does not provide discard feature flag, so the
> > filesystems which built on top of the block device will not send discard
> > command. This is okay for HDD backend, but it will impact the performance
> > for SSD backend.
> >
> > Add a feature flag VIRTIO_BLK_F_DISCARD and command
> VIRTIO_BLK_T_DISCARD
> > to extend exist virtio-blk protocol, define 16 bytes discard descriptor
> > for each discard segment, the discard segment defination aligns with
> > SCSI or NVM Express protocols, virtio-blk driver will support multi-range
> > discard request as well.
> >
> > Signed-off-by: Changpeng Liu <changpeng@intel.com>
> 
> Please include a patch for the specification.  Since we are at it, I
Thanks Paolo, do you mean include a text file which describe the changes for 
the specification?
> would like to have three operations defined using the same descriptor:
> 
> - discard (SCSI UNMAP)
> 
> - write zeroes (SCSI WRITE SAME without UNMAP flag)
> 
> - write zeroes and possibly discard (SCSI WRITE SAME with UNMAP flag)
> 
> The last two can use the same command VIRTIO_BLK_T_WRITE_ZEROES, using
> the reserved field as a flags field.
Will add write zeroes feature.
> 
> Paolo
> 
> > ---
> >  drivers/block/virtio_blk.c  | 76
> +++--
> >  include/uapi/linux/virtio_blk.h | 19 +++
> >  2 files changed, 92 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index 0297ad7..8f0c614 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -172,10 +172,52 @@ static int virtblk_add_req(struct virtqueue *vq, 
> > struct
> virtblk_req *vbr,
> > return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> >  }
> >
> > +static inline int virtblk_setup_discard(struct request *req)
> > +{
> > +   unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
> > +   u32 block_size = queue_logical_block_size(req->q);
> > +   struct virtio_blk_discard *range;
> > +   struct bio *bio;
> > +
> > +   if (block_size < 512 || !block_size)
> > +   return -1;
> > +
> > +   range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> > +   if (!range)
> > +   return -1;
> > +
> > +   __rq_for_each_bio(bio, req) {
> > +   u64 slba = (bio->bi_iter.bi_sector << 9) / block_size;
> > +   u32 nlb = bio->bi_iter.bi_size / block_size;
> > +
> > +   range[n].reserved = cpu_to_le32(0);
> > +   range[n].nlba = cpu_to_le32(nlb);
> > +   range[n].slba = cpu_to_le64(slba);
> > +   n++;
> > +   }
> > +
> > +   if (WARN_ON_ONCE(n != segments)) {
> > +   kfree(range);
> > +   return -1;
> > +   }
> > +
> > +   req->special_vec.bv_page = virt_to_page(range);
> > +   req->special_vec.bv_offset = offset_in_page(range);
> > +   req->special_vec.bv_len = sizeof(*range) * segments;
> > +   req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> > +
> > +   return 0;
> > +}
> > +
> >  static inline void virtblk_request_done(struct request *req)
> >  {
> > struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> >
> > +   if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> > +   kfree(page_address(req->special_vec.bv_page) +
> > + req->special_vec.bv_offset);
> > +   }
> > +
> > switch (req_op(req)) {
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > @@ -237,6 +279,9 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> > case REQ_OP_FLUSH:
> > type = VIRTIO_BLK_T_FLUSH;
> > break;
> > +   case REQ_OP_DISCARD:
> > +   type = VIRTIO_BLK_T_DISCARD;
> > +   break;
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,9 +301,15 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >
> > b

Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-04 Thread Michael S. Tsirkin
On Tue, Jul 04, 2017 at 11:24:01AM +0200, Paolo Bonzini wrote:
> 
> 
> On 05/07/2017 10:44, Changpeng Liu wrote:
> > Currently virtio-blk driver does not provide discard feature flag, so the
> > filesystems which built on top of the block device will not send discard
> > command. This is okay for HDD backend, but it will impact the performance
> > for SSD backend.
> > 
> > Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
> > to extend exist virtio-blk protocol, define 16 bytes discard descriptor
> > for each discard segment, the discard segment defination aligns with
> > SCSI or NVM Express protocols, virtio-blk driver will support multi-range
> > discard request as well.
> > 
> > Signed-off-by: Changpeng Liu 
> 
> Please include a patch for the specification.


Most importantly, please remember to copy virtio-...@lists.oasis-open.org
on anything that changes the host/guest interface.

> Since we are at it, I
> would like to have three operations defined using the same descriptor:
> 
> - discard (SCSI UNMAP)
> 
> - write zeroes (SCSI WRITE SAME without UNMAP flag)
> 
> - write zeroes and possibly discard (SCSI WRITE SAME with UNMAP flag)
> 
> The last two can use the same command VIRTIO_BLK_T_WRITE_ZEROES, using
> the reserved field as a flags field.
> 
> Paolo

> > ---
> >  drivers/block/virtio_blk.c  | 76 
> > +++--
> >  include/uapi/linux/virtio_blk.h | 19 +++
> >  2 files changed, 92 insertions(+), 3 deletions(-)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-04 Thread Paolo Bonzini


On 05/07/2017 10:44, Changpeng Liu wrote:
> Currently virtio-blk driver does not provide discard feature flag, so the
> filesystems which built on top of the block device will not send discard
> command. This is okay for HDD backend, but it will impact the performance
> for SSD backend.
> 
> Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
> to extend exist virtio-blk protocol, define 16 bytes discard descriptor
> for each discard segment, the discard segment defination aligns with
> SCSI or NVM Express protocols, virtio-blk driver will support multi-range
> discard request as well.
> 
> Signed-off-by: Changpeng Liu 

Please include a patch for the specification.  Since we are at it, I
would like to have three operations defined using the same descriptor:

- discard (SCSI UNMAP)

- write zeroes (SCSI WRITE SAME without UNMAP flag)

- write zeroes and possibly discard (SCSI WRITE SAME with UNMAP flag)

The last two can use the same command VIRTIO_BLK_T_WRITE_ZEROES, using
the reserved field as a flags field.

Paolo

> ---
>  drivers/block/virtio_blk.c  | 76 
> +++--
>  include/uapi/linux/virtio_blk.h | 19 +++
>  2 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 0297ad7..8f0c614 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -172,10 +172,52 @@ static int virtblk_add_req(struct virtqueue *vq, struct 
> virtblk_req *vbr,
>   return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +static inline int virtblk_setup_discard(struct request *req)
> +{
> + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
> + u32 block_size = queue_logical_block_size(req->q);
> + struct virtio_blk_discard *range;
> + struct bio *bio;
> +
> + if (block_size < 512 || !block_size)
> + return -1;
> +
> + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> + if (!range)
> + return -1;
> +
> + __rq_for_each_bio(bio, req) {
> + u64 slba = (bio->bi_iter.bi_sector << 9) / block_size;
> + u32 nlb = bio->bi_iter.bi_size / block_size;
> +
> + range[n].reserved = cpu_to_le32(0);
> + range[n].nlba = cpu_to_le32(nlb);
> + range[n].slba = cpu_to_le64(slba);
> + n++;
> + }
> +
> + if (WARN_ON_ONCE(n != segments)) {
> + kfree(range);
> + return -1;
> + }
> +
> + req->special_vec.bv_page = virt_to_page(range);
> + req->special_vec.bv_offset = offset_in_page(range);
> + req->special_vec.bv_len = sizeof(*range) * segments;
> + req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> + return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>   struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> + kfree(page_address(req->special_vec.bv_page) +
> +   req->special_vec.bv_offset);
> + }
> +
>   switch (req_op(req)) {
>   case REQ_OP_SCSI_IN:
>   case REQ_OP_SCSI_OUT:
> @@ -237,6 +279,9 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
> *hctx,
>   case REQ_OP_FLUSH:
>   type = VIRTIO_BLK_T_FLUSH;
>   break;
> + case REQ_OP_DISCARD:
> + type = VIRTIO_BLK_T_DISCARD;
> + break;
>   case REQ_OP_SCSI_IN:
>   case REQ_OP_SCSI_OUT:
>   type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,9 +301,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
> *hctx,
>  
>   blk_mq_start_request(req);
>  
> + if (type == VIRTIO_BLK_T_DISCARD) {
> + err = virtblk_setup_discard(req);
> + if (err)
> + return BLK_STS_IOERR;
> + }
> +
>   num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>   if (num) {
> - if (rq_data_dir(req) == WRITE)
> + if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD)
>   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
> VIRTIO_BLK_T_OUT);
>   else
>   vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
> VIRTIO_BLK_T_IN);
> @@ -767,6 +818,25 @@ static int virtblk_probe(struct virtio_device *vdev)
>   if (!err && opt_io_size)
>   blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> + q->limits.discard_alignment = blk_size;
> + q->limits.discard_granularity = blk_size;
> +
> + virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 
> );
> + if (v)
> + blk_queue_max_discard_sectors(q, v);
> + else
> + blk_queue_max_discard_sectors(q, -1U);
> +
> + virtio_cread(vdev, struct 

[PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver

2017-07-04 Thread Changpeng Liu
Currently virtio-blk driver does not provide discard feature flag, so the
filesystems which built on top of the block device will not send discard
command. This is okay for HDD backend, but it will impact the performance
for SSD backend.

Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
to extend exist virtio-blk protocol, define 16 bytes discard descriptor
for each discard segment, the discard segment defination aligns with
SCSI or NVM Express protocols, virtio-blk driver will support multi-range
discard request as well.

Signed-off-by: Changpeng Liu 
---
 drivers/block/virtio_blk.c  | 76 +++--
 include/uapi/linux/virtio_blk.h | 19 +++
 2 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7..8f0c614 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,10 +172,52 @@ static int virtblk_add_req(struct virtqueue *vq, struct 
virtblk_req *vbr,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
+static inline int virtblk_setup_discard(struct request *req)
+{
+   unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
+   u32 block_size = queue_logical_block_size(req->q);
+   struct virtio_blk_discard *range;
+   struct bio *bio;
+
+   if (block_size < 512 || !block_size)
+   return -1;
+
+   range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+   if (!range)
+   return -1;
+
+   __rq_for_each_bio(bio, req) {
+   u64 slba = (bio->bi_iter.bi_sector << 9) / block_size;
+   u32 nlb = bio->bi_iter.bi_size / block_size;
+
+   range[n].reserved = cpu_to_le32(0);
+   range[n].nlba = cpu_to_le32(nlb);
+   range[n].slba = cpu_to_le64(slba);
+   n++;
+   }
+
+   if (WARN_ON_ONCE(n != segments)) {
+   kfree(range);
+   return -1;
+   }
+
+   req->special_vec.bv_page = virt_to_page(range);
+   req->special_vec.bv_offset = offset_in_page(range);
+   req->special_vec.bv_len = sizeof(*range) * segments;
+   req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+   return 0;
+}
+
 static inline void virtblk_request_done(struct request *req)
 {
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 
+   if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+   kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+   }
+
switch (req_op(req)) {
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
@@ -237,6 +279,9 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
*hctx,
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
+   case REQ_OP_DISCARD:
+   type = VIRTIO_BLK_T_DISCARD;
+   break;
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,9 +301,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
*hctx,
 
blk_mq_start_request(req);
 
+   if (type == VIRTIO_BLK_T_DISCARD) {
+   err = virtblk_setup_discard(req);
+   if (err)
+   return BLK_STS_IOERR;
+   }
+
num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
if (num) {
-   if (rq_data_dir(req) == WRITE)
+   if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD)
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
VIRTIO_BLK_T_OUT);
else
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
VIRTIO_BLK_T_IN);
@@ -767,6 +818,25 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
 
+   if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+   q->limits.discard_alignment = blk_size;
+   q->limits.discard_granularity = blk_size;
+
+   virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 
);
+   if (v)
+   blk_queue_max_discard_sectors(q, v);
+   else
+   blk_queue_max_discard_sectors(q, -1U);
+
+   virtio_cread(vdev, struct virtio_blk_config, max_discard_num, 
);
+   if (v)
+   blk_queue_max_discard_segments(q, v);
+   else
+   blk_queue_max_discard_segments(q, 256);
+
+   queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+   }
+
virtio_device_ready(vdev);
 
device_add_disk(>dev, vblk->disk);
@@ -874,14 +944,14 @@ static int virtblk_restore(struct virtio_device *vdev)
VIRTIO_BLK_F_SCSI,
 #endif
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
-