Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Tiwei Bie
On Tue, Apr 24, 2018 at 04:43:22AM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 24, 2018 at 09:37:47AM +0800, Tiwei Bie wrote:
> > On Tue, Apr 24, 2018 at 04:29:51AM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 24, 2018 at 09:16:38AM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 24, 2018 at 04:05:07AM +0300, Michael S. Tsirkin wrote:
> > > > > On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> > > > > > 
> > > > > > 
> > > > > > On 2018年04月23日 17:29, Tiwei Bie wrote:
> > > > > > > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > > > > Hello everyone,
> > > > > > > > > 
> > > > > > > > > This RFC implements packed ring support for virtio driver.
> > > > > > > > > 
> > > > > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) 
> > > > > > > > > implemented
> > > > > > > > > by Jens at 
> > > > > > > > > http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > > > > Minor changes are needed for the vhost code, e.g. to kick the 
> > > > > > > > > guest.
> > > > > > > > > 
> > > > > > > > > TODO:
> > > > > > > > > - Refinements and bug fixes;
> > > > > > > > > - Split into small patches;
> > > > > > > > > - Test indirect descriptor support;
> > > > > > > > > - Test/fix event suppression support;
> > > > > > > > > - Test devices other than net;
> > > > > > > > > 
> > > > > > > > > RFC v1 -> RFC v2:
> > > > > > > > > - Add indirect descriptor support - compile test only;
> > > > > > > > > - Add event suppression supprt - compile test only;
> > > > > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > > > > - Split vring_unmap_one() for packed ring and split ring 
> > > > > > > > > (Jason);
> > > > > > > > > - Avoid using '%' operator (Jason);
> > > > > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() 
> > > > > > > > > (Jason);
> > > > > > > > > - Some other refinements and bug fixes;
> > > > > > > > > 
> > > > > > > > > Thanks!
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > > > > ---
> > > > > > > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > > > > > > +---
> > > > > > > > >include/linux/virtio_ring.h|8 +-
> > > > > > > > >include/uapi/linux/virtio_config.h |   12 +-
> > > > > > > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > > > > > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > > > > index 71458f493cf8..0515dca34d77 100644
> > > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > > @@ -58,14 +58,15 @@
> > > > > > > > [...]
> > > > > > > > 
> > > > > > > > > +
> > > > > > > > > + if (vq->indirect) {
> > > > > > > > > + u32 len;
> > > > > > > > > +
> > > > > > > > > + desc = vq->desc_state[head].indir_desc;
> > > > > > > > > + /* Free the indirect table, if any, now that 
> > > > > > > > > it's unmapped. */
> > > > > > > > > + if (!desc)
> > > > > > > > > + goto out;
> > > > > > > > > +
> > > > > > > > > + len = virtio32_to_cpu(vq->vq.vdev,
> > > > > > > > > +   
> > > > > > > > > vq->vring_packed.desc[head].len);
> > > > > > > > > +
> > > > > > > > > + BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > > > > > > +  cpu_to_virtio16(vq->vq.vdev, 
> > > > > > > > > VRING_DESC_F_INDIRECT)));
> > > > > > > > It looks to me spec does not force to keep 
> > > > > > > > VRING_DESC_F_INDIRECT here. So we
> > > > > > > > can safely remove this BUG_ON() here.
> > > > > > > > 
> > > > > > > > > + BUG_ON(len == 0 || len % sizeof(struct 
> > > > > > > > > vring_packed_desc));
> > > > > > > > Len could be ignored for used descriptor according to the spec, 
> > > > > > > > so we need
> > > > > > > > remove this BUG_ON() too.
> > > > > > > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > > > > > > And I think something related to this in the spec isn't very
> > > > > > > clear currently.
> > > > > > > 
> > > > > > > In the spec, there are below words:
> > > > > > > 
> > > > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > > > > > > """
> > > > > > > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > > > > > > is reserved and is ignored by the device.
> > > > > > > """
> > > > > > > 
> > > > > > > So when device writes back an used descriptor in this case,
> > > > > > > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > > > > > > is reserved and 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Michael S. Tsirkin
On Tue, Apr 24, 2018 at 09:37:47AM +0800, Tiwei Bie wrote:
> On Tue, Apr 24, 2018 at 04:29:51AM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 24, 2018 at 09:16:38AM +0800, Tiwei Bie wrote:
> > > On Tue, Apr 24, 2018 at 04:05:07AM +0300, Michael S. Tsirkin wrote:
> > > > On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> > > > > 
> > > > > 
> > > > > On 2018年04月23日 17:29, Tiwei Bie wrote:
> > > > > > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > > > Hello everyone,
> > > > > > > > 
> > > > > > > > This RFC implements packed ring support for virtio driver.
> > > > > > > > 
> > > > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) 
> > > > > > > > implemented
> > > > > > > > by Jens at 
> > > > > > > > http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > > > Minor changes are needed for the vhost code, e.g. to kick the 
> > > > > > > > guest.
> > > > > > > > 
> > > > > > > > TODO:
> > > > > > > > - Refinements and bug fixes;
> > > > > > > > - Split into small patches;
> > > > > > > > - Test indirect descriptor support;
> > > > > > > > - Test/fix event suppression support;
> > > > > > > > - Test devices other than net;
> > > > > > > > 
> > > > > > > > RFC v1 -> RFC v2:
> > > > > > > > - Add indirect descriptor support - compile test only;
> > > > > > > > - Add event suppression supprt - compile test only;
> > > > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > > > - Split vring_unmap_one() for packed ring and split ring 
> > > > > > > > (Jason);
> > > > > > > > - Avoid using '%' operator (Jason);
> > > > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() 
> > > > > > > > (Jason);
> > > > > > > > - Some other refinements and bug fixes;
> > > > > > > > 
> > > > > > > > Thanks!
> > > > > > > > 
> > > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > > > ---
> > > > > > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > > > > > +---
> > > > > > > >include/linux/virtio_ring.h|8 +-
> > > > > > > >include/uapi/linux/virtio_config.h |   12 +-
> > > > > > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > > > > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > > > index 71458f493cf8..0515dca34d77 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -58,14 +58,15 @@
> > > > > > > [...]
> > > > > > > 
> > > > > > > > +
> > > > > > > > +   if (vq->indirect) {
> > > > > > > > +   u32 len;
> > > > > > > > +
> > > > > > > > +   desc = vq->desc_state[head].indir_desc;
> > > > > > > > +   /* Free the indirect table, if any, now that 
> > > > > > > > it's unmapped. */
> > > > > > > > +   if (!desc)
> > > > > > > > +   goto out;
> > > > > > > > +
> > > > > > > > +   len = virtio32_to_cpu(vq->vq.vdev,
> > > > > > > > + 
> > > > > > > > vq->vring_packed.desc[head].len);
> > > > > > > > +
> > > > > > > > +   BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > > > > > +cpu_to_virtio16(vq->vq.vdev, 
> > > > > > > > VRING_DESC_F_INDIRECT)));
> > > > > > > It looks to me spec does not force to keep VRING_DESC_F_INDIRECT 
> > > > > > > here. So we
> > > > > > > can safely remove this BUG_ON() here.
> > > > > > > 
> > > > > > > > +   BUG_ON(len == 0 || len % sizeof(struct 
> > > > > > > > vring_packed_desc));
> > > > > > > Len could be ignored for used descriptor according to the spec, 
> > > > > > > so we need
> > > > > > > remove this BUG_ON() too.
> > > > > > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > > > > > And I think something related to this in the spec isn't very
> > > > > > clear currently.
> > > > > > 
> > > > > > In the spec, there are below words:
> > > > > > 
> > > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > > > > > """
> > > > > > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > > > > > is reserved and is ignored by the device.
> > > > > > """
> > > > > > 
> > > > > > So when device writes back an used descriptor in this case,
> > > > > > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > > > > > is reserved and should be ignored.
> > > > > > 
> > > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
> > > > > > """
> > > > > > Element Length is reserved for used descriptors without the
> > > > > > VIRTQ_DESC_F_WRITE flag, and 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Tiwei Bie
On Tue, Apr 24, 2018 at 04:29:51AM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 24, 2018 at 09:16:38AM +0800, Tiwei Bie wrote:
> > On Tue, Apr 24, 2018 at 04:05:07AM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> > > > 
> > > > 
> > > > On 2018年04月23日 17:29, Tiwei Bie wrote:
> > > > > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > > Hello everyone,
> > > > > > > 
> > > > > > > This RFC implements packed ring support for virtio driver.
> > > > > > > 
> > > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) 
> > > > > > > implemented
> > > > > > > by Jens at 
> > > > > > > http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > > Minor changes are needed for the vhost code, e.g. to kick the 
> > > > > > > guest.
> > > > > > > 
> > > > > > > TODO:
> > > > > > > - Refinements and bug fixes;
> > > > > > > - Split into small patches;
> > > > > > > - Test indirect descriptor support;
> > > > > > > - Test/fix event suppression support;
> > > > > > > - Test devices other than net;
> > > > > > > 
> > > > > > > RFC v1 -> RFC v2:
> > > > > > > - Add indirect descriptor support - compile test only;
> > > > > > > - Add event suppression supprt - compile test only;
> > > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > > > > - Avoid using '%' operator (Jason);
> > > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > > > > - Some other refinements and bug fixes;
> > > > > > > 
> > > > > > > Thanks!
> > > > > > > 
> > > > > > > Signed-off-by: Tiwei Bie 
> > > > > > > ---
> > > > > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > > > > +---
> > > > > > >include/linux/virtio_ring.h|8 +-
> > > > > > >include/uapi/linux/virtio_config.h |   12 +-
> > > > > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > > > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > > index 71458f493cf8..0515dca34d77 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -58,14 +58,15 @@
> > > > > > [...]
> > > > > > 
> > > > > > > +
> > > > > > > + if (vq->indirect) {
> > > > > > > + u32 len;
> > > > > > > +
> > > > > > > + desc = vq->desc_state[head].indir_desc;
> > > > > > > + /* Free the indirect table, if any, now that it's 
> > > > > > > unmapped. */
> > > > > > > + if (!desc)
> > > > > > > + goto out;
> > > > > > > +
> > > > > > > + len = virtio32_to_cpu(vq->vq.vdev,
> > > > > > > +   vq->vring_packed.desc[head].len);
> > > > > > > +
> > > > > > > + BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > > > > +  cpu_to_virtio16(vq->vq.vdev, 
> > > > > > > VRING_DESC_F_INDIRECT)));
> > > > > > It looks to me spec does not force to keep VRING_DESC_F_INDIRECT 
> > > > > > here. So we
> > > > > > can safely remove this BUG_ON() here.
> > > > > > 
> > > > > > > + BUG_ON(len == 0 || len % sizeof(struct 
> > > > > > > vring_packed_desc));
> > > > > > Len could be ignored for used descriptor according to the spec, so 
> > > > > > we need
> > > > > > remove this BUG_ON() too.
> > > > > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > > > > And I think something related to this in the spec isn't very
> > > > > clear currently.
> > > > > 
> > > > > In the spec, there are below words:
> > > > > 
> > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > > > > """
> > > > > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > > > > is reserved and is ignored by the device.
> > > > > """
> > > > > 
> > > > > So when device writes back an used descriptor in this case,
> > > > > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > > > > is reserved and should be ignored.
> > > > > 
> > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
> > > > > """
> > > > > Element Length is reserved for used descriptors without the
> > > > > VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
> > > > > """
> > > > > 
> > > > > And this is the way how driver ignores the `len` in an used
> > > > > descriptor.
> > > > > 
> > > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
> > > > > """
> > > > > To increase ring capacity the driver can store a (read-only
> > > > > by the device) table of indirect descriptors 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Michael S. Tsirkin
On Tue, Apr 24, 2018 at 09:16:38AM +0800, Tiwei Bie wrote:
> On Tue, Apr 24, 2018 at 04:05:07AM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> > > 
> > > 
> > > On 2018年04月23日 17:29, Tiwei Bie wrote:
> > > > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > Hello everyone,
> > > > > > 
> > > > > > This RFC implements packed ring support for virtio driver.
> > > > > > 
> > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > > > 
> > > > > > TODO:
> > > > > > - Refinements and bug fixes;
> > > > > > - Split into small patches;
> > > > > > - Test indirect descriptor support;
> > > > > > - Test/fix event suppression support;
> > > > > > - Test devices other than net;
> > > > > > 
> > > > > > RFC v1 -> RFC v2:
> > > > > > - Add indirect descriptor support - compile test only;
> > > > > > - Add event suppression supprt - compile test only;
> > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > > > - Avoid using '%' operator (Jason);
> > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > > > - Some other refinements and bug fixes;
> > > > > > 
> > > > > > Thanks!
> > > > > > 
> > > > > > Signed-off-by: Tiwei Bie 
> > > > > > ---
> > > > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > > > +---
> > > > > >include/linux/virtio_ring.h|8 +-
> > > > > >include/uapi/linux/virtio_config.h |   12 +-
> > > > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > > b/drivers/virtio/virtio_ring.c
> > > > > > index 71458f493cf8..0515dca34d77 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -58,14 +58,15 @@
> > > > > [...]
> > > > > 
> > > > > > +
> > > > > > +   if (vq->indirect) {
> > > > > > +   u32 len;
> > > > > > +
> > > > > > +   desc = vq->desc_state[head].indir_desc;
> > > > > > +   /* Free the indirect table, if any, now that it's 
> > > > > > unmapped. */
> > > > > > +   if (!desc)
> > > > > > +   goto out;
> > > > > > +
> > > > > > +   len = virtio32_to_cpu(vq->vq.vdev,
> > > > > > + vq->vring_packed.desc[head].len);
> > > > > > +
> > > > > > +   BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > > > +cpu_to_virtio16(vq->vq.vdev, 
> > > > > > VRING_DESC_F_INDIRECT)));
> > > > > It looks to me spec does not force to keep VRING_DESC_F_INDIRECT 
> > > > > here. So we
> > > > > can safely remove this BUG_ON() here.
> > > > > 
> > > > > > +   BUG_ON(len == 0 || len % sizeof(struct 
> > > > > > vring_packed_desc));
> > > > > Len could be ignored for used descriptor according to the spec, so we 
> > > > > need
> > > > > remove this BUG_ON() too.
> > > > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > > > And I think something related to this in the spec isn't very
> > > > clear currently.
> > > > 
> > > > In the spec, there are below words:
> > > > 
> > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > > > """
> > > > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > > > is reserved and is ignored by the device.
> > > > """
> > > > 
> > > > So when device writes back an used descriptor in this case,
> > > > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > > > is reserved and should be ignored.
> > > > 
> > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
> > > > """
> > > > Element Length is reserved for used descriptors without the
> > > > VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
> > > > """
> > > > 
> > > > And this is the way how driver ignores the `len` in an used
> > > > descriptor.
> > > > 
> > > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
> > > > """
> > > > To increase ring capacity the driver can store a (read-only
> > > > by the device) table of indirect descriptors anywhere in memory,
> > > > and insert a descriptor in the main virtqueue (with \field{Flags}
> > > > bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
> > > > containing this indirect descriptor table;
> > > > """
> > > > 
> > > > So the indirect descriptors in the table are read-only by
> > 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Tiwei Bie
On Tue, Apr 24, 2018 at 04:05:07AM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> > 
> > 
> > On 2018年04月23日 17:29, Tiwei Bie wrote:
> > > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > Hello everyone,
> > > > > 
> > > > > This RFC implements packed ring support for virtio driver.
> > > > > 
> > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > > 
> > > > > TODO:
> > > > > - Refinements and bug fixes;
> > > > > - Split into small patches;
> > > > > - Test indirect descriptor support;
> > > > > - Test/fix event suppression support;
> > > > > - Test devices other than net;
> > > > > 
> > > > > RFC v1 -> RFC v2:
> > > > > - Add indirect descriptor support - compile test only;
> > > > > - Add event suppression supprt - compile test only;
> > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > > - Avoid using '%' operator (Jason);
> > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > > - Some other refinements and bug fixes;
> > > > > 
> > > > > Thanks!
> > > > > 
> > > > > Signed-off-by: Tiwei Bie 
> > > > > ---
> > > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > > +---
> > > > >include/linux/virtio_ring.h|8 +-
> > > > >include/uapi/linux/virtio_config.h |   12 +-
> > > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/virtio/virtio_ring.c 
> > > > > b/drivers/virtio/virtio_ring.c
> > > > > index 71458f493cf8..0515dca34d77 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -58,14 +58,15 @@
> > > > [...]
> > > > 
> > > > > +
> > > > > + if (vq->indirect) {
> > > > > + u32 len;
> > > > > +
> > > > > + desc = vq->desc_state[head].indir_desc;
> > > > > + /* Free the indirect table, if any, now that it's 
> > > > > unmapped. */
> > > > > + if (!desc)
> > > > > + goto out;
> > > > > +
> > > > > + len = virtio32_to_cpu(vq->vq.vdev,
> > > > > +   vq->vring_packed.desc[head].len);
> > > > > +
> > > > > + BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > > +  cpu_to_virtio16(vq->vq.vdev, 
> > > > > VRING_DESC_F_INDIRECT)));
> > > > It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. 
> > > > So we
> > > > can safely remove this BUG_ON() here.
> > > > 
> > > > > + BUG_ON(len == 0 || len % sizeof(struct 
> > > > > vring_packed_desc));
> > > > Len could be ignored for used descriptor according to the spec, so we 
> > > > need
> > > > remove this BUG_ON() too.
> > > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > > And I think something related to this in the spec isn't very
> > > clear currently.
> > > 
> > > In the spec, there are below words:
> > > 
> > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > > """
> > > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > > is reserved and is ignored by the device.
> > > """
> > > 
> > > So when device writes back an used descriptor in this case,
> > > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > > is reserved and should be ignored.
> > > 
> > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
> > > """
> > > Element Length is reserved for used descriptors without the
> > > VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
> > > """
> > > 
> > > And this is the way how driver ignores the `len` in an used
> > > descriptor.
> > > 
> > > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
> > > """
> > > To increase ring capacity the driver can store a (read-only
> > > by the device) table of indirect descriptors anywhere in memory,
> > > and insert a descriptor in the main virtqueue (with \field{Flags}
> > > bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
> > > containing this indirect descriptor table;
> > > """
> > > 
> > > So the indirect descriptors in the table are read-only by
> > > the device. And the only descriptor which is writeable by
> > > the device is the descriptor in the main virtqueue (with
> > > Flags bit VIRTQ_DESC_F_INDIRECT on). So if we ignore the
> > > `len` in this descriptor, we won't be able to get the
> > > length 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Jason Wang



On 2018年04月24日 09:05, Michael S. Tsirkin wrote:

+   if (vq->indirect) {
+   u32 len;
+
+   desc = vq->desc_state[head].indir_desc;
+   /* Free the indirect table, if any, now that it's unmapped. */
+   if (!desc)
+   goto out;
+
+   len = virtio32_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[head].len);
+
+   BUG_ON(!(vq->vring_packed.desc[head].flags &
+cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));

It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. So we
can safely remove this BUG_ON() here.


+   BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));

Len could be ignored for used descriptor according to the spec, so we need
remove this BUG_ON() too.

Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
And I think something related to this in the spec isn't very
clear currently.

In the spec, there are below words:

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
"""
In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
is reserved and is ignored by the device.
"""

So when device writes back an used descriptor in this case,
device may not set the VIRTQ_DESC_F_WRITE flag as the flag
is reserved and should be ignored.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
"""
Element Length is reserved for used descriptors without the
VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
"""

And this is the way how driver ignores the `len` in an used
descriptor.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
"""
To increase ring capacity the driver can store a (read-only
by the device) table of indirect descriptors anywhere in memory,
and insert a descriptor in the main virtqueue (with \field{Flags}
bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
containing this indirect descriptor table;
"""

So the indirect descriptors in the table are read-only by
the device. And the only descriptor which is writeable by
the device is the descriptor in the main virtqueue (with
Flags bit VIRTQ_DESC_F_INDIRECT on). So if we ignore the
`len` in this descriptor, we won't be able to get the
length of the data written by the device.

So I think the `len` in this descriptor will carry the
length of the data written by the device (if the buffers
are writable to the device) even if the VIRTQ_DESC_F_WRITE
isn't set by the device. How do you think?

Yes I think so. But we'd better need clarification from Michael.

I think if you use a descriptor, and you want to supply len
to guest, you set VIRTQ_DESC_F_WRITE in the used descriptor.
Spec also says you must not set VIRTQ_DESC_F_INDIRECT then.
If that's a problem we could look at relaxing that last requirement -
does driver want INDIRECT in used descriptor to match
the value in the avail descriptor for some reason?



Looks not, so what I get it:

- device and set VIRTQ_DESC_F_WRITE flag for used descriptor when needed
- there no need to keep INDIRECT flag in used descriptor

So for the above case, we can just have a used descriptor with _F_WRITE 
but without INDIRECT flag.


Thanks



Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Michael S. Tsirkin
On Tue, Apr 24, 2018 at 08:54:52AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月23日 17:29, Tiwei Bie wrote:
> > On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > Hello everyone,
> > > > 
> > > > This RFC implements packed ring support for virtio driver.
> > > > 
> > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > 
> > > > TODO:
> > > > - Refinements and bug fixes;
> > > > - Split into small patches;
> > > > - Test indirect descriptor support;
> > > > - Test/fix event suppression support;
> > > > - Test devices other than net;
> > > > 
> > > > RFC v1 -> RFC v2:
> > > > - Add indirect descriptor support - compile test only;
> > > > - Add event suppression supprt - compile test only;
> > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > - Avoid using '%' operator (Jason);
> > > > - Rename free_head -> next_avail_idx (Jason);
> > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > - Some other refinements and bug fixes;
> > > > 
> > > > Thanks!
> > > > 
> > > > Signed-off-by: Tiwei Bie 
> > > > ---
> > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > +---
> > > >include/linux/virtio_ring.h|8 +-
> > > >include/uapi/linux/virtio_config.h |   12 +-
> > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > > > 
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index 71458f493cf8..0515dca34d77 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -58,14 +58,15 @@
> > > [...]
> > > 
> > > > +
> > > > +   if (vq->indirect) {
> > > > +   u32 len;
> > > > +
> > > > +   desc = vq->desc_state[head].indir_desc;
> > > > +   /* Free the indirect table, if any, now that it's 
> > > > unmapped. */
> > > > +   if (!desc)
> > > > +   goto out;
> > > > +
> > > > +   len = virtio32_to_cpu(vq->vq.vdev,
> > > > + vq->vring_packed.desc[head].len);
> > > > +
> > > > +   BUG_ON(!(vq->vring_packed.desc[head].flags &
> > > > +cpu_to_virtio16(vq->vq.vdev, 
> > > > VRING_DESC_F_INDIRECT)));
> > > It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. So 
> > > we
> > > can safely remove this BUG_ON() here.
> > > 
> > > > +   BUG_ON(len == 0 || len % sizeof(struct 
> > > > vring_packed_desc));
> > > Len could be ignored for used descriptor according to the spec, so we need
> > > remove this BUG_ON() too.
> > Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
> > And I think something related to this in the spec isn't very
> > clear currently.
> > 
> > In the spec, there are below words:
> > 
> > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
> > """
> > In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
> > is reserved and is ignored by the device.
> > """
> > 
> > So when device writes back an used descriptor in this case,
> > device may not set the VIRTQ_DESC_F_WRITE flag as the flag
> > is reserved and should be ignored.
> > 
> > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
> > """
> > Element Length is reserved for used descriptors without the
> > VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
> > """
> > 
> > And this is the way how driver ignores the `len` in an used
> > descriptor.
> > 
> > https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
> > """
> > To increase ring capacity the driver can store a (read-only
> > by the device) table of indirect descriptors anywhere in memory,
> > and insert a descriptor in the main virtqueue (with \field{Flags}
> > bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
> > containing this indirect descriptor table;
> > """
> > 
> > So the indirect descriptors in the table are read-only by
> > the device. And the only descriptor which is writeable by
> > the device is the descriptor in the main virtqueue (with
> > Flags bit VIRTQ_DESC_F_INDIRECT on). So if we ignore the
> > `len` in this descriptor, we won't be able to get the
> > length of the data written by the device.
> > 
> > So I think the `len` in this descriptor will carry the
> > length of the data written by the device (if the buffers
> > are writable to the device) even if the VIRTQ_DESC_F_WRITE
> > isn't set by the device. How do you think?
> 
> Yes I think so. But 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Jason Wang



On 2018年04月23日 17:29, Tiwei Bie wrote:

On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:

On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Signed-off-by: Tiwei Bie 
---
   drivers/virtio/virtio_ring.c   | 1094 
+---
   include/linux/virtio_ring.h|8 +-
   include/uapi/linux/virtio_config.h |   12 +-
   include/uapi/linux/virtio_ring.h   |   61 ++
   4 files changed, 980 insertions(+), 195 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..0515dca34d77 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,14 +58,15 @@

[...]


+
+   if (vq->indirect) {
+   u32 len;
+
+   desc = vq->desc_state[head].indir_desc;
+   /* Free the indirect table, if any, now that it's unmapped. */
+   if (!desc)
+   goto out;
+
+   len = virtio32_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[head].len);
+
+   BUG_ON(!(vq->vring_packed.desc[head].flags &
+cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));

It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. So we
can safely remove this BUG_ON() here.


+   BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));

Len could be ignored for used descriptor according to the spec, so we need
remove this BUG_ON() too.

Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
And I think something related to this in the spec isn't very
clear currently.

In the spec, there are below words:

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
"""
In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
is reserved and is ignored by the device.
"""

So when device writes back an used descriptor in this case,
device may not set the VIRTQ_DESC_F_WRITE flag as the flag
is reserved and should be ignored.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
"""
Element Length is reserved for used descriptors without the
VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
"""

And this is the way how driver ignores the `len` in an used
descriptor.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
"""
To increase ring capacity the driver can store a (read-only
by the device) table of indirect descriptors anywhere in memory,
and insert a descriptor in the main virtqueue (with \field{Flags}
bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
containing this indirect descriptor table;
"""

So the indirect descriptors in the table are read-only by
the device. And the only descriptor which is writeable by
the device is the descriptor in the main virtqueue (with
Flags bit VIRTQ_DESC_F_INDIRECT on). So if we ignore the
`len` in this descriptor, we won't be able to get the
length of the data written by the device.

So I think the `len` in this descriptor will carry the
length of the data written by the device (if the buffers
are writable to the device) even if the VIRTQ_DESC_F_WRITE
isn't set by the device. How do you think?


Yes I think so. But we'd better need clarification from Michael.





The reason is we don't touch descriptor ring in the case of split, so
BUG_ON()s may help there.


+
+   for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
+   vring_unmap_one_packed(vq, [j]);
+
+   kfree(desc);
+   vq->desc_state[head].indir_desc = NULL;
+   } else if (ctx) {
+   *ctx = vq->desc_state[head].indir_desc;
+   }
+
+out:
+   return vq->desc_state[head].num;
+}
+
+static inline bool more_used_split(const struct vring_virtqueue *vq)
   {
return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 
vq->vring.used->idx);
   }
+static inline bool more_used_packed(const struct vring_virtqueue *vq)
+{
+   u16 last_used, 

Re: [RFC v2] virtio: support packed ring

2018-04-23 Thread Tiwei Bie
On Mon, Apr 23, 2018 at 01:42:14PM +0800, Jason Wang wrote:
> On 2018年04月01日 22:12, Tiwei Bie wrote:
> > Hello everyone,
> > 
> > This RFC implements packed ring support for virtio driver.
> > 
> > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > 
> > TODO:
> > - Refinements and bug fixes;
> > - Split into small patches;
> > - Test indirect descriptor support;
> > - Test/fix event suppression support;
> > - Test devices other than net;
> > 
> > RFC v1 -> RFC v2:
> > - Add indirect descriptor support - compile test only;
> > - Add event suppression supprt - compile test only;
> > - Move vring_packed_init() out of uapi (Jason, MST);
> > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > - Avoid using '%' operator (Jason);
> > - Rename free_head -> next_avail_idx (Jason);
> > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > - Some other refinements and bug fixes;
> > 
> > Thanks!
> > 
> > Signed-off-by: Tiwei Bie 
> > ---
> >   drivers/virtio/virtio_ring.c   | 1094 
> > +---
> >   include/linux/virtio_ring.h|8 +-
> >   include/uapi/linux/virtio_config.h |   12 +-
> >   include/uapi/linux/virtio_ring.h   |   61 ++
> >   4 files changed, 980 insertions(+), 195 deletions(-)
> > 
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 71458f493cf8..0515dca34d77 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -58,14 +58,15 @@
> 
> [...]
> 
> > +
> > +   if (vq->indirect) {
> > +   u32 len;
> > +
> > +   desc = vq->desc_state[head].indir_desc;
> > +   /* Free the indirect table, if any, now that it's unmapped. */
> > +   if (!desc)
> > +   goto out;
> > +
> > +   len = virtio32_to_cpu(vq->vq.vdev,
> > + vq->vring_packed.desc[head].len);
> > +
> > +   BUG_ON(!(vq->vring_packed.desc[head].flags &
> > +cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
> 
> It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. So we
> can safely remove this BUG_ON() here.
> 
> > +   BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));
> 
> Len could be ignored for used descriptor according to the spec, so we need
> remove this BUG_ON() too.

Yeah, you're right! The BUG_ON() isn't right. I'll remove it.
And I think something related to this in the spec isn't very
clear currently.

In the spec, there are below words:

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L272
"""
In descriptors with VIRTQ_DESC_F_INDIRECT set VIRTQ_DESC_F_WRITE
is reserved and is ignored by the device.
"""

So when device writes back an used descriptor in this case,
device may not set the VIRTQ_DESC_F_WRITE flag as the flag
is reserved and should be ignored.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L170
"""
Element Length is reserved for used descriptors without the
VIRTQ_DESC_F_WRITE flag, and is ignored by drivers.
"""

And this is the way how driver ignores the `len` in an used
descriptor.

https://github.com/oasis-tcs/virtio-spec/blob/d4fec517dfcf/packed-ring.tex#L241
"""
To increase ring capacity the driver can store a (read-only
by the device) table of indirect descriptors anywhere in memory,
and insert a descriptor in the main virtqueue (with \field{Flags}
bit VIRTQ_DESC_F_INDIRECT on) that refers to a buffer element
containing this indirect descriptor table;
"""

So the indirect descriptors in the table are read-only by
the device. And the only descriptor which is writeable by
the device is the descriptor in the main virtqueue (with
Flags bit VIRTQ_DESC_F_INDIRECT on). So if we ignore the
`len` in this descriptor, we won't be able to get the
length of the data written by the device.

So I think the `len` in this descriptor will carry the
length of the data written by the device (if the buffers
are writable to the device) even if the VIRTQ_DESC_F_WRITE
isn't set by the device. How do you think?


> 
> The reason is we don't touch descriptor ring in the case of split, so
> BUG_ON()s may help there.
> 
> > +
> > +   for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
> > +   vring_unmap_one_packed(vq, [j]);
> > +
> > +   kfree(desc);
> > +   vq->desc_state[head].indir_desc = NULL;
> > +   } else if (ctx) {
> > +   *ctx = vq->desc_state[head].indir_desc;
> > +   }
> > +
> > +out:
> > +   return vq->desc_state[head].num;
> > +}
> > +
> > +static inline bool more_used_split(const struct vring_virtqueue *vq)
> >   {
> > return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 

Re: [RFC v2] virtio: support packed ring

2018-04-22 Thread Jason Wang



On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Signed-off-by: Tiwei Bie 
---
  drivers/virtio/virtio_ring.c   | 1094 +---
  include/linux/virtio_ring.h|8 +-
  include/uapi/linux/virtio_config.h |   12 +-
  include/uapi/linux/virtio_ring.h   |   61 ++
  4 files changed, 980 insertions(+), 195 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..0515dca34d77 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,14 +58,15 @@
  


[...]


+
+   if (vq->indirect) {
+   u32 len;
+
+   desc = vq->desc_state[head].indir_desc;
+   /* Free the indirect table, if any, now that it's unmapped. */
+   if (!desc)
+   goto out;
+
+   len = virtio32_to_cpu(vq->vq.vdev,
+ vq->vring_packed.desc[head].len);
+
+   BUG_ON(!(vq->vring_packed.desc[head].flags &
+cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));


It looks to me spec does not force to keep VRING_DESC_F_INDIRECT here. 
So we can safely remove this BUG_ON() here.



+   BUG_ON(len == 0 || len % sizeof(struct vring_packed_desc));


Len could be ignored for used descriptor according to the spec, so we 
need remove this BUG_ON() too.


The reason is we don't touch descriptor ring in the case of split, so 
BUG_ON()s may help there.



+
+   for (j = 0; j < len / sizeof(struct vring_packed_desc); j++)
+   vring_unmap_one_packed(vq, [j]);
+
+   kfree(desc);
+   vq->desc_state[head].indir_desc = NULL;
+   } else if (ctx) {
+   *ctx = vq->desc_state[head].indir_desc;
+   }
+
+out:
+   return vq->desc_state[head].num;
+}
+
+static inline bool more_used_split(const struct vring_virtqueue *vq)
  {
return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 
vq->vring.used->idx);
  }
  
+static inline bool more_used_packed(const struct vring_virtqueue *vq)

+{
+   u16 last_used, flags;
+   bool avail, used;
+
+   if (vq->vq.num_free == vq->vring_packed.num)
+   return false;
+
+   last_used = vq->last_used_idx;
+   flags = virtio16_to_cpu(vq->vq.vdev,
+   vq->vring_packed.desc[last_used].flags);
+   avail = flags & VRING_DESC_F_AVAIL(1);
+   used = flags & VRING_DESC_F_USED(1);
+
+   return avail == used;
+}


This looks interesting, spec said:

"
Thus VIRTQ_DESC_F_AVAIL and VIRTQ_DESC_F_USED bits are different for an 
available descriptor and

equal for a used descriptor.
Note that this observation is mostly useful for sanity-checking as these 
are necessary but not sufficient
conditions - for example, all descriptors are zero-initialized. To 
detect used and available descriptors it is
possible for drivers and devices to keep track of the last observed 
value of VIRTQ_DESC_F_USED/VIRTQ_-
DESC_F_AVAIL. Other techniques to detect 
VIRTQ_DESC_F_AVAIL/VIRTQ_DESC_F_USED bit changes

might also be possible.
"

So it looks to me it was not sufficient, looking at the example codes in 
spec, do we need to track last seen used_wrap_counter here?


Thanks


Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Tiwei Bie
On Tue, Apr 17, 2018 at 06:54:51PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:56:26PM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > [...]
> > > > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, 
> > > > > > > > > > unsigned int head,
> > > > > > > > > > + void **ctx)
> > > > > > > > > > +{
> > > > > > > > > > +   struct vring_packed_desc *desc;
> > > > > > > > > > +   unsigned int i, j;
> > > > > > > > > > +
> > > > > > > > > > +   /* Clear data ptr. */
> > > > > > > > > > +   vq->desc_state[head].data = NULL;
> > > > > > > > > > +
> > > > > > > > > > +   i = head;
> > > > > > > > > > +
> > > > > > > > > > +   for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > > > +   desc = >vring_packed.desc[i];
> > > > > > > > > > +   vring_unmap_one_packed(vq, desc);
> > > > > > > > > > +   desc->flags = 0x0;
> > > > > > > > > Looks like this is unnecessary.
> > > > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > > > this function, the desc is still available to the
> > > > > > > > device.
> > > > > > > 
> > > > > > > Well detach_unused_buf_packed() should be called after device is 
> > > > > > > stopped,
> > > > > > > otherwise even if you try to clear, there will still be a window 
> > > > > > > that device
> > > > > > > may use it.
> > > > > > 
> > > > > > This is not about whether the device has been stopped or
> > > > > > not. We don't have other places to re-initialize the ring
> > > > > > descriptors and wrap_counter. So they need to be set to
> > > > > > the correct values when doing detach_unused_buf.
> > > > > > 
> > > > > > Best regards,
> > > > > > Tiwei Bie
> > > > > 
> > > > > find vqs is the time to do it.
> > > > 
> > > > The .find_vqs() will call .setup_vq() which will eventually
> > > > call vring_create_virtqueue(). It's a different case. Here
> > > > we're talking about re-initializing the descs and updating
> > > > the wrap counter when detaching the unused descs (In this
> > > > case, split ring just needs to decrease vring.avail->idx).
> > > > 
> > > > Best regards,
> > > > Tiwei Bie
> > > 
> > > There's no requirement that  virtqueue_detach_unused_buf re-initializes
> > > the descs. It happens on cleanup path just before drivers delete the
> > > vqs.
> > 
> > Cool, I wasn't aware of it. I saw split ring decrease
> > vring.avail->idx after detaching an unused desc, so I
> > thought detaching unused desc also needs to make sure
> > that the ring state will be updated correspondingly.
> 
> 
> Hmm. You are right. Seems to be out console driver being out of spec.
> Will have to look at how to fix that :(
> 
> It was done here:
> 
> Commit b3258ff1d6086bd2b9eeb556844a868ad7d49bc8
> Author: Amit Shah 
> Date:   Wed Mar 16 19:12:10 2011 +0530
> 
> virtio: Decrement avail idx on buffer detach
> 
> When detaching a buffer from a vq, the avail.idx value should be
> decremented as well.
> 
> This was noticed by hot-unplugging a virtio console port and then
> plugging in a new one on the same number (re-using the vqs which were
> just 'disowned').  qemu reported
> 
>'Guest moved used index from 0 to 256'
> 
> when any IO was attempted on the new port.
> 
> CC: sta...@kernel.org
> Reported-by: juzhang 
> Signed-off-by: Amit Shah 
> Signed-off-by: Rusty Russell 
> 
> The spec is quite explicit though:
>   A driver MUST NOT decrement the available idx on a live virtqueue (ie. 
> there is no way to “unexpose”
>   buffers).
> 

Hmm.. Got it. Thanks!

Best regards,
Tiwei Bie


> 
> 
> 
> 
> > If there is no such requirement, do you think it's OK
> > to remove below two lines:
> > 
> > vq->avail_idx_shadow--;
> > vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> > 
> > from virtqueue_detach_unused_buf(), and we could have
> > one generic function to handle both rings:
> > 
> > void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > unsigned int num, i;
> > void *buf;
> > 
> > START_USE(vq);
> > 
> > num = vq->packed ? vq->vring_packed.num : vq->vring.num;
> > 
> > for (i = 0; i < num; i++) {
> > if 

Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Michael S. Tsirkin
On Tue, Apr 17, 2018 at 10:56:26PM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > [...]
> > > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, 
> > > > > > > > > unsigned int head,
> > > > > > > > > +   void **ctx)
> > > > > > > > > +{
> > > > > > > > > + struct vring_packed_desc *desc;
> > > > > > > > > + unsigned int i, j;
> > > > > > > > > +
> > > > > > > > > + /* Clear data ptr. */
> > > > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > > > +
> > > > > > > > > + i = head;
> > > > > > > > > +
> > > > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > > + desc = >vring_packed.desc[i];
> > > > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > > > + desc->flags = 0x0;
> > > > > > > > Looks like this is unnecessary.
> > > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > > this function, the desc is still available to the
> > > > > > > device.
> > > > > > 
> > > > > > Well detach_unused_buf_packed() should be called after device is 
> > > > > > stopped,
> > > > > > otherwise even if you try to clear, there will still be a window 
> > > > > > that device
> > > > > > may use it.
> > > > > 
> > > > > This is not about whether the device has been stopped or
> > > > > not. We don't have other places to re-initialize the ring
> > > > > descriptors and wrap_counter. So they need to be set to
> > > > > the correct values when doing detach_unused_buf.
> > > > > 
> > > > > Best regards,
> > > > > Tiwei Bie
> > > > 
> > > > find vqs is the time to do it.
> > > 
> > > The .find_vqs() will call .setup_vq() which will eventually
> > > call vring_create_virtqueue(). It's a different case. Here
> > > we're talking about re-initializing the descs and updating
> > > the wrap counter when detaching the unused descs (In this
> > > case, split ring just needs to decrease vring.avail->idx).
> > > 
> > > Best regards,
> > > Tiwei Bie
> > 
> > There's no requirement that  virtqueue_detach_unused_buf re-initializes
> > the descs. It happens on cleanup path just before drivers delete the
> > vqs.
> 
> Cool, I wasn't aware of it. I saw split ring decrease
> vring.avail->idx after detaching an unused desc, so I
> thought detaching unused desc also needs to make sure
> that the ring state will be updated correspondingly.


Hmm. You are right. Seems to be out console driver being out of spec.
Will have to look at how to fix that :(

It was done here:

Commit b3258ff1d6086bd2b9eeb556844a868ad7d49bc8
Author: Amit Shah 
Date:   Wed Mar 16 19:12:10 2011 +0530

virtio: Decrement avail idx on buffer detach

When detaching a buffer from a vq, the avail.idx value should be
decremented as well.

This was noticed by hot-unplugging a virtio console port and then
plugging in a new one on the same number (re-using the vqs which were
just 'disowned').  qemu reported

   'Guest moved used index from 0 to 256'

when any IO was attempted on the new port.

CC: sta...@kernel.org
Reported-by: juzhang 
Signed-off-by: Amit Shah 
Signed-off-by: Rusty Russell 

The spec is quite explicit though:
A driver MUST NOT decrement the available idx on a live virtqueue (ie. 
there is no way to “unexpose”
buffers).





> If there is no such requirement, do you think it's OK
> to remove below two lines:
> 
> vq->avail_idx_shadow--;
> vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> 
> from virtqueue_detach_unused_buf(), and we could have
> one generic function to handle both rings:
> 
> void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>   unsigned int num, i;
>   void *buf;
> 
>   START_USE(vq);
> 
>   num = vq->packed ? vq->vring_packed.num : vq->vring.num;
> 
>   for (i = 0; i < num; i++) {
>   if (!vq->desc_state[i].data)
>   continue;
>   /* detach_buf clears data, so grab it now. */
>   buf = vq->desc_state[i].data;
>   detach_buf(vq, i, NULL);
>   END_USE(vq);
>   return buf;
>   }
>   /* That should have freed everything. */
>   

Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Tiwei Bie
On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > [...]
> > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, 
> > > > > > > > unsigned int head,
> > > > > > > > + void **ctx)
> > > > > > > > +{
> > > > > > > > +   struct vring_packed_desc *desc;
> > > > > > > > +   unsigned int i, j;
> > > > > > > > +
> > > > > > > > +   /* Clear data ptr. */
> > > > > > > > +   vq->desc_state[head].data = NULL;
> > > > > > > > +
> > > > > > > > +   i = head;
> > > > > > > > +
> > > > > > > > +   for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > +   desc = >vring_packed.desc[i];
> > > > > > > > +   vring_unmap_one_packed(vq, desc);
> > > > > > > > +   desc->flags = 0x0;
> > > > > > > Looks like this is unnecessary.
> > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > this function, the desc is still available to the
> > > > > > device.
> > > > > 
> > > > > Well detach_unused_buf_packed() should be called after device is 
> > > > > stopped,
> > > > > otherwise even if you try to clear, there will still be a window that 
> > > > > device
> > > > > may use it.
> > > > 
> > > > This is not about whether the device has been stopped or
> > > > not. We don't have other places to re-initialize the ring
> > > > descriptors and wrap_counter. So they need to be set to
> > > > the correct values when doing detach_unused_buf.
> > > > 
> > > > Best regards,
> > > > Tiwei Bie
> > > 
> > > find vqs is the time to do it.
> > 
> > The .find_vqs() will call .setup_vq() which will eventually
> > call vring_create_virtqueue(). It's a different case. Here
> > we're talking about re-initializing the descs and updating
> > the wrap counter when detaching the unused descs (In this
> > case, split ring just needs to decrease vring.avail->idx).
> > 
> > Best regards,
> > Tiwei Bie
> 
> There's no requirement that  virtqueue_detach_unused_buf re-initializes
> the descs. It happens on cleanup path just before drivers delete the
> vqs.

Cool, I wasn't aware of it. I saw split ring decrease
vring.avail->idx after detaching an unused desc, so I
thought detaching unused desc also needs to make sure
that the ring state will be updated correspondingly.

If there is no such requirement, do you think it's OK
to remove below two lines:

vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);

from virtqueue_detach_unused_buf(), and we could have
one generic function to handle both rings:

void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int num, i;
void *buf;

START_USE(vq);

num = vq->packed ? vq->vring_packed.num : vq->vring.num;

for (i = 0; i < num; i++) {
if (!vq->desc_state[i].data)
continue;
/* detach_buf clears data, so grab it now. */
buf = vq->desc_state[i].data;
detach_buf(vq, i, NULL);
END_USE(vq);
return buf;
}
/* That should have freed everything. */
BUG_ON(vq->vq.num_free != num);

END_USE(vq);
return NULL;
}

Best regards,
Tiwei Bie


Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Michael S. Tsirkin
On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > [...]
> > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, 
> > > > > > > unsigned int head,
> > > > > > > +   void **ctx)
> > > > > > > +{
> > > > > > > + struct vring_packed_desc *desc;
> > > > > > > + unsigned int i, j;
> > > > > > > +
> > > > > > > + /* Clear data ptr. */
> > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > +
> > > > > > > + i = head;
> > > > > > > +
> > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > + desc = >vring_packed.desc[i];
> > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > + desc->flags = 0x0;
> > > > > > Looks like this is unnecessary.
> > > > > It's safer to zero it. If we don't zero it, after we
> > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > this function, the desc is still available to the
> > > > > device.
> > > > 
> > > > Well detach_unused_buf_packed() should be called after device is 
> > > > stopped,
> > > > otherwise even if you try to clear, there will still be a window that 
> > > > device
> > > > may use it.
> > > 
> > > This is not about whether the device has been stopped or
> > > not. We don't have other places to re-initialize the ring
> > > descriptors and wrap_counter. So they need to be set to
> > > the correct values when doing detach_unused_buf.
> > > 
> > > Best regards,
> > > Tiwei Bie
> > 
> > find vqs is the time to do it.
> 
> The .find_vqs() will call .setup_vq() which will eventually
> call vring_create_virtqueue(). It's a different case. Here
> we're talking about re-initializing the descs and updating
> the wrap counter when detaching the unused descs (In this
> case, split ring just needs to decrease vring.avail->idx).
> 
> Best regards,
> Tiwei Bie

There's no requirement that  virtqueue_detach_unused_buf re-initializes
the descs. It happens on cleanup path just before drivers delete the
vqs.

-- 
MST


Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Tiwei Bie
On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > [...]
> > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned 
> > > > > > int head,
> > > > > > + void **ctx)
> > > > > > +{
> > > > > > +   struct vring_packed_desc *desc;
> > > > > > +   unsigned int i, j;
> > > > > > +
> > > > > > +   /* Clear data ptr. */
> > > > > > +   vq->desc_state[head].data = NULL;
> > > > > > +
> > > > > > +   i = head;
> > > > > > +
> > > > > > +   for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > +   desc = >vring_packed.desc[i];
> > > > > > +   vring_unmap_one_packed(vq, desc);
> > > > > > +   desc->flags = 0x0;
> > > > > Looks like this is unnecessary.
> > > > It's safer to zero it. If we don't zero it, after we
> > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > this function, the desc is still available to the
> > > > device.
> > > 
> > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > otherwise even if you try to clear, there will still be a window that 
> > > device
> > > may use it.
> > 
> > This is not about whether the device has been stopped or
> > not. We don't have other places to re-initialize the ring
> > descriptors and wrap_counter. So they need to be set to
> > the correct values when doing detach_unused_buf.
> > 
> > Best regards,
> > Tiwei Bie
> 
> find vqs is the time to do it.

The .find_vqs() will call .setup_vq() which will eventually
call vring_create_virtqueue(). It's a different case. Here
we're talking about re-initializing the descs and updating
the wrap counter when detaching the unused descs (In this
case, split ring just needs to decrease vring.avail->idx).

Best regards,
Tiwei Bie


Re: [RFC v2] virtio: support packed ring

2018-04-17 Thread Michael S. Tsirkin
On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> [...]
> > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned 
> > > > > int head,
> > > > > +   void **ctx)
> > > > > +{
> > > > > + struct vring_packed_desc *desc;
> > > > > + unsigned int i, j;
> > > > > +
> > > > > + /* Clear data ptr. */
> > > > > + vq->desc_state[head].data = NULL;
> > > > > +
> > > > > + i = head;
> > > > > +
> > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > + desc = >vring_packed.desc[i];
> > > > > + vring_unmap_one_packed(vq, desc);
> > > > > + desc->flags = 0x0;
> > > > Looks like this is unnecessary.
> > > It's safer to zero it. If we don't zero it, after we
> > > call virtqueue_detach_unused_buf_packed() which calls
> > > this function, the desc is still available to the
> > > device.
> > 
> > Well detach_unused_buf_packed() should be called after device is stopped,
> > otherwise even if you try to clear, there will still be a window that device
> > may use it.
> 
> This is not about whether the device has been stopped or
> not. We don't have other places to re-initialize the ring
> descriptors and wrap_counter. So they need to be set to
> the correct values when doing detach_unused_buf.
> 
> Best regards,
> Tiwei Bie

find vqs is the time to do it.

-- 
MST


Re: [RFC v2] virtio: support packed ring

2018-04-16 Thread Tiwei Bie
On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> On 2018年04月13日 15:15, Tiwei Bie wrote:
> > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > On 2018年04月01日 22:12, Tiwei Bie wrote:
[...]
> > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int 
> > > > head,
> > > > + void **ctx)
> > > > +{
> > > > +   struct vring_packed_desc *desc;
> > > > +   unsigned int i, j;
> > > > +
> > > > +   /* Clear data ptr. */
> > > > +   vq->desc_state[head].data = NULL;
> > > > +
> > > > +   i = head;
> > > > +
> > > > +   for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > +   desc = >vring_packed.desc[i];
> > > > +   vring_unmap_one_packed(vq, desc);
> > > > +   desc->flags = 0x0;
> > > Looks like this is unnecessary.
> > It's safer to zero it. If we don't zero it, after we
> > call virtqueue_detach_unused_buf_packed() which calls
> > this function, the desc is still available to the
> > device.
> 
> Well detach_unused_buf_packed() should be called after device is stopped,
> otherwise even if you try to clear, there will still be a window that device
> may use it.

This is not about whether the device has been stopped or
not. We don't have other places to re-initialize the ring
descriptors and wrap_counter. So they need to be set to
the correct values when doing detach_unused_buf.

Best regards,
Tiwei Bie


Re: [RFC v2] virtio: support packed ring

2018-04-16 Thread Michael S. Tsirkin
On Tue, Apr 17, 2018 at 10:24:32AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月17日 10:17, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > 
> > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > Hello everyone,
> > > > > > 
> > > > > > This RFC implements packed ring support for virtio driver.
> > > > > > 
> > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > > > 
> > > > > > TODO:
> > > > > > - Refinements and bug fixes;
> > > > > > - Split into small patches;
> > > > > > - Test indirect descriptor support;
> > > > > > - Test/fix event suppression support;
> > > > > > - Test devices other than net;
> > > > > > 
> > > > > > RFC v1 -> RFC v2:
> > > > > > - Add indirect descriptor support - compile test only;
> > > > > > - Add event suppression supprt - compile test only;
> > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > > > - Avoid using '%' operator (Jason);
> > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > > > - Some other refinements and bug fixes;
> > > > > > 
> > > > > > Thanks!
> > > > > > 
> > > > > > Signed-off-by: Tiwei Bie 
> > > > > > ---
> > > > > > drivers/virtio/virtio_ring.c   | 1094 
> > > > > > +---
> > > > > > include/linux/virtio_ring.h|8 +-
> > > > > > include/uapi/linux/virtio_config.h |   12 +-
> > > > > > include/uapi/linux/virtio_ring.h   |   61 ++
> > > > > > 4 files changed, 980 insertions(+), 195 deletions(-)
> > > > [...]
> 
> [...]
> 
> > > > > It looks to me we should examine RING_EVENT_FLAGS_DESC in 
> > > > > desc_event_flags
> > > > > instead of vq->event here. Spec does not forces to use evenf_off and
> > > > > event_wrap if event index is enabled.
> > > > > 
> > > > > > +   // FIXME: fix this!
> > > > > > +   needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
> > > > > > +vring_need_event(off_wrap & ~(1<<15), new, 
> > > > > > old);
> > > > > Why need a & here?
> > > > Because wrap_counter (the most significant bit in off_wrap)
> > > > isn't part of the index.
> > > > 
> > > > > > +   } else {
> > > > > Need a smp_rmb() to make sure desc_event_flags was checked before 
> > > > > flags.
> > > > I don't get your point, if my understanding is correct,
> > > > desc_event_flags is vq->vring_packed.device->flags. So
> > > > what's the "flags"?
> > > Sorry, I mean we need check device.flags before off_warp. So it needs an
> > > smp_rmb() in the middle.
> > It's best to just read all flags atomically as u32.
> 
> Yes it is.
> 
> > 
> > > It looks to me there's no guarantee that
> > > VRING_EVENT_F_DESC is set if event index is supported.
> > > 
> > > > > > +   needs_kick = (vq->vring_packed.device->flags !=
> > > > > > + cpu_to_virtio16(_vq->vdev, 
> > > > > > VRING_EVENT_F_DISABLE));
> > > > > > +   }
> > > > > > +   END_USE(vq);
> > > > > > +   return needs_kick;
> > > > > > +}
> > > > [...]
> > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned 
> > > > > > int head,
> > > > > > + void **ctx)
> > > > > > +{
> > > > > > +   struct vring_packed_desc *desc;
> > > > > > +   unsigned int i, j;
> > > > > > +
> > > > > > +   /* Clear data ptr. */
> > > > > > +   vq->desc_state[head].data = NULL;
> > > > > > +
> > > > > > +   i = head;
> > > > > > +
> > > > > > +   for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > +   desc = >vring_packed.desc[i];
> > > > > > +   vring_unmap_one_packed(vq, desc);
> > > > > > +   desc->flags = 0x0;
> > > > > Looks like this is unnecessary.
> > > > It's safer to zero it. If we don't zero it, after we
> > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > this function, the desc is still available to the
> > > > device.
> > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > otherwise even if you try to clear, there will still be a window that 
> > > device
> > > may use it.
> > > 
> > > > > > +   i++;
> > > > > > +   if (i >= vq->vring_packed.num)
> > > > > > +   i = 0;
> > > > > > +   }
> > > > [...]
> > > > > > +static unsigned virtqueue_enable_cb_prepare_packed(struct 
> > > > > > virtqueue *_vq)
> > > > > > +{
> > > > > > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > +   u16 last_used_idx, 

Re: [RFC v2] virtio: support packed ring

2018-04-16 Thread Jason Wang



On 2018年04月17日 10:17, Michael S. Tsirkin wrote:

On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:


On 2018年04月13日 15:15, Tiwei Bie wrote:

On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:

On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Signed-off-by: Tiwei Bie 
---
drivers/virtio/virtio_ring.c   | 1094 
+---
include/linux/virtio_ring.h|8 +-
include/uapi/linux/virtio_config.h |   12 +-
include/uapi/linux/virtio_ring.h   |   61 ++
4 files changed, 980 insertions(+), 195 deletions(-)

[...]


[...]


It looks to me we should examine RING_EVENT_FLAGS_DESC in desc_event_flags
instead of vq->event here. Spec does not forces to use evenf_off and
event_wrap if event index is enabled.


+   // FIXME: fix this!
+   needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
+vring_need_event(off_wrap & ~(1<<15), new, old);

Why need a & here?

Because wrap_counter (the most significant bit in off_wrap)
isn't part of the index.


+   } else {

Need a smp_rmb() to make sure desc_event_flags was checked before flags.

I don't get your point, if my understanding is correct,
desc_event_flags is vq->vring_packed.device->flags. So
what's the "flags"?

Sorry, I mean we need check device.flags before off_warp. So it needs an
smp_rmb() in the middle.

It's best to just read all flags atomically as u32.


Yes it is.




It looks to me there's no guarantee that
VRING_EVENT_F_DESC is set if event index is supported.


+   needs_kick = (vq->vring_packed.device->flags !=
+ cpu_to_virtio16(_vq->vdev, 
VRING_EVENT_F_DISABLE));
+   }
+   END_USE(vq);
+   return needs_kick;
+}

[...]

+static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
+ void **ctx)
+{
+   struct vring_packed_desc *desc;
+   unsigned int i, j;
+
+   /* Clear data ptr. */
+   vq->desc_state[head].data = NULL;
+
+   i = head;
+
+   for (j = 0; j < vq->desc_state[head].num; j++) {
+   desc = >vring_packed.desc[i];
+   vring_unmap_one_packed(vq, desc);
+   desc->flags = 0x0;

Looks like this is unnecessary.

It's safer to zero it. If we don't zero it, after we
call virtqueue_detach_unused_buf_packed() which calls
this function, the desc is still available to the
device.

Well detach_unused_buf_packed() should be called after device is stopped,
otherwise even if you try to clear, there will still be a window that device
may use it.


+   i++;
+   if (i >= vq->vring_packed.num)
+   i = 0;
+   }

[...]

+static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
+{
+   struct vring_virtqueue *vq = to_vvq(_vq);
+   u16 last_used_idx, wrap_counter, off_wrap;
+
+   START_USE(vq);
+
+   last_used_idx = vq->last_used_idx;
+   wrap_counter = vq->wrap_counter;
+
+   if (last_used_idx > vq->next_avail_idx)
+   wrap_counter ^= 1;
+
+   off_wrap = last_used_idx | (wrap_counter << 15);
+
+   /* We optimistically turn back on interrupts, then check if there was
+* more to do. */
+   /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
+* either clear the flags bit or point the event index at the next
+* entry. Always do both to keep code simple. */
+   if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
+   vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC:
+VRING_EVENT_F_ENABLE;
+   vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
+   vq->event_flags_shadow);
+   }

A smp_wmb() is missed here?


+   vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev, 
off_wrap);

And according to the spec, it looks to me write a 

Re: [RFC v2] virtio: support packed ring

2018-04-16 Thread Michael S. Tsirkin
On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> 
> 
> On 2018年04月13日 15:15, Tiwei Bie wrote:
> > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > Hello everyone,
> > > > 
> > > > This RFC implements packed ring support for virtio driver.
> > > > 
> > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > 
> > > > TODO:
> > > > - Refinements and bug fixes;
> > > > - Split into small patches;
> > > > - Test indirect descriptor support;
> > > > - Test/fix event suppression support;
> > > > - Test devices other than net;
> > > > 
> > > > RFC v1 -> RFC v2:
> > > > - Add indirect descriptor support - compile test only;
> > > > - Add event suppression supprt - compile test only;
> > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > - Avoid using '%' operator (Jason);
> > > > - Rename free_head -> next_avail_idx (Jason);
> > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > - Some other refinements and bug fixes;
> > > > 
> > > > Thanks!
> > > > 
> > > > Signed-off-by: Tiwei Bie 
> > > > ---
> > > >drivers/virtio/virtio_ring.c   | 1094 
> > > > +---
> > > >include/linux/virtio_ring.h|8 +-
> > > >include/uapi/linux/virtio_config.h |   12 +-
> > > >include/uapi/linux/virtio_ring.h   |   61 ++
> > > >4 files changed, 980 insertions(+), 195 deletions(-)
> > [...]
> > > > +static struct vring_packed_desc *alloc_indirect_packed(struct 
> > > > virtqueue *_vq,
> > > > +  unsigned int 
> > > > total_sg,
> > > > +  gfp_t gfp)
> > > > +{
> > > > +   struct vring_packed_desc *desc;
> > > > +
> > > > +   /*
> > > > +* We require lowmem mappings for the descriptors because
> > > > +* otherwise virt_to_phys will give us bogus addresses in the
> > > > +* virtqueue.
> > > > +*/
> > > > +   gfp &= ~__GFP_HIGHMEM;
> > > > +
> > > > +   desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), 
> > > > gfp);
> > > Can we simply check vq->packed here to avoid duplicating helpers?
> > Then it would be something like this:
> > 
> > static void *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg,
> > gfp_t gfp)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > void *data;
> > 
> > /*
> >  * We require lowmem mappings for the descriptors because
> >  * otherwise virt_to_phys will give us bogus addresses in the
> >  * virtqueue.
> >  */
> > gfp &= ~__GFP_HIGHMEM;
> > 
> > if (vq->packed) {
> > data = kmalloc(total_sg * sizeof(struct vring_packed_desc),
> > gfp);
> > if (!data)
> > return NULL;
> > } else {
> > struct vring_desc *desc;
> > unsigned int i;
> > 
> > desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> > if (!desc)
> > return NULL;
> > 
> > for (i = 0; i < total_sg; i++)
> > desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
> > 
> > data = desc;
> > }
> > 
> > return data;
> > }
> > 
> > I would prefer to have two simpler helpers (and to the callers,
> > it's already very clear about which one they should call), i.e.
> > the current implementation:
> > 
> > static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue 
> > *_vq,
> >unsigned int total_sg,
> >gfp_t gfp)
> > {
> > struct vring_packed_desc *desc;
> > 
> > /*
> >  * We require lowmem mappings for the descriptors because
> >  * otherwise virt_to_phys will give us bogus addresses in the
> >  * virtqueue.
> >  */
> > gfp &= ~__GFP_HIGHMEM;
> > 
> > desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> > 
> > return desc;
> > }
> > 
> > static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
> >unsigned int total_sg,
> >gfp_t gfp)
> > {
> > struct vring_desc *desc;
> > unsigned int i;
> > 
> > /*
> >  * We require lowmem mappings for the descriptors because
> >  * otherwise virt_to_phys will give us bogus addresses in the
> >  * virtqueue.
> >  */
> > gfp &= ~__GFP_HIGHMEM;
> > 
> > desc = kmalloc(total_sg * sizeof(struct 

Re: [RFC v2] virtio: support packed ring

2018-04-16 Thread Jason Wang



On 2018年04月13日 15:15, Tiwei Bie wrote:

On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:

On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Signed-off-by: Tiwei Bie 
---
   drivers/virtio/virtio_ring.c   | 1094 
+---
   include/linux/virtio_ring.h|8 +-
   include/uapi/linux/virtio_config.h |   12 +-
   include/uapi/linux/virtio_ring.h   |   61 ++
   4 files changed, 980 insertions(+), 195 deletions(-)

[...]

+static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
+  unsigned int total_sg,
+  gfp_t gfp)
+{
+   struct vring_packed_desc *desc;
+
+   /*
+* We require lowmem mappings for the descriptors because
+* otherwise virt_to_phys will give us bogus addresses in the
+* virtqueue.
+*/
+   gfp &= ~__GFP_HIGHMEM;
+
+   desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);

Can we simply check vq->packed here to avoid duplicating helpers?

Then it would be something like this:

static void *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg,
gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *data;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

if (vq->packed) {
data = kmalloc(total_sg * sizeof(struct vring_packed_desc),
gfp);
if (!data)
return NULL;
} else {
struct vring_desc *desc;
unsigned int i;

desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return NULL;

for (i = 0; i < total_sg; i++)
desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);

data = desc;
}

return data;
}

I would prefer to have two simpler helpers (and to the callers,
it's already very clear about which one they should call), i.e.
the current implementation:

static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
   unsigned int total_sg,
   gfp_t gfp)
{
struct vring_packed_desc *desc;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);

return desc;
}

static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
   unsigned int total_sg,
   gfp_t gfp)
{
struct vring_desc *desc;
unsigned int i;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return NULL;

for (i = 0; i < total_sg; i++)
desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
return desc;
}


Yeah, I miss that split version needs a desc list.




+
+   return desc;
+}

[...]

+static inline int virtqueue_add_packed(struct virtqueue *_vq,
+  struct scatterlist *sgs[],
+  unsigned int total_sg,
+  unsigned int out_sgs,
+  unsigned int in_sgs,
+

Re: [RFC v2] virtio: support packed ring

2018-04-14 Thread Tiwei Bie
On Fri, Apr 13, 2018 at 06:22:45PM +0300, Michael S. Tsirkin wrote:
> On Sun, Apr 01, 2018 at 10:12:16PM +0800, Tiwei Bie wrote:
> > +static inline bool more_used(const struct vring_virtqueue *vq)
> > +{
> > +   return vq->packed ? more_used_packed(vq) : more_used_split(vq);
> > +}
> > +
> > +void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len,
> > + void **ctx)
> > +{
> > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > +   void *ret;
> > +   unsigned int i;
> > +   u16 last_used;
> > +
> > +   START_USE(vq);
> > +
> > +   if (unlikely(vq->broken)) {
> > +   END_USE(vq);
> > +   return NULL;
> > +   }
> > +
> > +   if (!more_used(vq)) {
> > +   pr_debug("No more buffers in queue\n");
> > +   END_USE(vq);
> > +   return NULL;
> > +   }
> 
> So virtqueue_get_buf_ctx_split should only call more_used_split.

Yeah, you're right! Will fix this in the next version.

> 
> to avoid such issues I think we should lay out the code like this:
> 
> XXX_split
> 
> XXX_packed
> 
> XXX wrappers

I'll do it. Thanks for the suggestion!

> 
> > +/* The standard layout
> 
> I'd drop standard here.

Got it. I'll drop the word "standard".

> 
> > for the packed ring is a continuous chunk of memory
> > + * which looks like this.
> > + *
> > + * struct vring_packed
> > + * {
> 
> Can the opening bracket go on the prev line pls?

Sure.

> 
> > + * // The actual descriptors (16 bytes each)
> > + * struct vring_packed_desc desc[num];
> > + *
> > + * // Padding to the next align boundary.
> > + * char pad[];
> > + *
> > + * // Driver Event Suppression
> > + * struct vring_packed_desc_event driver;
> > + *
> > + * // Device Event Suppression
> > + * struct vring_packed_desc_event device;
> 
> Maybe that's how our driver does it but it's not based on spec
> so I don't think this belongs in the header.

I will move it to the place where vring_packed_init()
is defined.

> 
> > + * };
> > + */
> > +
> > +static inline unsigned vring_packed_size(unsigned int num, unsigned long 
> > align)
> > +{
> > +   return ((sizeof(struct vring_packed_desc) * num + align - 1)
> > +   & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> > +}
> > +
> 
> Cant say this API makes sense for me.

Hmm, do you have any suggestion? Also move it out of this header?

Thanks for the review! :)

Best regards,
Tiwei Bie

> 
> 
> >  #endif /* _UAPI_LINUX_VIRTIO_RING_H */
> > -- 
> > 2.11.0


Re: [RFC v2] virtio: support packed ring

2018-04-13 Thread Michael S. Tsirkin
On Sun, Apr 01, 2018 at 10:12:16PM +0800, Tiwei Bie wrote:
> +static inline bool more_used(const struct vring_virtqueue *vq)
> +{
> + return vq->packed ? more_used_packed(vq) : more_used_split(vq);
> +}
> +
> +void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len,
> +   void **ctx)
> +{
> + struct vring_virtqueue *vq = to_vvq(_vq);
> + void *ret;
> + unsigned int i;
> + u16 last_used;
> +
> + START_USE(vq);
> +
> + if (unlikely(vq->broken)) {
> + END_USE(vq);
> + return NULL;
> + }
> +
> + if (!more_used(vq)) {
> + pr_debug("No more buffers in queue\n");
> + END_USE(vq);
> + return NULL;
> + }

So virtqueue_get_buf_ctx_split should only call more_used_split.

to avoid such issues I think we should lay out the code like this:

XXX_split

XXX_packed

XXX wrappers

> +/* The standard layout

I'd drop standard here.

> for the packed ring is a continuous chunk of memory
> + * which looks like this.
> + *
> + * struct vring_packed
> + * {

Can the opening bracket go on the prev line pls?

> + *   // The actual descriptors (16 bytes each)
> + *   struct vring_packed_desc desc[num];
> + *
> + *   // Padding to the next align boundary.
> + *   char pad[];
> + *
> + *   // Driver Event Suppression
> + *   struct vring_packed_desc_event driver;
> + *
> + *   // Device Event Suppression
> + *   struct vring_packed_desc_event device;

Maybe that's how our driver does it but it's not based on spec
so I don't think this belongs in the header.

> + * };
> + */
> +
> +static inline unsigned vring_packed_size(unsigned int num, unsigned long 
> align)
> +{
> + return ((sizeof(struct vring_packed_desc) * num + align - 1)
> + & ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
> +}
> +

Cant say this API makes sense for me.


>  #endif /* _UAPI_LINUX_VIRTIO_RING_H */
> -- 
> 2.11.0


Re: [RFC v2] virtio: support packed ring

2018-04-13 Thread Tiwei Bie
On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> On 2018年04月01日 22:12, Tiwei Bie wrote:
> > Hello everyone,
> > 
> > This RFC implements packed ring support for virtio driver.
> > 
> > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > 
> > TODO:
> > - Refinements and bug fixes;
> > - Split into small patches;
> > - Test indirect descriptor support;
> > - Test/fix event suppression support;
> > - Test devices other than net;
> > 
> > RFC v1 -> RFC v2:
> > - Add indirect descriptor support - compile test only;
> > - Add event suppression supprt - compile test only;
> > - Move vring_packed_init() out of uapi (Jason, MST);
> > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > - Avoid using '%' operator (Jason);
> > - Rename free_head -> next_avail_idx (Jason);
> > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > - Some other refinements and bug fixes;
> > 
> > Thanks!
> > 
> > Signed-off-by: Tiwei Bie 
> > ---
> >   drivers/virtio/virtio_ring.c   | 1094 
> > +---
> >   include/linux/virtio_ring.h|8 +-
> >   include/uapi/linux/virtio_config.h |   12 +-
> >   include/uapi/linux/virtio_ring.h   |   61 ++
> >   4 files changed, 980 insertions(+), 195 deletions(-)
[...]
> > +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue 
> > *_vq,
> > +  unsigned int total_sg,
> > +  gfp_t gfp)
> > +{
> > +   struct vring_packed_desc *desc;
> > +
> > +   /*
> > +* We require lowmem mappings for the descriptors because
> > +* otherwise virt_to_phys will give us bogus addresses in the
> > +* virtqueue.
> > +*/
> > +   gfp &= ~__GFP_HIGHMEM;
> > +
> > +   desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> 
> Can we simply check vq->packed here to avoid duplicating helpers?

Then it would be something like this:

static void *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg,
gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *data;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

if (vq->packed) {
data = kmalloc(total_sg * sizeof(struct vring_packed_desc),
gfp);
if (!data)
return NULL;
} else {
struct vring_desc *desc;
unsigned int i;

desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return NULL;

for (i = 0; i < total_sg; i++)
desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);

data = desc;
}

return data;
}

I would prefer to have two simpler helpers (and to the callers,
it's already very clear about which one they should call), i.e.
the current implementation:

static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
   unsigned int total_sg,
   gfp_t gfp)
{
struct vring_packed_desc *desc;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);

return desc;
}

static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
   unsigned int total_sg,
   gfp_t gfp)
{
struct vring_desc *desc;
unsigned int i;

/*
 * We require lowmem mappings for the descriptors because
 * otherwise virt_to_phys will give us bogus addresses in the
 * virtqueue.
 */
gfp &= ~__GFP_HIGHMEM;

desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return NULL;

for (i = 0; i < total_sg; i++)
desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
return desc;
}

> 
> > +
> > +   return desc;
> > +}
[...]
> > +static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > +  struct scatterlist *sgs[],
> > +  unsigned int total_sg,
> > +  unsigned int out_sgs,
> > 

Re: [RFC v2] virtio: support packed ring

2018-04-12 Thread Jason Wang



On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!

Signed-off-by: Tiwei Bie 
---
  drivers/virtio/virtio_ring.c   | 1094 +---
  include/linux/virtio_ring.h|8 +-
  include/uapi/linux/virtio_config.h |   12 +-
  include/uapi/linux/virtio_ring.h   |   61 ++
  4 files changed, 980 insertions(+), 195 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..0515dca34d77 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,14 +58,15 @@
  
  struct vring_desc_state {

void *data; /* Data for callback. */
-   struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
+   void *indir_desc;   /* Indirect descriptor, if any. */
+   int num;/* Descriptor list length. */
  };
  
  struct vring_virtqueue {

struct virtqueue vq;
  
-	/* Actual memory layout for this queue */

-   struct vring vring;
+   /* Is this a packed ring? */
+   bool packed;
  
  	/* Can we use weak barriers? */

bool weak_barriers;
@@ -79,19 +80,45 @@ struct vring_virtqueue {
/* Host publishes avail event idx */
bool event;
  
-	/* Head of free buffer list. */

-   unsigned int free_head;
/* Number we've added since last sync. */
unsigned int num_added;
  
  	/* Last used index we've seen. */

u16 last_used_idx;
  
-	/* Last written value to avail->flags */

-   u16 avail_flags_shadow;
+   union {
+   /* Available for split ring */
+   struct {
+   /* Actual memory layout for this queue. */
+   struct vring vring;
  
-	/* Last written value to avail->idx in guest byte order */

-   u16 avail_idx_shadow;
+   /* Head of free buffer list. */
+   unsigned int free_head;
+
+   /* Last written value to avail->flags */
+   u16 avail_flags_shadow;
+
+   /* Last written value to avail->idx in
+* guest byte order. */
+   u16 avail_idx_shadow;
+   };
+
+   /* Available for packed ring */
+   struct {
+   /* Actual memory layout for this queue. */
+   struct vring_packed vring_packed;
+
+   /* Driver ring wrap counter. */
+   u8 wrap_counter;
+
+   /* Index of the next avail descriptor. */
+   unsigned int next_avail_idx;
+
+   /* Last written value to driver->flags in
+* guest byte order. */
+   u16 event_flags_shadow;
+   };
+   };
  
  	/* How to notify other side. FIXME: commonalize hcalls! */

bool (*notify)(struct virtqueue *vq);
@@ -201,8 +228,33 @@ static dma_addr_t vring_map_single(const struct 
vring_virtqueue *vq,
  cpu_addr, size, direction);
  }
  
-static void vring_unmap_one(const struct vring_virtqueue *vq,

-   struct vring_desc *desc)
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+ struct vring_desc *desc)
+{
+   u16 flags;
+
+   if (!vring_use_dma_api(vq->vq.vdev))
+   return;
+
+   flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+   if (flags & VRING_DESC_F_INDIRECT) {
+   dma_unmap_single(vring_dma_dev(vq),
+virtio64_to_cpu(vq->vq.vdev, desc->addr),
+virtio32_to_cpu(vq->vq.vdev, desc->len),
+(flags & VRING_DESC_F_WRITE) ?
+DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   } else {
+   dma_unmap_page(vring_dma_dev(vq),
+  

Re: [RFC v2] virtio: support packed ring

2018-04-09 Thread Tiwei Bie
On Tue, Apr 10, 2018 at 10:55:25AM +0800, Jason Wang wrote:
> On 2018年04月01日 22:12, Tiwei Bie wrote:
> > Hello everyone,
> > 
> > This RFC implements packed ring support for virtio driver.
> > 
> > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > 
> > TODO:
> > - Refinements and bug fixes;
> > - Split into small patches;
> > - Test indirect descriptor support;
> > - Test/fix event suppression support;
> > - Test devices other than net;
> > 
> > RFC v1 -> RFC v2:
> > - Add indirect descriptor support - compile test only;
> > - Add event suppression supprt - compile test only;
> > - Move vring_packed_init() out of uapi (Jason, MST);
> > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > - Avoid using '%' operator (Jason);
> > - Rename free_head -> next_avail_idx (Jason);
> > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > - Some other refinements and bug fixes;
> > 
> > Thanks!
> 
> Will try to review this later.
> 
> But it would be better if you can split it (more than 1000 lines is too big
> to be reviewed easily). E.g you can at least split it into three patches,
> new structures, datapath, and event suppression.
> 

No problem! It's on my TODO list. I'll get it done in the next version.

Thanks!


Re: [RFC v2] virtio: support packed ring

2018-04-09 Thread Jason Wang



On 2018年04月01日 22:12, Tiwei Bie wrote:

Hello everyone,

This RFC implements packed ring support for virtio driver.

The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
Minor changes are needed for the vhost code, e.g. to kick the guest.

TODO:
- Refinements and bug fixes;
- Split into small patches;
- Test indirect descriptor support;
- Test/fix event suppression support;
- Test devices other than net;

RFC v1 -> RFC v2:
- Add indirect descriptor support - compile test only;
- Add event suppression supprt - compile test only;
- Move vring_packed_init() out of uapi (Jason, MST);
- Merge two loops into one in virtqueue_add_packed() (Jason);
- Split vring_unmap_one() for packed ring and split ring (Jason);
- Avoid using '%' operator (Jason);
- Rename free_head -> next_avail_idx (Jason);
- Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
- Some other refinements and bug fixes;

Thanks!


Will try to review this later.

But it would be better if you can split it (more than 1000 lines is too 
big to be reviewed easily). E.g you can at least split it into three 
patches, new structures, datapath, and event suppression.


Thanks




Signed-off-by: Tiwei Bie 
---
  drivers/virtio/virtio_ring.c   | 1094 +---
  include/linux/virtio_ring.h|8 +-
  include/uapi/linux/virtio_config.h |   12 +-
  include/uapi/linux/virtio_ring.h   |   61 ++
  4 files changed, 980 insertions(+), 195 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 71458f493cf8..0515dca34d77 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -58,14 +58,15 @@
  
  struct vring_desc_state {

void *data; /* Data for callback. */
-   struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
+   void *indir_desc;   /* Indirect descriptor, if any. */
+   int num;/* Descriptor list length. */
  };
  
  struct vring_virtqueue {

struct virtqueue vq;
  
-	/* Actual memory layout for this queue */

-   struct vring vring;
+   /* Is this a packed ring? */
+   bool packed;
  
  	/* Can we use weak barriers? */

bool weak_barriers;
@@ -79,19 +80,45 @@ struct vring_virtqueue {
/* Host publishes avail event idx */
bool event;
  
-	/* Head of free buffer list. */

-   unsigned int free_head;
/* Number we've added since last sync. */
unsigned int num_added;
  
  	/* Last used index we've seen. */

u16 last_used_idx;
  
-	/* Last written value to avail->flags */

-   u16 avail_flags_shadow;
+   union {
+   /* Available for split ring */
+   struct {
+   /* Actual memory layout for this queue. */
+   struct vring vring;
  
-	/* Last written value to avail->idx in guest byte order */

-   u16 avail_idx_shadow;
+   /* Head of free buffer list. */
+   unsigned int free_head;
+
+   /* Last written value to avail->flags */
+   u16 avail_flags_shadow;
+
+   /* Last written value to avail->idx in
+* guest byte order. */
+   u16 avail_idx_shadow;
+   };
+
+   /* Available for packed ring */
+   struct {
+   /* Actual memory layout for this queue. */
+   struct vring_packed vring_packed;
+
+   /* Driver ring wrap counter. */
+   u8 wrap_counter;
+
+   /* Index of the next avail descriptor. */
+   unsigned int next_avail_idx;
+
+   /* Last written value to driver->flags in
+* guest byte order. */
+   u16 event_flags_shadow;
+   };
+   };
  
  	/* How to notify other side. FIXME: commonalize hcalls! */

bool (*notify)(struct virtqueue *vq);
@@ -201,8 +228,33 @@ static dma_addr_t vring_map_single(const struct 
vring_virtqueue *vq,
  cpu_addr, size, direction);
  }
  
-static void vring_unmap_one(const struct vring_virtqueue *vq,

-   struct vring_desc *desc)
+static void vring_unmap_one_split(const struct vring_virtqueue *vq,
+ struct vring_desc *desc)
+{
+   u16 flags;
+
+   if (!vring_use_dma_api(vq->vq.vdev))
+   return;
+
+   flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+   if (flags & VRING_DESC_F_INDIRECT) {
+   dma_unmap_single(vring_dma_dev(vq),
+virtio64_to_cpu(vq->vq.vdev, desc->addr),
+virtio32_to_cpu(vq->vq.vdev, desc->len),
+