Re: [RFC v5 3/5] virtio_ring: add packed ring support

2018-05-29 Thread Jason Wang



On 2018年05月29日 13:11, Tiwei Bie wrote:

On Tue, May 29, 2018 at 11:18:57AM +0800, Jason Wang wrote:

On 2018年05月22日 16:16, Tiwei Bie wrote:

[...]

+static void detach_buf_packed(struct vring_virtqueue *vq,
+ unsigned int id, void **ctx)
+{
+   struct vring_desc_state_packed *state;
+   struct vring_packed_desc *desc;
+   unsigned int i;
+   int *next;
+
+   /* Clear data ptr. */
+   vq->desc_state_packed[id].data = NULL;
+
+   next = 
+   for (i = 0; i < vq->desc_state_packed[id].num; i++) {
+   state = >desc_state_packed[*next];
+   vring_unmap_state_packed(vq, state);
+   next = >desc_state_packed[*next].next;

Have a short discussion with Michael. It looks like the only thing we care
is DMA address and length here.

The length tracked by desc_state_packed[] is also necessary
when INDIRECT is used and the buffers are writable.


So a thought is to avoid using desc_state_packed() is vring_use_dma_api() is
false? Probably need another id allocator or just use desc_state_packed for
free id list.

I think it's a good suggestion. Thanks!

I won't track the addr/len/flags for non-indirect descs
when vring_use_dma_api() returns false.


+   }
+
+   vq->vq.num_free += vq->desc_state_packed[id].num;
+   *next = vq->free_head;

Using pointer seems not elegant and unnecessary here. You can just track
next in integer I think?

It's possible to use integer, but will need one more var
to track `prev` (desc_state_packed[prev].next == next in
this case).


Yes, please do this.




+   vq->free_head = id;
+
+   if (vq->indirect) {
+   u32 len;
+
+   /* Free the indirect table, if any, now that it's unmapped. */
+   desc = vq->desc_state_packed[id].indir_desc;
+   if (!desc)
+   return;
+
+   len = vq->desc_state_packed[id].len;
+   for (i = 0; i < len / sizeof(struct vring_packed_desc); i++)
+   vring_unmap_desc_packed(vq, [i]);
+
+   kfree(desc);
+   vq->desc_state_packed[id].indir_desc = NULL;
+   } else if (ctx) {
+   *ctx = vq->desc_state_packed[id].indir_desc;
+   }
   }
   static inline bool more_used_packed(const struct vring_virtqueue *vq)
   {
-   return false;
+   u16 last_used, flags;
+   u8 avail, used;
+
+   last_used = vq->last_used_idx;
+   flags = virtio16_to_cpu(vq->vq.vdev,
+   vq->vring_packed.desc[last_used].flags);
+   avail = !!(flags & VRING_DESC_F_AVAIL(1));
+   used = !!(flags & VRING_DESC_F_USED(1));
+
+   return avail == used && used == vq->used_wrap_counter;

Spec does not check avail == used? So there's probably a bug in either of
the two places.

In what condition that avail != used but used == vq_used_wrap_counter? A
buggy device or device set the two bits in two writes?

Currently, spec doesn't forbid devices to set the status
bits as: avail != used but used == vq_used_wrap_counter.
So I think driver cannot assume this won't happen.


Right, but I could not find a situation that device need to something 
like this. We should either forbid this in the spec or change the 
example code in the spec.


Let's see how Michael think about this.

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC v5 3/5] virtio_ring: add packed ring support

2018-05-28 Thread Tiwei Bie
On Tue, May 29, 2018 at 11:18:57AM +0800, Jason Wang wrote:
> On 2018年05月22日 16:16, Tiwei Bie wrote:
[...]
> > +static void detach_buf_packed(struct vring_virtqueue *vq,
> > + unsigned int id, void **ctx)
> > +{
> > +   struct vring_desc_state_packed *state;
> > +   struct vring_packed_desc *desc;
> > +   unsigned int i;
> > +   int *next;
> > +
> > +   /* Clear data ptr. */
> > +   vq->desc_state_packed[id].data = NULL;
> > +
> > +   next = 
> > +   for (i = 0; i < vq->desc_state_packed[id].num; i++) {
> > +   state = >desc_state_packed[*next];
> > +   vring_unmap_state_packed(vq, state);
> > +   next = >desc_state_packed[*next].next;
> 
> Have a short discussion with Michael. It looks like the only thing we care
> is DMA address and length here.

The length tracked by desc_state_packed[] is also necessary
when INDIRECT is used and the buffers are writable.

> 
> So a thought is to avoid using desc_state_packed() is vring_use_dma_api() is
> false? Probably need another id allocator or just use desc_state_packed for
> free id list.

I think it's a good suggestion. Thanks!

I won't track the addr/len/flags for non-indirect descs
when vring_use_dma_api() returns false.

> 
> > +   }
> > +
> > +   vq->vq.num_free += vq->desc_state_packed[id].num;
> > +   *next = vq->free_head;
> 
> Using pointer seems not elegant and unnecessary here. You can just track
> next in integer I think?

It's possible to use integer, but will need one more var
to track `prev` (desc_state_packed[prev].next == next in
this case).

> 
> > +   vq->free_head = id;
> > +
> > +   if (vq->indirect) {
> > +   u32 len;
> > +
> > +   /* Free the indirect table, if any, now that it's unmapped. */
> > +   desc = vq->desc_state_packed[id].indir_desc;
> > +   if (!desc)
> > +   return;
> > +
> > +   len = vq->desc_state_packed[id].len;
> > +   for (i = 0; i < len / sizeof(struct vring_packed_desc); i++)
> > +   vring_unmap_desc_packed(vq, [i]);
> > +
> > +   kfree(desc);
> > +   vq->desc_state_packed[id].indir_desc = NULL;
> > +   } else if (ctx) {
> > +   *ctx = vq->desc_state_packed[id].indir_desc;
> > +   }
> >   }
> >   static inline bool more_used_packed(const struct vring_virtqueue *vq)
> >   {
> > -   return false;
> > +   u16 last_used, flags;
> > +   u8 avail, used;
> > +
> > +   last_used = vq->last_used_idx;
> > +   flags = virtio16_to_cpu(vq->vq.vdev,
> > +   vq->vring_packed.desc[last_used].flags);
> > +   avail = !!(flags & VRING_DESC_F_AVAIL(1));
> > +   used = !!(flags & VRING_DESC_F_USED(1));
> > +
> > +   return avail == used && used == vq->used_wrap_counter;
> 
> Spec does not check avail == used? So there's probably a bug in either of
> the two places.
> 
> In what condition that avail != used but used == vq_used_wrap_counter? A
> buggy device or device set the two bits in two writes?

Currently, spec doesn't forbid devices to set the status
bits as: avail != used but used == vq_used_wrap_counter.
So I think driver cannot assume this won't happen.

> 
> >   }
[...]
> >   static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> >   {
> > -   return 0;
> > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +   START_USE(vq);
> > +
> > +   /* We optimistically turn back on interrupts, then check if there was
> > +* more to do. */
> > +
> > +   if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > +   virtio_wmb(vq->weak_barriers);
> 
> Missing comments for the barrier.

Will add some comments.

> 
> > +   vq->event_flags_shadow = VRING_EVENT_F_ENABLE;
> > +   vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > +   vq->event_flags_shadow);
> > +   }
> > +
> > +   END_USE(vq);
> > +   return vq->last_used_idx;
> >   }
> >   static bool virtqueue_poll_packed(struct virtqueue *_vq, unsigned 
> > last_used_idx)
> >   {
> > -   return false;
> > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > +   u8 avail, used;
> > +   u16 flags;
> > +
> > +   virtio_mb(vq->weak_barriers);
> > +   flags = virtio16_to_cpu(vq->vq.vdev,
> > +   vq->vring_packed.desc[last_used_idx].flags);
> > +   avail = !!(flags & VRING_DESC_F_AVAIL(1));
> > +   used = !!(flags & VRING_DESC_F_USED(1));
> > +
> > +   return avail == used && used == vq->used_wrap_counter;
> >   }
> >   static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
> >   {
> > -   return false;
> > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > +
> > +   START_USE(vq);
> > +
> > +   /* We optimistically turn back on interrupts, then check if there was
> > +* more to do. */
> > +
> > +   if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > +   virtio_wmb(vq->weak_barriers);
> 
> Need comments for the barrier.

Will add some comments.

Best regards,
Tiwei Bie

Re: [RFC v5 3/5] virtio_ring: add packed ring support

2018-05-28 Thread Jason Wang



On 2018年05月22日 16:16, Tiwei Bie wrote:

This commit introduces the support (without EVENT_IDX) for
packed ring.

Signed-off-by: Tiwei Bie 
---
  drivers/virtio/virtio_ring.c | 474 ++-
  1 file changed, 468 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index f5ef5f42a7cf..eb9fd5207a68 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -62,6 +62,12 @@ struct vring_desc_state {
  };
  
  struct vring_desc_state_packed {

+   void *data; /* Data for callback. */
+   struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
+   int num;/* Descriptor list length. */
+   dma_addr_t addr;/* Buffer DMA addr. */
+   u32 len;/* Buffer length. */
+   u16 flags;  /* Descriptor flags. */
int next;   /* The next desc state. */
  };
  
@@ -758,6 +764,72 @@ static inline unsigned vring_size_packed(unsigned int num, unsigned long align)

& ~(align - 1)) + sizeof(struct vring_packed_desc_event) * 2;
  }
  
+static void vring_unmap_state_packed(const struct vring_virtqueue *vq,

+struct vring_desc_state_packed *state)
+{
+   u16 flags;
+
+   if (!vring_use_dma_api(vq->vq.vdev))
+   return;
+
+   flags = state->flags;
+
+   if (flags & VRING_DESC_F_INDIRECT) {
+   dma_unmap_single(vring_dma_dev(vq),
+state->addr, state->len,
+(flags & VRING_DESC_F_WRITE) ?
+DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   } else {
+   dma_unmap_page(vring_dma_dev(vq),
+  state->addr, state->len,
+  (flags & VRING_DESC_F_WRITE) ?
+  DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   }
+}
+
+static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
+  struct vring_packed_desc *desc)
+{
+   u16 flags;
+
+   if (!vring_use_dma_api(vq->vq.vdev))
+   return;
+
+   flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+   if (flags & VRING_DESC_F_INDIRECT) {
+   dma_unmap_single(vring_dma_dev(vq),
+virtio64_to_cpu(vq->vq.vdev, desc->addr),
+virtio32_to_cpu(vq->vq.vdev, desc->len),
+(flags & VRING_DESC_F_WRITE) ?
+DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   } else {
+   dma_unmap_page(vring_dma_dev(vq),
+  virtio64_to_cpu(vq->vq.vdev, desc->addr),
+  virtio32_to_cpu(vq->vq.vdev, desc->len),
+  (flags & VRING_DESC_F_WRITE) ?
+  DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   }
+}
+
+static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
+  unsigned int total_sg,
+  gfp_t gfp)
+{
+   struct vring_packed_desc *desc;
+
+   /*
+* We require lowmem mappings for the descriptors because
+* otherwise virt_to_phys will give us bogus addresses in the
+* virtqueue.
+*/
+   gfp &= ~__GFP_HIGHMEM;
+
+   desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
+
+   return desc;
+}
+
  static inline int virtqueue_add_packed(struct virtqueue *_vq,
   struct scatterlist *sgs[],
   unsigned int total_sg,
@@ -767,47 +839,437 @@ static inline int virtqueue_add_packed(struct virtqueue 
*_vq,
   void *ctx,
   gfp_t gfp)
  {
+   struct vring_virtqueue *vq = to_vvq(_vq);
+   struct vring_packed_desc *desc;
+   struct scatterlist *sg;
+   unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
+   __virtio16 uninitialized_var(head_flags), flags;
+   u16 head, avail_wrap_counter, id, cur;
+   bool indirect;
+
+   START_USE(vq);
+
+   BUG_ON(data == NULL);
+   BUG_ON(ctx && vq->indirect);
+
+   if (unlikely(vq->broken)) {
+   END_USE(vq);
+   return -EIO;
+   }
+
+#ifdef DEBUG
+   {
+   ktime_t now = ktime_get();
+
+   /* No kick or get, with .1 second between?  Warn. */
+   if (vq->last_add_time_valid)
+   WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+   > 100);
+   vq->last_add_time = now;
+   vq->last_add_time_valid = true;
+   }
+#endif
+
+   BUG_ON(total_sg