Re: use a zero-copy approach for vmd virtio devices

2022-12-23 Thread Matthias Schmidt
Hi Dave,

* Dave Voutila wrote:
> While working back and forth with dlg@ on his idea of adding in async
> task queues to vmd devices, he started to introduce a cleaner way to
> handle virtqueues. In short, we can just track the host virtual address
> and read/write to that location.
> 
> This has the benefit of cleaning up a good chunk of code, mostly in the
> vioscsi device. If my count is correct, it's close to 100 lines
> removed.
> 
> Testing on a variety of systems, I see anywhere from a small (5%) to a
> large (50%) gain in things like vionet performance depending on the host
> hardware and the guest OS. (Linux guests push packets faster, on
> average.)
> 
> If/when this lands, it'll be easier to introduce the async taskq diff
> and also clean up more of the virtio codebase. Lots of duplicate logic
> in basic virtio-pci config handling, like updating queue address or
> size.
> 
> Testing is appreciated, but also looking for OKs.

I have your patch running on -current with OpenBSD -current, -stable and
Linux (voidlinux) guests.  Stable so far, no regressions and Linux "seems"
faster.

Cheers

Matthias



Re: use a zero-copy approach for vmd virtio devices

2022-12-23 Thread Dave Voutila


Dave Voutila  writes:

> While working back and forth with dlg@ on his idea of adding in async
> task queues to vmd devices, he started to introduce a cleaner way to
> handle virtqueues. In short, we can just track the host virtual address
> and read/write to that location.
>
> This has the benefit of cleaning up a good chunk of code, mostly in the
> vioscsi device. If my count is correct, it's close to 100 lines
> removed.
>
> Testing on a variety of systems, I see anywhere from a small (5%) to a
> large (50%) gain in things like vionet performance depending on the host
> hardware and the guest OS. (Linux guests push packets faster, on
> average.)
>
> If/when this lands, it'll be easier to introduce the async taskq diff
> and also clean up more of the virtio codebase. Lots of duplicate logic
> in basic virtio-pci config handling, like updating queue address or
> size.
>
> Testing is appreciated, but also looking for OKs.
>

Have had a few positive test reports off-list.

OK?

>
> diff refs/heads/master refs/heads/vmd-vq
> commit - 9f38b5b2272f7ca113158ec7943c959ae9b22683
> commit + 509bbee70e5a11cfb69b93202a24fb032c72ac43
> blob - aea2fc1d5e8903c97cb7076bfb9e5f265f416378
> blob + 3a9af7790b24914fb6e7716a049fd0428803af9b
> --- usr.sbin/vmd/vioscsi.c
> +++ usr.sbin/vmd/vioscsi.c
> @@ -81,6 +81,7 @@ vioscsi_next_ring_item(struct vioscsi_dev *dev, struct
>  {
>   used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx;
>   used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len;
> + __sync_synchronize();
>   used->idx++;
>
>   dev->vq[dev->cfg.queue_notify].last_avail =
> @@ -157,7 +158,7 @@ vioscsi_reg_name(uint8_t reg)
>   switch (reg) {
>   case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
>   case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
> - case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
> + case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn";
>   case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
>   case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
>   case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
> @@ -1672,8 +1673,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
>   DPRINTF("%s: guest feature set to %u",
>   __func__, dev->cfg.guest_feature);
>   break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - dev->cfg.queue_address = *data;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + dev->cfg.queue_pfn = *data;
>   vioscsi_update_qa(dev);
>   break;
>   case VIRTIO_CONFIG_QUEUE_SELECT:
> @@ -1692,7 +1693,7 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
>   if (dev->cfg.device_status == 0) {
>   log_debug("%s: device reset", __func__);
>   dev->cfg.guest_feature = 0;
> - dev->cfg.queue_address = 0;
> + dev->cfg.queue_pfn = 0;
>   vioscsi_update_qa(dev);
>   dev->cfg.queue_size = 0;
>   vioscsi_update_qs(dev);
> @@ -1988,8 +1989,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
>   case VIRTIO_CONFIG_GUEST_FEATURES:
>   *data = dev->cfg.guest_feature;
>   break;
> - case VIRTIO_CONFIG_QUEUE_ADDRESS:
> - *data = dev->cfg.queue_address;
> + case VIRTIO_CONFIG_QUEUE_PFN:
> + *data = dev->cfg.queue_pfn;
>   break;
>   case VIRTIO_CONFIG_QUEUE_SIZE:
>   if (sz == 4)
> @@ -2033,25 +2034,38 @@ vioscsi_update_qs(struct vioscsi_dev *dev)
>  void
>  vioscsi_update_qs(struct vioscsi_dev *dev)
>  {
> + struct virtio_vq_info *vq_info;
> +
>   /* Invalid queue? */
>   if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) {
>   dev->cfg.queue_size = 0;
>   return;
>   }
>
> - /* Update queue address/size based on queue select */
> - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
> - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> +
> + /* Update queue pfn/size based on queue select */
> + dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
> + dev->cfg.queue_size = vq_info->qs;
>  }
>
>  void
>  vioscsi_update_qa(struct vioscsi_dev *dev)
>  {
> + struct virtio_vq_info *vq_info;
> + void *hva = NULL;
> +
>   /* Invalid queue? */
>   if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES)
>   return;
>
> - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
> + vq_info = &dev->vq[dev->cfg.queue_select];
> + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
> +
> + hva = hvadd

use a zero-copy approach for vmd virtio devices

2022-12-20 Thread Dave Voutila
While working back and forth with dlg@ on his idea of adding in async
task queues to vmd devices, he started to introduce a cleaner way to
handle virtqueues. In short, we can just track the host virtual address
and read/write to that location.

This has the benefit of cleaning up a good chunk of code, mostly in the
vioscsi device. If my count is correct, it's close to 100 lines
removed.

Testing on a variety of systems, I see anywhere from a small (5%) to a
large (50%) gain in things like vionet performance depending on the host
hardware and the guest OS. (Linux guests push packets faster, on
average.)

If/when this lands, it'll be easier to introduce the async taskq diff
and also clean up more of the virtio codebase. Lots of duplicate logic
in basic virtio-pci config handling, like updating queue address or
size.

Testing is appreciated, but also looking for OKs.


diff refs/heads/master refs/heads/vmd-vq
commit - 9f38b5b2272f7ca113158ec7943c959ae9b22683
commit + 509bbee70e5a11cfb69b93202a24fb032c72ac43
blob - aea2fc1d5e8903c97cb7076bfb9e5f265f416378
blob + 3a9af7790b24914fb6e7716a049fd0428803af9b
--- usr.sbin/vmd/vioscsi.c
+++ usr.sbin/vmd/vioscsi.c
@@ -81,6 +81,7 @@ vioscsi_next_ring_item(struct vioscsi_dev *dev, struct
 {
used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx;
used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len;
+   __sync_synchronize();
used->idx++;

dev->vq[dev->cfg.queue_notify].last_avail =
@@ -157,7 +158,7 @@ vioscsi_reg_name(uint8_t reg)
switch (reg) {
case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature";
case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature";
-   case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address";
+   case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn";
case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size";
case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select";
case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify";
@@ -1672,8 +1673,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
DPRINTF("%s: guest feature set to %u",
__func__, dev->cfg.guest_feature);
break;
-   case VIRTIO_CONFIG_QUEUE_ADDRESS:
-   dev->cfg.queue_address = *data;
+   case VIRTIO_CONFIG_QUEUE_PFN:
+   dev->cfg.queue_pfn = *data;
vioscsi_update_qa(dev);
break;
case VIRTIO_CONFIG_QUEUE_SELECT:
@@ -1692,7 +1693,7 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
if (dev->cfg.device_status == 0) {
log_debug("%s: device reset", __func__);
dev->cfg.guest_feature = 0;
-   dev->cfg.queue_address = 0;
+   dev->cfg.queue_pfn = 0;
vioscsi_update_qa(dev);
dev->cfg.queue_size = 0;
vioscsi_update_qs(dev);
@@ -1988,8 +1989,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
case VIRTIO_CONFIG_GUEST_FEATURES:
*data = dev->cfg.guest_feature;
break;
-   case VIRTIO_CONFIG_QUEUE_ADDRESS:
-   *data = dev->cfg.queue_address;
+   case VIRTIO_CONFIG_QUEUE_PFN:
+   *data = dev->cfg.queue_pfn;
break;
case VIRTIO_CONFIG_QUEUE_SIZE:
if (sz == 4)
@@ -2033,25 +2034,38 @@ vioscsi_update_qs(struct vioscsi_dev *dev)
 void
 vioscsi_update_qs(struct vioscsi_dev *dev)
 {
+   struct virtio_vq_info *vq_info;
+
/* Invalid queue? */
if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) {
dev->cfg.queue_size = 0;
return;
}

-   /* Update queue address/size based on queue select */
-   dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa;
-   dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs;
+   vq_info = &dev->vq[dev->cfg.queue_select];
+
+   /* Update queue pfn/size based on queue select */
+   dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
+   dev->cfg.queue_size = vq_info->qs;
 }

 void
 vioscsi_update_qa(struct vioscsi_dev *dev)
 {
+   struct virtio_vq_info *vq_info;
+   void *hva = NULL;
+
/* Invalid queue? */
if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES)
return;

-   dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address;
+   vq_info = &dev->vq[dev->cfg.queue_select];
+   vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
+
+   hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOSCSI_QUEUE_SIZE));
+   if (hva == NULL)
+   fatal("vioscsi_update_qa");
+   vq_info->q_hva = hva;
 }

 /*
@@ -2067,