On Thu, 2008-10-30 at 14:24 -0500, Anthony Liguori wrote:
> Instead of using an event fd, perhaps you could just schedule a bottom
> half? I think that would be a whole lot cleaner.
Nice, I hadn't noticed the bottom halves. Much cleaner, indeed.
Results are a little better too:
| guest->host tput | host->guest
tput
netperf, 10x20s runs (Gb/s) | min/ mean/ max/stddev | min/ mean/
max/stddev
------------------------------+----------------------------+---------------------------
eventfd, 1k | 4.260/ 6.739/ 8.020/ 1.075 | 3.640/ 3.777/
4.020/ 0.110
eventfd, 16k | 8.530/ 8.867/ 9.110/ 0.146 | 7.220/ 7.276/
7.360/ 0.040
eventfd, 65k | 9.200/ 9.282/ 9.870/ 0.198 | 7.850/ 7.924/
8.000/ 0.045
bottom half, bh, 1k | 4.890/ 6.712/ 7.800/ 0.800 | 4.900/ 5.002/
5.130/ 0.067
bottom half, bh, 16k | 8.640/ 8.827/ 9.290/ 0.186 | 7.920/ 7.963/
8.020/ 0.034
bottom half, bh, 65k | 9.310/ 9.323/ 9.350/ 0.010 | 8.740/ 8.769/
8.800/ 0.016
| guest->host %idle |
host->guest %idle
netperf, 10x20s runs (Gb/s) | avg/cpu#0/cpu#1/cpu#2/cpu#3 |
avg/cpu#0/cpu#1/cpu#2/cpu#3
------------------------------+--------------------------------+-----------------------------
eventfd, 1k | 57.68/99.81/99.96/18.76/21.91 |
65.49/99.84/99.97/15.20/52.46
eventfd, 16k | 61.66/99.85/99.98/13.01/40.83 |
61.43/99.79/99.93/0.97/47.61
eventfd, 65k | 62.57/99.99/99.95/13.13/39.28 |
60.15/99.81/99.97/1.27/43.10
bottom half, 1k | 57.29/99.83/99.81/22.30/15.93 |
63.36/99.98/99.95/2.92/51.26
bottom half, 16k | 62.21/99.81/99.96/15.74/42.40 |
60.21/99.81/99.97/0.92/43.78
bottom half, 65k | 62.25/99.99/99.96/12.67/37.45 |
58.63/99.83/99.96/0.92/36.71
(With this approach, patches 2-4 aren't needed anymore, but they're
still worthwhile cleanups IMHO)
Cheers,
Mark.
From: Mark McLoughlin <[EMAIL PROTECTED]>
Subject: kvm: qemu: virtio-net: handle all tx in I/O thread without timer
By removing the tx timer altogether and doing all the copies in the
I/O thread, we can keep the I/O churning away in parallel with the
guest generating more I/O.
In my tests, this significantly increases guest->host throughput,
causes a minor increase in host->guest throughput, reduces CPU
utilization somewhat and greatly reduces roundtrip times.
Even aside from the benchmark results, removing the arbitrary 150us
timer is a nicer option than coming up with a heuristic to make it
vary according to load. Finally, on kernels which don't have a
suitably low posix timer latency, we won't be scuppered by effectively
having e.g. a 1ms timer.
Note, this highlights that the I/O thread may become a scalability
concern and we might want to consider e.g. an I/O thread per device.
Note also that when tuning for a specific workload, which CPU
the I/O thread is pinned to is important.
Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
qemu/hw/virtio-net.c | 60 +++++++++++++++++++++----------------------------
1 files changed, 26 insertions(+), 34 deletions(-)
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index bc2ede6..bc198ef 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -35,8 +35,6 @@
#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
-#define TX_TIMER_INTERVAL 150000 /* 150 us */
-
/* The config defining mac address (6 bytes) */
struct virtio_net_config
{
@@ -68,8 +66,7 @@ typedef struct VirtIONet
VirtQueue *rx_vq;
VirtQueue *tx_vq;
VLANClientState *vc;
- QEMUTimer *tx_timer;
- int tx_timer_active;
+ QEMUBH *tx_bh;
} VirtIONet;
/* TODO
@@ -227,13 +224,14 @@ static void virtio_net_receive(void *opaque, const
uint8_t *buf, int size)
}
/* TX */
-static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
+static int virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
VirtQueueElement elem;
int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
+ int num_packets = 0;
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
+ return num_packets;
while (virtqueue_pop(vq, &elem)) {
ssize_t len = 0;
@@ -256,38 +254,29 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue
*vq)
virtqueue_push(vq, &elem, len);
virtio_notify(&n->vdev, vq);
+
+ num_packets++;
}
+
+ return num_packets;
}
static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIONet *n = to_virtio_net(vdev);
- if (n->tx_timer_active) {
- vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- qemu_del_timer(n->tx_timer);
- n->tx_timer_active = 0;
- virtio_net_flush_tx(n, vq);
- } else {
- qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
- n->tx_timer_active = 1;
- vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
- }
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ qemu_bh_schedule(n->tx_bh);
}
-static void virtio_net_tx_timer(void *opaque)
+static void virtio_net_tx_bh(void *opaque)
{
VirtIONet *n = opaque;
- n->tx_timer_active = 0;
-
- /* Just in case the driver is not ready on more */
- if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
-
- n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- virtio_net_flush_tx(n, n->tx_vq);
+ if (!virtio_net_flush_tx(n, n->tx_vq))
+ n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ else
+ qemu_bh_schedule(n->tx_bh);
}
static void virtio_net_save(QEMUFile *f, void *opaque)
@@ -297,7 +286,6 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
virtio_save(&n->vdev, f);
qemu_put_buffer(f, n->mac, 6);
- qemu_put_be32(f, n->tx_timer_active);
}
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
@@ -310,12 +298,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int
version_id)
virtio_load(&n->vdev, f);
qemu_get_buffer(f, n->mac, 6);
- n->tx_timer_active = qemu_get_be32(f);
- if (n->tx_timer_active) {
- qemu_mod_timer(n->tx_timer,
- qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
- }
+ return 0;
+}
+
+static int virtio_net_uninit(PCIDevice *dev)
+{
+ VirtIONet *n = (VirtIONet *)dev;
+
+ qemu_bh_delete(n->tx_bh);
return 0;
}
@@ -341,8 +332,9 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int
devfn)
n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
virtio_net_can_receive, n);
- n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
- n->tx_timer_active = 0;
+ n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
+
+ n->vdev.pci_dev.unregister = virtio_net_uninit;
register_savevm("virtio-net", virtio_net_id++, 1,
virtio_net_save, virtio_net_load, n);
--
1.5.4.3
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html