Multi-queue devices achieve the best performance when each vCPU has a
dedicated queue. This ensures that virtqueue used notifications are
handled on the same vCPU that submitted virtqueue buffers.  When another
vCPU handles the the notification an IPI will be necessary to wake the
submission vCPU and this incurs a performance overhead.

Provide a helper function that virtio-pci devices will use in later
patches to automatically select the optimal number of queues.

The function handles guests with large numbers of CPUs by limiting the
number of queues to fit within the following constraints:
1. The maximum number of MSI-X vectors.
2. The maximum number of virtqueues.

Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com>
---
 hw/virtio/virtio-pci.h |  9 +++++++++
 hw/virtio/virtio-pci.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index e2eaaa9182..91096f0291 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -243,4 +243,13 @@ typedef struct VirtioPCIDeviceTypeInfo {
 /* Register virtio-pci type(s).  @t must be static. */
 void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
 
+/**
+ * virtio_pci_optimal_num_queues:
+ * @fixed_queues: number of queues that are always present
+ *
+ * Returns: The optimal number of queues for a multi-queue device, excluding
+ * @fixed_queues.
+ */
+unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
+
 #endif
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 7bc8c1c056..c48570c03f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -19,6 +19,7 @@
 
 #include "exec/memop.h"
 #include "standard-headers/linux/virtio_pci.h"
+#include "hw/boards.h"
 #include "hw/virtio/virtio.h"
 #include "migration/qemu-file-types.h"
 #include "hw/pci/pci.h"
@@ -2028,6 +2029,37 @@ void virtio_pci_types_register(const 
VirtioPCIDeviceTypeInfo *t)
     g_free(base_name);
 }
 
+unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues)
+{
+    /*
+     * 1:1 vq to vCPU mapping is ideal because the same vCPU that submitted
+     * virtqueue buffers can handle their completion. When a different vCPU
+     * handles completion it may need to IPI the vCPU that submitted the
+     * request and this adds overhead.
+     *
+     * Virtqueues consume guest RAM and MSI-X vectors. This is wasteful in
+     * guests with very many vCPUs and a device that is only used by a few
+     * vCPUs. Unfortunately optimizing that case requires manual pinning inside
+     * the guest, so those users might as well manually set the number of
+     * queues. There is no upper limit that can be applied automatically and
+     * doing so arbitrarily would result in a sudden performance drop once the
+     * threshold number of vCPUs is exceeded.
+     */
+    unsigned num_queues = current_machine->smp.cpus;
+
+    /*
+     * The maximum number of MSI-X vectors is PCI_MSIX_FLAGS_QSIZE + 1, but the
+     * config change interrupt and the fixed virtqueues must be taken into
+     * account too.
+     */
+    num_queues = MIN(num_queues, PCI_MSIX_FLAGS_QSIZE - fixed_queues);
+
+    /*
+     * There is a limit to how many virtqueues a device can have.
+     */
+    return MIN(num_queues, VIRTIO_QUEUE_MAX - fixed_queues);
+}
+
 /* virtio-pci-bus */
 
 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
-- 
2.26.2

Reply via email to