From: Zhai, Edwin <[email protected]>

Enable optional parameter (default 0) - PCI segment (or domain) besides
BDF, when assigning PCI device to guest.

Signed-off-by: Zhai Edwin <[email protected]>
Acked-by: Chris Wright <[email protected]>
Signed-off-by: Marcelo Tosatti <[email protected]>

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 869b71e..315fc72 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -595,8 +595,8 @@ static int get_real_device_id(const char *devpath, uint16_t 
*val)
     return get_real_id(devpath, "device", val);
 }
 
-static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
-                           uint8_t r_dev, uint8_t r_func)
+static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
+                           uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
 {
     char dir[128], name[128];
     int fd, r = 0, v;
@@ -609,8 +609,8 @@ static int get_real_device(AssignedDevice *pci_dev, uint8_t 
r_bus,
 
     dev->region_number = 0;
 
-    snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
-            r_bus, r_dev, r_func);
+    snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
+            r_seg, r_bus, r_dev, r_func);
 
     snprintf(name, sizeof(name), "%sconfig", dir);
 
@@ -752,9 +752,9 @@ static void free_assigned_device(AssignedDevice *dev)
     }
 }
 
-static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
 {
-    return (uint32_t)bus << 8 | (uint32_t)devfn;
+    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
 }
 
 static void assign_failed_examine(AssignedDevice *dev)
@@ -763,9 +763,8 @@ static void assign_failed_examine(AssignedDevice *dev)
     uint16_t vendor_id, device_id;
     int r;
 
-    /* XXX implement multidomain */
-    sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/",
-             dev->host.bus, dev->host.dev, dev->host.func);
+    sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+            dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
 
     sprintf(name, "%sdriver", dir);
 
@@ -782,19 +781,19 @@ static void assign_failed_examine(AssignedDevice *dev)
     }
 
     fprintf(stderr, "*** The driver '%s' is occupying your device "
-                    "%02x:%02x.%x.\n",
-            ns, dev->host.bus, dev->host.dev, dev->host.func);
+                    "%04x:%02x:%02x.%x.\n",
+            ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
     fprintf(stderr, "***\n");
     fprintf(stderr, "*** You can try the following commands to free it:\n");
     fprintf(stderr, "***\n");
     fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
                     "new_id\n", vendor_id, device_id);
-    fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
                     "%s/unbind\n",
-            dev->host.bus, dev->host.dev, dev->host.func, ns);
-    fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+            dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns);
+    fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
                     "pci-stub/bind\n",
-            dev->host.bus, dev->host.dev, dev->host.func);
+            dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
     fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
                     "/remove_id\n", vendor_id, device_id);
     fprintf(stderr, "***\n");
@@ -810,9 +809,20 @@ static int assign_device(AssignedDevice *dev)
     struct kvm_assigned_pci_dev assigned_dev_data;
     int r;
 
+#ifdef KVM_CAP_PCI_SEGMENT
+    /* Only pass non-zero PCI segment to capable module */
+    if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
+        dev->h_segnr) {
+        fprintf(stderr, "Can't assign device inside non-zero PCI segment "
+                "as this KVM module doesn't support it.\n");
+        return -ENODEV;
+    }
+#endif
+
     memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
     assigned_dev_data.assigned_dev_id  =
-       calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+       calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
+    assigned_dev_data.segnr = dev->h_segnr;
     assigned_dev_data.busnr = dev->h_busnr;
     assigned_dev_data.devfn = dev->h_devfn;
 
@@ -867,7 +877,7 @@ static int assign_irq(AssignedDevice *dev)
 
     memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
     assigned_irq_data.assigned_dev_id =
-        calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+        calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
     assigned_irq_data.guest_irq = irq;
     assigned_irq_data.host_irq = dev->real_device.irq;
 #ifdef KVM_CAP_ASSIGN_DEV_IRQ
@@ -908,7 +918,7 @@ static void deassign_device(AssignedDevice *dev)
 
     memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
     assigned_dev_data.assigned_dev_id  =
-       calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+       calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
 
     r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
     if (r < 0)
@@ -964,7 +974,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, 
unsigned int ctrl_pos)
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
-        calc_assigned_dev_id(assigned_dev->h_busnr,
+        calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                 (uint8_t)assigned_dev->h_devfn);
 
     if (assigned_dev->irq_requested_type) {
@@ -1048,7 +1058,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice 
*pci_dev)
         fprintf(stderr, "MSI-X entry number is zero!\n");
         return -EINVAL;
     }
-    msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
+    msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, 
adev->h_busnr,
                                           (uint8_t)adev->h_devfn);
     msix_nr.entry_nr = entries_nr;
     r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
@@ -1121,7 +1131,7 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, 
unsigned int ctrl_pos)
 
     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
     assigned_irq_data.assigned_dev_id  =
-            calc_assigned_dev_id(assigned_dev->h_busnr,
+            calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
                     (uint8_t)assigned_dev->h_devfn);
 
     if (assigned_dev->irq_requested_type) {
@@ -1317,12 +1327,13 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
     uint8_t e_device, e_intx;
     int r;
 
-    if (!dev->host.bus && !dev->host.dev && !dev->host.func) {
+    if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) 
{
         qemu_error("pci-assign: error: no host device specified\n");
         goto out;
     }
 
-    if (get_real_device(dev, dev->host.bus, dev->host.dev, dev->host.func)) {
+    if (get_real_device(dev, dev->host.seg, dev->host.bus,
+                        dev->host.dev, dev->host.func)) {
         qemu_error("pci-assign: Error: Couldn't get real device (%s)!\n",
                    dev->dev.qdev.id);
         goto out;
@@ -1340,12 +1351,13 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
     dev->intpin = e_intx;
     dev->run = 0;
     dev->girq = 0;
+    dev->h_segnr = dev->host.seg;
     dev->h_busnr = dev->host.bus;
     dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
 
     pacc = pci_alloc();
     pci_init(pacc);
-    dev->pdev = pci_get_dev(pacc, 0, dev->host.bus, dev->host.dev, 
dev->host.func);
+    dev->pdev = pci_get_dev(pacc, dev->host.seg, dev->host.bus, dev->host.dev, 
dev->host.func);
 
     if (pci_enable_capability_support(pci_dev, 0, NULL,
                     assigned_device_pci_cap_write_config,
@@ -1392,7 +1404,7 @@ static int parse_hostaddr(DeviceState *dev, Property 
*prop, const char *str)
     PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
     int rc;
 
-    rc = pci_parse_host_devaddr(str, &ptr->bus, &ptr->dev, &ptr->func);
+    rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, 
&ptr->func);
     if (rc != 0)
         return -1;
     return 0;
@@ -1512,8 +1524,8 @@ static void assigned_dev_load_option_rom(AssignedDevice 
*dev)
     char rom_file[64];
 
     snprintf(rom_file, sizeof(rom_file),
-             "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom",
-             dev->host.bus, dev->host.dev, dev->host.func);
+             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
+             dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
 
     if (access(rom_file, F_OK))
         return;
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index a231260..1cbfc36 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -37,6 +37,7 @@
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
 
 typedef struct PCIHostDevice {
+    int seg;
     int bus;
     int dev;
     int func;
@@ -82,6 +83,7 @@ typedef struct AssignedDevice {
     PCIDevRegions real_device;
     int run;
     int girq;
+    unsigned int h_segnr;
     unsigned char h_busnr;
     unsigned int h_devfn;
     int irq_requested_type;
diff --git a/hw/pci.c b/hw/pci.c
index 1fa7f1c..a274d3b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -481,21 +481,48 @@ static int pci_parse_devaddr(const char *addr, int *domp, 
int *busp, unsigned *s
 }
 
 /*
- * Parse device bdf in device assignment command:
+ * Parse device seg and bdf in device assignment command:
  *
- * -pcidevice host=bus:dev.func
+ * -pcidevice host=[seg:]bus:dev.func
  *
- * Parse <bus>:<slot>.<func> return -1 on error
+ * Parse [seg:]<bus>:<slot>.<func> return -1 on error
  */
-int pci_parse_host_devaddr(const char *addr, int *busp,
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
                            int *slotp, int *funcp)
 {
     const char *p;
     char *e;
     int val;
-    int bus = 0, slot = 0, func = 0;
+    int seg = 0, bus = 0, slot = 0, func = 0;
 
+    /* parse optional seg */
     p = addr;
+    val = 0;
+    while (1) {
+        p = strchr(p, ':');
+        if (p) {
+            val++;
+            p++;
+        } else
+            break;
+    }
+    if (val <= 0 || val > 2)
+        return -1;
+
+    p = addr;
+    if (val == 2) {
+        val = strtoul(p, &e, 16);
+        if (e == p)
+            return -1;
+        if (*e == ':') {
+            seg = val;
+            p = e + 1;
+        }
+    } else
+        seg = 0;
+
+
+    /* parse bdf */
     val = strtoul(p, &e, 16);
     if (e == p)
        return -1;
@@ -517,12 +544,13 @@ int pci_parse_host_devaddr(const char *addr, int *busp,
     } else
        return -1;
 
-    if (bus > 0xff || slot > 0x1f || func > 0x7)
+    if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7)
        return -1;
 
     if (*e)
        return -1;
 
+    *segp = seg;
     *busp = bus;
     *slotp = slot;
     *funcp = func;
diff --git a/hw/pci.h b/hw/pci.h
index 728f83f..c9e9d56 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -277,7 +277,7 @@ PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr);
 int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
                      unsigned *slotp);
 
-int pci_parse_host_devaddr(const char *addr, int *busp,
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
                            int *slotp, int *funcp);
 
 void do_pci_info_print(Monitor *mon, const QObject *data);
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index 3fd3371..6485981 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -498,6 +498,8 @@ struct kvm_ioeventfd {
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
 
+#define KVM_CAP_PCI_SEGMENT 47
+
 #ifdef KVM_CAP_IRQ_ROUTING
 
 struct kvm_irq_routing_irqchip {
@@ -691,8 +693,9 @@ struct kvm_assigned_pci_dev {
        __u32 busnr;
        __u32 devfn;
        __u32 flags;
+       __u32 segnr;
        union {
-               __u32 reserved[12];
+               __u32 reserved[11];
        };
 };
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 47b6b81..5c1c398 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2031,7 +2031,7 @@ DEF("no-kvm-pit-reinjection", 0, 
QEMU_OPTION_no_kvm_pit_reinjection,
     "                disable KVM kernel mode PIT interrupt reinjection\n")
 #if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(TARGET_IA64) || 
defined(__linux__)
 DEF("pcidevice", HAS_ARG, QEMU_OPTION_pcidevice,
-    "-pcidevice host=bus:dev.func[,dma=none][,name=string]\n"
+    "-pcidevice host=[seg:]bus:dev.func[,dma=none][,name=string]\n"
     "                expose a PCI device to the guest OS\n"
     "                dma=none: don't perform any dma translations (default is 
to use an iommu)\n"
     "                'string' is used in log output\n")
--
To unsubscribe from this list: send the line "unsubscribe kvm-commits" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to