On Tuesday 30 December 2008 14:29:32 Sheng Yang wrote: > The most important part here, is we emulate a page of MMIO region using a > page of memory. That's because MSI-X table was put in the region and we > have to intercept it. >
Found some issues, would update this later. -- regards Yang, Sheng > Signed-off-by: Sheng Yang <[email protected]> > --- > qemu/hw/device-assignment.c | 275 > ++++++++++++++++++++++++++++++++++++++++++- qemu/hw/device-assignment.h | > 6 + > 2 files changed, 276 insertions(+), 5 deletions(-) > > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > index 2d3e67e..dc2020a 100644 > --- a/qemu/hw/device-assignment.c > +++ b/qemu/hw/device-assignment.c > @@ -146,6 +146,7 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, > int region_num, { > AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev); > AssignedDevRegion *region = &r_dev->v_addrs[region_num]; > + PCIRegion *real_region = &r_dev->real_device.regions[region_num]; > uint32_t old_ephys = region->e_physbase; > uint32_t old_esize = region->e_size; > int first_map = (region->e_size == 0); > @@ -164,10 +165,27 @@ static void assigned_dev_iomem_map(PCIDevice > *pci_dev, int region_num, TARGET_PAGE_ALIGN(old_esize)); > } > > - if (e_size > 0) > + if (e_size > 0) { > + /* deal with MSI-X MMIO page */ > + if (real_region->base_addr <= r_dev->msix_table_addr && > + real_region->base_addr + real_region->size >= > + r_dev->msix_table_addr) { > + int offset = r_dev->msix_table_addr - real_region->base_addr; > + ret = munmap(region->u.r_virtbase + offset, TARGET_PAGE_SIZE); > + if (ret == 0) > + DEBUG("munmap done, virt_base 0x%p\n", > + region->u.r_virtbase + offset); > + else { > + fprintf(stderr, "%s: fail munmap msix table!\n", > __func__); + exit(1); > + } > + cpu_register_physical_memory(e_phys + offset, > + TARGET_PAGE_SIZE, r_dev->mmio_index); > + } > ret = kvm_register_phys_mem(kvm_context, e_phys, > region->u.r_virtbase, > TARGET_PAGE_ALIGN(e_size), 0); > + } > > if (ret != 0) { > fprintf(stderr, "%s: Error: create new mapping failed\n", __func__); > @@ -570,7 +588,9 @@ void assigned_dev_update_irq(PCIDevice *d) > } > } > > -#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_MSG) > +#ifdef KVM_CAP_GSI_MSG > + > +#ifdef KVM_CAP_DEVICE_MSI > static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int > ctrl_pos) { > struct kvm_assigned_irq assigned_irq_data; > @@ -610,14 +630,140 @@ static void assigned_dev_update_msi(PCIDevice > *pci_dev, unsigned int ctrl_pos) } > #endif > > -void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t > address, +#ifdef KVM_CAP_DEVICE_MSIX > +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) > +{ > + AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); > + u16 entries_nr = 0, entries_max_nr; > + int pos = 0, i, r = 0; > + u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl; > + struct kvm_assigned_msix_nr msix_nr; > + struct kvm_assigned_msix_entry msix_entry; > + struct kvm_assigned_gsi_msg gsi_msg; > + void *va = adev->msix_table_page; > + > + if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI) > + pos = PCI_CAPABILITY_CONFIG_MSI_LENGTH; > + entries_max_nr = pci_dev->cap.config[pos + 2]; > + entries_max_nr &= PCI_MSIX_TABSIZE; > + > + /* Get the usable entry number for allocating */ > + for (i = 0; i < entries_max_nr; i++) { > + memcpy(&msg_ctrl, va + i * 16 + 12, 4); > + /* 0x1 is mask bit for per vector */ > + if (msg_ctrl & 0x1) > + continue; > + memcpy(&msg_data, va + i * 16 + 8, 4); > + /* Ignore unused entry even it's unmasked */ > + if (msg_data == 0) > + continue; > + entries_nr ++; > + } > + > + msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr, > + (uint8_t)adev->h_devfn); > + msix_nr.entry_nr = entries_nr; > + r = kvm_set_msix_nr(kvm_context, &msix_nr); > + if (r != 0) { > + fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n", > + strerror(-r)); > + return r; > + } > + > + msix_entry.assigned_dev_id = msix_nr.assigned_dev_id; > + entries_nr = 0; > + for (i = 0; i < entries_max_nr; i++) { > + if (entries_nr >= msix_nr.entry_nr) > + break; > + memcpy(&msg_ctrl, va + i * 16 + 12, 4); > + if (msg_ctrl & 0x1) > + continue; > + memcpy(&msg_data, va + i * 16 + 8, 4); > + if (msg_data == 0) > + continue; > + > + memcpy(&msg_addr, va + i * 16, 4); > + memcpy(&msg_upper_addr, va + i * 16 + 4, 4); > + > + gsi_msg.gsi = 0; > + gsi_msg.msg.addr_lo = msg_addr; > + gsi_msg.msg.addr_hi = msg_upper_addr; > + gsi_msg.msg.data = msg_data; > + r = kvm_request_gsi_msg(kvm_context, &gsi_msg); > + if (r) { > + fprintf(stderr, "fail to request gsi msg for MSIX! %s\n", > + strerror(-r)); > + break; > + } > + DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, > msg_addr); + > + msix_entry.gsi = gsi_msg.gsi; > + msix_entry.entry = i; > + msix_entry.pos = entries_nr; > + r = kvm_set_msix_entry(kvm_context, &msix_entry); > + if (r) { > + fprintf(stderr, "fail to set MSI-X entry! %s\n", > strerror(-r)); + break; > + } > + DEBUG("MSI-X entry gsi 0x%x, entry %d, pos %d\n!", > + msix_entry.gsi, msix_entry.entry, msix_entry.pos); > + entries_nr ++; > + } > + > + return r; > +} > + > +static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int > ctrl_pos) +{ > + struct kvm_assigned_irq assigned_irq_data; > + AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, > dev); + uint16_t *ctrl_word = (uint16_t *)(pci_dev->cap.config + > ctrl_pos); + > + memset(&assigned_irq_data, 0, sizeof assigned_irq_data); > + assigned_irq_data.assigned_dev_id = > + calc_assigned_dev_id(assigned_dev->h_busnr, > + (uint8_t)assigned_dev->h_devfn); > + > + assigned_irq_data.flags = KVM_DEV_IRQ_ASSIGN_MSIX_ACTION; > + if (*ctrl_word & PCI_MSIX_ENABLE) { > + assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX; > + if (assigned_dev_update_msix_mmio(pci_dev) < 0) { > + perror("assigned_dev_update_msix_mmio"); > + } > + } > + if (*ctrl_word & PCI_MSIX_MASK) > + assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_MASK_MSIX; > + > + if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) > + perror("assigned_dev_enable_msi"); > + > + if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX) { > + assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_ENABLED; > + *ctrl_word |= PCI_MSIX_ENABLE; > + } else { > + assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_ENABLED; > + *ctrl_word &= ~PCI_MSIX_ENABLE; > + } > + if (assigned_irq_data.flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) { > + assigned_dev->cap.state |= ASSIGNED_DEVICE_MSIX_MASKED; > + *ctrl_word |= PCI_MSIX_MASK; > + } else { > + assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSIX_MASKED; > + *ctrl_word &= ~PCI_MSIX_MASK; > + } > +} > +#endif > +#endif > + > +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, > uint32_t address, uint32_t val, int len) > { > AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, > dev); unsigned int pos = pci_dev->cap.start, ctrl_pos; > > pci_default_cap_write_config(pci_dev, address, val, len); > -#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_MSG) > +#ifdef KVM_CAP_GSI_MSG > +#ifdef KVM_CAP_DEVICE_MSI > if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > ctrl_pos = pos + PCI_MSI_FLAGS; > if (address <= ctrl_pos && address + len > ctrl_pos) > @@ -625,6 +771,17 @@ void assigned_device_pci_cap_write_config(PCIDevice > *pci_dev, uint32_t address, pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH; > } > #endif > +#ifdef KVM_CAP_DEVICE_MSIX > + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > + ctrl_pos = pos + 3; > + if (address <= ctrl_pos && address + len > ctrl_pos) { > + ctrl_pos--; /* control is word long */ > + assigned_dev_update_msix(pci_dev, ctrl_pos - > pci_dev->cap.start); + } > + pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; > + } > +#endif > +#endif > return; > } > > @@ -643,7 +800,9 @@ static void assigned_device_pci_cap_init(PCIDevice > *pci_dev) pci_init(pacc); > dev->pdev = pci_get_dev(pacc, 0, h_bus, h_dev, h_func); > pci_cleanup(pacc); > -#if defined(KVM_CAP_DEVICE_MSI) && defined (KVM_CAP_GSI_MSG) > + pci_fill_info(dev->pdev, PCI_FILL_BASES); > +#ifdef KVM_CAP_GSI_MSG > +#ifdef KVM_CAP_DEVICE_MSI > /* Expose MSI capability > * MSI capability is the 1st capability in cap.config */ > if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSI)) { > @@ -653,6 +812,107 @@ static void assigned_device_pci_cap_init(PCIDevice > *pci_dev) next_cap_pt = 1; > } > #endif > +#ifdef KVM_CAP_DEVICE_MSIX > + /* Expose MSI-X capability */ > + if (pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX)) { > + int pos, entry_nr, bar_nr; > + u32 msix_table_entry; > + dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; > + pos = pci_find_cap_offset(dev->pdev, PCI_CAP_ID_MSIX); > + entry_nr = pci_read_word(dev->pdev, pos + 2) & PCI_MSIX_TABSIZE; > + pci_dev->cap.config[pci_dev->cap.length] = 0x11; > + pci_dev->cap.config[pci_dev->cap.length + 2] = entry_nr; > + msix_table_entry = pci_read_long(dev->pdev, pos + PCI_MSIX_TABLE); > + *(uint32_t *)(pci_dev->cap.config + > + pci_dev->cap.length + PCI_MSIX_TABLE) = > msix_table_entry; + *(uint32_t *)(pci_dev->cap.config + > + pci_dev->cap.length + PCI_MSIX_PBA) = > + pci_read_long(dev->pdev, pos + PCI_MSIX_PBA); > + bar_nr = msix_table_entry & PCI_MSIX_BIR; > + msix_table_entry &= ~PCI_MSIX_BIR; > + dev->msix_table_addr = dev->pdev->base_addr[bar_nr] + > msix_table_entry; + pci_dev->cap.config[next_cap_pt] = > + pci_dev->cap.start + pci_dev->cap.length; > + pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; > + next_cap_pt += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; > + } > +#endif > +#endif > +} > + > +static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) > +{ > + AssignedDevice *adev = opaque; > + unsigned int offset = addr & 0xfff; > + void *page = adev->msix_table_page; > + uint32_t val = 0; > + > + DEBUG("read from MSI-X entry table mmio 0x%lx", addr); > + memcpy(&val, (void *)((char *)page + offset), 4); > + > + return val; > +} > + > +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) > +{ > + return ((msix_mmio_readl(opaque, addr & ~3)) >> > + (8 * (addr & 3))) & 0xff; > +} > + > +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) > +{ > + return ((msix_mmio_readl(opaque, addr & ~3)) >> > + (8 * (addr & 3))) & 0xffff; > +} > + > +static void msix_mmio_writel(void *opaque, > + target_phys_addr_t addr, uint32_t val) > +{ > + AssignedDevice *adev = opaque; > + unsigned int offset = addr & 0xfff; > + void *page = adev->msix_table_page; > + > + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n", > + addr, val); > + memcpy((void *)((char *)page + offset), &val, 4); > +} > + > +static void msix_mmio_writew(void *opaque, > + target_phys_addr_t addr, uint32_t val) > +{ > + msix_mmio_writel(opaque, addr & ~3, > + (val & 0xffff) << (8*(addr & 3))); > +} > + > +static void msix_mmio_writeb(void *opaque, > + target_phys_addr_t addr, uint32_t val) > +{ > + msix_mmio_writel(opaque, addr & ~3, > + (val & 0xff) << (8*(addr & 3))); > +} > + > +static CPUWriteMemoryFunc *msix_mmio_write[] = { > + msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel > +}; > + > +static CPUReadMemoryFunc *msix_mmio_read[] = { > + msix_mmio_readb, msix_mmio_readw, msix_mmio_readl > +}; > + > +static int assigned_dev_register_msix_mmio(AssignedDevice *dev) > +{ > + /* handle exception */ > + dev->msix_table_page = mmap(NULL, 0x10000, > + PROT_READ|PROT_WRITE, > + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); > + if (dev->msix_table_page == MAP_FAILED) { > + fprintf(stderr, "fail allocate msix_table_page! %s\n", > strerror(errno)); + return -EFAULT; > + } > + > + dev->mmio_index = cpu_register_io_memory(0, msix_mmio_read, > + msix_mmio_write, dev); > + return 0; > } > > struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus) > @@ -726,6 +986,11 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo > *adev, PCIBus *bus) assigned_device_pci_cap_write_config, > assigned_device_pci_cap_init); > > + /* intercept MSI-X entry page in the MMIO */ > + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) > + if (assigned_dev_register_msix_mmio(dev)) > + return NULL; > + > return &dev->dev; > } > > diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h > index ea26de5..0db371a 100644 > --- a/qemu/hw/device-assignment.h > +++ b/qemu/hw/device-assignment.h > @@ -84,10 +84,16 @@ typedef struct { > struct pci_dev *pdev; > struct { > #define ASSIGNED_DEVICE_CAP_MSI (1 << 0) > +#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1) > uint32_t available; > #define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0) > +#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1) > +#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) > uint32_t state; > } cap; > + void *msix_table_page; > + target_phys_addr_t msix_table_addr; > + int mmio_index; > } AssignedDevice; > > typedef struct AssignedDevInfo AssignedDevInfo; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
