On Mon, 2011-06-27 at 20:19 +0200, Jan Kiszka wrote:
> From: Jan Kiszka <[email protected]>
>
> This drastically simplifies config space access management: Instead of
> coding various range checks and merging bits, set up two access control
> bitmaps. One defines, which bits can be directly read from the device,
> the other allows direct write to the device, also with bit-granularity.
>
> The setup of those bitmaps, specifically the corner cases like the
> capability list status bit, is way more readable than the existing code.
> And the generic config access functions just need one additional logic
> to catch writes to the MSI/MSI-X control flags and call the related
> update handlers.
>
> Signed-off-by: Jan Kiszka <[email protected]>
> ---
> hw/device-assignment.c | 325 ++++++++++++++++-------------------------------
> hw/device-assignment.h | 3 +-
> 2 files changed, 113 insertions(+), 215 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index 6a2ca8c..be199d2 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -63,35 +63,6 @@ static void assigned_dev_load_option_rom(AssignedDevice
> *dev);
>
> static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
>
> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
> - uint32_t address,
> - uint32_t val, int len);
> -
> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
> - uint32_t address, int
> len);
> -
> -/* Merge the bits set in mask from mval into val. Both val and mval are
> - * at the same addr offset, pos is the starting offset of the mask. */
> -static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr,
> - int len, uint8_t pos, uint32_t mask)
> -{
> - if (!ranges_overlap(addr, len, pos, 4)) {
> - return val;
> - }
> -
> - if (addr >= pos) {
> - mask >>= (addr - pos) * 8;
> - } else {
> - mask <<= (pos - addr) * 8;
> - }
> - mask &= 0xffffffffU >> (4 - len) * 8;
> -
> - val &= ~mask;
> - val |= (mval & mask);
> -
> - return val;
> -}
> -
> static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
> uint32_t addr, int len, uint32_t *val)
> {
> @@ -404,143 +375,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d,
> uint8_t cap, uint8_t start)
> return 0;
> }
>
> -static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
> - uint32_t val, int len)
> -{
> - int fd;
> - ssize_t ret;
> - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> -
> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> - (uint16_t) address, val, len);
> -
> - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
> - return assigned_device_pci_cap_write_config(d, address, val, len);
> - }
> -
> - if (ranges_overlap(address, len, PCI_COMMAND, 2)) {
> - pci_default_write_config(d, address, val, len);
> - /* Continue to program the card */
> - }
> -
> - /*
> - * Catch access to
> - * - base address registers
> - * - ROM base address & capability pointer
> - * - interrupt line & pin
> - */
> - if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
> - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
> - pci_default_write_config(d, address, val, len);
> - return;
> - } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) ||
> - ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) {
> - uint32_t real_val;
> -
> - pci_default_write_config(d, address, val, len);
> -
> - /* Ensure that writes to overlapping areas we don't virtualize still
> - * hit the device. */
> - real_val = assigned_dev_pci_read(d, address, len);
> - val = merge_bits(val, real_val, address, len,
> - PCI_CAPABILITY_LIST, 0xff);
> - val = merge_bits(val, real_val, address, len,
> - PCI_INTERRUPT_LINE, 0xffff);
> - }
> -
> - DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
> - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> - (uint16_t) address, val, len);
> -
> - fd = pci_dev->real_device.config_fd;
> -
> -again:
> - ret = pwrite(fd, &val, len, address);
> - if (ret != len) {
> - if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
> - goto again;
> -
> - fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
> - __func__, ret, errno);
> -
> - exit(1);
> - }
> -}
> -
> -static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
> - int len)
> -{
> - uint32_t val = 0, virt_val;
> - int fd;
> - ssize_t ret;
> - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
> -
> - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
> - val = assigned_device_pci_cap_read_config(d, address, len);
> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
> - return val;
> - }
> -
> - /*
> - * Catch access to
> - * - vendor & device ID
> - * - base address registers
> - * - ROM base address
> - */
> - if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) ||
> - ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
> - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
> - val = pci_default_read_config(d, address, len);
> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
> - return val;
> - }
> -
> - fd = pci_dev->real_device.config_fd;
> -
> -again:
> - ret = pread(fd, &val, len, address);
> - if (ret != len) {
> - if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
> - goto again;
> -
> - fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
> - __func__, ret, errno);
> -
> - exit(1);
> - }
> -
> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
> -
> - if (pci_dev->emulate_cmd_mask) {
> - val = merge_bits(val, pci_default_read_config(d, address, len),
> - address, len, PCI_COMMAND,
> pci_dev->emulate_cmd_mask);
> - }
> -
> - /*
> - * Merge bits from virtualized
> - * - capability pointer
> - * - interrupt line & pin
> - */
> - virt_val = pci_default_read_config(d, address, len);
> - val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff);
> - val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE,
> 0xffff);
> -
> - if (!pci_dev->cap.available) {
> - /* kill the special capabilities */
> - if (address == PCI_COMMAND && len == 4) {
> - val &= ~(PCI_STATUS_CAP_LIST << 16);
> - } else if (address == PCI_STATUS) {
> - val &= ~PCI_STATUS_CAP_LIST;
> - }
> - }
> -
> - return val;
> -}
> -
> static int assigned_dev_register_regions(PCIRegion *io_regions,
> unsigned long regions_num,
> AssignedDevice *pci_dev)
> @@ -790,7 +624,8 @@ again:
> /* dealing with virtual function device */
> snprintf(name, sizeof(name), "%sphysfn/", dir);
> if (!stat(name, &statbuf)) {
> - pci_dev->emulate_cmd_mask = 0xffff;
> + /* always provide the written value on readout */
> + memset(pci_dev->direct_config_read + PCI_COMMAND, 0xff, 2);
> }
>
> dev->region_number = r;
> @@ -1268,73 +1103,81 @@ static void assigned_dev_update_msix(PCIDevice
> *pci_dev)
> }
> }
>
> -/* There can be multiple VNDR capabilities per device, we need to find the
> - * one that starts closet to the given address without going over. */
> -static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address)
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
> + uint32_t address, int len)
> {
> - uint8_t cap, pos;
> + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> + uint32_t virt_val = pci_default_read_config(pci_dev, address, len);
> + uint32_t real_val, direct_mask;
>
> - for (cap = pos = 0;
> - (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
> - pos += PCI_CAP_LIST_NEXT) {
> - if (pos <= address) {
> - cap = MAX(pos, cap);
> - }
> + switch (len) {
> + case 4:
> + direct_mask =
> + pci_get_long(assigned_dev->direct_config_read + address);
> + break;
> + case 2:
> + direct_mask =
> + pci_get_word(assigned_dev->direct_config_read + address);
> + break;
> + case 1:
> + direct_mask =
> + pci_get_byte(assigned_dev->direct_config_read + address);
> + break;
> + default:
> + abort();
> }
> - return cap;
> -}
> -
> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
> - uint32_t address, int
> len)
> -{
> - uint8_t cap, cap_id = pci_dev->config_map[address];
> - uint32_t val;
> -
> - switch (cap_id) {
> -
> - case PCI_CAP_ID_VPD:
> - cap = pci_find_capability(pci_dev, cap_id);
> - val = assigned_dev_pci_read(pci_dev, address, len);
> - return merge_bits(val, pci_get_long(pci_dev->config + address),
> - address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
> -
> - case PCI_CAP_ID_VNDR:
> - cap = find_vndr_start(pci_dev, address);
> - val = assigned_dev_pci_read(pci_dev, address, len);
> - return merge_bits(val, pci_get_long(pci_dev->config + address),
> - address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
> + if (direct_mask) {
> + real_val = assigned_dev_pci_read(pci_dev, address, len);
> + return (virt_val & ~direct_mask) | (real_val & direct_mask);
> + } else {
> + return virt_val;
> }
> -
> - return pci_default_read_config(pci_dev, address, len);
> }
>
> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
> - uint32_t address,
> - uint32_t val, int len)
> +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t
> address,
> + uint32_t val, int len)
> {
> - uint8_t cap_id = pci_dev->config_map[address];
> + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> + uint32_t direct_mask, full_access;
>
> pci_default_write_config(pci_dev, address, val, len);
> - switch (cap_id) {
> - case PCI_CAP_ID_MSI:
> +
> + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> if (range_covers_byte(address, len,
> pci_dev->msi_cap + PCI_MSI_FLAGS)) {
> assigned_dev_update_msi(pci_dev);
> }
> - break;
> -
> - case PCI_CAP_ID_MSIX:
> + } else if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
> if (range_covers_byte(address, len,
> pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
> assigned_dev_update_msix(pci_dev);
> }
> - break;
> + }
>
> - case PCI_CAP_ID_VPD:
> - case PCI_CAP_ID_VNDR:
> - assigned_dev_pci_write(pci_dev, address, val, len);
> + switch (len) {
> + case 4:
> + direct_mask =
> + pci_get_long(assigned_dev->direct_config_write + address);
> + full_access = 0xffffffff;
> + break;
> + case 2:
> + direct_mask =
> + pci_get_word(assigned_dev->direct_config_write + address);
> + full_access = 0xffff;
> break;
> + case 1:
> + direct_mask =
> + pci_get_byte(assigned_dev->direct_config_write + address);
> + full_access = 0xff;
> + break;
> + default:
> + abort();
> + }
Shouldn't there be a if (!direct_mask) return; here?
> + if (direct_mask != full_access) {
> + val &= direct_mask;
> + val |= assigned_dev_pci_read(pci_dev, address, len) & ~direct_mask;
> }
> + assigned_dev_pci_write(pci_dev, address, val, len);
> }
>
> static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
> @@ -1404,6 +1247,10 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev)
> return ret;
> }
>
> + /* direct read except for next cap pointer */
> + memset(dev->direct_config_read + pos, 0xff, PCI_PM_SIZEOF);
> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> +
> pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
> pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
> pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
> @@ -1438,6 +1285,10 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev)
> return ret;
> }
>
> + /* direct read except for next cap pointer */
> + memset(dev->direct_config_read + pos, 0xff, size);
> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> +
> type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
> type = (type & PCI_EXP_FLAGS_TYPE) >> 8;
> if (type != PCI_EXP_TYPE_ENDPOINT &&
> @@ -1513,6 +1364,10 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev)
> return ret;
> }
>
> + /* direct read except for next cap pointer */
> + memset(dev->direct_config_read + pos, 0xff, 8);
> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> +
> /* Command register, clear upper bits, including extended modes */
> cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
> cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
> @@ -1534,6 +1389,13 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev)
> if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0)
> {
> return ret;
> }
> +
> + /* direct read except for next cap pointer */
> + memset(dev->direct_config_read + pos, 0xff, 8);
> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> +
> + /* direct write for cap content */
> + memset(dev->direct_config_write + pos + 2, 0xff, 6);
> }
>
> /* Devices can have multiple vendor capabilities, get them all */
> @@ -1545,6 +1407,21 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev)
> pos, len)) < 0) {
> return ret;
> }
> +
> + /* direct read except for next cap pointer */
> + memset(dev->direct_config_read + pos, 0xff, len);
> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> +
> + /* direct write for cap content */
> + memset(dev->direct_config_write + pos + 2, 0xff, len - 2);
> + }
> +
> + /* If real and virtual capability list status bits differ, virtualize the
> + * access. */
> + if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) !=
> + (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) &
> + PCI_STATUS_CAP_LIST)) {
> + dev->direct_config_read[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> }
>
> return 0;
> @@ -1694,6 +1571,26 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
> return -1;
> }
>
> + /*
> + * Set up basic config space access control. Will be further refined
> during
> + * device initialization.
> + *
> + * Direct read/write access is grangted for:
> + * - status & command registers, revision ID & class code, cacheline
> size,
> + * latency timer, header type, BIST
> + * - cardbus CIS pointer, subsystem vendor & ID
> + * - reserved fields
> + * - Min_Gnt & Max_Lat
> + */
> + memset(dev->direct_config_read + PCI_COMMAND, 0xff, 12);
This is more efficient, but maybe we should memset each field for
grep-ability.
Nice change overall. Thanks,
Alex
> + memset(dev->direct_config_read + PCI_CARDBUS_CIS, 0xff, 8);
> + memset(dev->direct_config_read + PCI_CAPABILITY_LIST + 1, 0xff, 7);
> + memset(dev->direct_config_read + PCI_MIN_GNT, 0xff, 2);
> + memset(dev->direct_config_write + PCI_COMMAND, 0xff, 12);
> + memset(dev->direct_config_write + PCI_CARDBUS_CIS, 0xff, 8);
> + memset(dev->direct_config_write + PCI_CAPABILITY_LIST + 1, 0xff, 7);
> + memset(dev->direct_config_write + PCI_MIN_GNT, 0xff, 2);
> +
> if (get_real_device(dev, dev->host.seg, dev->host.bus,
> dev->host.dev, dev->host.func)) {
> error_report("pci-assign: Error: Couldn't get real device (%s)!",
> diff --git a/hw/device-assignment.h b/hw/device-assignment.h
> index 3d20207..ed5defb 100644
> --- a/hw/device-assignment.h
> +++ b/hw/device-assignment.h
> @@ -104,12 +104,13 @@ typedef struct AssignedDevice {
> #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
> uint32_t state;
> } cap;
> + uint8_t direct_config_read[PCI_CONFIG_SPACE_SIZE];
> + uint8_t direct_config_write[PCI_CONFIG_SPACE_SIZE];
> int irq_entries_nr;
> struct kvm_irq_routing_entry *entry;
> void *msix_table_page;
> target_phys_addr_t msix_table_addr;
> int mmio_index;
> - uint32_t emulate_cmd_mask;
> char *configfd_name;
> int32_t bootindex;
> QLIST_ENTRY(AssignedDevice) next;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html