Amit Shah wrote:
> This patch has been contributed to by the following people:
>
> From: Or Sagi <[EMAIL PROTECTED]>
> From: Nir Peleg <[EMAIL PROTECTED]>
> From: Amit Shah <[EMAIL PROTECTED]>
> From: Ben-Ami Yassour <[EMAIL PROTECTED]>
> From: Weidong Han <[EMAIL PROTECTED]>
> From: Glauber de Oliveira Costa <[EMAIL PROTECTED]>
>
> With this patch, we can assign a device on the host machine to a
> guest.
>
> A new command-line option, -pcidevice is added.
> To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this:
>
> -pcidevice host=04:08.0
>
> * The host driver for the device, if any, is to be removed before
> assigning the device (else device assignment will fail).
>
> * A device that shares IRQ with another host device cannot currently
> be assigned.
>
> * The RAW_IO capability is needed for this to work
>
> This works only with the in-kernel irqchip method; to use the
> userspace irqchip, a kernel module (irqhook) and some extra changes
> are needed.
>
> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> ---
> qemu/Makefile.target | 1 +
> qemu/hw/device-assignment.c | 619
> +++++++++++++++++++++++++++++++++++++++++++
> qemu/hw/device-assignment.h | 98 +++++++ qemu/hw/pc.c
> | 6 + qemu/hw/pci.c | 7 +
> qemu/vl.c | 18 ++
> 6 files changed, 749 insertions(+), 0 deletions(-)
> create mode 100644 qemu/hw/device-assignment.c
> create mode 100644 qemu/hw/device-assignment.h
>
> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
> index d9bdeca..05a1d84 100644
> --- a/qemu/Makefile.target
> +++ b/qemu/Makefile.target
> @@ -621,6 +621,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
> OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
> OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
> OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
> +OBJS+= device-assignment.o
> ifeq ($(USE_KVM_PIT), 1)
> OBJS+= i8254-kvm.o
> endif
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> new file mode 100644
> index 0000000..5ba21a0
> --- /dev/null
> +++ b/qemu/hw/device-assignment.c
> @@ -0,0 +1,619 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + *
> + * Assign a PCI device from the host to a guest VM.
> + *
> + * Adapted for KVM by Qumranet.
> + *
> + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
> + */
> +#include <stdio.h>
> +#include <sys/io.h>
> +#include "qemu-kvm.h"
> +#include "hw.h"
> +#include "pc.h"
> +#include "sysemu.h"
> +#include "console.h"
> +#include <linux/kvm_para.h>
> +#include "device-assignment.h"
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO 0x00000100 /* Resource type */
> +#define IORESOURCE_MEM 0x00000200
> +#define IORESOURCE_IRQ 0x00000400
> +#define IORESOURCE_DMA 0x00000800
> +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, args...) \
> + do { \
> + fprintf(stderr, "%s: " fmt, __func__ , ## args); \
> + } while (0)
> +#else
> +#define DEBUG(fmt, args...) do { } while(0)
> +#endif
> +
> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
> + uint32_t value)
> +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (unsigned long)r_access->r_virtbase
> + + (addr - r_access->e_physbase);
> +
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x"
> + " r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + outb(value, r_pio);
> +}
> +
> +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
> + uint32_t value)
> +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (unsigned long)r_access->r_virtbase
> + + (addr - r_access->e_physbase);
> +
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x"
> + " r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + outw(value, r_pio);
> +}
> +
> +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
> + uint32_t value)
> +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (unsigned long)r_access->r_virtbase
> + + (addr - r_access->e_physbase);
> +
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x"
> + " r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + outl(value, r_pio);
> +}
> +
> +static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t
> addr) +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (addr - r_access->e_physbase)
> + + (unsigned long)r_access->r_virtbase;
> + uint32_t value;
> +
> + value = inb(r_pio);
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x "
> + "r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + return value;
> +}
> +
> +static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t
> addr) +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (addr - r_access->e_physbase)
> + + (unsigned long)r_access->r_virtbase;
> + uint32_t value;
> +
> + value = inw(r_pio);
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x "
> + "r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + return value;
> +}
> +
> +static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t
> addr) +{
> + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> + uint32_t r_pio = (addr - r_access->e_physbase)
> + + (unsigned long)r_access->r_virtbase;
> + uint32_t value;
> +
> + value = inl(r_pio);
> + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x "
> + "r_virtbase=%08lx value=%08x\n",
> + __func__, r_pio, (int)r_access->e_physbase,
> + (unsigned long)r_access->r_virtbase, value);
> + return value;
> +}
> +
> +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int
> region_num, + uint32_t e_phys,
> uint32_t e_size, int type) +{
> + AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
> + AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> + int first_map = (region->e_size == 0);
> + int ret = 0;
> +
> + DEBUG("%s: e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d
> \n", + __func__, e_phys, (uint32_t)region->r_virtbase, type,
> e_size, + region_num);
> +
> + region->e_physbase = e_phys;
> + region->e_size = e_size;
> +
> + /* FIXME: Add support for emulated MMIO for non-kvm guests */
> + if (kvm_enabled()) {
> + if (!first_map)
> + kvm_destroy_phys_mem(kvm_context, e_phys, e_size);
A typo? Need to destory orignal registered address?
> + if (e_size > 0)
> + ret = kvm_register_phys_mem(kvm_context, e_phys,
> + region->r_virtbase, e_size,
> 0); + if (ret != 0)
> + fprintf(stderr, "%s: Error: create new mapping
> failed\n", __func__); + }
> +}
> +
> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int
> region_num, + uint32_t addr,
> uint32_t size, int type) +{
> + AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
> + AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> + int r;
> +
> + region->e_physbase = addr;
> + region->e_size = size;
> +
> + DEBUG("%s: e_phys=0x%x r_virt=%x type=0x%x len=%d region_num=%d
> \n", + __func__, addr, (uint32_t)region->r_virtbase, type,
> size, region_num); +
> + r = ioperm((uint32_t)region->r_virtbase, size, 1);
> + if (r < 0) {
> + perror("assigned_dev_ioport_map: ioperm");
> + return;
> + }
> +
> + register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
> + (void *) (r_dev->v_addrs + region_num));
> + register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
> + (void *) (r_dev->v_addrs + region_num));
> + register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
> + (void *) (r_dev->v_addrs + region_num));
> + register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
> + (void *) (r_dev->v_addrs + region_num));
> + register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
> + (void *) (r_dev->v_addrs + region_num));
> + register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
> + (void *) (r_dev->v_addrs + region_num));
> +}
> +
> +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t
> address, + uint32_t val, int
> len) +{
> + int fd, r;
> +
> + DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n",
> + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> + (uint16_t) address, val, len);
> +
> + if (address == 0x4) {
> + pci_default_write_config(d, address, val, len);
> + /* Continue to program the card */
> + }
> +
> + if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> + address == 0x3c || address == 0x3d) {
> + /* used for update-mappings (BAR emulation) */
> + pci_default_write_config(d, address, val, len);
> + return;
> + }
> + DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
> + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> + (uint16_t) address, val, len);
> + fd = ((AssignedDevice *)d)->real_device.config_fd;
> + r = lseek(fd, address, SEEK_SET);
> + if (r < 0) {
> + fprintf(stderr, "%s: bad seek, errno = %d\n", __func__,
> errno); + return;
> + }
> +again:
> + r = write(fd, &val, len);
> + if (r < 0) {
> + if (errno == EINTR || errno == EAGAIN)
> + goto again;
> + fprintf(stderr, "%s: write failed, errno = %d\n", __func__,
> errno); + }
> +}
> +
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t
> address, + int len)
> +{
> + uint32_t val = 0;
> + int fd, r;
> +
> + if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> + address == 0x3c || address == 0x3d) {
> + val = pci_default_read_config(d, address, len);
> + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address,
> val, len); + return val;
> + }
> +
> + /* vga specific, remove later */
> + if (address == 0xFC)
> + goto do_log;
> +
> + fd = ((AssignedDevice *)d)->real_device.config_fd;
> + r = lseek(fd, address, SEEK_SET);
> + if (r < 0) {
> + fprintf(stderr, "%s: bad seek, errno = %d\n", __func__,
> errno); + return val;
> + }
> +again:
> + r = read(fd, &val, len);
> + if (r < 0) {
> + if (errno == EINTR || errno == EAGAIN)
> + goto again;
> + fprintf(stderr, "%s: read failed, errno = %d\n",
> + __func__, errno);
> + }
> +do_log:
> + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val,
> len); +
> + /* kill the special capabilities */
> + if (address == 4 && len == 4)
> + val &= ~0x100000;
> + else if (address == 6)
> + val &= ~0x10;
> +
> + return val;
> +}
> +
> +static int assigned_dev_register_regions(PCIRegion *io_regions,
> + unsigned long regions_num,
> + AssignedDevice *pci_dev)
> +{
> + uint32_t i;
> + PCIRegion *cur_region = io_regions;
> +
> + for (i = 0; i < regions_num; i++, cur_region++) {
> + if (!cur_region->valid)
> + continue;
> + pci_dev->v_addrs[i].num = i;
> +
> + /* handle memory io regions */
> + if (cur_region->type & IORESOURCE_MEM) {
> + int t = cur_region->type & IORESOURCE_PREFETCH
> + ? PCI_ADDRESS_SPACE_MEM_PREFETCH
> + : PCI_ADDRESS_SPACE_MEM;
> +
> + /* map physical memory */
> + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
> + pci_dev->v_addrs[i].r_virtbase =
> + mmap(NULL,
> + (cur_region->size + 0xFFF) & 0xFFFFF000,
> + PROT_WRITE | PROT_READ, MAP_SHARED,
> + cur_region->resource_fd, (off_t) 0);
> +
> + if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase) {
> + fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
> + "\n", __func__,
> + (uint32_t) (cur_region->base_addr));
> + return -1;
> + }
> + pci_dev->v_addrs[i].r_size = cur_region->size;
> + pci_dev->v_addrs[i].e_size = 0;
> +
> + /* add offset */
> + pci_dev->v_addrs[i].r_virtbase +=
> + (cur_region->base_addr & 0xFFF);
> +
> + pci_register_io_region((PCIDevice *) pci_dev, i,
> + cur_region->size, t,
> + assigned_dev_iomem_map);
> + continue;
> + }
> + /* handle port io regions */
> + pci_register_io_region((PCIDevice *) pci_dev, i,
> + cur_region->size,
> PCI_ADDRESS_SPACE_IO, +
> assigned_dev_ioport_map); +
> + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
> + pci_dev->v_addrs[i].r_virtbase =
> + (void *)(long)cur_region->base_addr;
> + /* not relevant for port io */
> + pci_dev->v_addrs[i].memory_index = 0;
> + }
> +
> + /* success */
> + return 0;
> +}
> +
> +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
> + uint8_t r_dev, uint8_t r_func)
> +{
> + char dir[128], name[128], comp[16];
> + int fd, r = 0;
> + FILE *f;
> + unsigned long long start, end, size, flags;
> + PCIRegion *rp;
> + PCIDevRegions *dev = &pci_dev->real_device;
> +
> + dev->region_number = 0;
> +
> + snprintf(dir, 128, "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
> + r_bus, r_dev, r_func);
> + strncpy(name, dir, 128);
> + strncat(name, "config", 6);
> + fd = open(name, O_RDWR);
> + if (fd == -1) {
> + fprintf(stderr, "%s: %s: %m\n", __func__, name);
> + return 1;
> + }
> + dev->config_fd = fd;
> +again:
> + r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
> + if (r < 0) {
> + if (errno == EINTR || errno == EAGAIN)
> + goto again;
> + fprintf(stderr, "%s: read failed, errno = %d\n", __func__,
> errno); + }
> + strncpy(name, dir, 128);
> + strncat(name, "resource", 8);
> +
> + f = fopen(name, "r");
> + if (f == NULL) {
> + fprintf(stderr, "%s: %s: %m\n", __func__, name);
> + return 1;
> + }
> + r = -1;
> + while (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3)
> { + r++;
> + rp = dev->regions + r;
> + rp->valid = 0;
> + size = end - start + 1;
> + flags &= IORESOURCE_IO | IORESOURCE_MEM |
> IORESOURCE_PREFETCH; + if (size == 0 || (flags &
> ~IORESOURCE_PREFETCH) == 0) + continue;
> + if (flags & IORESOURCE_MEM) {
> + flags &= ~IORESOURCE_IO;
> + snprintf(comp, 16, "resource%d", r);
> + strncpy(name, dir, 128);
> + strncat(name, comp, 16);
> + fd = open(name, O_RDWR);
> + if (fd == -1)
> + continue; /* probably ROM */
> + rp->resource_fd = fd;
> + } else
> + flags &= ~IORESOURCE_PREFETCH;
> +
> + rp->type = flags;
> + rp->valid = 1;
> + rp->base_addr = start;
> + rp->size = size;
> + DEBUG("%s: region %d size %d start 0x%x type %d resource_fd
> %d\n", + __func__, r, rp->size, start, rp->type,
> rp->resource_fd); + }
> + fclose(f);
> +
> + dev->region_number = r;
> + return 0;
> +}
> +
> +static int disable_iommu;
> +int nr_assigned_devices;
> +static LIST_HEAD(, AssignedDevInfo) adev_head;
> +
> +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> +{
> + return (uint32_t)bus << 8 | (uint32_t)devfn;
> +}
> +
> +static AssignedDevice *register_real_device(PCIBus *e_bus,
> + const char *e_dev_name,
> + int e_devfn, uint8_t
> r_bus, + uint8_t r_dev,
> uint8_t r_func) +{
> + int r;
> + AssignedDevice *pci_dev;
> + uint8_t e_device, e_intx;
> +
> + DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n",
> + __func__, e_dev_name, e_devfn);
> +
> + pci_dev = (AssignedDevice *)
> + pci_register_device(e_bus, e_dev_name,
> sizeof(AssignedDevice), + e_devfn,
> assigned_dev_pci_read_config, +
> assigned_dev_pci_write_config); + if (NULL == pci_dev) {
> + fprintf(stderr, "%s: Error: Couldn't register real device
> %s\n", + __func__, e_dev_name);
> + return NULL;
> + }
> + if (get_real_device(pci_dev, r_bus, r_dev, r_func)) {
> + fprintf(stderr, "%s: Error: Couldn't get real device
> (%s)!\n", + __func__, e_dev_name);
> + goto out;
> + }
> +
> + /* handle real device's MMIO/PIO BARs */
> + if (assigned_dev_register_regions(pci_dev->real_device.regions,
> +
> pci_dev->real_device.region_number, +
> pci_dev)) + goto out;
> +
> + /* handle interrupt routing */
> + e_device = (pci_dev->dev.devfn >> 3) & 0x1f;
> + e_intx = pci_dev->dev.config[0x3d] - 1;
> + pci_dev->intpin = e_intx;
> + pci_dev->run = 0;
> + pci_dev->girq = 0;
> + pci_dev->h_busnr = r_bus;
> + pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> + if (kvm_enabled()) {
> + struct kvm_assigned_pci_dev assigned_dev_data;
> +
> + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> + assigned_dev_data.assigned_dev_id =
> + calc_assigned_dev_id(pci_dev->h_busnr,
> + (uint32_t)pci_dev->h_devfn);
> + assigned_dev_data.busnr = pci_dev->h_busnr;
> + assigned_dev_data.devfn = pci_dev->h_devfn;
> +
> +#ifdef KVM_CAP_IOMMU
> + /* We always enable the IOMMU if present
> + * (or when not disabled on the command line)
> + */
> + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> + if (r && !disable_iommu)
> + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
> +#endif
> + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
> + if (r < 0) {
> + fprintf(stderr, "Could not notify kernel about "
> + "assigned device \"%s\"\n", e_dev_name);
> + perror("register_real_device");
> + goto out;
> + }
> + }
> +#endif
> + term_printf("Registered host PCI device %02x:%02x.%1x "
> + "(\"%s\") as guest device %02x:%02x.%1x\n",
> + r_bus, r_dev, r_func, e_dev_name,
> + pci_bus_num(e_bus), e_device, r_func);
> +
> + return pci_dev;
> +out:
> +/* pci_unregister_device(&pci_dev->dev); */
> + return NULL;
> +}
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +/* The pci config space got updated. Check if irq numbers have
> changed + * for our devices
> + */
> +void assigned_dev_update_irq(PCIDevice *d)
> +{
> + int irq, r;
> + AssignedDevice *assigned_dev;
> + AssignedDevInfo *adev;
> +
> + LIST_FOREACH(adev, &adev_head, next) {
> + assigned_dev = adev->assigned_dev;
> + irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
> + irq = piix_get_irq(irq);
> +
> + if (irq != assigned_dev->girq) {
> + struct kvm_assigned_irq assigned_irq_data;
> +
> + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
> + assigned_irq_data.assigned_dev_id =
> + calc_assigned_dev_id(assigned_dev->h_busnr,
> + (uint8_t)
> assigned_dev->h_devfn); + assigned_irq_data.guest_irq =
> irq; + assigned_irq_data.host_irq =
> assigned_dev->real_device.irq; + r =
> kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r <
> 0) { + perror("assigned_dev_update_irq");
> + fprintf(stderr, "Are you assigning a device "
> + "that shares IRQ with some other device?\n");
> + pci_unregister_device(&assigned_dev->dev);
> + /* FIXME: Delete node from list */
> + continue;
> + }
> + assigned_dev->girq = irq;
> + }
> + }
> +}
> +#endif
> +
> +struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus
> *bus) +{
> + adev->assigned_dev = register_real_device(bus,
> + adev->name, -1,
> + adev->bus,
> + adev->dev,
> + adev->func);
> + return &adev->assigned_dev->dev;
> +}
> +
> +int init_all_assigned_devices(PCIBus *bus)
> +{
> + struct AssignedDevInfo *adev;
> +
> + LIST_FOREACH(adev, &adev_head, next)
> + if (init_assigned_device(adev, bus) == NULL)
> + return -1;
> + return 0;
> +}
> +
> +/*
> + * Syntax to assign device:
> + *
> + * -pcidevice dev=bus:dev.func,dma=dma
> + *
> + * Example:
> + * -pcidevice host=00:13.0,dma=pvdma
> + *
> + * dma can currently only be 'none' to disable iommu support.
> + */
> +AssignedDevInfo *add_assigned_device(const char *arg)
> +{
> + char *cp, *cp1;
> + char device[8];
> + char dma[6];
> + int r;
> + AssignedDevInfo *adev;
> +
> + adev = qemu_mallocz(sizeof(AssignedDevInfo));
> + if (adev == NULL) {
> + fprintf(stderr, "%s: Out of memory\n", __func__);
> + return NULL;
> + }
> + r = get_param_value(device, sizeof(device), "host", arg);
> + r = get_param_value(adev->name, sizeof(adev->name), "name", arg);
> + if (!r)
> + strncpy(adev->name, device, 8);
> +
> +#ifdef KVM_CAP_IOMMU
> + r = get_param_value(dma, sizeof(dma), "dma", arg);
> + if (r && !strncmp(dma, "none", 4))
> + disable_iommu = 1;
> +#endif
> + cp = device;
> + adev->bus = strtoul(cp, &cp1, 16);
> + if (*cp1 != ':')
> + goto bad;
> + cp = cp1 + 1;
> +
> + adev->dev = strtoul(cp, &cp1, 16);
> + if (*cp1 != '.')
> + goto bad;
> + cp = cp1 + 1;
> +
> + adev->func = strtoul(cp, &cp1, 16);
> +
> + nr_assigned_devices++;
> + LIST_INSERT_HEAD(&adev_head, adev, next);
> + return adev;
> +bad:
> + fprintf(stderr, "pcidevice argument parse error; "
> + "please check the help text for usage\n");
> + qemu_free(adev);
> + return NULL;
> +}
> diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
> new file mode 100644
> index 0000000..e4148df
> --- /dev/null
> +++ b/qemu/hw/device-assignment.h
> @@ -0,0 +1,98 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + * Copyright (c) 2007, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> modify it + * under the terms and conditions of the GNU General
> Public License, + * version 2, as published by the Free Software
> Foundation. + *
> + * This program is distributed in the hope it will be useful, but
> WITHOUT + * ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> General Public License for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License
> along with + * this program; if not, write to the Free Software
> Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA
> 02111-1307 USA. + *
> + * Data structures for storing PCI state
> + *
> + * Adapted to kvm by Qumranet
> + *
> + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
> + */
> +
> +#ifndef __DEVICE_ASSIGNMENT_H__
> +#define __DEVICE_ASSIGNMENT_H__
> +
> +#include <sys/mman.h>
> +#include "qemu-common.h"
> +#include "sys-queue.h"
> +#include "pci.h"
> +
> +/* From include/linux/pci.h in the kernel sources */
> +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) &
> 0x07)) +
> +#define MAX_IO_REGIONS (6)
> +
> +typedef struct {
> + int type; /* Memory or port I/O */
> + int valid;
> + uint32_t base_addr;
> + uint32_t size; /* size of the region */
> + int resource_fd;
> +} PCIRegion;
> +
> +typedef struct {
> + uint8_t bus, dev, func; /* Bus inside domain, device and
> function */ + int irq; /* IRQ number */
> + uint16_t region_number; /* number of active regions */
> +
> + /* Port I/O or MMIO Regions */
> + PCIRegion regions[MAX_IO_REGIONS];
> + int config_fd;
> +} PCIDevRegions;
> +
> +typedef struct {
> + target_phys_addr_t e_physbase;
> + uint32_t memory_index;
> + void *r_virtbase; /* mmapped access address */
> + int num; /* our index within v_addrs[] */
> + uint32_t e_size; /* emulated size of region in bytes */
> + uint32_t r_size; /* real size of region in bytes */
> +} AssignedDevRegion;
> +
> +typedef struct {
> + PCIDevice dev;
> + int intpin;
> + uint8_t debug_flags;
> + AssignedDevRegion v_addrs[PCI_NUM_REGIONS];
> + PCIDevRegions real_device;
> + int run;
> + int girq;
> + unsigned char h_busnr;
> + unsigned int h_devfn;
> + int bound;
> +} AssignedDevice;
> +
> +typedef struct AssignedDevInfo AssignedDevInfo;
> +
> +struct AssignedDevInfo {
> + char name[15];
> + int bus;
> + int dev;
> + int func;
> + AssignedDevice *assigned_dev;
> + LIST_ENTRY(AssignedDevInfo) next;
> +};
> +
> +PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus);
> +int init_all_assigned_devices(PCIBus *bus);
> +AssignedDevInfo *add_assigned_device(const char *arg);
> +void assigned_dev_set_vector(int irq, int vector);
> +void assigned_dev_ack_mirq(int vector);
> +
> +#endif /* __DEVICE_ASSIGNMENT_H__ */
> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
> index d559f0c..e0438ed 100644
> --- a/qemu/hw/pc.c
> +++ b/qemu/hw/pc.c
> @@ -33,6 +33,7 @@
> #include "boards.h"
> #include "console.h"
> #include "fw_cfg.h"
> +#include "device-assignment.h"
>
> #include "qemu-kvm.h"
>
> @@ -993,6 +994,11 @@ static void pc_init1(ram_addr_t ram_size, int
> vga_ram_size, }
> }
>
> + /* Initialize assigned devices */
> + if (pci_enabled)
> + if(init_all_assigned_devices(pci_bus))
> + exit(1);
> +
> rtc_state = rtc_init(0x70, i8259[8]);
>
> qemu_register_boot_set(pc_boot_set, rtc_state);
> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
> index c82cd20..f86a8a7 100644
> --- a/qemu/hw/pci.c
> +++ b/qemu/hw/pci.c
> @@ -50,6 +50,7 @@ struct PCIBus {
>
> static void pci_update_mappings(PCIDevice *d);
> static void pci_set_irq(void *opaque, int irq_num, int level);
> +void assigned_dev_update_irq(PCIDevice *d);
>
> target_phys_addr_t pci_mem_base;
> static int pci_irq_index;
> @@ -453,6 +454,12 @@ void pci_default_write_config(PCIDevice *d,
> val >>= 8;
> }
>
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() &&
> + address >= 0x60 && address <= 0x63)
> + assigned_dev_update_irq(d);
> +#endif
> +
> end = address + len;
> if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
> /* if the command register is modified, we must modify the
> mappings */
> diff --git a/qemu/vl.c b/qemu/vl.c
> index 388e79d..5a39d12 100644
> --- a/qemu/vl.c
> +++ b/qemu/vl.c
> @@ -38,6 +38,7 @@
> #include "qemu-char.h"
> #include "block.h"
> #include "audio/audio.h"
> +#include "hw/device-assignment.h"
> #include "migration.h"
> #include "balloon.h"
> #include "qemu-kvm.h"
> @@ -8692,6 +8693,12 @@ static void help(int exitcode)
> #endif
> "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
> "-no-kvm-pit disable KVM kernel mode PIT\n"
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) ||
> defined(__linux__) + "-pcidevice
> host=bus:dev.func[,dma=none][,name=\"string\"]\n" + "
> expose a PCI device to the guest OS.\n" + "
> dma=none: don't perform any dma translations (default is to use an
> iommu)\n" + " 'string' is used in log
> output.\n" +#endif #endif
> #ifdef TARGET_I386
> "-no-acpi disable ACPI\n"
> @@ -8811,6 +8818,9 @@ enum {
> QEMU_OPTION_no_kvm,
> QEMU_OPTION_no_kvm_irqchip,
> QEMU_OPTION_no_kvm_pit,
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) ||
> defined(__linux__) + QEMU_OPTION_pcidevice,
> +#endif
> QEMU_OPTION_no_reboot,
> QEMU_OPTION_no_shutdown,
> QEMU_OPTION_show_cursor,
> @@ -8900,6 +8910,9 @@ static const QEMUOption qemu_options[] = {
> #endif
> { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
> { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit },
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) ||
> defined(__linux__) + { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice
> }, +#endif
> #endif
> #if defined(TARGET_PPC) || defined(TARGET_SPARC)
> { "g", 1, QEMU_OPTION_g },
> @@ -9844,6 +9857,11 @@ int main(int argc, char **argv)
> kvm_pit = 0;
> break;
> }
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) ||
> defined(__linux__) + case QEMU_OPTION_pcidevice:
> + add_assigned_device(optarg);
> + break;
> +#endif
> #endif
> case QEMU_OPTION_usb:
> usb_enabled = 1;
> --
> 1.6.0.2
Best Regards,
Disheng, Su
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html