On Wed, Oct 29, 2008 at 12:27:19PM +0000, Mark McLoughlin wrote:
> On Wed, 2008-10-29 at 14:20 +0200, [EMAIL PROTECTED] wrote:
> > diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c
> > index b9067b8..27d5f02 100644
> > --- a/qemu/hw/piix_pci.c
> > +++ b/qemu/hw/piix_pci.c
> > @@ -246,9 +246,9 @@ static void piix3_set_irq(qemu_irq *pic, int
> > irq_num, int level)
> > int piix_get_irq(int pin)
> > {
> > if (piix3_dev)
> > - return piix3_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> > + return piix3_dev->config[0x60+pin];
> > if (piix4_dev)
> > - return piix4_dev->config[PIIX_CONFIG_IRQ_ROUTE + pin];
> > + return piix4_dev->config[0x60+pin];
> >
> > return 0;
> > }
>
> Another rebase mixup?
Argh. Indeed. Here's 5/6 again without the offending hunk. Hopefully
that's the last one for today or I might have to return my "git
competency" boy scout badge.
>From 1dd6f84986a4224635ad5a6f9edfa57b1c5a1e7b Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <[EMAIL PROTECTED]>
Date: Wed, 29 Oct 2008 14:12:08 +0200
Subject: [PATCH 5/6] device assignment: support for assigning PCI devices to
guests
This patch has been contributed to by the following people:
Or Sagi <[EMAIL PROTECTED]>
Nir Peleg <[EMAIL PROTECTED]>
Amit Shah <[EMAIL PROTECTED]>
Ben-Ami Yassour <[EMAIL PROTECTED]>
Weidong Han <[EMAIL PROTECTED]>
Glauber de Oliveira Costa <[EMAIL PROTECTED]>
Muli Ben-Yehuda <[EMAIL PROTECTED]>
With this patch, we can assign a device on the host machine to a
guest.
A new command-line option, -pcidevice is added.
To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this:
-pcidevice host=04:08.0
* The host driver for the device, if any, is to be removed before
assigning the device (else device assignment will fail).
* A device that shares IRQ with another host device cannot currently
be assigned.
* The RAW_IO capability is needed for this to work
This works only with the in-kernel irqchip method; to use the
userspace irqchip, a kernel module (irqhook) and some extra changes
are needed.
Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
---
qemu/Makefile.target | 3 +
qemu/configure | 21 ++
qemu/hw/device-assignment.c | 616 +++++++++++++++++++++++++++++++++++++++++++
qemu/hw/device-assignment.h | 106 ++++++++
qemu/hw/pc.c | 18 ++
qemu/hw/pci.c | 8 +
qemu/hw/piix_pci.c | 4 +-
qemu/qemu-kvm.c | 13 +
qemu/qemu-kvm.h | 8 +
qemu/vl.c | 26 ++
10 files changed, 821 insertions(+), 2 deletions(-)
create mode 100644 qemu/hw/device-assignment.c
create mode 100644 qemu/hw/device-assignment.h
diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index d9bdeca..64d4e44 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -621,6 +621,9 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
+ifeq ($(USE_KVM_DEVICE_ASSIGNMENT), 1)
+OBJS+= device-assignment.o
+endif
ifeq ($(USE_KVM_PIT), 1)
OBJS+= i8254-kvm.o
endif
diff --git a/qemu/configure b/qemu/configure
index 922a156..618dbce 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -101,6 +101,7 @@ linux="no"
kqemu="no"
kvm="no"
kvm_cap_pit="no"
+kvm_cap_device_assignment="no"
profiler="no"
kernel_path=""
cocoa="no"
@@ -749,6 +750,9 @@ fi
# KVM probe
if test "$kvm" = "yes" ; then
+
+# test for KVM_CAP_PIT
+
cat > $TMPC <<EOF
#include <libkvm.h>
#ifndef KVM_CAP_PIT
@@ -759,6 +763,19 @@ EOF
if $cc $ARCH_CFLAGS $CFLAGS -I"$kernel_path"/include -o $TMPE ${OS_CFLAGS}
$TMPC 2> /dev/null ; then
kvm_cap_pit="yes"
fi
+
+# test for KVM_CAP_DEVICE_ASSIGNMENT
+
+cat > $TMPC <<EOF
+#include <libkvm.h>
+#ifndef KVM_CAP_DEVICE_ASSIGNMENT
+#error "kvm no device assignment capability"
+#endif
+int main(void) { return 0; }
+EOF
+ if $cc $ARCH_CFLAGS $CFLAGS -I"$kernel_path"/include -o $TMPE ${OS_CFLAGS}
$TMPC 2> /dev/null ; then
+ kvm_cap_device_assignment="yes"
+ fi
fi
##########################################
@@ -1515,6 +1532,10 @@ configure_kvm() {
echo "USE_KVM_PIT=1" >> $config_mak
echo "#define USE_KVM_PIT 1" >> $config_h
fi
+ if test $kvm_cap_device_assignment = "yes" ; then
+ echo "USE_KVM_DEVICE_ASSIGNMENT=1" >> $config_mak
+ echo "#define USE_KVM_DEVICE_ASSIGNMENT 1" >> $config_h
+ fi
disable_cpu_emulation
fi
}
diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
new file mode 100644
index 0000000..78b7e14
--- /dev/null
+++ b/qemu/hw/device-assignment.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *
+ * Assign a PCI device from the host to a guest VM.
+ *
+ * Adapted for KVM by Qumranet.
+ *
+ * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
+ * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
+ * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
+ * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda ([EMAIL PROTECTED])
+ */
+#include <stdio.h>
+#include <sys/io.h>
+#include "qemu-kvm.h"
+#include "hw.h"
+#include "pc.h"
+#include "sysemu.h"
+#include "console.h"
+#include "device-assignment.h"
+
+/* From linux/ioport.h */
+#define IORESOURCE_IO 0x00000100 /* Resource type */
+#define IORESOURCE_MEM 0x00000200
+#define IORESOURCE_IRQ 0x00000400
+#define IORESOURCE_DMA 0x00000800
+#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
+
+/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
+
+#ifdef DEVICE_ASSIGNMENT_DEBUG
+#define DEBUG(fmt, ...) \
+ do { \
+ fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
+ } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
+{
+ return region->u.r_baseport + (addr - region->e_physbase);
+}
+
+static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outb(value, r_pio);
+}
+
+static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outw(value, r_pio);
+}
+
+static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outl(value, r_pio);
+}
+
+static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inb(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inw(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inl(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
+ uint32_t e_phys, uint32_t e_size, int type)
+{
+ AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ uint32_t old_ephys = region->e_physbase;
+ uint32_t old_esize = region->e_size;
+ int first_map = (region->e_size == 0);
+ int ret = 0;
+
+ DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
+ e_phys, region->u.r_virtbase, type, e_size, region_num);
+
+ region->e_physbase = e_phys;
+ region->e_size = e_size;
+
+ if (!first_map)
+ kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize);
+
+ if (e_size > 0)
+ ret = kvm_register_phys_mem(kvm_context, e_phys,
+ region->u.r_virtbase, e_size, 0);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
+ exit(1);
+ }
+}
+
+static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
+ uint32_t addr, uint32_t size, int type)
+{
+ AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ uint32_t old_port = region->u.r_baseport;
+ uint32_t old_num = region->e_size;
+ int first_map = (old_num == 0);
+ struct ioperm_data data;
+ int i;
+
+ region->e_physbase = addr;
+ region->e_size = size;
+
+ DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n",
+ addr, region->u.r_baseport, type, size, region_num);
+
+ memset(&data, 0, sizeof(data));
+
+ if (!first_map) {
+ data.start_port = old_port;
+ data.num = old_num;
+ data.turn_on = 0;
+
+ for (i = 0; i < smp_cpus; ++i)
+ kvm_ioperm(qemu_kvm_cpu_env(i), &data);
+ }
+
+ data.start_port = region->u.r_baseport;
+ data.num = size;
+ data.turn_on = 1;
+
+ for (i = 0; i < smp_cpus; ++i)
+ kvm_ioperm(qemu_kvm_cpu_env(i), &data);
+
+ register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
+ (r_dev->v_addrs + region_num));
+ register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
+ (r_dev->v_addrs + region_num));
+ register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
+ (r_dev->v_addrs + region_num));
+}
+
+static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
+ uint32_t val, int len)
+{
+ int fd;
+ ssize_t ret;
+
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+ (uint16_t) address, val, len);
+
+ if (address == 0x4) {
+ pci_default_write_config(d, address, val, len);
+ /* Continue to program the card */
+ }
+
+ if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
+ address == 0x3c || address == 0x3d) {
+ /* used for update-mappings (BAR emulation) */
+ pci_default_write_config(d, address, val, len);
+ return;
+ }
+
+ DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
+ ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+ (uint16_t) address, val, len);
+
+ fd = ((AssignedDevice *)d)->real_device.config_fd;
+
+again:
+ ret = pwrite(fd, &val, len, address);
+ if (ret != len) {
+ if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
+ goto again;
+
+ fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
+ __func__, ret, errno);
+
+ exit(1);
+ }
+}
+
+static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
+ int len)
+{
+ uint32_t val = 0;
+ int fd;
+ ssize_t ret;
+
+ if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
+ address == 0x3c || address == 0x3d) {
+ val = pci_default_read_config(d, address, len);
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+ return val;
+ }
+
+ /* vga specific, remove later */
+ if (address == 0xFC)
+ goto do_log;
+
+ fd = ((AssignedDevice *)d)->real_device.config_fd;
+
+again:
+ ret = pread(fd, &val, len, address);
+ if (ret != len) {
+ if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
+ goto again;
+
+ fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
+ __func__, ret, errno);
+
+ exit(1);
+ }
+
+do_log:
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+
+ /* kill the special capabilities */
+ if (address == 4 && len == 4)
+ val &= ~0x100000;
+ else if (address == 6)
+ val &= ~0x10;
+
+ return val;
+}
+
+static int assigned_dev_register_regions(PCIRegion *io_regions,
+ unsigned long regions_num,
+ AssignedDevice *pci_dev)
+{
+ uint32_t i;
+ PCIRegion *cur_region = io_regions;
+
+ for (i = 0; i < regions_num; i++, cur_region++) {
+ if (!cur_region->valid)
+ continue;
+ pci_dev->v_addrs[i].num = i;
+
+ /* handle memory io regions */
+ if (cur_region->type & IORESOURCE_MEM) {
+ int t = cur_region->type & IORESOURCE_PREFETCH
+ ? PCI_ADDRESS_SPACE_MEM_PREFETCH
+ : PCI_ADDRESS_SPACE_MEM;
+
+ /* map physical memory */
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_virtbase =
+ mmap(NULL,
+ (cur_region->size + 0xFFF) & 0xFFFFF000,
+ PROT_WRITE | PROT_READ, MAP_SHARED,
+ cur_region->resource_fd, (off_t) 0);
+
+ if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
+ fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
+ "\n", __func__,
+ (uint32_t) (cur_region->base_addr));
+ return -1;
+ }
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ /* add offset */
+ pci_dev->v_addrs[i].u.r_virtbase +=
+ (cur_region->base_addr & 0xFFF);
+
+ pci_register_io_region((PCIDevice *) pci_dev, i,
+ cur_region->size, t,
+ assigned_dev_iomem_map);
+ continue;
+ }
+ /* handle port io regions */
+ pci_register_io_region((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
+ }
+
+ /* success */
+ return 0;
+}
+
+static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
+ uint8_t r_dev, uint8_t r_func)
+{
+ char dir[128], name[128];
+ int fd, r = 0;
+ FILE *f;
+ unsigned long long start, end, size, flags;
+ PCIRegion *rp;
+ PCIDevRegions *dev = &pci_dev->real_device;
+
+ dev->region_number = 0;
+
+ snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
+ r_bus, r_dev, r_func);
+
+ snprintf(name, sizeof(name), "%sconfig", dir);
+
+ fd = open(name, O_RDWR);
+ if (fd == -1) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return 1;
+ }
+ dev->config_fd = fd;
+again:
+ r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config));
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ goto again;
+ fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
+ }
+
+ snprintf(name, sizeof(name), "%sresource", dir);
+
+ f = fopen(name, "r");
+ if (f == NULL) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return 1;
+ }
+
+ for (r = 0; r < MAX_IO_REGIONS; r++) {
+ if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
+ break;
+
+ rp = dev->regions + r;
+ rp->valid = 0;
+ size = end - start + 1;
+ flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+ if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
+ continue;
+ if (flags & IORESOURCE_MEM) {
+ flags &= ~IORESOURCE_IO;
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue; /* probably ROM */
+ rp->resource_fd = fd;
+ } else
+ flags &= ~IORESOURCE_PREFETCH;
+
+ rp->type = flags;
+ rp->valid = 1;
+ rp->base_addr = start;
+ rp->size = size;
+ DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
+ r, rp->size, start, rp->type, rp->resource_fd);
+ }
+ fclose(f);
+
+ dev->region_number = r;
+ return 0;
+}
+
+static LIST_HEAD(, AssignedDevInfo) adev_head;
+
+static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
+{
+ return (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
+/* The pci config space got updated. Check if irq numbers have changed
+ * for our devices
+ */
+void assigned_dev_update_irq(PCIDevice *d)
+{
+ int irq, r;
+ AssignedDevice *assigned_dev;
+ AssignedDevInfo *adev;
+
+ LIST_FOREACH(adev, &adev_head, next) {
+ assigned_dev = adev->assigned_dev;
+ irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
+ irq = piix_get_irq(irq);
+
+ if (irq != assigned_dev->girq) {
+ struct kvm_assigned_irq assigned_irq_data;
+
+ memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(assigned_dev->h_busnr,
+ (uint8_t) assigned_dev->h_devfn);
+ assigned_irq_data.guest_irq = irq;
+ assigned_irq_data.host_irq = assigned_dev->real_device.irq;
+ r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+ if (r < 0) {
+ perror("assigned_dev_update_irq");
+ fprintf(stderr, "Are you assigning a device "
+ "that shares IRQ with some other device?\n");
+ pci_unregister_device(&assigned_dev->dev);
+ /* FIXME: Delete node from list */
+ continue;
+ }
+ assigned_dev->girq = irq;
+ }
+ }
+}
+
+struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
+{
+ int r;
+ AssignedDevice *dev;
+ uint8_t e_device, e_intx;
+ struct kvm_assigned_pci_dev assigned_dev_data;
+
+ DEBUG("Registering real physical device %s (devfn=0x%x)\n",
+ adev->name, e_devfn);
+
+ dev = (AssignedDevice *)
+ pci_register_device(bus, adev->name, sizeof(AssignedDevice),
+ -1, assigned_dev_pci_read_config,
+ assigned_dev_pci_write_config);
+ if (NULL == dev) {
+ fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
+ __func__, adev->name);
+ return NULL;
+ }
+
+ if (get_real_device(dev, adev->bus, adev->dev, adev->func)) {
+ fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
+ __func__, adev->name);
+ goto out;
+ }
+
+ /* handle real device's MMIO/PIO BARs */
+ if (assigned_dev_register_regions(dev->real_device.regions,
+ dev->real_device.region_number,
+ dev))
+ goto out;
+
+ /* handle interrupt routing */
+ e_device = (dev->dev.devfn >> 3) & 0x1f;
+ e_intx = dev->dev.config[0x3d] - 1;
+ dev->intpin = e_intx;
+ dev->run = 0;
+ dev->girq = 0;
+ dev->h_busnr = adev->bus;
+ dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU if present
+ * (or when not disabled on the command line)
+ */
+ r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+ if (r && !adev->disable_iommu)
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+ r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
+ if (r < 0) {
+ fprintf(stderr, "Could not notify kernel about "
+ "assigned device \"%s\"\n", adev->name);
+ perror("register_real_device");
+ goto out;
+ }
+
+ adev->assigned_dev = dev;
+ out:
+ return &dev->dev;
+}
+
+int init_all_assigned_devices(PCIBus *bus)
+{
+ struct AssignedDevInfo *adev;
+
+ LIST_FOREACH(adev, &adev_head, next)
+ if (init_assigned_device(adev, bus) == NULL)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Syntax to assign device:
+ *
+ * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
+ *
+ * Example:
+ * -pcidevice host=00:13.0,dma=pvdma
+ *
+ * dma can currently only be 'none' to disable iommu support.
+ */
+AssignedDevInfo *add_assigned_device(const char *arg)
+{
+ char *cp, *cp1;
+ char device[8];
+ char dma[6];
+ int r;
+ AssignedDevInfo *adev;
+
+ adev = qemu_mallocz(sizeof(AssignedDevInfo));
+ if (adev == NULL) {
+ fprintf(stderr, "%s: Out of memory\n", __func__);
+ return NULL;
+ }
+ r = get_param_value(device, sizeof(device), "host", arg);
+ r = get_param_value(adev->name, sizeof(adev->name), "name", arg);
+ if (!r)
+ snprintf(adev->name, sizeof(adev->name), "%s", device);
+
+#ifdef KVM_CAP_IOMMU
+ r = get_param_value(dma, sizeof(dma), "dma", arg);
+ if (r && !strncmp(dma, "none", 4))
+ adev->disable_iommu = 1;
+#endif
+ cp = device;
+ adev->bus = strtoul(cp, &cp1, 16);
+ if (*cp1 != ':')
+ goto bad;
+ cp = cp1 + 1;
+
+ adev->dev = strtoul(cp, &cp1, 16);
+ if (*cp1 != '.')
+ goto bad;
+ cp = cp1 + 1;
+
+ adev->func = strtoul(cp, &cp1, 16);
+
+ LIST_INSERT_HEAD(&adev_head, adev, next);
+ return adev;
+bad:
+ fprintf(stderr, "pcidevice argument parse error; "
+ "please check the help text for usage\n");
+ qemu_free(adev);
+ return NULL;
+}
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
new file mode 100644
index 0000000..d6caa67
--- /dev/null
+++ b/qemu/hw/device-assignment.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Data structures for storing PCI state
+ *
+ * Adapted to kvm by Qumranet
+ *
+ * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
+ * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
+ * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
+ * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
+ */
+
+#ifndef __DEVICE_ASSIGNMENT_H__
+#define __DEVICE_ASSIGNMENT_H__
+
+#include <sys/mman.h>
+#include "qemu-common.h"
+#include "sys-queue.h"
+#include "pci.h"
+
+/* From include/linux/pci.h in the kernel sources */
+#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+
+/* The number of BARs in the config space header */
+#define MAX_IO_REGIONS (6)
+
+typedef struct {
+ int type; /* Memory or port I/O */
+ int valid;
+ uint32_t base_addr;
+ uint32_t size; /* size of the region */
+ int resource_fd;
+} PCIRegion;
+
+typedef struct {
+ uint8_t bus, dev, func; /* Bus inside domain, device and function */
+ int irq; /* IRQ number */
+ uint16_t region_number; /* number of active regions */
+
+ /* Port I/O or MMIO Regions */
+ PCIRegion regions[MAX_IO_REGIONS];
+ int config_fd;
+} PCIDevRegions;
+
+typedef struct {
+ target_phys_addr_t e_physbase;
+ uint32_t memory_index;
+ union {
+ void *r_virtbase; /* mmapped access address for memory regions */
+ uint32_t r_baseport; /* the base guest port for I/O regions */
+ } u;
+ int num; /* our index within v_addrs[] */
+ uint32_t e_size; /* emulated size of region in bytes */
+ uint32_t r_size; /* real size of region in bytes */
+} AssignedDevRegion;
+
+typedef struct {
+ PCIDevice dev;
+ int intpin;
+ uint8_t debug_flags;
+ AssignedDevRegion v_addrs[PCI_NUM_REGIONS];
+ PCIDevRegions real_device;
+ int run;
+ int girq;
+ unsigned char h_busnr;
+ unsigned int h_devfn;
+ int bound;
+} AssignedDevice;
+
+typedef struct AssignedDevInfo AssignedDevInfo;
+
+struct AssignedDevInfo {
+ char name[15];
+ int bus;
+ int dev;
+ int func;
+ AssignedDevice *assigned_dev;
+ LIST_ENTRY(AssignedDevInfo) next;
+ int disable_iommu;
+};
+
+PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus);
+AssignedDevInfo *add_assigned_device(const char *arg);
+int init_all_assigned_devices(PCIBus *bus);
+
+#define MAX_DEV_ASSIGN_CMDLINE 8
+
+extern const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE];
+extern int assigned_devices_index;
+
+#endif /* __DEVICE_ASSIGNMENT_H__ */
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index d559f0c..30bb5ea 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -33,6 +33,7 @@
#include "boards.h"
#include "console.h"
#include "fw_cfg.h"
+#include "device-assignment.h"
#include "qemu-kvm.h"
@@ -1157,6 +1158,23 @@ static void pc_init1(ram_addr_t ram_size, int
vga_ram_size,
if (pci_enabled)
virtio_balloon_init(pci_bus);
+
+#ifdef USE_KVM_DEVICE_ASSIGNMENT
+ if (kvm_enabled()) {
+ int i;
+ for (i = 0; i < assigned_devices_index; i++) {
+ if (add_assigned_device(assigned_devices[i]) < 0) {
+ fprintf(stderr, "Warning: could not add assigned device %s\n",
+ assigned_devices[i]);
+ }
+ }
+
+ if (init_all_assigned_devices(pci_bus)) {
+ fprintf(stderr, "Failed to initialize assigned devices\n");
+ exit (1);
+ }
+ }
+#endif /* USE_KVM_DEVICE_ASSIGNMENT */
}
static void pc_init_pci(ram_addr_t ram_size, int vga_ram_size,
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index c82cd20..75bc9a9 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -50,6 +50,7 @@ struct PCIBus {
static void pci_update_mappings(PCIDevice *d);
static void pci_set_irq(void *opaque, int irq_num, int level);
+void assigned_dev_update_irq(PCIDevice *d);
target_phys_addr_t pci_mem_base;
static int pci_irq_index;
@@ -453,6 +454,13 @@ void pci_default_write_config(PCIDevice *d,
val >>= 8;
}
+#ifdef USE_KVM_DEVICE_ASSIGNMENT
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() &&
+ address >= PIIX_CONFIG_IRQ_ROUTE &&
+ address < PIIX_CONFIG_IRQ_ROUTE + 4)
+ assigned_dev_update_irq(d);
+#endif /* USE_KVM_DEVICE_ASSIGNMENT */
+
end = address + len;
if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
/* if the command register is modified, we must modify the mappings */
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index c5f3f29..3b4f279 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -27,6 +27,7 @@ int kvm_pit = 1;
#include <sys/utsname.h>
#include <sys/syscall.h>
#include <sys/mman.h>
+#include <sys/io.h>
#define bool _Bool
#define false 0
@@ -1047,3 +1048,15 @@ int
qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t addr,
{
return kvm_unregister_coalesced_mmio(kvm_context, addr, size);
}
+
+static void kvm_do_ioperm(void *_data)
+{
+ struct ioperm_data *data = _data;
+ ioperm(data->start_port, data->num, data->turn_on);
+}
+
+void kvm_ioperm(CPUState *env, void *data)
+{
+ if (kvm_enabled() && qemu_system_ready)
+ on_vcpu(env, kvm_do_ioperm, data);
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index a1d6646..1084cd6 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -93,6 +93,8 @@ int qemu_kvm_unregister_coalesced_mmio(target_phys_addr_t
addr,
void qemu_kvm_system_reset_request(void);
+void kvm_ioperm(CPUState *env, void *data);
+
#ifdef TARGET_PPC
int handle_powerpc_dcr_read(int vcpu, uint32_t dcrn, uint32_t *data);
int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data);
@@ -107,6 +109,12 @@ int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn,
uint32_t data);
extern int kvm_allowed;
extern kvm_context_t kvm_context;
+struct ioperm_data {
+ unsigned long start_port;
+ unsigned long num;
+ int turn_on;
+};
+
#define kvm_enabled() (kvm_allowed)
#define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context)
#define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context)
diff --git a/qemu/vl.c b/qemu/vl.c
index 388e79d..967cb98 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -38,6 +38,7 @@
#include "qemu-char.h"
#include "block.h"
#include "audio/audio.h"
+#include "hw/device-assignment.h"
#include "migration.h"
#include "balloon.h"
#include "qemu-kvm.h"
@@ -215,6 +216,8 @@ CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
int win2k_install_hack = 0;
#endif
int usb_enabled = 0;
+const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE];
+int assigned_devices_index;
static VLANState *first_vlan;
int smp_cpus = 1;
const char *vnc_display;
@@ -8692,6 +8695,12 @@ static void help(int exitcode)
#endif
"-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
"-no-kvm-pit disable KVM kernel mode PIT\n"
+#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
+ "-pcidevice host=bus:dev.func[,dma=none][,name=string]\n"
+ " expose a PCI device to the guest OS.\n"
+ " dma=none: don't perform any dma translations
(default is to use an iommu)\n"
+ " 'string' is used in log output.\n"
+#endif
#endif
#ifdef TARGET_I386
"-no-acpi disable ACPI\n"
@@ -8811,6 +8820,9 @@ enum {
QEMU_OPTION_no_kvm,
QEMU_OPTION_no_kvm_irqchip,
QEMU_OPTION_no_kvm_pit,
+#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
+ QEMU_OPTION_pcidevice,
+#endif
QEMU_OPTION_no_reboot,
QEMU_OPTION_no_shutdown,
QEMU_OPTION_show_cursor,
@@ -8900,6 +8912,9 @@ static const QEMUOption qemu_options[] = {
#endif
{ "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
{ "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit },
+#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
+ { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice },
+#endif
#endif
#if defined(TARGET_PPC) || defined(TARGET_SPARC)
{ "g", 1, QEMU_OPTION_g },
@@ -9411,6 +9426,7 @@ int main(int argc, char **argv)
parallel_device_index = 0;
usb_devices_index = 0;
+ assigned_devices_index = 0;
nb_net_clients = 0;
nb_drives = 0;
@@ -9844,6 +9860,16 @@ int main(int argc, char **argv)
kvm_pit = 0;
break;
}
+#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
+ case QEMU_OPTION_pcidevice:
+ if (assigned_devices_index >= MAX_DEV_ASSIGN_CMDLINE) {
+ fprintf(stderr, "Too many assigned devices\n");
+ exit(1);
+ }
+ assigned_devices[assigned_devices_index] = optarg;
+ assigned_devices_index++;
+ break;
+#endif
#endif
case QEMU_OPTION_usb:
usb_enabled = 1;
--
1.5.6.5
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html