On Tue, 2008-08-26 at 18:29 +0300, Amit Shah wrote:
> From: Or Sagi <[EMAIL PROTECTED]>
> From: Nir Peleg <[EMAIL PROTECTED]>
> From: Amit Shah <[EMAIL PROTECTED]>
> From: Ben-Ami Yassour <[EMAIL PROTECTED]>
> From: Glauber de Oliveira Costa <[EMAIL PROTECTED]>
> With this patch, we can assign a device on the host machine to a
> guest.
> 
> A new command-line option, -pcidevice is added.
> For example, to invoke it for a device sitting at PCI bus:dev.fn
> 04:08.0 with host IRQ 18, use this:
> 
>       -pcidevice host=04:08.0
> 
> The host driver for the device, if any, is to be removed before
> assigning the device.
> 
> This works only with the in-kernel irqchip method; to use the
> userspace irqchip, a kernel module (irqhook) and some extra changes
> are needed.
> 
> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> ---
>  libkvm/libkvm-x86.c         |   14 +
>  libkvm/libkvm.h             |   27 ++
>  qemu/Makefile.target        |    1 +
>  qemu/hw/device-assignment.c |  600 
> +++++++++++++++++++++++++++++++++++++++++++
>  qemu/hw/device-assignment.h |   94 +++++++
>  qemu/hw/isa.h               |    2 +
>  qemu/hw/pc.c                |    9 +
>  qemu/hw/pci.c               |   12 +
>  qemu/hw/pci.h               |    1 +
>  qemu/hw/piix_pci.c          |   19 ++
>  qemu/vl.c                   |   18 ++
>  11 files changed, 797 insertions(+), 0 deletions(-)
>  create mode 100644 qemu/hw/device-assignment.c
>  create mode 100644 qemu/hw/device-assignment.h
> 
> diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c
> index ea97bdd..831823b 100644
> --- a/libkvm/libkvm-x86.c
> +++ b/libkvm/libkvm-x86.c
> @@ -126,6 +126,20 @@ static int kvm_init_tss(kvm_context_t kvm)
>       return 0;
>  }
>  
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +int kvm_assign_pci_device(kvm_context_t kvm,
> +                       struct kvm_assigned_pci_dev *assigned_dev)
> +{
> +     return ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
> +}
> +
> +int kvm_assign_irq(kvm_context_t kvm,
> +                struct kvm_assigned_irq *assigned_irq)
> +{
> +     return ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq);
> +}
> +#endif
> +
>  int kvm_arch_create_default_phys_mem(kvm_context_t kvm,
>                                      unsigned long phys_mem_bytes,
>                                      void **vm_mem)
> diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h
> index 346eedf..b424af6 100644
> --- a/libkvm/libkvm.h
> +++ b/libkvm/libkvm.h
> @@ -658,4 +658,31 @@ int kvm_s390_interrupt(kvm_context_t kvm, int slot,
>  int kvm_s390_set_initial_psw(kvm_context_t kvm, int slot, psw_t psw);
>  int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr);
>  #endif
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +/*!
> + * \brief Notifies host kernel aboud a PCI device assigned to guest
> + *
> + * Used for PCI device assignment, this function notifies the host
> + * kernel about the assigning of the physical PCI device.
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param assigned_dev Parameters, like bus, devfn number, etc
> + */
> +int kvm_assign_pci_device(kvm_context_t kvm,
> +                       struct kvm_assigned_pci_dev *assigned_dev);
> +
> +/*!
> + * \brief Notifies host kernel about changes to a irq assignment
> + *
> + * Used for PCI device assignment, this function notifies the host
> + * kernel about the assigning of the irq for an assigned physical
> + * PCI device.
> + *
> + * \param kvm Pointer to the current kvm_context
> + * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc
> + */
> +int kvm_assign_irq(kvm_context_t kvm,
> +                struct kvm_assigned_irq *assigned_irq);
> +#endif
>  #endif
> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
> index 2332fe3..750ecd5 100644
> --- a/qemu/Makefile.target
> +++ b/qemu/Makefile.target
> @@ -611,6 +611,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
>  OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
>  OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
>  OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
> +OBJS+= device-assignment.o
>  ifeq ($(USE_KVM_PIT), 1)
>  OBJS+= i8254-kvm.o
>  endif
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> new file mode 100644
> index 0000000..8bc4e5c
> --- /dev/null
> +++ b/qemu/hw/device-assignment.c
> @@ -0,0 +1,600 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 
> Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + *
> + *  Assign a PCI device from the host to a guest VM.
> + *
> + *  Adapted for KVM by Qumranet.
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + */
> +#include <stdio.h>
> +#include <pthread.h>
> +#include <sys/io.h>
> +#include <sys/ioctl.h>
> +#include <linux/types.h>
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO                0x00000100      /* Resource type */
> +#define IORESOURCE_MEM               0x00000200
> +#define IORESOURCE_IRQ               0x00000400
> +#define IORESOURCE_DMA               0x00000800
> +#define IORESOURCE_PREFETCH  0x00001000      /* No side effects */
> +
> +#include "device-assignment.h"
> +#include "irq.h"
> +
> +#include "qemu-kvm.h"
> +#include <linux/kvm_para.h>
> +
> +extern FILE *logfile;
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __func__ , ## args)
> +#else
> +#define DEBUG(fmt, args...)
> +#endif
> +
> +#define assigned_dev_ioport_write(suffix)                            \
> + static void assigned_dev_ioport_write##suffix(void *opaque, uint32_t addr, \
> +                                            uint32_t value)          \
> + {                                                                   \
> +      assigned_dev_region_t *r_access = (assigned_dev_region_t *)opaque; \
> +      uint32_t r_pio = (unsigned long)r_access->r_virtbase           \
> +              + (addr - r_access->e_physbase);                       \
> +      if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {           \
> +              fprintf(logfile, "assigned_dev_ioport_write" #suffix   \
> +                      ": r_pio=%08x e_physbase=%08x"                 \
> +                      " r_virtbase=%08lx value=%08x\n",              \
> +                      r_pio, (int)r_access->e_physbase,              \
> +                      (unsigned long)r_access->r_virtbase, value);   \
> +      }                                                              \
> +      iopl(3);                                                       \
> +      out##suffix(value, r_pio);                                     \
> +  }
> +
> +assigned_dev_ioport_write(b)
> +assigned_dev_ioport_write(w)
> +assigned_dev_ioport_write(l)
> +
> +#define assigned_dev_ioport_read(suffix)                             \
> + static uint32_t assigned_dev_ioport_read##suffix(void *opaque, uint32_t 
> addr) \
> + {                                                                   \
> +      assigned_dev_region_t *r_access = (assigned_dev_region_t *)opaque; \
> +      uint32_t r_pio = (addr - r_access->e_physbase)                 \
> +              + (unsigned long)r_access->r_virtbase;                 \

Please add iopl(3) here same as for the write case.
I think it was Sheng who mentioned that in his environment it was required.

We need to change this code in the future so that the iopls are not required, 
but for now we need to add this one too.

> +      uint32_t value = in##suffix(r_pio);                            \
> +      if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {           \
> +              fprintf(logfile, "assigned_dev_ioport_read" #suffix    \
> +                      ": r_pio=%08x e_physbase=%08x r_virtbase=%08lx " \
> +                      "value=%08x\n",                                \
> +                      r_pio, (int)r_access->e_physbase,              \
> +                      (unsigned long)r_access->r_virtbase, value);   \
> +      }                                                              \
> +      return value;                                                  \
> + }
> +
> +assigned_dev_ioport_read(b)
> +assigned_dev_ioport_read(w)
> +assigned_dev_ioport_read(l)
> +
> +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
> +                      uint32_t e_phys, uint32_t e_size, int type)
> +{
> +     assigned_dev_t *r_dev = (assigned_dev_t *) pci_dev;
> +     assigned_dev_region_t *region = &r_dev->v_addrs[region_num];
> +     int first_map = (region->e_size == 0);
> +     int ret = 0;
> +
> +     DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
> +           e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size,
> +           region_num);
> +
> +     region->e_physbase = e_phys;
> +     region->e_size = e_size;
> +
> +     if (!first_map)
> +             kvm_destroy_phys_mem(kvm_context, e_phys, e_size);
> +     if (e_size > 0)
> +             ret = kvm_register_userspace_phys_mem(kvm_context,
> +                                                   e_phys,
> +                                                   region->r_virtbase,
> +                                                   e_size,
> +                                                   0);
> +     if (ret != 0)
> +             fprintf(logfile, "%s: Error: create new mapping failed\n",
> +                     __func__);
> +}
> +
> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
> +                                 uint32_t addr, uint32_t size, int type)
> +{
> +     assigned_dev_t *r_dev = (assigned_dev_t *) pci_dev;
> +     int i;
> +     uint32_t ((*rf[])(void *, uint32_t)) =
> +             { assigned_dev_ioport_readb,
> +               assigned_dev_ioport_readw,
> +               assigned_dev_ioport_readl
> +             };
> +     void ((*wf[])(void *, uint32_t, uint32_t)) =
> +             { assigned_dev_ioport_writeb,
> +               assigned_dev_ioport_writew,
> +               assigned_dev_ioport_writel
> +             };
> +
> +     r_dev->v_addrs[region_num].e_physbase = addr;
> +     DEBUG("%s: address=0x%x type=0x%x len=%d region_num=%d \n",
> +           __func__, addr, type, size, region_num);
> +
> +     for (i = 0; i < 3; i++) {
> +             register_ioport_write(addr, size, 1<<i, wf[i],
> +                                   (void *) (r_dev->v_addrs + region_num));
> +             register_ioport_read(addr, size, 1<<i, rf[i],
> +                                  (void *) (r_dev->v_addrs + region_num));
> +     }
> +}
> +
> +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
> +                                       uint32_t val, int len)
> +{
> +     int fd, r;
> +
> +     DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n",
> +           __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> +           (uint16_t) address, val, len);
> +
> +     if (address == 0x4)
> +             pci_default_write_config(d, address, val, len);
> +
> +     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> +         address == 0x3c || address == 0x3d) {
> +             /* used for update-mappings (BAR emulation) */
> +             pci_default_write_config(d, address, val, len);
> +             return;
> +     }
> +     DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
> +           __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> +           (uint16_t) address, val, len);
> +     fd = ((assigned_dev_t *)d)->real_device.config_fd;
> +     lseek(fd, address, SEEK_SET);
> +again:
> +     r = write(fd, &val, len);
> +     if (r < 0) {
> +             if (errno == EINTR || errno == EAGAIN)
> +                     goto again;
> +             fprintf(stderr, "%s: write failed, errno = %d\n",
> +                     __func__, errno);
> +     }
> +}
> +
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
> +                                          int len)
> +{
> +     uint32_t val = 0;
> +     int fd, r;
> +
> +     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> +         address == 0x3c || address == 0x3d) {
> +             val = pci_default_read_config(d, address, len);
> +             DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> +                   (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val,
> +                   len);
> +             return val;
> +     }
> +
> +     /* vga specific, remove later */
> +     if (address == 0xFC)
> +             goto do_log;
> +
> +     fd = ((assigned_dev_t *)d)->real_device.config_fd;
> +     lseek(fd, address, SEEK_SET);
> +again:
> +     r = read(fd, &val, len);
> +     if (r < 0) {
> +             if (errno == EINTR || errno == EAGAIN)
> +                     goto again;
> +             fprintf(stderr, "%s: read failed, errno = %d\n",
> +                     __func__, errno);
> +     }
> +do_log:
> +     DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> +           (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
> +
> +     /* kill the special capabilities */
> +     if (address == 4 && len == 4)
> +             val &= ~0x100000;
> +     else if (address == 6)
> +             val &= ~0x10;
> +
> +     return val;
> +}
> +
> +static int assigned_dev_register_regions(pci_region_t *io_regions,
> +                                      unsigned long regions_num,
> +                                      assigned_dev_t *pci_dev)
> +{
> +     uint32_t i;
> +     pci_region_t *cur_region = io_regions;
> +
> +     for (i = 0; i < regions_num; i++, cur_region++) {
> +             if (!cur_region->valid)
> +                     continue;
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +             pci_dev->v_addrs[i].debug |= DEVICE_ASSIGNMENT_DEBUG_MMIO
> +                                          | DEVICE_ASSIGNMENT_DEBUG_PIO;
> +#endif
> +             pci_dev->v_addrs[i].num = i;
> +
> +             /* handle memory io regions */
> +             if (cur_region->type & IORESOURCE_MEM) {
> +                     int t = cur_region->type & IORESOURCE_PREFETCH
> +                             ? PCI_ADDRESS_SPACE_MEM_PREFETCH
> +                             : PCI_ADDRESS_SPACE_MEM;
> +
> +                     /* map physical memory */
> +                     pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
> +                     pci_dev->v_addrs[i].r_virtbase =
> +                             mmap(NULL,
> +                                  (cur_region->size + 0xFFF) & 0xFFFFF000,
> +                                  PROT_WRITE | PROT_READ, MAP_SHARED,
> +                                  cur_region->resource_fd, (off_t) 0);
> +
> +                     if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase) {
> +                             fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
> +                                     "\n", __func__,
> +                                     (uint32_t) (cur_region->base_addr));
> +                             return -1;
> +                     }
> +                     pci_dev->v_addrs[i].r_size = cur_region->size;
> +                     pci_dev->v_addrs[i].e_size = 0;
> +
> +                     /* add offset */
> +                     pci_dev->v_addrs[i].r_virtbase +=
> +                             (cur_region->base_addr & 0xFFF);
> +
> +                     pci_register_io_region((PCIDevice *) pci_dev, i,
> +                                            cur_region->size, t,
> +                                            assigned_dev_iomem_map);
> +                     continue;
> +             }
> +             /* handle port io regions */
> +             pci_register_io_region((PCIDevice *) pci_dev, i,
> +                                    cur_region->size, PCI_ADDRESS_SPACE_IO,
> +                                    assigned_dev_ioport_map);
> +
> +             pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
> +             pci_dev->v_addrs[i].r_virtbase =
> +                     (void *)(long)cur_region->base_addr;
> +             /* not relevant for port io */
> +             pci_dev->v_addrs[i].memory_index = 0;
> +     }
> +
> +     /* success */
> +     return 0;
> +}
> +
> +static int get_real_device(assigned_dev_t *pci_dev, uint8_t r_bus,
> +                        uint8_t r_dev, uint8_t r_func)
> +{
> +     char dir[128], name[128], comp[16];
> +     int fd, r = 0;
> +     FILE *f;
> +     unsigned long long start, end, size, flags;
> +     pci_region_t *rp;
> +     pci_dev_t *dev = &pci_dev->real_device;
> +
> +     dev->region_number = 0;
> +
> +     sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
> +             r_bus, r_dev, r_func);
> +     strcpy(name, dir);
> +     strcat(name, "config");
> +     fd = open(name, O_RDWR);
> +     if (fd == -1) {
> +             fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +             return 1;
> +     }
> +     dev->config_fd = fd;
> +again:
> +     r = read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config);
> +     if (r < 0) {
> +             if (errno == EINTR || errno == EAGAIN)
> +                     goto again;
> +             fprintf(stderr, "%s: read failed, errno = %d\n",
> +                     __func__, errno);
> +     }
> +     strcpy(name, dir);
> +     strcat(name, "resource");
> +
> +     f = fopen(name, "r");
> +     if (f == NULL) {
> +             fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +             return 1;
> +     }
> +     for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3;
> +          r++) {
> +             rp = dev->regions + r;
> +             rp->valid = 0;
> +             size = end - start + 1;
> +             flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
> +             if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
> +                     continue;
> +             if (flags & IORESOURCE_MEM) {
> +                     flags &= ~IORESOURCE_IO;
> +                     sprintf(comp, "resource%d", r);
> +                     strcpy(name, dir);
> +                     strcat(name, comp);
> +                     fd = open(name, O_RDWR);
> +                     if (fd == -1)
> +                             continue;               /* probably ROM */
> +                     rp->resource_fd = fd;
> +             } else
> +                     flags &= ~IORESOURCE_PREFETCH;
> +
> +             rp->type = flags;
> +             rp->valid = 1;
> +             rp->base_addr = start;
> +             rp->size = size;
> +             DEBUG("%s: region %d size %d start 0x%x type %d "
> +                   "resource_fd %d\n", __func__, r, rp->size, start,
> +                   rp->type, rp->resource_fd);
> +     }
> +     fclose(f);
> +
> +     dev->region_number = r;
> +     return 0;
> +}
> +
> +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> +{
> +     return (uint32_t)bus << 8 | (uint32_t)devfn;
> +}
> +
> +static assigned_dev_t *register_real_device(PCIBus *e_bus,
> +                                         const char *e_dev_name,
> +                                         int e_devfn, uint8_t r_bus,
> +                                         uint8_t r_dev, uint8_t r_func,
> +                                         int flags)
> +{
> +     int rc;
> +     assigned_dev_t *pci_dev;
> +     uint8_t e_device, e_intx;
> +
> +     DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n",
> +           __func__, e_dev_name, e_devfn);
> +
> +     pci_dev = (assigned_dev_t *)
> +             pci_register_device(e_bus, e_dev_name, sizeof(assigned_dev_t),
> +                                 e_devfn, assigned_dev_pci_read_config,
> +                                 assigned_dev_pci_write_config);
> +     if (NULL == pci_dev) {
> +             fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
> +                     __func__, e_dev_name);
> +             return NULL;
> +     }
> +     if (get_real_device(pci_dev, r_bus, r_dev, r_func)) {
> +             fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
> +                     __func__, e_dev_name);
> +             goto out;
> +     }
> +
> +     /* handle real device's MMIO/PIO BARs */
> +     if (assigned_dev_register_regions(pci_dev->real_device.regions,
> +                                       pci_dev->real_device.region_number,
> +                                       pci_dev))
> +             goto out;
> +
> +     /* handle interrupt routing */
> +     e_device = (pci_dev->dev.devfn >> 3) & 0x1f;
> +     e_intx = pci_dev->dev.config[0x3d] - 1;
> +     pci_dev->intpin = e_intx;
> +     pci_dev->run = 0;
> +     pci_dev->girq = 0;
> +     pci_dev->h_busnr = r_bus;
> +     pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +     if (kvm_enabled()) {
> +             struct kvm_assigned_pci_dev assigned_dev_data;
> +
> +             memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> +             assigned_dev_data.assigned_dev_id  =
> +                     calc_assigned_dev_id(pci_dev->h_busnr,
> +                                          (uint32_t)pci_dev->h_devfn);
> +             assigned_dev_data.busnr = pci_dev->h_busnr;
> +             assigned_dev_data.devfn = pci_dev->h_devfn;
> +             assigned_dev_data.flags = flags;
> +             rc = kvm_assign_pci_device(kvm_context,
> +                                        &assigned_dev_data);
> +             if (rc < 0) {
> +                     fprintf(stderr, "Could not notify kernel about "
> +                             "assigned device \"%s\"\n", e_dev_name);
> +                     perror("pt-ioctl");
> +                     goto out;
> +             }
> +     }
> +#endif
> +     fprintf(logfile, "Registered host PCI device %02x:%02x.%1x "
> +             "(\"%s\") as guest device %02x:%02x.%1x\n",
> +             r_bus, r_dev, r_func, e_dev_name,
> +             pci_bus_num(e_bus), e_device, r_func);
> +
> +     return pci_dev;
> +out:
> +     pci_unregister_device(&pci_dev->dev);
> +     return NULL;
> +}
> +
> +#define      MAX_ASSIGNED_DEVS 4
> +struct {
> +     char name[15];
> +     int bus;
> +     int dev;
> +     int func;
> +     int dma;
> +     assigned_dev_t *assigned_dev;
> +} assigned_devices[MAX_ASSIGNED_DEVS];
> +
> +int nr_assigned_devices;
> +extern int get_param_value(char *buf, int buf_size,
> +                        const char *tag, const char *str);
> +extern int piix_get_irq(int);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +/* The pci config space got updated. Check if irq numbers have changed
> + * for our devices
> + */
> +void assigned_dev_update_irq(PCIDevice *d)
> +{
> +     int i, irq, r;
> +     assigned_dev_t *assigned_dev;
> +
> +     for (i = 0; i < nr_assigned_devices; i++) {
> +             assigned_dev = assigned_devices[i].assigned_dev;
> +             if (assigned_dev == NULL)
> +                     continue;
> +
> +             irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
> +             irq = piix_get_irq(irq);
> +
> +             if (irq != assigned_dev->girq) {
> +                     struct kvm_assigned_irq assigned_irq_data;
> +
> +                     memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
> +                     assigned_irq_data.assigned_dev_id  =
> +                             calc_assigned_dev_id(assigned_dev->h_busnr,
> +                                                  (uint8_t)
> +                                                  assigned_dev->h_devfn);
> +                     assigned_irq_data.guest_irq = irq;
> +                     assigned_irq_data.host_irq =
> +                             assigned_dev->real_device.irq;
> +                     r = kvm_assign_irq(kvm_context, &assigned_irq_data);
> +                     if (r < 0) {
> +                             perror("assigned_dev_update_irq");
> +                             pci_unregister_device(&assigned_dev->dev);
> +                             continue;
> +                     }
> +                     assigned_dev->girq = irq;
> +             }
> +     }
> +}
> +#endif
> +
> +static int init_device_assignment(void)
> +{
> +     /* Do we have any devices to be assigned? */
> +     if (nr_assigned_devices == 0)
> +             return -1;
> +     iopl(3);
> +     return 0;
> +}
> +
> +int init_assigned_device(PCIBus *bus, int *index)
> +{
> +     assigned_dev_t *dev = NULL;
> +     int i, ret = 0;
> +
> +     if (*index == -1) {
> +             if (init_device_assignment() < 0)
> +                     return -1;
> +
> +             *index = nr_assigned_devices - 1;
> +     }
> +     i = *index;
> +     dev = register_real_device(bus, assigned_devices[i].name, -1,
> +                                assigned_devices[i].bus,
> +                                assigned_devices[i].dev,
> +                                assigned_devices[i].func,
> +                                assigned_devices[i].dma);
> +     if (dev == NULL) {
> +             fprintf(stderr, "Error: Couldn't register device \"%s\"\n",
> +                     assigned_devices[i].name);
> +             ret = -1;
> +     }
> +     assigned_devices[i].assigned_dev = dev;
> +
> +     --*index;
> +     return ret;
> +}
> +
> +/*
> + * Syntax to assign device:
> + *
> + * -pcidevice dev=bus:dev.func,dma=dma
> + *
> + * Example:
> + * -pcidevice dev=00:13.0,dma=pvdma
> + *
> + * dma can currently be 'none' to disable iommu support.
> + */
> +void add_assigned_device(const char *arg)
> +{
> +     char *cp, *cp1;
> +     char device[8];
> +     char dma[6];
> +     int r;
> +
> +     if (nr_assigned_devices >= MAX_ASSIGNED_DEVS) {
> +             fprintf(stderr, "Too many assigned devices (max %d)\n",
> +                     MAX_ASSIGNED_DEVS);
> +             return;
> +     }
> +     memset(&assigned_devices[nr_assigned_devices], 0,
> +            sizeof assigned_devices[nr_assigned_devices]);
> +
> +     r = get_param_value(device, sizeof device, "host", arg);
> +
> +     r = get_param_value(assigned_devices[nr_assigned_devices].name,
> +                         sizeof assigned_devices[nr_assigned_devices].name,
> +                         "name", arg);
> +     if (!r)
> +             strncpy(assigned_devices[nr_assigned_devices].name, device, 8);
> +
> +#ifdef KVM_CAP_IOMMU
> +     r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);

This code is called during parsing of the parameters and at this time we
do not have valid kvm->fd in libkvm so its not going to work.

Regards,
Ben



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to