From: Nir Peleg <[EMAIL PROTECTED]>
From: Or Sagi <[EMAIL PROTECTED]>
When using the --no-kvm-irqchip option, this irqhook module injects
interrupts into the guests for assigned devices.
This module is not well-supported and only exists for debugging and
for legacy / non-x86 support.
Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
---
Makefile | 10 ++-
irqhook/Kbuild | 3 +
irqhook/Makefile | 25 +++++
irqhook/irqhook_main.c | 215 +++++++++++++++++++++++++++++++++++++++++++++
qemu/hw/apic.c | 4 +
qemu/hw/pci-passthrough.c | 171 ++++++++++++++++++++++++++++++++++--
qemu/hw/pci-passthrough.h | 1 +
qemu/vl.c | 4 +-
8 files changed, 421 insertions(+), 12 deletions(-)
create mode 100644 irqhook/Kbuild
create mode 100644 irqhook/Makefile
create mode 100644 irqhook/irqhook_main.c
diff --git a/Makefile b/Makefile
index 48a8dff..d4246fd 100644
--- a/Makefile
+++ b/Makefile
@@ -7,16 +7,16 @@ rpmrelease = devel
sane-arch = $(subst i386,x86,$(subst x86_64,x86,$(ARCH)))
-.PHONY: kernel user libkvm qemu bios vgabios extboot clean libfdt
+.PHONY: kernel irqhook user libkvm qemu bios vgabios extboot clean libfdt
all: libkvm qemu
ifneq '$(filter $(ARCH), x86_64 i386 ia64)' ''
- all: $(if $(WANT_MODULE), kernel) user
+ all: $(if $(WANT_MODULE), kernel irqhook) user
endif
kcmd = $(if $(WANT_MODULE),,@\#)
-qemu kernel user libkvm:
+qemu kernel user irqhook libkvm:
$(MAKE) -C $@
qemu: libkvm
@@ -77,6 +77,7 @@ install-rpm:
install:
$(kcmd)make -C kernel DESTDIR="$(DESTDIR)" install
+ $(kcmd)make -C irqhook DESTDIR="$(DESTDIR)" install
make -C libkvm DESTDIR="$(DESTDIR)" install
make -C qemu DESTDIR="$(DESTDIR)" install
@@ -97,6 +98,7 @@ srpm:
tar czf $(RPMTOPDIR)/SOURCES/user.tar.gz user
tar czf $(RPMTOPDIR)/SOURCES/libkvm.tar.gz libkvm
tar czf $(RPMTOPDIR)/SOURCES/kernel.tar.gz kernel
+ tar czf $(RPMTOPDIR)/SOURCES/irqhook.tar.gz irqhook
tar czf $(RPMTOPDIR)/SOURCES/scripts.tar.gz scripts
tar czf $(RPMTOPDIR)/SOURCES/extboot.tar.gz extboot
cp Makefile configure kvm_stat $(RPMTOPDIR)/SOURCES
@@ -104,7 +106,7 @@ srpm:
$(RM) $(tmpspec)
clean:
- for i in $(if $(WANT_MODULE), kernel) user libkvm qemu libfdt; do \
+ for i in $(if $(WANT_MODULE), kernel irqhook) user libkvm qemu libfdt;
do \
make -C $$i clean; \
done
diff --git a/irqhook/Kbuild b/irqhook/Kbuild
new file mode 100644
index 0000000..9af75a4
--- /dev/null
+++ b/irqhook/Kbuild
@@ -0,0 +1,3 @@
+EXTRA_CFLAGS := -I$(src)/include
+obj-m := irqhook.o
+irqhook-objs := irqhook_main.o
diff --git a/irqhook/Makefile b/irqhook/Makefile
new file mode 100644
index 0000000..3b1d851
--- /dev/null
+++ b/irqhook/Makefile
@@ -0,0 +1,25 @@
+include ../config.mak
+
+KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
+
+DESTDIR=
+
+INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR))
+
+rpmrelease = devel
+
+LINUX = ../linux-2.6
+
+all::
+ $(MAKE) -C $(KERNELDIR) M=`pwd` "$$@"
+
+#sync:
+# rsync --exclude='*.mod.c' "$(LINUX)"/drivers/irqhook/*.[ch] .
+
+install:
+ mkdir -p $(DESTDIR)/$(INSTALLDIR)
+ cp *.ko $(DESTDIR)/$(INSTALLDIR)
+ /sbin/depmod -a
+
+clean:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` $@
diff --git a/irqhook/irqhook_main.c b/irqhook/irqhook_main.c
new file mode 100644
index 0000000..0f93d17
--- /dev/null
+++ b/irqhook/irqhook_main.c
@@ -0,0 +1,215 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/bitmap.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/miscdevice.h>
+#include <linux/pci.h>
+
+#include <asm/uaccess.h>
+
+#define irqh_VERSION "0.0.1"
+#define irqh_MODULE_NAME "irqhook"
+#define irqh_DRIVER_NAME irqh_MODULE_NAME " HW IRQ hook " irqh_VERSION
+
+// based on earlier proprietary Tutis code; this modified version goes under
GPL
+MODULE_AUTHOR("Nir Peleg - Tutis");
+MODULE_DESCRIPTION("IRQ hook driver");
+MODULE_LICENSE("GPL");
+
+//#define irqh_DEBUG /* define to enable copious debugging info */
+
+#ifdef irqh_DEBUG
+#define DPRINTK(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ , ## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#define ERROR(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ , ## args)
+
+static spinlock_t irqh_lock;
+static wait_queue_head_t irqh_proc_list;
+
+static DECLARE_BITMAP(pending, NR_IRQS);
+static DECLARE_BITMAP(handled, NR_IRQS);
+
+#define irqh_on(which, bit) test_bit(bit, which)
+#define irqh_set(which, bit) set_bit(bit, which)
+#define irqh_clear(which, bit) clear_bit(bit, which)
+#define irqh_ffs(which) find_first_bit(which, NR_IRQS)
+
+static irqreturn_t
+irqh_interrupt(int irq, void *p)
+{
+ unsigned long flags;
+
+ DPRINTK("interrupt: %d\n", irq);
+ if (!irqh_on(handled, irq))
+ return IRQ_HANDLED;
+ spin_lock_irqsave(&irqh_lock, flags);
+ irqh_set(pending, irq);
+ wake_up_interruptible(&irqh_proc_list);
+ spin_unlock_irqrestore(&irqh_lock, flags);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+static ssize_t
+irqh_dev_write(struct file *fp, const char *buf, size_t size, loff_t *offp)
+{
+ int n, device, func, devfn;
+ char arg[32], *cp, *cp1;
+ struct pci_dev *pdp = 0;
+
+ DPRINTK("ENTER\n");
+ if ((fp->f_mode & FMODE_WRITE) == 0 || size > sizeof arg)
+ return -EINVAL;
+
+ if (size >= sizeof arg || copy_from_user(arg, buf, size))
+ return -EFAULT;
+ arg[size] = 0;
+ cp = arg + (arg[0] == '+' || arg[0] == '-');
+ n = simple_strtol(cp, &cp1, 0);
+ if (*cp1 == ':') {
+ device = simple_strtol(cp1+1, &cp1, 0);
+ func = simple_strtol(cp1+1, NULL, 0);
+ DPRINTK("PCI dev %d:%d.%d\n", n, device, func);
+ devfn = PCI_DEVFN(device, func);
+ for_each_pci_dev(pdp) {
+ if (pdp->bus->number == n && pdp->devfn == devfn) {
+ n = pdp->irq;
+ goto found;
+ }
+ }
+ ERROR("PCI device not found\n");
+ return -ENOENT;
+ }
+ found:
+ DPRINTK("IRQ %d\n", n);
+ if (arg[0] == '+') {
+ if (pdp) {
+ if (pci_enable_device(pdp))
+ ERROR("device not enabled\n");
+ if ((unsigned)(n = pdp->irq) >= NR_IRQS) {
+ ERROR("device has invalid IRQ set\n");
+ return -EINVAL;
+ }
+ }
+ if (irqh_on(handled, n))
+ return -EBUSY;
+ if (request_irq(n, irqh_interrupt, IRQF_SHARED,
irqh_MODULE_NAME, (void *)irqh_interrupt)) {
+ ERROR("request_irq failed\n");
+ return -EIO;
+ }
+ printk("Bound machine irq %d\n", n);
+ irqh_set(handled, n);
+ goto done;
+ }
+ if ((unsigned)n >= NR_IRQS)
+ return -EINVAL;
+ if (arg[0] == '-') {
+ if (pdp)
+ pci_disable_device(pdp);
+ free_irq(n, (void *)irqh_interrupt);
+ irqh_clear(handled, n);
+ } else
+ enable_irq(n);
+
+ done:
+ DPRINTK("DONE\n");
+ return size;
+}
+
+static ssize_t
+irqh_dev_read(struct file *fp, char *buf, size_t size, loff_t *offp)
+{
+ char b[20];
+ int m = -ERESTARTSYS, n;
+
+ DECLARE_WAITQUEUE(wait, current);
+
+ DPRINTK("ENTER\n");
+ if ((fp->f_mode & FMODE_READ) == 0)
+ return -EINVAL;
+ spin_lock_irq(&irqh_lock);
+ while (!signal_pending(current)) {
+ if ((n = irqh_ffs(pending)) < NR_IRQS) {
+ if ((m = sprintf(b, "%d", n) + 1) > size)
+ m = size;
+ if (copy_to_user(buf, b, m))
+ m = -EFAULT;
+ else
+ irqh_clear(pending, n);
+ break;
+ }
+ if (fp->f_flags & O_NONBLOCK) {
+ m = -EWOULDBLOCK;
+ break;
+ }
+ add_wait_queue(&irqh_proc_list, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&irqh_lock);
+ schedule();
+ spin_lock_irq(&irqh_lock);
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&irqh_proc_list, &wait);
+ }
+ spin_unlock_irq(&irqh_lock);
+ return m;
+}
+
+static struct file_operations irqh_chrdev_ops = {
+ owner: THIS_MODULE,
+ read: irqh_dev_read,
+ write: irqh_dev_write,
+};
+
+#define irqh_MISCDEV_MINOR MISC_DYNAMIC_MINOR
+
+static struct miscdevice irqh_miscdev = {
+ irqh_MISCDEV_MINOR,
+ irqh_MODULE_NAME,
+ &irqh_chrdev_ops,
+};
+
+static int __init
+irqh_init(void)
+{
+ int rc;
+
+ DPRINTK("ENTER\n");
+
+ if ((rc = misc_register(&irqh_miscdev))) {
+ printk(KERN_ERR irqh_MODULE_NAME ": " "cannot register misc
device\n");
+ DPRINTK("EXIT, returning %d\n", rc);
+ return rc;
+ }
+
+ printk(KERN_INFO irqh_DRIVER_NAME " loaded\n");
+
+ init_waitqueue_head(&irqh_proc_list);
+ spin_lock_init(&irqh_lock);
+
+ DPRINTK("EXIT, returning 0\n");
+ return 0;
+}
+
+static void __exit
+irqh_cleanup(void)
+{
+ int n;
+
+ DPRINTK("ENTER\n");
+
+ while ((n = irqh_ffs(handled)) < NR_IRQS) {
+ irqh_clear(handled, n);
+ free_irq(n, (void *)irqh_interrupt);
+ }
+ misc_deregister (&irqh_miscdev);
+
+ DPRINTK("EXIT\n");
+}
+
+module_init (irqh_init);
+module_exit (irqh_cleanup);
diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c
index 4ebf1ff..7d45385 100644
--- a/qemu/hw/apic.c
+++ b/qemu/hw/apic.c
@@ -23,6 +23,8 @@
#include "qemu-kvm.h"
+#include "pci-passthrough.h"
+
//#define DEBUG_APIC
//#define DEBUG_IOAPIC
@@ -389,6 +391,7 @@ static void apic_eoi(APICState *s)
/* XXX: send the EOI packet to the APIC bus to allow the I/O APIC to
set the remote IRR bit for level triggered interrupts. */
apic_update_irq(s);
+ pt_ack_mirq(isrv);
}
static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
@@ -1144,6 +1147,7 @@ static void ioapic_mem_writel(void *opaque,
target_phys_addr_t addr, uint32_t va
} else {
s->ioredtbl[index] &= ~0xffffffffULL;
s->ioredtbl[index] |= val;
+ pt_set_vector(index, (val << 24) >> 24);
}
ioapic_service(s);
}
diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c
index 250d7ef..1cf1d0f 100644
--- a/qemu/hw/pci-passthrough.c
+++ b/qemu/hw/pci-passthrough.c
@@ -398,9 +398,11 @@ again:
return 0;
}
+static int pt_bind_mirq(int bus, int dev, int fn);
+
static pt_dev_t *register_real_device(PCIBus *e_bus, const char *e_dev_name,
int e_devfn, uint8_t r_bus, uint8_t r_dev,
- uint8_t r_func)
+ uint8_t r_func, uint32_t machine_irq)
{
int rc;
pt_dev_t *pci_dev;
@@ -435,10 +437,24 @@ static pt_dev_t *register_real_device(PCIBus *e_bus,
const char *e_dev_name,
e_intx = pci_dev->dev.config[0x3d] - 1;
pci_dev->intpin = e_intx;
pci_dev->run = 0;
+ pci_dev->mirq = machine_irq;
pci_dev->girq = 0;
pci_dev->h_busnr = r_bus;
pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
+ /* bind machine_irq to device */
+ if (machine_irq && (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel())) {
+ DEBUG(logfile, "Binding mirq %u to device=0x%x intpin=0x%x\n",
+ machine_irq, e_device, pci_dev->intpin);
+ rc = pt_bind_mirq(r_bus, r_dev, r_func);
+ if (rc) {
+ fprintf(stderr, "pt_bind %d failed rc=%d\n",
+ pci_dev->mirq, rc);
+ return NULL;
+ }
+ sprintf(pci_dev->sirq, "%d", pci_dev->mirq);
+ }
+
#ifdef KVM_CAP_PCI_PASSTHROUGH
if (kvm_enabled()) {
struct kvm_pci_passthrough_dev pci_pt_dev;
@@ -464,9 +480,9 @@ static pt_dev_t *register_real_device(PCIBus *e_bus, const
char *e_dev_name,
}
#endif
- fprintf(logfile, "Registered host PCI device %02x:%02x.%1x "
+ fprintf(logfile, "Registered host PCI device %02x:%02x.%1x-%u "
"as guest device %02x:%02x.%1x\n",
- r_bus, r_dev, r_func,
+ r_bus, r_dev, r_func, machine_irq,
pci_bus_num(e_bus), e_device, r_func);
return pci_dev;
@@ -478,6 +494,7 @@ struct {
int bus;
int dev;
int func;
+ int irq;
pt_dev_t *ptdev;
} ptdevs[MAX_PTDEVS];
@@ -518,6 +535,62 @@ void pci_pt_update_irq(PCIDevice *d)
}
#endif
+static QEMUBH *ptbh;
+static int irqfd;
+static pt_dev_t **apicv[0xfe]; /* 0x10 - 0xfe according to intel IOAPIC spec */
+#define IRQHOOK_DEV "/dev/irqhook"
+static pthread_t irqthread;
+
+static void *pt_irq(void *arg)
+{
+ char buf[20];
+ int irq;
+ int i;
+ pt_dev_t *dev;
+ sigset_t signals;
+
+ sigfillset(&signals);
+ sigprocmask(SIG_BLOCK, &signals, NULL);
+
+ if (!irqfd) {
+ fprintf(stderr, "pt_irq: irqfd %d, exiting\n", irqfd);
+ exit(-1);
+ }
+
+ for (;;) {
+ if (read(irqfd, buf, 20) == -1) {
+ if (errno == EINTR)
+ continue;
+ perror("irq read");
+ break;
+ }
+
+ irq = atoi(buf);
+ DEBUG("read irq %d\n", irq);
+ if (!irq)
+ continue;
+
+ for (i = 0; i < nptdevs; i++)
+ if ((dev = ptdevs[i].ptdev) && dev->mirq == irq)
+ dev->run = 1;
+ qemu_bh_schedule(ptbh);
+ }
+ return NULL;
+}
+
+static void pt_bh(void *p)
+{
+ int i;
+ pt_dev_t *dev;
+ for (i = 0; i < nptdevs; i++)
+ if ((dev = ptdevs[i].ptdev) && dev->run) {
+ qemu_set_irq(dev->dev.irq[dev->intpin], 1);
+ dev->run = 0;
+ if (cpu_single_env)
+ cpu_interrupt(cpu_single_env,
CPU_INTERRUPT_EXIT);
+ }
+}
+
int pt_init_system(void)
{
/* Do we have any devices to be assigned? */
@@ -526,6 +599,17 @@ int pt_init_system(void)
iopl(3);
+ if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) {
+ if (!(ptbh = qemu_bh_new(pt_bh, 0))) {
+ fprintf(stderr, "Couldn't register PT callback\n");
+ return -1;
+ }
+ if (!(irqfd = open(IRQHOOK_DEV, O_RDWR))) {
+ fprintf(stderr, "Couldn't open PT irqhook dev, make "
+ "sure the irqhook module is loaded\n");
+ return -1;
+ }
+ }
return 0;
}
@@ -544,7 +628,7 @@ int pt_init_device(PCIBus *bus, int *index)
dev = register_real_device(bus, ptdevs[i].name, -1,
ptdevs[i].bus, ptdevs[i].dev,
- ptdevs[i].func);
+ ptdevs[i].func, ptdevs[i].irq);
if (dev == NULL) {
fprintf(stderr, "Error: Couldn't register device %s\n",
ptdevs[i].name);
@@ -552,13 +636,23 @@ int pt_init_device(PCIBus *bus, int *index)
}
ptdevs[i].ptdev = dev;
+ if (!*index && kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) {
+ if (ptdevs[i].irq == 0) {
+ fprintf(stderr, "Please specify the irq for the
device\n");
+ return -1;
+ }
+ if (pthread_create(&irqthread, 0, pt_irq, dev)) {
+ fprintf(stderr, "Couldn't create IRQ thread\n");
+ return -1;
+ }
+ }
--*index;
return ret;
}
void add_pci_passthrough_device(const char *arg)
{
- /* name/bus:dev.func */
+ /* name/bus:dev.func-intr */
char *cp, *cp1;
if (nptdevs >= MAX_PTDEVS) {
@@ -583,12 +677,75 @@ void add_pci_passthrough_device(const char *arg)
cp = cp1 + 1;
ptdevs[nptdevs].func = strtoul(cp, &cp1, 16);
- if (*cp1 != 0)
+
+ /* In case of irqchip_in_kernel, we don't want the next param */
+ if (*cp1 == 0) {
+ ptdevs[nptdevs].irq = 0;
+ goto skip_irq;
+ }
+ if (*cp1 != '-')
goto bad;
+ cp = cp1 + 1;
+ ptdevs[nptdevs].irq = strtoul(cp, &cp1, 0);
+ if (*cp1 != 0)
+ goto bad;
+skip_irq:
nptdevs++;
return;
bad:
fprintf(stderr, "passthrough arg (%s) not in the form of "
- "name/bus:dev.func\n", arg);
+ "name/bus:dev.func-intr\n", arg);
+}
+
+void pt_ack_mirq(int vector)
+{
+ pt_dev_t **p = apicv[vector];
+ if (!p)
+ return;
+
+ for (; *p; *p++) {
+ write(irqfd, (*p)->sirq, strlen((*p)->sirq));
+ qemu_set_irq((*p)->dev.irq[(*p)->intpin], 0);
+ }
+}
+
+static int pt_bind_mirq(int bus, int dev, int fn)
+{
+ char s[64];
+ sprintf(s, "+%d:%d.%d", bus, dev, fn);
+ if (write(irqfd, s, strlen(s)) != strlen(s)) {
+ perror("pt_bind_mirq");
+ fprintf(stderr, "Make sure the irqhook module is loaded\n");
+ exit(-1);
+ }
+ return 0;
+}
+
+int piix3_get_pin(int pic_irq);
+
+void pt_set_vector(int irq, int vector)
+{
+ int i, j;
+ int pin = piix3_get_pin(irq);
+ pt_dev_t *pt, **p;
+
+ DEBUG("irq %d vector %d\n", irq, vector);
+ if (vector > 0xfe)
+ return;
+ for (i = 0; i < nptdevs; i++) {
+ pt = ptdevs[i].ptdev;
+ if (!pt || pt->bound)
+ continue;
+ if (pci_map_irq(&pt->dev, pt->intpin) == pin) {
+ for (j = 1, p = apicv[vector]; p; j++, *p++)
+ ;
+ apicv[vector] = realloc(apicv[vector], j * sizeof pt);
+ p = &apicv[vector][j];
+ *(p-1) = pt;
+ *p = 0;
+ pt->bound = 1;
+ }
+ }
+ DEBUG("done\n");
}
diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h
index 60df017..cd63482 100644
--- a/qemu/hw/pci-passthrough.h
+++ b/qemu/hw/pci-passthrough.h
@@ -75,6 +75,7 @@ typedef struct pt_dev_s {
pt_region_t v_addrs[PCI_NUM_REGIONS];
pci_dev_t real_device;
int run;
+ int mirq;
int girq;
char sirq[4];
unsigned char h_busnr;
diff --git a/qemu/vl.c b/qemu/vl.c
index 4946e9a..33decf5 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7788,9 +7788,11 @@ static void help(int exitcode)
"-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
"-no-kvm-pit disable KVM kernel mode PIT\n"
#if defined(TARGET_I386) || defined(TARGET_X86_64)
- "-pcidevice name/bus:dev.func\n"
+ "-pcidevice name/bus:dev.func[-intr] \n"
" expose a PCI device to the guest OS.\n"
" 'name' is just used for debug logs.\n"
+ " [-intr] is the interrupt (from the lspci -v
output),\n"
+ " in case you use the irqhook module for interrupt
routing.\n"
#endif
#endif
#ifdef TARGET_I386
--
1.5.4.3
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html