Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=759f89e03c9e5656ff18c02e21b439506f7c0cdc
Commit:     759f89e03c9e5656ff18c02e21b439506f7c0cdc
Parent:     a2cd15586e630b0870bf34783568d83901890743
Author:     David S. Miller <[EMAIL PROTECTED]>
AuthorDate: Thu Oct 11 03:16:13 2007 -0700
Committer:  David S. Miller <[EMAIL PROTECTED]>
CommitDate: Sat Oct 13 21:53:13 2007 -0700

    [SPARC64]: Consolidate MSI support code.
    
    This also makes us use the MSI queues correctly.
    
    Each MSI queue is serviced by a normal sun4u/sun4v INO interrupt
    handler.  This handler runs the MSI queue and dispatches the
    virtual interrupts indicated by arriving MSIs in that MSI queue.
    
    All of the common logic is placed in pci_msi.c, with callbacks to
    handle the PCI controller specific aspects of the operations.
    
    This common infrastructure will make it much easier to add MSG
    support.
    
    Signed-off-by: David S. Miller <[EMAIL PROTECTED]>
---
 arch/sparc64/kernel/Makefile    |    1 +
 arch/sparc64/kernel/irq.c       |  230 ++-------------------
 arch/sparc64/kernel/pci_fire.c  |  380 +++++++++-------------------------
 arch/sparc64/kernel/pci_impl.h  |   31 +++
 arch/sparc64/kernel/pci_msi.c   |  433 +++++++++++++++++++++++++++++++++++++++
 arch/sparc64/kernel/pci_sun4v.c |  400 ++++++++++--------------------------
 include/asm-sparc64/irq.h       |    6 +-
 7 files changed, 704 insertions(+), 777 deletions(-)

diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 40d2f3a..112c46e 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-$(CONFIG_PCI)       += ebus.o isa.o pci_common.o \
                            pci_psycho.o pci_sabre.o pci_schizo.o \
                            pci_sun4v.o pci_sun4v_asm.o pci_fire.o
+obj-$(CONFIG_PCI_MSI)  += pci_msi.o
 obj-$(CONFIG_SMP)       += smp.o trampoline.o hvtramp.o
 obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
 obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 7f5a4c7..045ab27 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -21,7 +21,6 @@
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
 #include <linux/irq.h>
-#include <linux/msi.h>
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -92,39 +91,46 @@ static struct {
        unsigned int dev_handle;
        unsigned int dev_ino;
 } virt_to_real_irq_table[NR_IRQS];
+static DEFINE_SPINLOCK(virt_irq_alloc_lock);
 
-static unsigned char virt_irq_alloc(unsigned int real_irq)
+unsigned char virt_irq_alloc(unsigned int real_irq)
 {
+       unsigned long flags;
        unsigned char ent;
 
        BUILD_BUG_ON(NR_IRQS >= 256);
 
+       spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
        for (ent = 1; ent < NR_IRQS; ent++) {
                if (!virt_to_real_irq_table[ent].irq)
                        break;
        }
        if (ent >= NR_IRQS) {
                printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
-               return 0;
+               ent = 0;
+       } else {
+               virt_to_real_irq_table[ent].irq = real_irq;
        }
 
-       virt_to_real_irq_table[ent].irq = real_irq;
+       spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 
        return ent;
 }
 
 #ifdef CONFIG_PCI_MSI
-static void virt_irq_free(unsigned int virt_irq)
+void virt_irq_free(unsigned int virt_irq)
 {
-       unsigned int real_irq;
+       unsigned long flags;
 
        if (virt_irq >= NR_IRQS)
                return;
 
-       real_irq = virt_to_real_irq_table[virt_irq].irq;
+       spin_lock_irqsave(&virt_irq_alloc_lock, flags);
+
        virt_to_real_irq_table[virt_irq].irq = 0;
 
-       __bucket(real_irq)->virt_irq = 0;
+       spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 }
 #endif
 
@@ -217,27 +223,8 @@ struct irq_handler_data {
        void            (*pre_handler)(unsigned int, void *, void *);
        void            *pre_handler_arg1;
        void            *pre_handler_arg2;
-
-       u32             msi;
 };
 
-void sparc64_set_msi(unsigned int virt_irq, u32 msi)
-{
-       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
-
-       if (data)
-               data->msi = msi;
-}
-
-u32 sparc64_get_msi(unsigned int virt_irq)
-{
-       struct irq_handler_data *data = get_irq_chip_data(virt_irq);
-
-       if (data)
-               return data->msi;
-       return 0xffffffff;
-}
-
 static inline struct ino_bucket *virt_irq_to_bucket(unsigned int virt_irq)
 {
        unsigned int real_irq = virt_to_real_irq(virt_irq);
@@ -405,32 +392,6 @@ static void sun4v_irq_disable(unsigned int virt_irq)
        }
 }
 
-#ifdef CONFIG_PCI_MSI
-static void sun4u_msi_enable(unsigned int virt_irq)
-{
-       sun4u_irq_enable(virt_irq);
-       unmask_msi_irq(virt_irq);
-}
-
-static void sun4u_msi_disable(unsigned int virt_irq)
-{
-       mask_msi_irq(virt_irq);
-       sun4u_irq_disable(virt_irq);
-}
-
-static void sun4v_msi_enable(unsigned int virt_irq)
-{
-       sun4v_irq_enable(virt_irq);
-       unmask_msi_irq(virt_irq);
-}
-
-static void sun4v_msi_disable(unsigned int virt_irq)
-{
-       mask_msi_irq(virt_irq);
-       sun4v_irq_disable(virt_irq);
-}
-#endif
-
 static void sun4v_irq_end(unsigned int virt_irq)
 {
        struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -585,39 +546,6 @@ static struct irq_chip sun4v_irq = {
        .set_affinity   = sun4v_set_affinity,
 };
 
-static struct irq_chip sun4v_irq_ack = {
-       .typename       = "sun4v+ack",
-       .enable         = sun4v_irq_enable,
-       .disable        = sun4v_irq_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4v_irq_end,
-       .set_affinity   = sun4v_set_affinity,
-};
-
-#ifdef CONFIG_PCI_MSI
-static struct irq_chip sun4u_msi = {
-       .typename       = "sun4u+msi",
-       .mask           = mask_msi_irq,
-       .unmask         = unmask_msi_irq,
-       .enable         = sun4u_msi_enable,
-       .disable        = sun4u_msi_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4u_irq_end,
-       .set_affinity   = sun4u_set_affinity,
-};
-
-static struct irq_chip sun4v_msi = {
-       .typename       = "sun4v+msi",
-       .mask           = mask_msi_irq,
-       .unmask         = unmask_msi_irq,
-       .enable         = sun4v_msi_enable,
-       .disable        = sun4v_msi_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4v_irq_end,
-       .set_affinity   = sun4v_set_affinity,
-};
-#endif
-
 static struct irq_chip sun4v_virq = {
        .typename       = "vsun4v",
        .enable         = sun4v_virq_enable,
@@ -626,42 +554,27 @@ static struct irq_chip sun4v_virq = {
        .set_affinity   = sun4v_virt_set_affinity,
 };
 
-static struct irq_chip sun4v_virq_ack = {
-       .typename       = "vsun4v+ack",
-       .enable         = sun4v_virq_enable,
-       .disable        = sun4v_virq_disable,
-       .ack            = run_pre_handler,
-       .end            = sun4v_virq_end,
-       .set_affinity   = sun4v_virt_set_affinity,
-};
-
 void irq_install_pre_handler(int virt_irq,
                             void (*func)(unsigned int, void *, void *),
                             void *arg1, void *arg2)
 {
        struct irq_handler_data *data = get_irq_chip_data(virt_irq);
-       struct irq_chip *chip;
+       struct irq_chip *chip = get_irq_chip(virt_irq);
+
+       if (WARN_ON(chip == &sun4v_irq || chip == &sun4v_virq)) {
+               printk(KERN_ERR "IRQ: Trying to install pre-handler on "
+                      "sun4v irq %u\n", virt_irq);
+               return;
+       }
 
        data->pre_handler = func;
        data->pre_handler_arg1 = arg1;
        data->pre_handler_arg2 = arg2;
 
-       chip = get_irq_chip(virt_irq);
-       if (chip == &sun4u_irq_ack ||
-           chip == &sun4v_irq_ack ||
-           chip == &sun4v_virq_ack
-#ifdef CONFIG_PCI_MSI
-           || chip == &sun4u_msi
-           || chip == &sun4v_msi
-#endif
-           )
+       if (chip == &sun4u_irq_ack)
                return;
 
-       chip = (chip == &sun4u_irq ?
-               &sun4u_irq_ack :
-               (chip == &sun4v_irq ?
-                &sun4v_irq_ack : &sun4v_virq_ack));
-       set_irq_chip(virt_irq, chip);
+       set_irq_chip(virt_irq, &sun4u_irq_ack);
 }
 
 unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
@@ -765,103 +678,6 @@ unsigned int sun4v_build_virq(u32 devhandle, unsigned int 
devino)
        return virq;
 }
 
-#ifdef CONFIG_PCI_MSI
-unsigned int sun4v_build_msi(u32 devhandle, unsigned int *virt_irq_p,
-                            unsigned int msi_start, unsigned int msi_end)
-{
-       struct ino_bucket *bucket;
-       struct irq_handler_data *data;
-       unsigned long sysino;
-       unsigned int devino;
-
-       BUG_ON(tlb_type != hypervisor);
-
-       /* Find a free devino in the given range.  */
-       for (devino = msi_start; devino < msi_end; devino++) {
-               sysino = sun4v_devino_to_sysino(devhandle, devino);
-               bucket = &ivector_table[sysino];
-               if (!bucket->virt_irq)
-                       break;
-       }
-       if (devino >= msi_end)
-               return -ENOSPC;
-
-       sysino = sun4v_devino_to_sysino(devhandle, devino);
-       bucket = &ivector_table[sysino];
-       bucket->virt_irq = virt_irq_alloc(__irq(bucket));
-       *virt_irq_p = bucket->virt_irq;
-       set_irq_chip(bucket->virt_irq, &sun4v_msi);
-
-       data = get_irq_chip_data(bucket->virt_irq);
-       if (unlikely(data))
-               return devino;
-
-       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-       if (unlikely(!data)) {
-               virt_irq_free(*virt_irq_p);
-               return -ENOMEM;
-       }
-       set_irq_chip_data(bucket->virt_irq, data);
-
-       data->imap = ~0UL;
-       data->iclr = ~0UL;
-
-       return devino;
-}
-
-void sun4v_destroy_msi(unsigned int virt_irq)
-{
-       virt_irq_free(virt_irq);
-}
-
-unsigned int sun4u_build_msi(u32 portid, unsigned int *virt_irq_p,
-                            unsigned int msi_start, unsigned int msi_end,
-                            unsigned long imap_base, unsigned long iclr_base)
-{
-       struct ino_bucket *bucket;
-       struct irq_handler_data *data;
-       unsigned long sysino;
-       unsigned int devino;
-
-       /* Find a free devino in the given range.  */
-       for (devino = msi_start; devino < msi_end; devino++) {
-               sysino = (portid << 6) | devino;
-               bucket = &ivector_table[sysino];
-               if (!bucket->virt_irq)
-                       break;
-       }
-       if (devino >= msi_end)
-               return -ENOSPC;
-
-       sysino = (portid << 6) | devino;
-       bucket = &ivector_table[sysino];
-       bucket->virt_irq = virt_irq_alloc(__irq(bucket));
-       *virt_irq_p = bucket->virt_irq;
-       set_irq_chip(bucket->virt_irq, &sun4u_msi);
-
-       data = get_irq_chip_data(bucket->virt_irq);
-       if (unlikely(data))
-               return devino;
-
-       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-       if (unlikely(!data)) {
-               virt_irq_free(*virt_irq_p);
-               return -ENOMEM;
-       }
-       set_irq_chip_data(bucket->virt_irq, data);
-
-       data->imap = (imap_base + (devino * 0x8UL));
-       data->iclr = (iclr_base + (devino * 0x8UL));
-
-       return devino;
-}
-
-void sun4u_destroy_msi(unsigned int virt_irq)
-{
-       virt_irq_free(virt_irq);
-}
-#endif
-
 void ack_bad_irq(unsigned int virt_irq)
 {
        struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
diff --git a/arch/sparc64/kernel/pci_fire.c b/arch/sparc64/kernel/pci_fire.c
index 090f265..bcf6a5d 100644
--- a/arch/sparc64/kernel/pci_fire.c
+++ b/arch/sparc64/kernel/pci_fire.c
@@ -161,90 +161,92 @@ struct pci_msiq_entry {
 #define MSI_64BIT_ADDR                 0x034008UL
 #define  MSI_64BIT_ADDR_VAL            0xffffffffffff0000UL
 
-/* For now this just runs as a pre-handler for the real interrupt handler.
- * So we just walk through the queue and ACK all the entries, update the
- * head pointer, and return.
- *
- * In the longer term it would be nice to do something more integrated
- * wherein we can pass in some of this MSI info to the drivers.  This
- * would be most useful for PCIe fabric error messages, although we could
- * invoke those directly from the loop here in order to pass the info around.
- */
-static void pci_msi_prehandler(unsigned int ino, void *data1, void *data2)
+static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                            unsigned long *head)
+{
+       *head = fire_read(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+       return 0;
+}
+
+static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid,
+                               unsigned long *head, unsigned long *msi)
 {
-       unsigned long msiqid, orig_head, head, type_fmt, type;
-       struct pci_pbm_info *pbm = data1;
+       unsigned long type_fmt, type, msi_num;
        struct pci_msiq_entry *base, *ep;
 
-       msiqid = (unsigned long) data2;
+       base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+       ep = &base[*head];
 
-       head = fire_read(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+       if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0)
+               return 0;
 
-       orig_head = head;
-       base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
-       ep = &base[head];
-       while ((ep->word0 & MSIQ_WORD0_FMT_TYPE) != 0) {
-               unsigned long msi_num;
-
-               type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
-                           MSIQ_WORD0_FMT_TYPE_SHIFT);
-               type = (type_fmt >>3);
-               if (unlikely(type != MSIQ_TYPE_MSI32 &&
-                            type != MSIQ_TYPE_MSI64))
-                       goto bad_type;
-
-               msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
-                          MSIQ_WORD0_DATA0_SHIFT);
-
-               fire_write(pbm->pbm_regs + MSI_CLEAR(msi_num),
-                          MSI_CLEAR_EQWR_N);
-
-               /* Clear the entry.  */
-               ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
-
-               /* Go to next entry in ring.  */
-               head++;
-               if (head >= pbm->msiq_ent_count)
-                       head = 0;
-               ep = &base[head];
-       }
+       type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+                   MSIQ_WORD0_FMT_TYPE_SHIFT);
+       type = (type_fmt >> 3);
+       if (unlikely(type != MSIQ_TYPE_MSI32 &&
+                    type != MSIQ_TYPE_MSI64))
+               return -EINVAL;
 
-       if (likely(head != orig_head)) {
-               /* ACK entries by updating head pointer.  */
-               fire_write(pbm->pbm_regs +
-                          EVENT_QUEUE_HEAD(msiqid),
-                          head);
-       }
-       return;
+       *msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+                         MSIQ_WORD0_DATA0_SHIFT);
 
-bad_type:
-       printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type);
-       return;
+       fire_write(pbm->pbm_regs + MSI_CLEAR(msi_num),
+                  MSI_CLEAR_EQWR_N);
+
+       /* Clear the entry.  */
+       ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+
+       /* Go to next entry in ring.  */
+       (*head)++;
+       if (*head >= pbm->msiq_ent_count)
+               *head = 0;
+
+       return 1;
 }
 
-static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                            unsigned long head)
 {
-       unsigned long size, bits_per_ulong;
+       fire_write(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid), head);
+       return 0;
+}
 
-       bits_per_ulong = sizeof(unsigned long) * 8;
-       size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
-       size /= 8;
-       BUG_ON(size % sizeof(unsigned long));
+static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long msi, int is_msi64)
+{
+       u64 val;
 
-       pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
-       if (!pbm->msi_bitmap)
-               return -ENOMEM;
+       val = fire_read(pbm->pbm_regs + MSI_MAP(msi));
+       val &= ~(MSI_MAP_EQNUM);
+       val |= msiqid;
+       fire_write(pbm->pbm_regs + MSI_MAP(msi), val);
+
+       fire_write(pbm->pbm_regs + MSI_CLEAR(msi),
+                  MSI_CLEAR_EQWR_N);
+
+       val = fire_read(pbm->pbm_regs + MSI_MAP(msi));
+       val |= MSI_MAP_VALID;
+       fire_write(pbm->pbm_regs + MSI_MAP(msi), val);
 
        return 0;
 }
 
-static void msi_bitmap_free(struct pci_pbm_info *pbm)
+static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
 {
-       kfree(pbm->msi_bitmap);
-       pbm->msi_bitmap = NULL;
+       unsigned long msiqid;
+       u64 val;
+
+       val = fire_read(pbm->pbm_regs + MSI_MAP(msi));
+       msiqid = (val & MSI_MAP_EQNUM);
+
+       val &= ~MSI_MAP_VALID;
+
+       fire_write(pbm->pbm_regs + MSI_MAP(msi), val);
+
+       return 0;
 }
 
-static int msi_queue_alloc(struct pci_pbm_info *pbm)
+static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm)
 {
        unsigned long pages, order, i;
 
@@ -279,241 +281,65 @@ static int msi_queue_alloc(struct pci_pbm_info *pbm)
        return 0;
 }
 
-static int alloc_msi(struct pci_pbm_info *pbm)
+static void pci_fire_msiq_free(struct pci_pbm_info *pbm)
 {
-       int i;
+       unsigned long pages, order;
 
-       for (i = 0; i < pbm->msi_num; i++) {
-               if (!test_and_set_bit(i, pbm->msi_bitmap))
-                       return i + pbm->msi_first;
-       }
+       order = get_order(512 * 1024);
+       pages = (unsigned long) pbm->msi_queues;
 
-       return -ENOENT;
-}
+       free_pages(pages, order);
 
-static void free_msi(struct pci_pbm_info *pbm, int msi_num)
-{
-       msi_num -= pbm->msi_first;
-       clear_bit(msi_num, pbm->msi_bitmap);
+       pbm->msi_queues = NULL;
 }
 
-static int pci_setup_msi_irq(unsigned int *virt_irq_p,
-                            struct pci_dev *pdev,
-                            struct msi_desc *entry)
+static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm,
+                                  unsigned long msiqid,
+                                  unsigned long devino)
 {
-       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
-       unsigned long devino, msiqid, cregs, imap_off;
-       struct msi_msg msg;
-       int msi_num, err;
+       unsigned long cregs = (unsigned long) pbm->pbm_regs;
+       unsigned long imap_reg, iclr_reg, int_ctrlr;
+       unsigned int virt_irq;
+       int fixup;
        u64 val;
 
-       *virt_irq_p = 0;
-
-       msi_num = alloc_msi(pbm);
-       if (msi_num < 0)
-               return msi_num;
-
-       cregs = (unsigned long) pbm->pbm_regs;
+       imap_reg = cregs + (0x001000UL + (devino * 0x08UL));
+       iclr_reg = cregs + (0x001400UL + (devino * 0x08UL));
 
-       err = sun4u_build_msi(pbm->portid, virt_irq_p,
-                             pbm->msiq_first_devino,
-                             (pbm->msiq_first_devino +
-                              pbm->msiq_num),
-                             cregs + 0x001000UL,
-                             cregs + 0x001400UL);
-       if (err < 0)
-               goto out_err;
-       devino = err;
+       /* XXX iterate amongst the 4 IRQ controllers XXX */
+       int_ctrlr = (1UL << 6);
 
-       imap_off = 0x001000UL + (devino * 0x8UL);
+       val = fire_read(imap_reg);
+       val |= (1UL << 63) | int_ctrlr;
+       fire_write(imap_reg, val);
 
-       val = fire_read(pbm->pbm_regs + imap_off);
-       val |= (1UL << 63) | (1UL << 6);
-       fire_write(pbm->pbm_regs + imap_off, val);
+       fixup = ((pbm->portid << 6) | devino) - int_ctrlr;
 
-       msiqid = ((devino - pbm->msiq_first_devino) +
-                 pbm->msiq_first);
+       virt_irq = build_irq(fixup, iclr_reg, imap_reg);
+       if (!virt_irq)
+               return -ENOMEM;
 
        fire_write(pbm->pbm_regs +
                   EVENT_QUEUE_CONTROL_SET(msiqid),
                   EVENT_QUEUE_CONTROL_SET_EN);
 
-       val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
-       val &= ~(MSI_MAP_EQNUM);
-       val |= msiqid;
-       fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
-
-       fire_write(pbm->pbm_regs + MSI_CLEAR(msi_num),
-                  MSI_CLEAR_EQWR_N);
-
-       val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
-       val |= MSI_MAP_VALID;
-       fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
-
-       sparc64_set_msi(*virt_irq_p, msi_num);
-
-       if (entry->msi_attrib.is_64) {
-               msg.address_hi = pbm->msi64_start >> 32;
-               msg.address_lo = pbm->msi64_start & 0xffffffff;
-       } else {
-               msg.address_hi = 0;
-               msg.address_lo = pbm->msi32_start;
-       }
-       msg.data = msi_num;
-
-       set_irq_msi(*virt_irq_p, entry);
-       write_msi_msg(*virt_irq_p, &msg);
-
-       irq_install_pre_handler(*virt_irq_p,
-                               pci_msi_prehandler,
-                               pbm, (void *) msiqid);
-
-       return 0;
-
-out_err:
-       free_msi(pbm, msi_num);
-       return err;
+       return virt_irq;
 }
 
-static void pci_teardown_msi_irq(unsigned int virt_irq,
-                                struct pci_dev *pdev)
-{
-       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
-       unsigned long msiqid, msi_num;
-       u64 val;
-
-       msi_num = sparc64_get_msi(virt_irq);
-
-       val = fire_read(pbm->pbm_regs + MSI_MAP(msi_num));
-
-       msiqid = (val & MSI_MAP_EQNUM);
-
-       val &= ~MSI_MAP_VALID;
-       fire_write(pbm->pbm_regs + MSI_MAP(msi_num), val);
-
-       fire_write(pbm->pbm_regs + EVENT_QUEUE_CONTROL_CLEAR(msiqid),
-                  EVENT_QUEUE_CONTROL_CLEAR_DIS);
-
-       free_msi(pbm, msi_num);
-
-       /* The sun4u_destroy_msi() will liberate the devino and thus the MSIQ
-        * allocation.
-        */
-       sun4u_destroy_msi(virt_irq);
-}
+static const struct sparc64_msiq_ops pci_fire_msiq_ops = {
+       .get_head       =       pci_fire_get_head,
+       .dequeue_msi    =       pci_fire_dequeue_msi,
+       .set_head       =       pci_fire_set_head,
+       .msi_setup      =       pci_fire_msi_setup,
+       .msi_teardown   =       pci_fire_msi_teardown,
+       .msiq_alloc     =       pci_fire_msiq_alloc,
+       .msiq_free      =       pci_fire_msiq_free,
+       .msiq_build_irq =       pci_fire_msiq_build_irq,
+};
 
 static void pci_fire_msi_init(struct pci_pbm_info *pbm)
 {
-       const u32 *val;
-       int len;
-
-       val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
-       if (!val || len != 4)
-               goto no_msi;
-       pbm->msiq_num = *val;
-       if (pbm->msiq_num) {
-               const struct msiq_prop {
-                       u32 first_msiq;
-                       u32 num_msiq;
-                       u32 first_devino;
-               } *mqp;
-               const struct msi_range_prop {
-                       u32 first_msi;
-                       u32 num_msi;
-               } *mrng;
-               const struct addr_range_prop {
-                       u32 msi32_high;
-                       u32 msi32_low;
-                       u32 msi32_len;
-                       u32 msi64_high;
-                       u32 msi64_low;
-                       u32 msi64_len;
-               } *arng;
-
-               val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-
-               pbm->msiq_ent_count = *val;
-
-               mqp = of_get_property(pbm->prom_node,
-                                     "msi-eq-to-devino", &len);
-               if (!mqp)
-                       mqp = of_get_property(pbm->prom_node,
-                                             "msi-eq-devino", &len);
-               if (!mqp || len != sizeof(struct msiq_prop))
-                       goto no_msi;
-
-               pbm->msiq_first = mqp->first_msiq;
-               pbm->msiq_first_devino = mqp->first_devino;
-
-               val = of_get_property(pbm->prom_node, "#msi", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msi_num = *val;
-
-               mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
-               if (!mrng || len != sizeof(struct msi_range_prop))
-                       goto no_msi;
-               pbm->msi_first = mrng->first_msi;
-
-               val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msi_data_mask = *val;
-
-               val = of_get_property(pbm->prom_node, "msix-data-width", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msix_data_width = *val;
-
-               arng = of_get_property(pbm->prom_node, "msi-address-ranges",
-                                      &len);
-               if (!arng || len != sizeof(struct addr_range_prop))
-                       goto no_msi;
-               pbm->msi32_start = ((u64)arng->msi32_high << 32) |
-                       (u64) arng->msi32_low;
-               pbm->msi64_start = ((u64)arng->msi64_high << 32) |
-                       (u64) arng->msi64_low;
-               pbm->msi32_len = arng->msi32_len;
-               pbm->msi64_len = arng->msi64_len;
-
-               if (msi_bitmap_alloc(pbm))
-                       goto no_msi;
-
-               if (msi_queue_alloc(pbm)) {
-                       msi_bitmap_free(pbm);
-                       goto no_msi;
-               }
-
-               printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
-                      "devino[0x%x]\n",
-                      pbm->name,
-                      pbm->msiq_first, pbm->msiq_num,
-                      pbm->msiq_ent_count,
-                      pbm->msiq_first_devino);
-               printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
-                      "width[%u]\n",
-                      pbm->name,
-                      pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
-                      pbm->msix_data_width);
-               printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
-                      "addr64[0x%lx:0x%x]\n",
-                      pbm->name,
-                      pbm->msi32_start, pbm->msi32_len,
-                      pbm->msi64_start, pbm->msi64_len);
-               printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
-                      pbm->name,
-                      __pa(pbm->msi_queues));
-       }
-       pbm->setup_msi_irq = pci_setup_msi_irq;
-       pbm->teardown_msi_irq = pci_teardown_msi_irq;
-
-       return;
-
-no_msi:
-       pbm->msiq_num = 0;
-       printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+       sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops);
 }
 #else /* CONFIG_PCI_MSI */
 static void pci_fire_msi_init(struct pci_pbm_info *pbm)
diff --git a/arch/sparc64/kernel/pci_impl.h b/arch/sparc64/kernel/pci_impl.h
index f660c2b..ccbb188 100644
--- a/arch/sparc64/kernel/pci_impl.h
+++ b/arch/sparc64/kernel/pci_impl.h
@@ -29,6 +29,33 @@
 #define PCI_STC_FLUSHFLAG_SET(STC) \
        (*((STC)->strbuf_flushflag) != 0UL)
 
+#ifdef CONFIG_PCI_MSI
+struct pci_pbm_info;
+struct sparc64_msiq_ops {
+       int (*get_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+                       unsigned long *head);
+       int (*dequeue_msi)(struct pci_pbm_info *pbm, unsigned long msiqid,
+                          unsigned long *head, unsigned long *msi);
+       int (*set_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+                       unsigned long head);
+       int (*msi_setup)(struct pci_pbm_info *pbm, unsigned long msiqid,
+                        unsigned long msi, int is_msi64);
+       int (*msi_teardown)(struct pci_pbm_info *pbm, unsigned long msi);
+       int (*msiq_alloc)(struct pci_pbm_info *pbm);
+       void (*msiq_free)(struct pci_pbm_info *pbm);
+       int (*msiq_build_irq)(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long devino);
+};
+
+extern void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+                                const struct sparc64_msiq_ops *ops);
+
+struct sparc64_msiq_cookie {
+       struct pci_pbm_info *pbm;
+       unsigned long msiqid;
+};
+#endif
+
 struct pci_controller_info;
 
 struct pci_pbm_info {
@@ -90,6 +117,8 @@ struct pci_pbm_info {
        u32                             msiq_ent_count;
        u32                             msiq_first;
        u32                             msiq_first_devino;
+       u32                             msiq_rotor;
+       struct sparc64_msiq_cookie      *msiq_irq_cookies;
        u32                             msi_num;
        u32                             msi_first;
        u32                             msi_data_mask;
@@ -100,9 +129,11 @@ struct pci_pbm_info {
        u32                             msi64_len;
        void                            *msi_queues;
        unsigned long                   *msi_bitmap;
+       unsigned int                    *msi_irq_table;
        int (*setup_msi_irq)(unsigned int *virt_irq_p, struct pci_dev *pdev,
                             struct msi_desc *entry);
        void (*teardown_msi_irq)(unsigned int virt_irq, struct pci_dev *pdev);
+       const struct sparc64_msiq_ops   *msi_ops;
 #endif /* !(CONFIG_PCI_MSI) */
 
        /* This PBM's streaming buffer. */
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
new file mode 100644
index 0000000..0fa33b1
--- /dev/null
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -0,0 +1,433 @@
+/* pci_msi.c: Sparc64 MSI support common layer.
+ *
+ * Copyright (C) 2007 David S. Miller ([EMAIL PROTECTED])
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include "pci_impl.h"
+
+static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
+{
+       struct sparc64_msiq_cookie *msiq_cookie = cookie;
+       struct pci_pbm_info *pbm = msiq_cookie->pbm;
+       unsigned long msiqid = msiq_cookie->msiqid;
+       const struct sparc64_msiq_ops *ops;
+       unsigned long orig_head, head;
+       int err;
+
+       ops = pbm->msi_ops;
+
+       err = ops->get_head(pbm, msiqid, &head);
+       if (unlikely(err < 0))
+               goto err_get_head;
+
+       orig_head = head;
+       for (;;) {
+               unsigned long msi;
+
+               err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
+               if (likely(err > 0))
+                       __do_IRQ(pbm->msi_irq_table[msi - pbm->msi_first]);
+
+               if (unlikely(err < 0))
+                       goto err_dequeue;
+
+               if (err == 0)
+                       break;
+       }
+       if (likely(head != orig_head)) {
+               err = ops->set_head(pbm, msiqid, head);
+               if (unlikely(err < 0))
+                       goto err_set_head;
+       }
+       return IRQ_HANDLED;
+
+err_get_head:
+       printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
+              msiqid, err);
+       goto err_out;
+
+err_dequeue:
+       printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
+              "gives error %d\n",
+              head, msiqid, err);
+       goto err_out;
+
+err_set_head:
+       printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
+              "gives error %d\n",
+              head, msiqid, err);
+       goto err_out;
+
+err_out:
+       return IRQ_NONE;
+}
+
+static u32 pick_msiq(struct pci_pbm_info *pbm)
+{
+       static DEFINE_SPINLOCK(rotor_lock);
+       unsigned long flags;
+       u32 ret, rotor;
+
+       spin_lock_irqsave(&rotor_lock, flags);
+
+       rotor = pbm->msiq_rotor;
+       ret = pbm->msiq_first + rotor;
+
+       if (++rotor >= pbm->msiq_num)
+               rotor = 0;
+       pbm->msiq_rotor = rotor;
+
+       spin_unlock_irqrestore(&rotor_lock, flags);
+
+       return ret;
+}
+
+
+static int alloc_msi(struct pci_pbm_info *pbm)
+{
+       int i;
+
+       for (i = 0; i < pbm->msi_num; i++) {
+               if (!test_and_set_bit(i, pbm->msi_bitmap))
+                       return i + pbm->msi_first;
+       }
+
+       return -ENOENT;
+}
+
+static void free_msi(struct pci_pbm_info *pbm, int msi_num)
+{
+       msi_num -= pbm->msi_first;
+       clear_bit(msi_num, pbm->msi_bitmap);
+}
+
+static struct irq_chip msi_irq = {
+       .typename       = "PCI-MSI",
+       .mask           = mask_msi_irq,
+       .unmask         = unmask_msi_irq,
+       .enable         = unmask_msi_irq,
+       .disable        = mask_msi_irq,
+       /* XXX affinity XXX */
+};
+
+int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
+                         struct pci_dev *pdev,
+                         struct msi_desc *entry)
+{
+       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+       const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+       struct msi_msg msg;
+       int msi, err;
+       u32 msiqid;
+
+       *virt_irq_p = virt_irq_alloc(~0);
+       err = -ENOMEM;
+       if (!*virt_irq_p)
+               goto out_err;
+
+       set_irq_chip(*virt_irq_p, &msi_irq);
+
+       err = alloc_msi(pbm);
+       if (unlikely(err < 0))
+               goto out_virt_irq_free;
+
+       msi = err;
+
+       msiqid = pick_msiq(pbm);
+
+       err = ops->msi_setup(pbm, msiqid, msi,
+                            (entry->msi_attrib.is_64 ? 1 : 0));
+       if (err)
+               goto out_msi_free;
+
+       pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p;
+
+       if (entry->msi_attrib.is_64) {
+               msg.address_hi = pbm->msi64_start >> 32;
+               msg.address_lo = pbm->msi64_start & 0xffffffff;
+       } else {
+               msg.address_hi = 0;
+               msg.address_lo = pbm->msi32_start;
+       }
+       msg.data = msi;
+
+       set_irq_msi(*virt_irq_p, entry);
+       write_msi_msg(*virt_irq_p, &msg);
+
+       return 0;
+
+out_msi_free:
+       free_msi(pbm, msi);
+
+out_virt_irq_free:
+       set_irq_chip(*virt_irq_p, NULL);
+       virt_irq_free(*virt_irq_p);
+       *virt_irq_p = 0;
+
+out_err:
+       return err;
+}
+
+void sparc64_teardown_msi_irq(unsigned int virt_irq,
+                             struct pci_dev *pdev)
+{
+       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+       const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+       unsigned int msi_num;
+       int i, err;
+
+       for (i = 0; i < pbm->msi_num; i++) {
+               if (pbm->msi_irq_table[i] == virt_irq)
+                       break;
+       }
+       if (i >= pbm->msi_num) {
+               printk(KERN_ERR "%s: teardown: No MSI for irq %u\n",
+                      pbm->name, virt_irq);
+               return;
+       }
+
+       msi_num = pbm->msi_first + i;
+       pbm->msi_irq_table[i] = ~0U;
+
+       err = ops->msi_teardown(pbm, msi_num);
+       if (err) {
+               printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, "
+                      "irq %u, gives error %d\n",
+                      pbm->name, msi_num, virt_irq, err);
+               return;
+       }
+
+       free_msi(pbm, msi_num);
+
+       set_irq_chip(virt_irq, NULL);
+       virt_irq_free(virt_irq);
+}
+
+static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+{
+       unsigned long size, bits_per_ulong;
+
+       bits_per_ulong = sizeof(unsigned long) * 8;
+       size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
+       size /= 8;
+       BUG_ON(size % sizeof(unsigned long));
+
+       pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
+       if (!pbm->msi_bitmap)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void msi_bitmap_free(struct pci_pbm_info *pbm)
+{
+       kfree(pbm->msi_bitmap);
+       pbm->msi_bitmap = NULL;
+}
+
+static int msi_table_alloc(struct pci_pbm_info *pbm)
+{
+       int size, i;
+
+       size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
+       pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
+       if (!pbm->msiq_irq_cookies)
+               return -ENOMEM;
+
+       for (i = 0; i < pbm->msiq_num; i++) {
+               struct sparc64_msiq_cookie *p;
+
+               p = &pbm->msiq_irq_cookies[i];
+               p->pbm = pbm;
+               p->msiqid = pbm->msiq_first + i;
+       }
+
+       size = pbm->msi_num * sizeof(unsigned int);
+       pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
+       if (!pbm->msi_irq_table) {
+               kfree(pbm->msiq_irq_cookies);
+               pbm->msiq_irq_cookies = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void msi_table_free(struct pci_pbm_info *pbm)
+{
+       kfree(pbm->msiq_irq_cookies);
+       pbm->msiq_irq_cookies = NULL;
+
+       kfree(pbm->msi_irq_table);
+       pbm->msi_irq_table = NULL;
+}
+
+static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
+                                const struct sparc64_msiq_ops *ops,
+                                unsigned long msiqid,
+                                unsigned long devino)
+{
+       int irq = ops->msiq_build_irq(pbm, msiqid, devino);
+       int err;
+
+       if (irq < 0)
+               return irq;
+
+       err = request_irq(irq, sparc64_msiq_interrupt, 0,
+                         "MSIQ",
+                         &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
+                                     const struct sparc64_msiq_ops *ops)
+{
+       int i;
+
+       for (i = 0; i < pbm->msiq_num; i++) {
+               unsigned long msiqid = i + pbm->msiq_first;
+               unsigned long devino = i + pbm->msiq_first_devino;
+               int err;
+
+               err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+                         const struct sparc64_msiq_ops *ops)
+{
+       const u32 *val;
+       int len;
+
+       val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
+       if (!val || len != 4)
+               goto no_msi;
+       pbm->msiq_num = *val;
+       if (pbm->msiq_num) {
+               const struct msiq_prop {
+                       u32 first_msiq;
+                       u32 num_msiq;
+                       u32 first_devino;
+               } *mqp;
+               const struct msi_range_prop {
+                       u32 first_msi;
+                       u32 num_msi;
+               } *mrng;
+               const struct addr_range_prop {
+                       u32 msi32_high;
+                       u32 msi32_low;
+                       u32 msi32_len;
+                       u32 msi64_high;
+                       u32 msi64_low;
+                       u32 msi64_len;
+               } *arng;
+
+               val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
+               if (!val || len != 4)
+                       goto no_msi;
+
+               pbm->msiq_ent_count = *val;
+
+               mqp = of_get_property(pbm->prom_node,
+                                     "msi-eq-to-devino", &len);
+               if (!mqp)
+                       mqp = of_get_property(pbm->prom_node,
+                                             "msi-eq-devino", &len);
+               if (!mqp || len != sizeof(struct msiq_prop))
+                       goto no_msi;
+
+               pbm->msiq_first = mqp->first_msiq;
+               pbm->msiq_first_devino = mqp->first_devino;
+
+               val = of_get_property(pbm->prom_node, "#msi", &len);
+               if (!val || len != 4)
+                       goto no_msi;
+               pbm->msi_num = *val;
+
+               mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
+               if (!mrng || len != sizeof(struct msi_range_prop))
+                       goto no_msi;
+               pbm->msi_first = mrng->first_msi;
+
+               val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
+               if (!val || len != 4)
+                       goto no_msi;
+               pbm->msi_data_mask = *val;
+
+               val = of_get_property(pbm->prom_node, "msix-data-width", &len);
+               if (!val || len != 4)
+                       goto no_msi;
+               pbm->msix_data_width = *val;
+
+               arng = of_get_property(pbm->prom_node, "msi-address-ranges",
+                                      &len);
+               if (!arng || len != sizeof(struct addr_range_prop))
+                       goto no_msi;
+               pbm->msi32_start = ((u64)arng->msi32_high << 32) |
+                       (u64) arng->msi32_low;
+               pbm->msi64_start = ((u64)arng->msi64_high << 32) |
+                       (u64) arng->msi64_low;
+               pbm->msi32_len = arng->msi32_len;
+               pbm->msi64_len = arng->msi64_len;
+
+               if (msi_bitmap_alloc(pbm))
+                       goto no_msi;
+
+               if (msi_table_alloc(pbm)) {
+                       msi_bitmap_free(pbm);
+                       goto no_msi;
+               }
+
+               if (ops->msiq_alloc(pbm)) {
+                       msi_table_free(pbm);
+                       msi_bitmap_free(pbm);
+                       goto no_msi;
+               }
+
+               if (sparc64_bringup_msi_queues(pbm, ops)) {
+                       ops->msiq_free(pbm);
+                       msi_table_free(pbm);
+                       msi_bitmap_free(pbm);
+                       goto no_msi;
+               }
+
+               printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
+                      "devino[0x%x]\n",
+                      pbm->name,
+                      pbm->msiq_first, pbm->msiq_num,
+                      pbm->msiq_ent_count,
+                      pbm->msiq_first_devino);
+               printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
+                      "width[%u]\n",
+                      pbm->name,
+                      pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
+                      pbm->msix_data_width);
+               printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
+                      "addr64[0x%lx:0x%x]\n",
+                      pbm->name,
+                      pbm->msi32_start, pbm->msi32_len,
+                      pbm->msi64_start, pbm->msi64_len);
+               printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
+                      pbm->name,
+                      __pa(pbm->msi_queues));
+
+               pbm->msi_ops = ops;
+               pbm->setup_msi_irq = sparc64_setup_msi_irq;
+               pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
+       }
+       return;
+
+no_msi:
+       pbm->msiq_num = 0;
+       printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+}
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index da724b1..97c45b2 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -748,111 +748,102 @@ struct pci_sun4v_msiq_entry {
        u64             reserved2;
 };
 
-/* For now this just runs as a pre-handler for the real interrupt handler.
- * So we just walk through the queue and ACK all the entries, update the
- * head pointer, and return.
- *
- * In the longer term it would be nice to do something more integrated
- * wherein we can pass in some of this MSI info to the drivers.  This
- * would be most useful for PCIe fabric error messages, although we could
- * invoke those directly from the loop here in order to pass the info around.
- */
-static void pci_sun4v_msi_prehandler(unsigned int ino, void *data1, void 
*data2)
+static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long *head)
 {
-       struct pci_pbm_info *pbm = data1;
-       struct pci_sun4v_msiq_entry *base, *ep;
-       unsigned long msiqid, orig_head, head, type, err;
-
-       msiqid = (unsigned long) data2;
+       unsigned long err, limit;
 
-       head = 0xdeadbeef;
-       err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, &head);
+       err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
        if (unlikely(err))
-               goto hv_error_get;
-
-       if (unlikely(head >= (pbm->msiq_ent_count * sizeof(struct 
pci_sun4v_msiq_entry))))
-               goto bad_offset;
-
-       head /= sizeof(struct pci_sun4v_msiq_entry);
-       orig_head = head;
-       base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
-                                  (pbm->msiq_ent_count *
-                                   sizeof(struct pci_sun4v_msiq_entry))));
-       ep = &base[head];
-       while ((ep->version_type & MSIQ_TYPE_MASK) != 0) {
-               type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
-               if (unlikely(type != MSIQ_TYPE_MSI32 &&
-                            type != MSIQ_TYPE_MSI64))
-                       goto bad_type;
-
-               pci_sun4v_msi_setstate(pbm->devhandle,
-                                      ep->msi_data /* msi_num */,
-                                      HV_MSISTATE_IDLE);
-
-               /* Clear the entry.  */
-               ep->version_type &= ~MSIQ_TYPE_MASK;
-
-               /* Go to next entry in ring.  */
-               head++;
-               if (head >= pbm->msiq_ent_count)
-                       head = 0;
-               ep = &base[head];
-       }
+               return -ENXIO;
 
-       if (likely(head != orig_head)) {
-               /* ACK entries by updating head pointer.  */
-               head *= sizeof(struct pci_sun4v_msiq_entry);
-               err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
-               if (unlikely(err))
-                       goto hv_error_set;
-       }
-       return;
+       limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+       if (unlikely(*head >= limit))
+               return -EFBIG;
 
-hv_error_set:
-       printk(KERN_EMERG "MSI: Hypervisor set head gives error %lu\n", err);
-       goto hv_error_cont;
+       return 0;
+}
 
-hv_error_get:
-       printk(KERN_EMERG "MSI: Hypervisor get head gives error %lu\n", err);
+static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
+                                unsigned long msiqid, unsigned long *head,
+                                unsigned long *msi)
+{
+       struct pci_sun4v_msiq_entry *ep;
+       unsigned long err, type;
 
-hv_error_cont:
-       printk(KERN_EMERG "MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
-              pbm->devhandle, msiqid, head);
-       return;
+       /* Note: void pointer arithmetic, 'head' is a byte offset  */
+       ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
+                                (pbm->msiq_ent_count *
+                                 sizeof(struct pci_sun4v_msiq_entry))) +
+             *head);
 
-bad_offset:
-       printk(KERN_EMERG "MSI: Hypervisor gives bad offset %lx max(%lx)\n",
-              head, pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry));
-       return;
+       if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
+               return 0;
 
-bad_type:
-       printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type);
-       return;
+       type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
+       if (unlikely(type != MSIQ_TYPE_MSI32 &&
+                    type != MSIQ_TYPE_MSI64))
+               return -EINVAL;
+
+       *msi = ep->msi_data;
+
+       err = pci_sun4v_msi_setstate(pbm->devhandle,
+                                    ep->msi_data /* msi_num */,
+                                    HV_MSISTATE_IDLE);
+       if (unlikely(err))
+               return -ENXIO;
+
+       /* Clear the entry.  */
+       ep->version_type &= ~MSIQ_TYPE_MASK;
+
+       (*head) += sizeof(struct pci_sun4v_msiq_entry);
+       if (*head >=
+           (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
+               *head = 0;
+
+       return 1;
 }
 
-static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+                             unsigned long head)
 {
-       unsigned long size, bits_per_ulong;
+       unsigned long err;
 
-       bits_per_ulong = sizeof(unsigned long) * 8;
-       size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
-       size /= 8;
-       BUG_ON(size % sizeof(unsigned long));
+       err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
+       if (unlikely(err))
+               return -EINVAL;
 
-       pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
-       if (!pbm->msi_bitmap)
-               return -ENOMEM;
+       return 0;
+}
 
+static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+                              unsigned long msi, int is_msi64)
+{
+       if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
+                                 (is_msi64 ?
+                                  HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
+               return -ENXIO;
+       if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
+               return -ENXIO;
+       if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
+               return -ENXIO;
        return 0;
 }
 
-static void msi_bitmap_free(struct pci_pbm_info *pbm)
+static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
 {
-       kfree(pbm->msi_bitmap);
-       pbm->msi_bitmap = NULL;
+       unsigned long err, msiqid;
+
+       err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
+       if (err)
+               return -ENXIO;
+
+       pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);
+
+       return 0;
 }
 
-static int msi_queue_alloc(struct pci_pbm_info *pbm)
+static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
 {
        unsigned long q_size, alloc_size, pages, order;
        int i;
@@ -906,232 +897,59 @@ h_error:
        return -EINVAL;
 }
 
-
-static int alloc_msi(struct pci_pbm_info *pbm)
+static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
 {
+       unsigned long q_size, alloc_size, pages, order;
        int i;
 
-       for (i = 0; i < pbm->msi_num; i++) {
-               if (!test_and_set_bit(i, pbm->msi_bitmap))
-                       return i + pbm->msi_first;
-       }
-
-       return -ENOENT;
-}
-
-static void free_msi(struct pci_pbm_info *pbm, int msi_num)
-{
-       msi_num -= pbm->msi_first;
-       clear_bit(msi_num, pbm->msi_bitmap);
-}
-
-static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p,
-                                  struct pci_dev *pdev,
-                                  struct msi_desc *entry)
-{
-       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
-       unsigned long devino, msiqid;
-       struct msi_msg msg;
-       int msi_num, err;
-
-       *virt_irq_p = 0;
-
-       msi_num = alloc_msi(pbm);
-       if (msi_num < 0)
-               return msi_num;
-
-       err = sun4v_build_msi(pbm->devhandle, virt_irq_p,
-                             pbm->msiq_first_devino,
-                             (pbm->msiq_first_devino +
-                              pbm->msiq_num));
-       if (err < 0)
-               goto out_err;
-       devino = err;
-
-       msiqid = ((devino - pbm->msiq_first_devino) +
-                 pbm->msiq_first);
-
-       err = -EINVAL;
-       if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
-       if (err)
-               goto out_err;
-
-       if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
-               goto out_err;
-
-       if (pci_sun4v_msi_setmsiq(pbm->devhandle,
-                                 msi_num, msiqid,
-                                 (entry->msi_attrib.is_64 ?
-                                  HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
-               goto out_err;
-
-       if (pci_sun4v_msi_setstate(pbm->devhandle, msi_num, HV_MSISTATE_IDLE))
-               goto out_err;
-
-       if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID))
-               goto out_err;
-
-       sparc64_set_msi(*virt_irq_p, msi_num);
+       for (i = 0; i < pbm->msiq_num; i++) {
+               unsigned long msiqid = pbm->msiq_first + i;
 
-       if (entry->msi_attrib.is_64) {
-               msg.address_hi = pbm->msi64_start >> 32;
-               msg.address_lo = pbm->msi64_start & 0xffffffff;
-       } else {
-               msg.address_hi = 0;
-               msg.address_lo = pbm->msi32_start;
+               (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
        }
-       msg.data = msi_num;
 
-       set_irq_msi(*virt_irq_p, entry);
-       write_msi_msg(*virt_irq_p, &msg);
-
-       irq_install_pre_handler(*virt_irq_p,
-                               pci_sun4v_msi_prehandler,
-                               pbm, (void *) msiqid);
+       q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+       alloc_size = (pbm->msiq_num * q_size);
+       order = get_order(alloc_size);
 
-       return 0;
+       pages = (unsigned long) pbm->msi_queues;
 
-out_err:
-       free_msi(pbm, msi_num);
-       return err;
+       free_pages(pages, order);
 
+       pbm->msi_queues = NULL;
 }
 
-static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq,
-                                      struct pci_dev *pdev)
+static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
+                                   unsigned long msiqid,
+                                   unsigned long devino)
 {
-       struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
-       unsigned long msiqid, err;
-       unsigned int msi_num;
-
-       msi_num = sparc64_get_msi(virt_irq);
-       err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi_num, &msiqid);
-       if (err) {
-               printk(KERN_ERR "%s: getmsiq gives error %lu\n",
-                      pbm->name, err);
-               return;
-       }
+       unsigned int virt_irq = sun4v_build_irq(pbm->devhandle, devino);
 
-       pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_INVALID);
-       pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_INVALID);
+       if (!virt_irq)
+               return -ENOMEM;
 
-       free_msi(pbm, msi_num);
+       if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
+               return -EINVAL;
+       if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
+               return -EINVAL;
 
-       /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
-        * allocation.
-        */
-       sun4v_destroy_msi(virt_irq);
+       return virt_irq;
 }
 
+static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
+       .get_head       =       pci_sun4v_get_head,
+       .dequeue_msi    =       pci_sun4v_dequeue_msi,
+       .set_head       =       pci_sun4v_set_head,
+       .msi_setup      =       pci_sun4v_msi_setup,
+       .msi_teardown   =       pci_sun4v_msi_teardown,
+       .msiq_alloc     =       pci_sun4v_msiq_alloc,
+       .msiq_free      =       pci_sun4v_msiq_free,
+       .msiq_build_irq =       pci_sun4v_msiq_build_irq,
+};
+
 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
 {
-       const u32 *val;
-       int len;
-
-       val = of_get_property(pbm->prom_node, "#msi-eqs", &len);
-       if (!val || len != 4)
-               goto no_msi;
-       pbm->msiq_num = *val;
-       if (pbm->msiq_num) {
-               const struct msiq_prop {
-                       u32 first_msiq;
-                       u32 num_msiq;
-                       u32 first_devino;
-               } *mqp;
-               const struct msi_range_prop {
-                       u32 first_msi;
-                       u32 num_msi;
-               } *mrng;
-               const struct addr_range_prop {
-                       u32 msi32_high;
-                       u32 msi32_low;
-                       u32 msi32_len;
-                       u32 msi64_high;
-                       u32 msi64_low;
-                       u32 msi64_len;
-               } *arng;
-
-               val = of_get_property(pbm->prom_node, "msi-eq-size", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-
-               pbm->msiq_ent_count = *val;
-
-               mqp = of_get_property(pbm->prom_node,
-                                     "msi-eq-to-devino", &len);
-               if (!mqp || len != sizeof(struct msiq_prop))
-                       goto no_msi;
-
-               pbm->msiq_first = mqp->first_msiq;
-               pbm->msiq_first_devino = mqp->first_devino;
-
-               val = of_get_property(pbm->prom_node, "#msi", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msi_num = *val;
-
-               mrng = of_get_property(pbm->prom_node, "msi-ranges", &len);
-               if (!mrng || len != sizeof(struct msi_range_prop))
-                       goto no_msi;
-               pbm->msi_first = mrng->first_msi;
-
-               val = of_get_property(pbm->prom_node, "msi-data-mask", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msi_data_mask = *val;
-
-               val = of_get_property(pbm->prom_node, "msix-data-width", &len);
-               if (!val || len != 4)
-                       goto no_msi;
-               pbm->msix_data_width = *val;
-
-               arng = of_get_property(pbm->prom_node, "msi-address-ranges",
-                                      &len);
-               if (!arng || len != sizeof(struct addr_range_prop))
-                       goto no_msi;
-               pbm->msi32_start = ((u64)arng->msi32_high << 32) |
-                       (u64) arng->msi32_low;
-               pbm->msi64_start = ((u64)arng->msi64_high << 32) |
-                       (u64) arng->msi64_low;
-               pbm->msi32_len = arng->msi32_len;
-               pbm->msi64_len = arng->msi64_len;
-
-               if (msi_bitmap_alloc(pbm))
-                       goto no_msi;
-
-               if (msi_queue_alloc(pbm)) {
-                       msi_bitmap_free(pbm);
-                       goto no_msi;
-               }
-
-               printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
-                      "devino[0x%x]\n",
-                      pbm->name,
-                      pbm->msiq_first, pbm->msiq_num,
-                      pbm->msiq_ent_count,
-                      pbm->msiq_first_devino);
-               printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
-                      "width[%u]\n",
-                      pbm->name,
-                      pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
-                      pbm->msix_data_width);
-               printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
-                      "addr64[0x%lx:0x%x]\n",
-                      pbm->name,
-                      pbm->msi32_start, pbm->msi32_len,
-                      pbm->msi64_start, pbm->msi64_len);
-               printk(KERN_INFO "%s: MSI queues at RA [%p]\n",
-                      pbm->name,
-                      pbm->msi_queues);
-       }
-       pbm->setup_msi_irq = pci_sun4v_setup_msi_irq;
-       pbm->teardown_msi_irq = pci_sun4v_teardown_msi_irq;
-
-       return;
-
-no_msi:
-       pbm->msiq_num = 0;
-       printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+       sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
 }
 #else /* CONFIG_PCI_MSI */
 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 4de3725..bad3c28 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -59,8 +59,10 @@ extern unsigned int sun4u_build_msi(u32 portid, unsigned int 
*virt_irq_p,
 extern void sun4u_destroy_msi(unsigned int virt_irq);
 extern unsigned int sbus_build_irq(void *sbus, unsigned int ino);
 
-extern void sparc64_set_msi(unsigned int virt_irq, u32 msi);
-extern u32 sparc64_get_msi(unsigned int virt_irq);
+extern unsigned char virt_irq_alloc(unsigned int real_irq);
+#ifdef CONFIG_PCI_MSI
+extern void virt_irq_free(unsigned int virt_irq);
+#endif
 
 extern void fixup_irqs(void);
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to