On Thu, Jul 21, 2016 at 08:45:30PM +0300, Michael S. Tsirkin wrote: > On Thu, Jul 14, 2016 at 01:56:22PM +0800, Peter Xu wrote: > > This patch enables interrupt remapping for PCI devices. > > > > To play the trick, one memory region "iommu_ir" is added as child region > > of the original iommu memory region, covering range 0xfeeXXXXX (which is > > the address range for APIC). All the writes to this range will be taken > > as MSI, and translation is carried out only when IR is enabled. > > > > Idea suggested by Paolo Bonzini. > > > > Signed-off-by: Peter Xu <pet...@redhat.com> > > --- > > hw/i386/intel_iommu.c | 241 > > +++++++++++++++++++++++++++++++++++++++++ > > hw/i386/intel_iommu_internal.h | 2 + > > include/hw/i386/intel_iommu.h | 66 +++++++++++ > > 3 files changed, 309 insertions(+) > > > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > > index 6a6cb3b..3d1b15d 100644 > > --- a/hw/i386/intel_iommu.c > > +++ b/hw/i386/intel_iommu.c > > @@ -1982,6 +1982,242 @@ static Property vtd_properties[] = { > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > +/* Read IRTE entry with specific index */ > > +static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, > > + VTD_IRTE *entry) > > +{ > > + dma_addr_t addr = 0x00; > > + > > + addr = iommu->intr_root + index * sizeof(*entry); > > + if (dma_memory_read(&address_space_memory, addr, entry, > > + sizeof(*entry))) { > > + VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64 > > + " + %"PRIu16, iommu->intr_root, index); > > + return -VTD_FR_IR_ROOT_INVAL; > > + } > > + > > + if (!entry->present) { > > + VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE" > > + " entry index %u value 0x%"PRIx64 " 0x%"PRIx64, > > + index, le64_to_cpu(entry->data[1]), > > + le64_to_cpu(entry->data[0])); > > + return -VTD_FR_IR_ENTRY_P; > > + } > > + > > + if (entry->__reserved_0 || entry->__reserved_1 || \ > > + entry->__reserved_2) { > > + VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16 > > + " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64, > > + index, le64_to_cpu(entry->data[1]), > > + le64_to_cpu(entry->data[0])); > > + return -VTD_FR_IR_IRTE_RSVD; > > + } > > + > > + /* > > + * TODO: Check Source-ID corresponds to SVT (Source Validation > > + * Type) bits > > + */ > > + > > + return 0; > > +} > > + > > +/* Fetch IRQ information of specific IR index */ > > +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, > > VTDIrq *irq) > > +{ > > + VTD_IRTE irte; > > + int ret = 0; > > + > > + bzero(&irte, sizeof(irte)); > > + > > + ret = vtd_irte_get(iommu, index, &irte); > > + if (ret) { > > + return ret; > > + } > > + > > + irq->trigger_mode = irte.trigger_mode; > > + irq->vector = irte.vector; > > + irq->delivery_mode = irte.delivery_mode; > > + /* Not support EIM yet: please refer to vt-d 9.10 DST bits */ > > +#define VTD_IR_APIC_DEST_MASK (0xff00ULL) > > +#define VTD_IR_APIC_DEST_SHIFT (8) > > + irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \ > > + VTD_IR_APIC_DEST_SHIFT; > > + irq->dest_mode = irte.dest_mode; > > + irq->redir_hint = irte.redir_hint; > > + > > + VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u," > > + "deliver:%u,dest:%u,dest_mode:%u", index, > > + irq->trigger_mode, irq->vector, irq->delivery_mode, > > + irq->dest, irq->dest_mode); > > + > > + return 0; > > +} > > + > > +/* Generate one MSI message from VTDIrq info */ > > +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out) > > +{ > > + VTD_MSIMessage msg = {}; > > + > > + /* Generate address bits */ > > + msg.dest_mode = irq->dest_mode; > > + msg.redir_hint = irq->redir_hint; > > + msg.dest = irq->dest; > > + msg.__addr_head = cpu_to_le32(0xfee); > > + /* Keep this from original MSI address bits */ > > + msg.__not_used = irq->msi_addr_last_bits; > > + > > + /* Generate data bits */ > > + msg.vector = irq->vector; > > + msg.delivery_mode = irq->delivery_mode; > > + msg.level = 1; > > + msg.trigger_mode = irq->trigger_mode; > > + > > + msg_out->address = msg.msi_addr; > > + msg_out->data = msg.msi_data; > > +} > > + > > +/* Interrupt remapping for MSI/MSI-X entry */ > > +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, > > + MSIMessage *origin, > > + MSIMessage *translated) > > +{ > > + int ret = 0; > > + VTD_IR_MSIAddress addr; > > + uint16_t index; > > + VTDIrq irq = {0}; > > + > > + assert(origin && translated); > > + > > + if (!iommu || !iommu->intr_enabled) { > > + goto do_not_translate; > > + } > > + > > + if (origin->address & VTD_MSI_ADDR_HI_MASK) { > > + VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero" > > + " during interrupt remapping: 0x%"PRIx32, > > + (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> > > \ > > + VTD_MSI_ADDR_HI_SHIFT)); > > + return -VTD_FR_IR_REQ_RSVD; > > + } > > + > > + addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; > > Below you treat data as LE, but here you use VTD_MSI_ADDR_LO_MASK > which is native endian. This looks wrong to me.
You are right. I see that this is merged in master. Will fix this in seperate patch. Thanks, -- peterx