On Mon, Aug 11, 2014 at 03:04:59PM +0800, Le Tan wrote: > Add support for emulating Intel IOMMU according to the VT-d specification for > the q35 chipset machine. Implement the logics for DMAR (DMA remapping) without > PASID support. The emulation supports register-based invalidation and primary > fault logging. > > Signed-off-by: Le Tan <tamlokv...@gmail.com> > --- > hw/i386/Makefile.objs | 1 + > hw/i386/intel_iommu.c | 1345 > ++++++++++++++++++++++++++++++++++++++++ > hw/i386/intel_iommu_internal.h | 345 +++++++++++ > include/hw/i386/intel_iommu.h | 90 +++ > 4 files changed, 1781 insertions(+) > create mode 100644 hw/i386/intel_iommu.c > create mode 100644 hw/i386/intel_iommu_internal.h > create mode 100644 include/hw/i386/intel_iommu.h > > diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs > index 48014ab..6936111 100644 > --- a/hw/i386/Makefile.objs > +++ b/hw/i386/Makefile.objs > @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ > obj-y += multiboot.o smbios.o > obj-y += pc.o pc_piix.o pc_q35.o > obj-y += pc_sysfw.o > +obj-y += intel_iommu.o > obj-$(CONFIG_XEN) += ../xenpv/ xen/ > > obj-y += kvmvapic.o > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > new file mode 100644 > index 0000000..b3a4f78 > --- /dev/null > +++ b/hw/i386/intel_iommu.c > @@ -0,0 +1,1345 @@ > +/* > + * QEMU emulation of an Intel IOMMU (VT-d) > + * (DMA Remapping device) > + * > + * Copyright (C) 2013 Knut Omang, Oracle <knut.om...@oracle.com> > + * Copyright (C) 2014 Le Tan, <tamlokv...@gmail.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "hw/sysbus.h" > +#include "exec/address-spaces.h" > +#include "intel_iommu_internal.h" > + > + > +/*#define DEBUG_INTEL_IOMMU*/ > +#ifdef DEBUG_INTEL_IOMMU > +enum { > + DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, > +}; > +#define VTD_DBGBIT(x) (1 << DEBUG_##x) > +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) | > + VTD_DBGBIT(FLOG); > + > +#define VTD_DPRINTF(what, fmt, ...) do { \ > + if (vtd_dbgflags & VTD_DBGBIT(what)) { \ > + fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ > + ## __VA_ARGS__); } \ > + } while (0) > +#else > +#define VTD_DPRINTF(what, fmt, ...) do {} while (0) > +#endif > + > +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, > + uint64_t wmask, uint64_t w1cmask)
Please prefix functions with intel_iommu_ or vtd_ , we don't want build failing when someone adds such a function in a global header, and will serve as a hint to the type of the 1st parameter. > +{ > + stq_le_p(&s->csr[addr], val); > + stq_le_p(&s->wmask[addr], wmask); > + stq_le_p(&s->w1cmask[addr], w1cmask); > +} > + > +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, > + uint64_t mask) > +{ > + stq_le_p(&s->womask[addr], mask); > +} > + > +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, > + uint32_t wmask, uint32_t w1cmask) > +{ > + stl_le_p(&s->csr[addr], val); > + stl_le_p(&s->wmask[addr], wmask); > + stl_le_p(&s->w1cmask[addr], w1cmask); > +} > + > +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, > + uint32_t mask) > +{ > + stl_le_p(&s->womask[addr], mask); > +} > + > +/* "External" get/set operations */ > +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) > +{ > + uint64_t oldval = ldq_le_p(&s->csr[addr]); > + uint64_t wmask = ldq_le_p(&s->wmask[addr]); > + uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); > + stq_le_p(&s->csr[addr], > + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); > +} > + > +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) > +{ > + uint32_t oldval = ldl_le_p(&s->csr[addr]); > + uint32_t wmask = ldl_le_p(&s->wmask[addr]); > + uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); > + stl_le_p(&s->csr[addr], > + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); > +} > + > +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr) > +{ > + uint64_t val = ldq_le_p(&s->csr[addr]); > + uint64_t womask = ldq_le_p(&s->womask[addr]); > + return val & ~womask; > +} > + > + > +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr) > +{ > + uint32_t val = ldl_le_p(&s->csr[addr]); > + uint32_t womask = ldl_le_p(&s->womask[addr]); > + return val & ~womask; > +} > + > +/* "Internal" get/set operations */ > +static inline uint64_t get_quad_raw(IntelIOMMUState *s, hwaddr addr) > +{ > + return ldq_le_p(&s->csr[addr]); > +} > + > +static inline uint32_t get_long_raw(IntelIOMMUState *s, hwaddr addr) > +{ > + return ldl_le_p(&s->csr[addr]); > +} > + > +static inline void set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t > val) > +{ > + stq_le_p(&s->csr[addr], val); > +} > + > +static inline uint32_t set_clear_mask_long(IntelIOMMUState *s, hwaddr addr, > + uint32_t clear, uint32_t mask) > +{ > + uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask; > + stl_le_p(&s->csr[addr], new_val); > + return new_val; > +} > + > +static inline uint64_t set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, > + uint64_t clear, uint64_t mask) > +{ > + uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask; > + stq_le_p(&s->csr[addr], new_val); > + return new_val; > +} > + > +/* Given the reg addr of both the message data and address, generate an > + * interrupt via MSI. > + */ > +static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, > + hwaddr mesg_data_reg) > +{ > + hwaddr addr; > + uint32_t data; > + > + assert(mesg_data_reg < DMAR_REG_SIZE); > + assert(mesg_addr_reg < DMAR_REG_SIZE); > + > + addr = get_long_raw(s, mesg_addr_reg); > + data = get_long_raw(s, mesg_data_reg); > + > + VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data); > + stl_le_phys(&address_space_memory, addr, data); > +} > + > +/* Generate a fault event to software via MSI if conditions are met. > + * Notice that the value of FSTS_REG being passed to it should be the one > + * before any update. > + */ > +static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) > +{ > + /* Check if there are any previously reported interrupt conditions */ > + if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || > + pre_fsts & VTD_FSTS_IQE) { > + VTD_DPRINTF(FLOG, "there are previous interrupt conditions " > + "to be serviced by software, fault event is not > generated " > + "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); > + return; > + } > + set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); > + if (get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { > + /* Interrupt Mask */ > + VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not > generated"); > + } else { > + /* generate interrupt */ > + vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); > + set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); > + } > +} > + > +/* Check if the Fault (F) field of the Fault Recording Register referenced by > + * @index is Set. > + */ > +static inline bool is_frcd_set(IntelIOMMUState *s, uint16_t index) > +{ > + /* Each reg is 128-bit */ > + hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); > + addr += 8; /* Access the high 64-bit half */ > + > + assert(index < DMAR_FRCD_REG_NR); > + > + return get_quad_raw(s, addr) & VTD_FRCD_F; > +} > + > +/* Update the PPF field of Fault Status Register. > + * Should be called whenever change the F field of any fault recording > + * registers. > + */ > +static inline void update_fsts_ppf(IntelIOMMUState *s) > +{ > + uint32_t i; > + uint32_t ppf_mask = 0; > + > + for (i = 0; i < DMAR_FRCD_REG_NR; i++) { > + if (is_frcd_set(s, i)) { > + ppf_mask = VTD_FSTS_PPF; > + break; > + } > + } > + set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); > + VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); > +} > + > +static inline void set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t > index) > +{ > + /* Each reg is 128-bit */ > + hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); > + addr += 8; /* Access the high 64-bit half */ > + > + assert(index < DMAR_FRCD_REG_NR); > + > + set_clear_mask_quad(s, addr, 0, VTD_FRCD_F); > + update_fsts_ppf(s); > +} > + > +/* Must not update F field now, should be done later */ > +static void record_frcd(IntelIOMMUState *s, uint16_t index, uint16_t > source_id, > + hwaddr addr, VTDFaultReason fault, bool is_write) > +{ > + uint64_t hi = 0, lo; > + hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); > + > + assert(index < DMAR_FRCD_REG_NR); > + > + lo = VTD_FRCD_FI(addr); > + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); > + if (!is_write) { > + hi |= VTD_FRCD_T; > + } > + > + set_quad_raw(s, frcd_reg_addr, lo); > + set_quad_raw(s, frcd_reg_addr + 8, hi); > + VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 > + ", lo 0x%"PRIx64, index, hi, lo); > +} > + > +/* Try to collapse multiple pending faults from the same requester */ > +static inline bool try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) > +{ > + uint32_t i; > + uint64_t frcd_reg; > + hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */ > + > + for (i = 0; i < DMAR_FRCD_REG_NR; i++) { > + frcd_reg = get_quad_raw(s, addr); > + VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); > + if ((frcd_reg & VTD_FRCD_F) && > + ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { > + return true; > + } > + addr += 16; /* 128-bit for each */ > + } > + > + return false; > +} > + > +/* Log and report an DMAR (address translation) fault to software */ > +static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, > + hwaddr addr, VTDFaultReason fault, > + bool is_write) > +{ > + uint32_t fsts_reg = get_long_raw(s, DMAR_FSTS_REG); > + > + assert(fault < VTD_FR_MAX); > + > + if (fault == VTD_FR_RESERVED_ERR) { > + /* This is not a normal fault reason case. Drop it. */ > + return; > + } > + > + VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 > + ", is_write %d", source_id, fault, addr, is_write); > + > + /* Check PFO field in FSTS_REG */ > + if (fsts_reg & VTD_FSTS_PFO) { > + VTD_DPRINTF(FLOG, "new fault is not recorded due to " > + "Primary Fault Overflow"); > + return; > + } > + > + /* Compression of multiple faults from the same requester */ > + if (try_collapse_fault(s, source_id)) { > + VTD_DPRINTF(FLOG, "new fault is not recorded due to " > + "compression of faults"); > + return; > + } > + > + /* Check next_frcd_reg to see whether it is overflow now */ > + if (is_frcd_set(s, s->next_frcd_reg)) { > + VTD_DPRINTF(FLOG, "Primary Fault Overflow and " > + "new fault is not recorded, set PFO field"); > + set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); > + return; > + } > + > + record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); > + > + if (fsts_reg & VTD_FSTS_PPF) { > + /* There are already one or more pending faults */ > + VTD_DPRINTF(FLOG, "there are pending faults already, " > + "fault event is not generated"); > + set_frcd_and_update_ppf(s, s->next_frcd_reg); > + s->next_frcd_reg++; > + if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { > + s->next_frcd_reg = 0; > + } > + } else { > + set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK, > + VTD_FSTS_FRI(s->next_frcd_reg)); > + set_frcd_and_update_ppf(s, s->next_frcd_reg); /* It will also set > PPF */ > + s->next_frcd_reg++; > + if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { > + s->next_frcd_reg = 0; > + } > + > + /* This case actually cause the PPF to be Set. > + * So generate fault event (interrupt). > + */ > + vtd_generate_fault_event(s, fsts_reg); > + } > +} > + > +static inline bool root_entry_present(VTDRootEntry *root) > +{ > + return root->val & VTD_ROOT_ENTRY_P; > +} > + > +static int get_root_entry(IntelIOMMUState *s, uint32_t index, VTDRootEntry > *re) > +{ > + dma_addr_t addr; > + > + assert(index < VTD_ROOT_ENTRY_NR); > + > + addr = s->root + index * sizeof(*re); > + > + if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { > + VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64 > + " + %"PRIu32, s->root, index); > + re->val = 0; > + return -VTD_FR_ROOT_TABLE_INV; > + } > + > + re->val = le64_to_cpu(re->val); > + return VTD_FR_RESERVED; > +} > + > +static inline bool context_entry_present(VTDContextEntry *context) > +{ > + return context->lo & VTD_CONTEXT_ENTRY_P; > +} > + > +static int get_context_entry_from_root(VTDRootEntry *root, uint32_t index, > + VTDContextEntry *ce) > +{ > + dma_addr_t addr; > + > + if (!root_entry_present(root)) { > + ce->lo = 0; > + ce->hi = 0; > + VTD_DPRINTF(GENERAL, "error: root-entry is not present"); > + return -VTD_FR_ROOT_ENTRY_P; > + } > + > + assert(index < VTD_CONTEXT_ENTRY_NR); > + > + addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); > + > + if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { > + VTD_DPRINTF(GENERAL, "error: fail to access context-entry at > 0x%"PRIx64 > + " + %"PRIu32, > + (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index); > + ce->lo = 0; > + ce->hi = 0; > + return -VTD_FR_CONTEXT_TABLE_INV; > + } > + > + ce->lo = le64_to_cpu(ce->lo); > + ce->hi = le64_to_cpu(ce->hi); > + return VTD_FR_RESERVED; > +} > + > +static inline dma_addr_t get_slpt_base_from_context(VTDContextEntry *ce) > +{ > + return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; > +} > + > +/* The shift of an addr for a certain level of paging structure */ > +static inline uint32_t slpt_level_shift(uint32_t level) > +{ > + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; > +} > + > +static inline uint64_t get_slpte_addr(uint64_t slpte) > +{ > + return slpte & VTD_SL_PT_BASE_ADDR_MASK; > +} > + > +/* Whether the pte indicates the address of the page frame */ > +static inline bool is_last_slpte(uint64_t slpte, uint32_t level) > +{ > + return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); > +} > + > +/* Get the content of a spte located in @base_addr[@index] */ > +static inline uint64_t get_slpte(dma_addr_t base_addr, uint32_t index) > +{ > + uint64_t slpte; > + > + assert(index < VTD_SL_PT_ENTRY_NR); > + > + if (dma_memory_read(&address_space_memory, > + base_addr + index * sizeof(slpte), &slpte, > + sizeof(slpte))) { > + slpte = (uint64_t)-1; > + return slpte; > + } > + > + slpte = le64_to_cpu(slpte); > + return slpte; > +} > + > +/* Given a gpa and the level of paging structure, return the offset of > current > + * level. > + */ > +static inline uint32_t gpa_level_offset(uint64_t gpa, uint32_t level) > +{ > + return (gpa >> slpt_level_shift(level)) & ((1ULL << VTD_SL_LEVEL_BITS) - > 1); > +} > + > +/* Check Capability Register to see if the @level of page-table is supported > */ > +static inline bool is_level_supported(IntelIOMMUState *s, uint32_t level) > +{ > + return VTD_CAP_SAGAW_MASK & s->cap & > + (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); > +} > + > +/* Get the page-table level that hardware should use for the second-level > + * page-table walk from the Address Width field of context-entry. > + */ > +static inline uint32_t get_level_from_context_entry(VTDContextEntry *ce) > +{ > + return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); > +} > + > +static inline uint32_t get_agaw_from_context_entry(VTDContextEntry *ce) > +{ > + return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; > +} > + > +static const uint64_t paging_entry_rsvd_field[] = { > + [0] = ~0ULL, > + /* For not large page */ > + [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + /* For large page */ > + [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > + [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), > +}; > + > +static inline bool slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) > +{ > + if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) { > + /* Maybe large page */ > + return slpte & paging_entry_rsvd_field[level + 4]; > + } else { > + return slpte & paging_entry_rsvd_field[level]; > + } > +} > + > +/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level > + * of the translation, can be used for deciding the size of large page. > + * @slptep and @slpte_level will not be touched if error happens. > + */ > +static int gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write, > + uint64_t *slptep, uint32_t *slpte_level) > +{ > + dma_addr_t addr = get_slpt_base_from_context(ce); > + uint32_t level = get_level_from_context_entry(ce); > + uint32_t offset; > + uint64_t slpte; > + uint32_t ce_agaw = get_agaw_from_context_entry(ce); > + uint64_t access_right_check; > + > + /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in > CAP_REG > + * and AW in context-entry. > + */ > + if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { > + VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa); > + return -VTD_FR_ADDR_BEYOND_MGAW; > + } > + > + /* FIXME: what is the Atomics request here? */ > + access_right_check = is_write ? VTD_SL_W : VTD_SL_R; > + > + while (true) { > + offset = gpa_level_offset(gpa, level); > + slpte = get_slpte(addr, offset); > + > + if (slpte == (uint64_t)-1) { > + VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " > + "entry at level %"PRIu32 " for gpa 0x%"PRIx64, > + level, gpa); > + if (level == get_level_from_context_entry(ce)) { > + /* Invalid programming of context-entry */ > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } else { > + return -VTD_FR_PAGING_ENTRY_INV; > + } > + } > + if (!(slpte & access_right_check)) { > + VTD_DPRINTF(GENERAL, "error: lack of %s permission for " > + "gpa 0x%"PRIx64 " slpte 0x%"PRIx64, > + (is_write ? "write" : "read"), gpa, slpte); > + return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; > + } > + if (slpte_nonzero_rsvd(slpte, level)) { > + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " > + "level paging entry level %"PRIu32 " slpte > 0x%"PRIx64, > + level, slpte); > + return -VTD_FR_PAGING_ENTRY_RSVD; > + } > + > + if (is_last_slpte(slpte, level)) { > + *slptep = slpte; > + *slpte_level = level; > + return VTD_FR_RESERVED; > + } > + addr = get_slpte_addr(slpte); > + level--; > + } > +} > + > +/* Map a device to its corresponding domain (context-entry). @ce will be set > + * to Zero if error happens while accessing the context-entry. > + */ > +static inline int dev_to_context_entry(IntelIOMMUState *s, int bus_num, > + int devfn, VTDContextEntry *ce) > +{ > + VTDRootEntry re; > + int ret_fr; > + > + assert(0 <= bus_num && bus_num < VTD_PCI_BUS_MAX); > + assert(0 <= devfn && devfn < VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX); > + > + ret_fr = get_root_entry(s, bus_num, &re); > + if (ret_fr) { > + ce->hi = 0; > + ce->lo = 0; > + return ret_fr; > + } > + > + if (!root_entry_present(&re)) { > + VTD_DPRINTF(GENERAL, "error: root-entry #%d is not present", > bus_num); > + ce->hi = 0; > + ce->lo = 0; > + return -VTD_FR_ROOT_ENTRY_P; > + } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { > + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry " > + "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val); > + ce->hi = 0; > + ce->lo = 0; > + return -VTD_FR_ROOT_ENTRY_RSVD; > + } > + > + ret_fr = get_context_entry_from_root(&re, devfn, ce); > + if (ret_fr) { > + return ret_fr; > + } > + > + if (!context_entry_present(ce)) { > + VTD_DPRINTF(GENERAL, > + "error: context-entry #%d(bus #%d) is not present", > devfn, > + bus_num); > + return -VTD_FR_CONTEXT_ENTRY_P; > + } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || > + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { > + VTD_DPRINTF(GENERAL, > + "error: non-zero reserved field in context-entry " > + "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo); > + return -VTD_FR_CONTEXT_ENTRY_RSVD; > + } > + > + /* Check if the programming of context-entry is valid */ > + if (!is_level_supported(s, get_level_from_context_entry(ce))) { > + VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in " > + "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, > + ce->hi, ce->lo); > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) { > + VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in " > + "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, > + ce->hi, ce->lo); > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } > + > + return VTD_FR_RESERVED; > +} > + > +static inline uint16_t make_source_id(int bus_num, int devfn) > +{ > + return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); > +} > + > +static const bool qualified_faults[] = { > + [VTD_FR_RESERVED] = false, > + [VTD_FR_ROOT_ENTRY_P] = false, > + [VTD_FR_CONTEXT_ENTRY_P] = true, > + [VTD_FR_CONTEXT_ENTRY_INV] = true, > + [VTD_FR_ADDR_BEYOND_MGAW] = true, > + [VTD_FR_WRITE] = true, > + [VTD_FR_READ] = true, > + [VTD_FR_PAGING_ENTRY_INV] = true, > + [VTD_FR_ROOT_TABLE_INV] = false, > + [VTD_FR_CONTEXT_TABLE_INV] = false, > + [VTD_FR_ROOT_ENTRY_RSVD] = false, > + [VTD_FR_PAGING_ENTRY_RSVD] = true, > + [VTD_FR_CONTEXT_ENTRY_TT] = true, > + [VTD_FR_RESERVED_ERR] = false, > + [VTD_FR_MAX] = false, > +}; > + > +/* To see if a fault condition is "qualified", which is reported to software > + * only if the FPD field in the context-entry used to process the faulting > + * request is 0. > + */ > +static inline bool is_qualified_fault(VTDFaultReason fault) > +{ > + return qualified_faults[fault]; > +} > + > +static inline bool is_interrupt_addr(hwaddr addr) > +{ > + return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= > VTD_INTERRUPT_ADDR_LAST; > +} > + > +/* Map dev to context-entry then do a paging-structures walk to do a iommu > + * translation. > + * @bus_num: The bus number > + * @devfn: The devfn, which is the combined of device and function number > + * @is_write: The access is a write operation > + * @entry: IOMMUTLBEntry that contain the addr to be translated and result > + */ > +static void iommu_translate(IntelIOMMUState *s, int bus_num, int devfn, > + hwaddr addr, bool is_write, IOMMUTLBEntry *entry) > +{ > + VTDContextEntry ce; > + uint64_t slpte; > + uint32_t level; > + uint64_t page_mask; > + uint16_t source_id = make_source_id(bus_num, devfn); > + int ret_fr; > + bool is_fpd_set = false; > + > + /* Check if the request is in interrupt address range */ > + if (is_interrupt_addr(addr)) { > + if (is_write) { > + /* FIXME: since we don't know the length of the access here, we > + * treat Non-DWORD length write requests without PASID as > + * interrupt requests, too. Withoud interrupt remapping support, > + * we just use 1:1 mapping. > + */ > + VTD_DPRINTF(MMU, "write request to interrupt address " > + "gpa 0x%"PRIx64, addr); > + entry->iova = addr & VTD_PAGE_MASK_4K; > + entry->translated_addr = addr & VTD_PAGE_MASK_4K; > + entry->addr_mask = ~VTD_PAGE_MASK_4K; > + entry->perm = IOMMU_WO; > + return; > + } else { > + VTD_DPRINTF(GENERAL, "error: read request from interrupt address > " > + "gpa 0x%"PRIx64, addr); > + vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write); > + return; > + } > + } > + > + ret_fr = dev_to_context_entry(s, bus_num, devfn, &ce); > + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; > + if (ret_fr) { > + ret_fr = -ret_fr; > + if (is_fpd_set && is_qualified_fault(ret_fr)) { > + VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests > " > + "through this context-entry (with FPD Set)"); > + } else { > + vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); > + } > + return; > + } > + > + ret_fr = gpa_to_slpte(&ce, addr, is_write, &slpte, &level); > + if (ret_fr) { > + ret_fr = -ret_fr; > + if (is_fpd_set && is_qualified_fault(ret_fr)) { > + VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests > " > + "through this context-entry (with FPD Set)"); > + } else { > + vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); > + } > + return; > + } > + > + if (level == VTD_SL_PT_LEVEL) { > + /* 4-KB page */ > + page_mask = VTD_PAGE_MASK_4K; > + } else if (level == VTD_SL_PDP_LEVEL) { > + /* 1-GB page */ > + page_mask = VTD_PAGE_MASK_1G; > + } else { > + /* 2-MB page */ > + page_mask = VTD_PAGE_MASK_2M; > + } > + > + entry->iova = addr & page_mask; > + entry->translated_addr = get_slpte_addr(slpte) & page_mask; > + entry->addr_mask = ~page_mask; > + entry->perm = slpte & VTD_SL_RW_MASK; > +} > + > +static void vtd_root_table_setup(IntelIOMMUState *s) > +{ > + s->root = get_quad_raw(s, DMAR_RTADDR_REG); > + s->root_extended = s->root & VTD_RTADDR_RTT; > + s->root &= VTD_RTADDR_ADDR_MASK; > + > + VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, > + (s->root_extended ? "(extended)" : "")); > +} > + > +/* Context-cache invalidation > + * Returns the Context Actual Invalidation Granularity. > + * @val: the content of the CCMD_REG > + */ > +static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t > val) > +{ > + uint64_t caig; > + uint64_t type = val & VTD_CCMD_CIRG_MASK; > + > + switch (type) { > + case VTD_CCMD_GLOBAL_INVL: > + VTD_DPRINTF(INV, "Global invalidation request"); > + caig = VTD_CCMD_GLOBAL_INVL_A; > + break; > + > + case VTD_CCMD_DOMAIN_INVL: > + VTD_DPRINTF(INV, "Domain-selective invalidation request"); > + caig = VTD_CCMD_DOMAIN_INVL_A; > + break; > + > + case VTD_CCMD_DEVICE_INVL: > + VTD_DPRINTF(INV, "Domain-selective invalidation request"); > + caig = VTD_CCMD_DEVICE_INVL_A; > + break; > + > + default: > + VTD_DPRINTF(GENERAL, > + "error: wrong context-cache invalidation granularity"); > + caig = 0; > + } > + > + return caig; > +} > + > +/* Flush IOTLB > + * Returns the IOTLB Actual Invalidation Granularity. > + * @val: the content of the IOTLB_REG > + */ > +static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) > +{ > + uint64_t iaig; > + uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK; > + > + switch (type) { > + case VTD_TLB_GLOBAL_FLUSH: > + VTD_DPRINTF(INV, "Global IOTLB flush"); > + iaig = VTD_TLB_GLOBAL_FLUSH_A; > + break; > + > + case VTD_TLB_DSI_FLUSH: > + VTD_DPRINTF(INV, "Domain-selective IOTLB flush"); > + iaig = VTD_TLB_DSI_FLUSH_A; > + break; > + > + case VTD_TLB_PSI_FLUSH: > + VTD_DPRINTF(INV, "Page-selective-within-domain IOTLB flush"); > + iaig = VTD_TLB_PSI_FLUSH_A; > + break; > + > + default: > + VTD_DPRINTF(GENERAL, "error: wrong iotlb flush granularity"); > + iaig = 0; > + } > + > + return iaig; > +} > + > +/* Set Root Table Pointer */ > +static void handle_gcmd_srtp(IntelIOMMUState *s) > +{ > + VTD_DPRINTF(CSR, "set Root Table Pointer"); > + > + vtd_root_table_setup(s); > + /* Ok - report back to driver */ > + set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); > +} > + > +/* Handle Translation Enable/Disable */ > +static void handle_gcmd_te(IntelIOMMUState *s, bool en) > +{ > + VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); > + > + if (en) { > + s->dmar_enabled = true; > + /* Ok - report back to driver */ > + set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES); > + } else { > + s->dmar_enabled = false; > + > + /* Clear the index of Fault Recording Register */ > + s->next_frcd_reg = 0; > + /* Ok - report back to driver */ > + set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); > + } > +} > + > +/* Handle write to Global Command Register */ > +static void handle_gcmd_write(IntelIOMMUState *s) > +{ > + uint32_t status = get_long_raw(s, DMAR_GSTS_REG); > + uint32_t val = get_long_raw(s, DMAR_GCMD_REG); > + uint32_t changed = status ^ val; > + > + VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); > + if (changed & VTD_GCMD_TE) { > + /* Translation enable/disable */ > + handle_gcmd_te(s, val & VTD_GCMD_TE); > + } > + if (val & VTD_GCMD_SRTP) { > + /* Set/update the root-table pointer */ > + handle_gcmd_srtp(s); > + } > +} > + > +/* Handle write to Context Command Register */ > +static void handle_ccmd_write(IntelIOMMUState *s) > +{ > + uint64_t ret; > + uint64_t val = get_quad_raw(s, DMAR_CCMD_REG); > + > + /* Context-cache invalidation request */ > + if (val & VTD_CCMD_ICC) { > + ret = vtd_context_cache_invalidate(s, val); > + > + /* Invalidation completed. Change something to show */ > + set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); > + ret = set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, ret); > + VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); > + } > +} > + > +/* Handle write to IOTLB Invalidation Register */ > +static void handle_iotlb_write(IntelIOMMUState *s) > +{ > + uint64_t ret; > + uint64_t val = get_quad_raw(s, DMAR_IOTLB_REG); > + > + /* IOTLB invalidation request */ > + if (val & VTD_TLB_IVT) { > + ret = vtd_iotlb_flush(s, val); > + > + /* Invalidation completed. Change something to show */ > + set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); > + ret = set_clear_mask_quad(s, DMAR_IOTLB_REG, > + VTD_TLB_FLUSH_GRANU_MASK_A, ret); > + VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); > + } > +} > + > +static inline void handle_fsts_write(IntelIOMMUState *s) > +{ > + uint32_t fsts_reg = get_long_raw(s, DMAR_FSTS_REG); > + uint32_t fectl_reg = get_long_raw(s, DMAR_FECTL_REG); > + uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE; > + > + if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { > + set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); > + VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " > + "IP field of FECTL_REG"); > + } > +} > + > +static inline void handle_fectl_write(IntelIOMMUState *s) > +{ > + uint32_t fectl_reg; > + /* When software clears the IM field, check the IP field. But do we > + * need to compare the old value and the new value to conclude that > + * software clears the IM field? Or just check if the IM field is zero? > + */ > + fectl_reg = get_long_raw(s, DMAR_FECTL_REG); > + if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { > + vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); > + set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); > + VTD_DPRINTF(FLOG, "IM field is cleared, generate " > + "fault event interrupt"); > + } > +} > + > +static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) > +{ > + IntelIOMMUState *s = opaque; > + uint64_t val; > + > + if (addr + size > DMAR_REG_SIZE) { > + VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 > + ", got 0x%"PRIx64 " %d", > + (uint64_t)DMAR_REG_SIZE, addr, size); > + return (uint64_t)-1; > + } > + > + assert(size == 4 || size == 8); > + > + switch (addr) { > + /* Root Table Address Register, 64-bit */ > + case DMAR_RTADDR_REG: > + if (size == 4) { > + val = s->root & ((1ULL << 32) - 1); > + } else { > + val = s->root; > + } > + break; > + > + case DMAR_RTADDR_REG_HI: > + assert(size == 4); > + val = s->root >> 32; > + break; > + > + default: > + if (size == 4) { > + val = get_long(s, addr); > + } else { > + val = get_quad(s, addr); > + } > + } > + > + VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, > + addr, size, val); > + return val; > +} > + > +static void vtd_mem_write(void *opaque, hwaddr addr, > + uint64_t val, unsigned size) > +{ > + IntelIOMMUState *s = opaque; > + > + if (addr + size > DMAR_REG_SIZE) { > + VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 > + ", got 0x%"PRIx64 " %d", > + (uint64_t)DMAR_REG_SIZE, addr, size); > + return; > + } > + > + assert(size == 4 || size == 8); > + > + switch (addr) { > + /* Global Command Register, 32-bit */ > + case DMAR_GCMD_REG: > + VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + set_long(s, addr, val); > + handle_gcmd_write(s); > + break; > + > + /* Context Command Register, 64-bit */ > + case DMAR_CCMD_REG: > + VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + handle_ccmd_write(s); > + } > + break; > + > + case DMAR_CCMD_REG_HI: > + VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + handle_ccmd_write(s); > + break; > + > + > + /* IOTLB Invalidation Register, 64-bit */ > + case DMAR_IOTLB_REG: > + VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + handle_iotlb_write(s); > + } > + break; > + > + case DMAR_IOTLB_REG_HI: > + VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + handle_iotlb_write(s); > + break; > + > + /* Fault Status Register, 32-bit */ > + case DMAR_FSTS_REG: > + VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + handle_fsts_write(s); > + break; > + > + /* Fault Event Control Register, 32-bit */ > + case DMAR_FECTL_REG: > + VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + handle_fectl_write(s); > + break; > + > + /* Fault Event Data Register, 32-bit */ > + case DMAR_FEDATA_REG: > + VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + /* Fault Event Address Register, 32-bit */ > + case DMAR_FEADDR_REG: > + VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + /* Fault Event Upper Address Register, 32-bit */ > + case DMAR_FEUADDR_REG: > + VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + /* Protected Memory Enable Register, 32-bit */ > + case DMAR_PMEN_REG: > + VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + > + /* Root Table Address Register, 64-bit */ > + case DMAR_RTADDR_REG: > + VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + } > + break; > + > + case DMAR_RTADDR_REG_HI: > + VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + /* Fault Recording Registers, 128-bit */ > + case DMAR_FRCD_REG_0_0: > + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + } > + break; > + > + case DMAR_FRCD_REG_0_1: > + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + break; > + > + case DMAR_FRCD_REG_0_2: > + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + /* May clear bit 127 (Fault), update PPF */ > + update_fsts_ppf(s); > + } > + break; > + > + case DMAR_FRCD_REG_0_3: > + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + assert(size == 4); > + set_long(s, addr, val); > + /* May clear bit 127 (Fault), update PPF */ > + update_fsts_ppf(s); > + break; > + > + default: > + VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 > + ", size %d, val 0x%"PRIx64, addr, size, val); > + if (size == 4) { > + set_long(s, addr, val); > + } else { > + set_quad(s, addr, val); > + } > + } > + > +} > + > +static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, > + bool is_write) > +{ > + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); > + IntelIOMMUState *s = vtd_as->iommu_state; > + int bus_num = vtd_as->bus_num; > + int devfn = vtd_as->devfn; > + IOMMUTLBEntry ret = { > + .target_as = &address_space_memory, > + .iova = addr, > + .translated_addr = 0, > + .addr_mask = ~(hwaddr)0, > + .perm = IOMMU_NONE, > + }; > + > + if (!s->dmar_enabled) { > + /* DMAR disabled, passthrough, use 4k-page*/ > + ret.iova = addr & VTD_PAGE_MASK_4K; > + ret.translated_addr = addr & VTD_PAGE_MASK_4K; > + ret.addr_mask = ~VTD_PAGE_MASK_4K; > + ret.perm = IOMMU_RW; > + return ret; > + } > + > + iommu_translate(s, bus_num, devfn, addr, is_write, &ret); > + > + VTD_DPRINTF(MMU, > + "bus %d slot %d func %d devfn %d gpa %"PRIx64 " hpa %"PRIx64, > + bus_num, VTD_PCI_SLOT(devfn), VTD_PCI_FUNC(devfn), devfn, > addr, > + ret.translated_addr); > + return ret; > +} > + > +static const VMStateDescription vtd_vmstate = { > + .name = "iommu_intel", > + .version_id = 1, > + .minimum_version_id = 1, > + .minimum_version_id_old = 1, > + .fields = (VMStateField[]) { > + VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static const MemoryRegionOps vtd_mem_ops = { > + .read = vtd_mem_read, > + .write = vtd_mem_write, > + .endianness = DEVICE_LITTLE_ENDIAN, > + .impl = { > + .min_access_size = 4, > + .max_access_size = 8, > + }, > + .valid = { > + .min_access_size = 4, > + .max_access_size = 8, > + }, > +}; > + > +static Property iommu_properties[] = { > + DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > +/* Do the real initialization. It will also be called when reset, so pay > + * attention when adding new initialization stuff. > + */ > +static void do_vtd_init(IntelIOMMUState *s) > +{ > + memset(s->csr, 0, DMAR_REG_SIZE); > + memset(s->wmask, 0, DMAR_REG_SIZE); > + memset(s->w1cmask, 0, DMAR_REG_SIZE); > + memset(s->womask, 0, DMAR_REG_SIZE); > + > + s->iommu_ops.translate = vtd_iommu_translate; > + s->root = 0; > + s->root_extended = false; > + s->dmar_enabled = false; > + s->iq_head = 0; > + s->iq_tail = 0; > + s->iq = 0; > + s->iq_size = 0; > + s->qi_enabled = false; > + s->iq_last_desc_type = VTD_INV_DESC_NONE; > + s->next_frcd_reg = 0; > + > + /* b.0:2 = 6: Number of domains supported: 64K using 16 bit ids > + * b.3 = 0: Advanced fault logging not supported > + * b.4 = 0: Required write buffer flushing not supported > + * b.5 = 0: Protected low memory region not supported > + * b.6 = 0: Protected high memory region not supported > + * b.8:12 = 2: SAGAW(Supported Adjusted Guest Address Widths), 39-bit, > + * 3-level page-table > + * b.16:21 = 38: MGAW(Maximum Guest Address Width) = 39 > + * b.22 = 0: ZLR(Zero Length Read) zero length DMA read requests > + * to write-only pages not supported > + * b.24:33 = 34: FRO(Fault-recording Register offset) > + * b.54 = 0: DWD(Write Draining), draining of write requests not > supported > + * b.55 = 0: DRD(Read Draining), draining of read requests not supported > + */ > + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | > + VTD_CAP_SAGAW; > + > + /* b.1 = 0: QI(Queued Invalidation support) not supported > + * b.2 = 0: DT(Device-TLB support) not supported > + * b.3 = 0: IR(Interrupt Remapping support) not supported > + * b.4 = 0: EIM(Extended Interrupt Mode) not supported > + * b.8:17 = 15: IRO(IOTLB Register Offset) > + * b.20:23 = 0: MHMV(Maximum Handle Mask Value) not valid > + */ > + s->ecap = VTD_ECAP_IRO; > + > + /* Define registers with default values and bit semantics */ > + define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); /* set MAX = 1, RO */ > + define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); > + define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); > + define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); > + define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); > + define_long(s, DMAR_GSTS_REG, 0, 0, 0); /* All bits RO, default 0 */ > + define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0); > + define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0); > + define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL); > + > + /* Advanced Fault Logging not supported */ > + define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL); > + define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0); > + define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0); /* 15:0 RW */ > + define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0); /* 31:2 RW */ > + > + /* Treated as RsvdZ when EIM in ECAP_REG is not supported > + * define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0); > + */ > + define_long(s, DMAR_FEUADDR_REG, 0, 0, 0); > + > + /* Treated as RO for implementations that PLMR and PHMR fields reported > + * as Clear in the CAP_REG. > + * define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0); > + */ > + define_long(s, DMAR_PMEN_REG, 0, 0, 0); > + > + /* IOTLB registers */ > + define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0); > + define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0); > + define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL); > + > + /* Fault Recording Registers, 128-bit */ > + define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); > + define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); > +} > + > +/* Reset function of QOM > + * Should not reset address_spaces when reset > + */ > +static void vtd_reset(DeviceState *dev) > +{ > + IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); > + > + VTD_DPRINTF(GENERAL, ""); > + do_vtd_init(s); > +} > + > +/* Initialization function of QOM */ > +static void vtd_realize(DeviceState *dev, Error **errp) > +{ > + IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); > + > + VTD_DPRINTF(GENERAL, ""); > + memset(s->address_spaces, 0, sizeof(s->address_spaces)); > + memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, > + "intel_iommu", DMAR_REG_SIZE); > + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); > + do_vtd_init(s); > +} > + > +static void vtd_class_init(ObjectClass *klass, void *data) > +{ > + DeviceClass *dc = DEVICE_CLASS(klass); > + > + dc->reset = vtd_reset; > + dc->realize = vtd_realize; > + dc->vmsd = &vtd_vmstate; > + dc->props = iommu_properties; > +} > + > +static const TypeInfo vtd_info = { > + .name = TYPE_INTEL_IOMMU_DEVICE, > + .parent = TYPE_SYS_BUS_DEVICE, > + .instance_size = sizeof(IntelIOMMUState), > + .class_init = vtd_class_init, > +}; > + > +static void vtd_register_types(void) > +{ > + VTD_DPRINTF(GENERAL, ""); > + type_register_static(&vtd_info); > +} > + > +type_init(vtd_register_types) > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > new file mode 100644 > index 0000000..7bc679a > --- /dev/null > +++ b/hw/i386/intel_iommu_internal.h > @@ -0,0 +1,345 @@ > +/* > + * QEMU emulation of an Intel IOMMU (VT-d) > + * (DMA Remapping device) > + * > + * Copyright (C) 2013 Knut Omang, Oracle <knut.om...@oracle.com> > + * Copyright (C) 2014 Le Tan, <tamlokv...@gmail.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + * > + * Lots of defines copied from kernel/include/linux/intel-iommu.h: > + * Copyright (C) 2006-2008 Intel Corporation > + * Author: Ashok Raj <ashok....@intel.com> > + * Author: Anil S Keshavamurthy <anil.s.keshavamur...@intel.com> > + * > + */ > + > +#ifndef HW_I386_INTEL_IOMMU_INTERNAL_H > +#define HW_I386_INTEL_IOMMU_INTERNAL_H > +#include "hw/i386/intel_iommu.h" > + > +/* > + * Intel IOMMU register specification > + */ > +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ > +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ > +#define DMAR_CAP_REG_HI 0xc /* High 32-bit of DMAR_CAP_REG */ > +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ > +#define DMAR_ECAP_REG_HI 0X14 > +#define DMAR_GCMD_REG 0x18 /* Global command register */ > +#define DMAR_GSTS_REG 0x1c /* Global status register */ > +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ > +#define DMAR_RTADDR_REG_HI 0X24 > +#define DMAR_CCMD_REG 0x28 /* Context command reg */ > +#define DMAR_CCMD_REG_HI 0x2c > +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ > +#define DMAR_FECTL_REG 0x38 /* Fault control register */ > +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ > +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ > +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ > +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ > +#define DMAR_AFLOG_REG_HI 0X5c > +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ > +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ > +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ > +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ > +#define DMAR_PHMBASE_REG_HI 0X74 > +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ > +#define DMAR_PHMLIMIT_REG_HI 0x7c > +#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */ > +#define DMAR_IQH_REG_HI 0X84 > +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ > +#define DMAR_IQT_REG_HI 0X8c > +#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ > +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ > +#define DMAR_IQA_REG_HI 0x94 > +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ > +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ > +#define DMAR_IRTA_REG_HI 0xbc > + > +#define DMAR_IECTL_REG 0xa0 /* Invalidation event control register */ > +#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data register */ > +#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address register */ > +#define DMAR_IEUADDR_REG 0xac /* Invalidation event address register */ > +#define DMAR_PQH_REG 0xc0 /* Page request queue head register */ > +#define DMAR_PQH_REG_HI 0xc4 > +#define DMAR_PQT_REG 0xc8 /* Page request queue tail register*/ > +#define DMAR_PQT_REG_HI 0xcc > +#define DMAR_PQA_REG 0xd0 /* Page request queue address register */ > +#define DMAR_PQA_REG_HI 0xd4 > +#define DMAR_PRS_REG 0xdc /* Page request status register */ > +#define DMAR_PECTL_REG 0xe0 /* Page request event control register */ > +#define DMAR_PEDATA_REG 0xe4 /* Page request event data register */ > +#define DMAR_PEADDR_REG 0xe8 /* Page request event address register */ > +#define DMAR_PEUADDR_REG 0xec /* Page event upper address register */ > +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */ > +#define DMAR_MTRRCAP_REG_HI 0x104 > +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */ > +#define DMAR_MTRRDEF_REG_HI 0x10c > + > +/* IOTLB */ > +#define DMAR_IOTLB_REG_OFFSET 0xf0 /* Offset to the IOTLB registers */ > +#define DMAR_IVA_REG DMAR_IOTLB_REG_OFFSET /* Invalidate Address Register */ > +#define DMAR_IVA_REG_HI (DMAR_IVA_REG + 4) > +/* IOTLB Invalidate Register */ > +#define DMAR_IOTLB_REG (DMAR_IOTLB_REG_OFFSET + 0x8) > +#define DMAR_IOTLB_REG_HI (DMAR_IOTLB_REG + 4) > + > +/* FRCD */ > +#define DMAR_FRCD_REG_OFFSET 0x220 /* Offset to the Fault Recording > Registers */ > +/* NOTICE: If you change the DMAR_FRCD_REG_NR, please remember to change the > + * DMAR_REG_SIZE in include/hw/i386/intel_iommu.h. > + * #define DMAR_REG_SIZE (DMAR_FRCD_REG_OFFSET + 16 * DMAR_FRCD_REG_NR) > + */ > +#define DMAR_FRCD_REG_NR 1ULL /* Num of Fault Recording Registers */ > + > +#define DMAR_FRCD_REG_0_0 0x220 /* The 0th Fault Recording Register */ > +#define DMAR_FRCD_REG_0_1 0x224 > +#define DMAR_FRCD_REG_0_2 0x228 > +#define DMAR_FRCD_REG_0_3 0x22c > + > +/* Interrupt Address Range */ > +#define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL > +#define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL > + > +/* IOTLB_REG */ > +#define VTD_TLB_GLOBAL_FLUSH (1ULL << 60) /* Global invalidation */ > +#define VTD_TLB_DSI_FLUSH (2ULL << 60) /* Domain-selective invalidation */ > +#define VTD_TLB_PSI_FLUSH (3ULL << 60) /* Page-selective invalidation */ > +#define VTD_TLB_FLUSH_GRANU_MASK (3ULL << 60) > +#define VTD_TLB_GLOBAL_FLUSH_A (1ULL << 57) > +#define VTD_TLB_DSI_FLUSH_A (2ULL << 57) > +#define VTD_TLB_PSI_FLUSH_A (3ULL << 57) > +#define VTD_TLB_FLUSH_GRANU_MASK_A (3ULL << 57) > +#define VTD_TLB_IVT (1ULL << 63) > + > +/* GCMD_REG */ > +#define VTD_GCMD_TE (1UL << 31) > +#define VTD_GCMD_SRTP (1UL << 30) > +#define VTD_GCMD_SFL (1UL << 29) > +#define VTD_GCMD_EAFL (1UL << 28) > +#define VTD_GCMD_WBF (1UL << 27) > +#define VTD_GCMD_QIE (1UL << 26) > +#define VTD_GCMD_IRE (1UL << 25) > +#define VTD_GCMD_SIRTP (1UL << 24) > +#define VTD_GCMD_CFI (1UL << 23) > + > +/* GSTS_REG */ > +#define VTD_GSTS_TES (1UL << 31) > +#define VTD_GSTS_RTPS (1UL << 30) > +#define VTD_GSTS_FLS (1UL << 29) > +#define VTD_GSTS_AFLS (1UL << 28) > +#define VTD_GSTS_WBFS (1UL << 27) > +#define VTD_GSTS_QIES (1UL << 26) > +#define VTD_GSTS_IRES (1UL << 25) > +#define VTD_GSTS_IRTPS (1UL << 24) > +#define VTD_GSTS_CFIS (1UL << 23) > + > +/* CCMD_REG */ > +#define VTD_CCMD_ICC (1ULL << 63) > +#define VTD_CCMD_GLOBAL_INVL (1ULL << 61) > +#define VTD_CCMD_DOMAIN_INVL (2ULL << 61) > +#define VTD_CCMD_DEVICE_INVL (3ULL << 61) > +#define VTD_CCMD_CIRG_MASK (3ULL << 61) > +#define VTD_CCMD_GLOBAL_INVL_A (1ULL << 59) > +#define VTD_CCMD_DOMAIN_INVL_A (2ULL << 59) > +#define VTD_CCMD_DEVICE_INVL_A (3ULL << 59) > +#define VTD_CCMD_CAIG_MASK (3ULL << 59) > + > +/* RTADDR_REG */ > +#define VTD_RTADDR_RTT (1ULL << 11) > +#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) > + > +/* ECAP_REG */ > +#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) /* (offset >> 4) << 8 */ > +#define VTD_ECAP_QI (1ULL << 1) > + > +/* CAP_REG */ > +#define VTD_CAP_FRO (DMAR_FRCD_REG_OFFSET << 20) /* (offset >> 4) << 24 */ > +#define VTD_CAP_NFR ((DMAR_FRCD_REG_NR - 1) << 40) > +#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ > +#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) > +#define VTD_MGAW 39 /* Maximum Guest Address Width */ > +#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) > + > +/* Supported Adjusted Guest Address Widths */ > +#define VTD_CAP_SAGAW_SHIFT (8) > +#define VTD_CAP_SAGAW_MASK (0x1fULL << VTD_CAP_SAGAW_SHIFT) > + /* 39-bit AGAW, 3-level page-table */ > +#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) > + /* 48-bit AGAW, 4-level page-table */ > +#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) > +#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit > + > +/* IQT_REG */ > +#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) > + > +/* IQA_REG */ > +#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) > +#define VTD_IQA_QS (0x7ULL) > + > +/* IQH_REG */ > +#define VTD_IQH_QH_SHIFT (4) > +#define VTD_IQH_QH_MASK (0x7fff0ULL) > + > +/* ICS_REG */ > +#define VTD_ICS_IWC (1UL) > + > +/* IECTL_REG */ > +#define VTD_IECTL_IM (1UL << 31) > +#define VTD_IECTL_IP (1UL << 30) > + > +/* FSTS_REG */ > +#define VTD_FSTS_FRI_MASK (0xff00) > +#define VTD_FSTS_FRI(val) ((((uint32_t)(val)) << 8) & VTD_FSTS_FRI_MASK) > +#define VTD_FSTS_IQE (1UL << 4) > +#define VTD_FSTS_PPF (1UL << 1) > +#define VTD_FSTS_PFO (1UL) > + > +/* FECTL_REG */ > +#define VTD_FECTL_IM (1UL << 31) > +#define VTD_FECTL_IP (1UL << 30) > + > +/* Fault Recording Register */ > +/* For the high 64-bit of 128-bit */ > +#define VTD_FRCD_F (1ULL << 63) > +#define VTD_FRCD_T (1ULL << 62) > +#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32) > +#define VTD_FRCD_SID_MASK 0xffffULL > +#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) > +/* For the low 64-bit of 128-bit */ > +#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) > + > +/* DMA Remapping Fault Conditions */ > +typedef enum VTDFaultReason { > + /* Reserved for Advanced Fault logging. We use this to represent the case > + * with no fault event. > + */ > + VTD_FR_RESERVED = 0, > + VTD_FR_ROOT_ENTRY_P = 1, /* The Present(P) field of root-entry is 0 */ > + VTD_FR_CONTEXT_ENTRY_P, /* The Present(P) field of context-entry is 0 */ > + VTD_FR_CONTEXT_ENTRY_INV, /* Invalid programming of a context-entry */ > + VTD_FR_ADDR_BEYOND_MGAW, /* Input-address above (2^x-1) */ > + VTD_FR_WRITE, /* No write permission */ > + VTD_FR_READ, /* No read permission */ > + /* Fail to access a second-level paging entry (not SL_PML4E) */ > + VTD_FR_PAGING_ENTRY_INV, > + VTD_FR_ROOT_TABLE_INV, /* Fail to access a root-entry */ > + VTD_FR_CONTEXT_TABLE_INV, /* Fail to access a context-entry */ > + /* Non-zero reserved field in a present root-entry */ > + VTD_FR_ROOT_ENTRY_RSVD, > + /* Non-zero reserved field in a present context-entry */ > + VTD_FR_CONTEXT_ENTRY_RSVD, > + /* Non-zero reserved field in a second-level paging entry with at lease > one > + * Read(R) and Write(W) or Execute(E) field is Set. > + */ > + VTD_FR_PAGING_ENTRY_RSVD, > + /* Translation request or translated request explicitly blocked dut to > the > + * programming of the Translation Type (T) field in the present > + * context-entry. > + */ > + VTD_FR_CONTEXT_ENTRY_TT, > + /* This is not a normal fault reason. We use this to indicate some faults > + * that are not referenced by the VT-d specification. > + * Fault event with such reason should not be recorded. > + */ > + VTD_FR_RESERVED_ERR, > + /* Guard */ > + VTD_FR_MAX, > +} VTDFaultReason; > + > + > +/* Masks for Queued Invalidation Descriptor */ > +#define VTD_INV_DESC_TYPE (0xf) > +#define VTD_INV_DESC_CC (0x1) /* Context-cache Invalidate Descriptor */ > +#define VTD_INV_DESC_IOTLB (0x2) > +#define VTD_INV_DESC_WAIT (0x5) /* Invalidation Wait Descriptor */ > +#define VTD_INV_DESC_NONE (0) /* Not an Invalidate Descriptor */ > + > + > +/* Pagesize of VTD paging structures, including root and context tables */ > +#define VTD_PAGE_SHIFT (12) > +#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT) > + > +#define VTD_PAGE_SHIFT_4K (12) > +#define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1)) > +#define VTD_PAGE_SHIFT_2M (21) > +#define VTD_PAGE_MASK_2M (~((1ULL << VTD_PAGE_SHIFT_2M) - 1)) > +#define VTD_PAGE_SHIFT_1G (30) > +#define VTD_PAGE_MASK_1G (~((1ULL << VTD_PAGE_SHIFT_1G) - 1)) > + > +/* Root-Entry > + * 0: Present > + * 1-11: Reserved > + * 12-63: Context-table Pointer > + * 64-127: Reserved > + */ > +struct VTDRootEntry { > + uint64_t val; > + uint64_t rsvd; > +}; > +typedef struct VTDRootEntry VTDRootEntry; > + > +/* Masks for struct VTDRootEntry */ > +#define VTD_ROOT_ENTRY_P (1ULL << 0) > +#define VTD_ROOT_ENTRY_CTP (~0xfffULL) > + > +#define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) > +#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) > + > +/* Context-Entry */ > +struct VTDContextEntry { > + uint64_t lo; > + uint64_t hi; > +}; > +typedef struct VTDContextEntry VTDContextEntry; > + > +/* Masks for struct VTDContextEntry */ > +/* lo */ > +#define VTD_CONTEXT_ENTRY_P (1ULL << 0) > +#define VTD_CONTEXT_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ > +#define VTD_CONTEXT_ENTRY_TT (3ULL << 2) /* Translation Type */ > +#define VTD_CONTEXT_TT_MULTI_LEVEL (0) > +#define VTD_CONTEXT_TT_DEV_IOTLB (1) > +#define VTD_CONTEXT_TT_PASS_THROUGH (2) > +/* Second Level Page Translation Pointer*/ > +#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) > +#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) > +/* hi */ > +#define VTD_CONTEXT_ENTRY_AW (7ULL) /* Adjusted guest-address-width */ > +#define VTD_CONTEXT_ENTRY_DID (0xffffULL << 8) /* Domain Identifier */ > +#define VTD_CONTEXT_ENTRY_RSVD_HI (0xffffffffff000080ULL) > + > +#define VTD_CONTEXT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDContextEntry)) > + > + > +/* Paging Structure common */ > +#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) > +#define VTD_SL_LEVEL_BITS 9 /* Bits to decide the offset for each level > */ > + > +/* Second Level Paging Structure */ > +#define VTD_SL_PML4_LEVEL 4 > +#define VTD_SL_PDP_LEVEL 3 > +#define VTD_SL_PD_LEVEL 2 > +#define VTD_SL_PT_LEVEL 1 > +#define VTD_SL_PT_ENTRY_NR 512 > + > +/* Masks for Second Level Paging Entry */ > +#define VTD_SL_RW_MASK (3ULL) > +#define VTD_SL_R (1ULL) > +#define VTD_SL_W (1ULL << 1) > +#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) > +#define VTD_SL_IGN_COM (0xbff0000000000000ULL) > + > +#endif > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > new file mode 100644 > index 0000000..6601e62 > --- /dev/null > +++ b/include/hw/i386/intel_iommu.h > @@ -0,0 +1,90 @@ > +/* > + * QEMU emulation of an Intel IOMMU (VT-d) > + * (DMA Remapping device) > + * > + * Copyright (C) 2013 Knut Omang, Oracle <knut.om...@oracle.com> > + * Copyright (C) 2014 Le Tan, <tamlokv...@gmail.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef INTEL_IOMMU_H > +#define INTEL_IOMMU_H > +#include "hw/qdev.h" > +#include "sysemu/dma.h" > + > +#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu" > +#define INTEL_IOMMU_DEVICE(obj) \ > + OBJECT_CHECK(IntelIOMMUState, (obj), TYPE_INTEL_IOMMU_DEVICE) > + > +/* DMAR Hardware Unit Definition address (IOMMU unit) */ > +#define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL > + > +#define VTD_PCI_BUS_MAX 256 > +#define VTD_PCI_SLOT_MAX 32 > +#define VTD_PCI_FUNC_MAX 8 > +#define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) > +#define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) > + > +#define DMAR_REG_SIZE 0x230 > + > +/* FIXME: do not know how to decide the haw */ > +#define VTD_HOST_ADDRESS_WIDTH 39 > +#define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1) > + > +typedef struct IntelIOMMUState IntelIOMMUState; > +typedef struct VTDAddressSpace VTDAddressSpace; > + > +struct VTDAddressSpace { > + int bus_num; > + int devfn; > + AddressSpace as; > + MemoryRegion iommu; > + IntelIOMMUState *iommu_state; > +}; > + > +/* The iommu (DMAR) device state struct */ > +struct IntelIOMMUState { > + SysBusDevice busdev; > + MemoryRegion csrmem; > + uint8_t csr[DMAR_REG_SIZE]; /* register values */ > + uint8_t wmask[DMAR_REG_SIZE]; /* R/W bytes */ > + uint8_t w1cmask[DMAR_REG_SIZE]; /* RW1C(Write 1 to Clear) bytes */ > + uint8_t womask[DMAR_REG_SIZE]; /* WO (write only - read returns 0) */ > + uint32_t version; > + > + dma_addr_t root; /* Current root table pointer */ > + bool root_extended; /* Type of root table (extended or not) */ > + bool dmar_enabled; /* Set if DMA remapping is enabled */ > + > + uint16_t iq_head; /* Current invalidation queue head */ > + uint16_t iq_tail; /* Current invalidation queue tail */ > + dma_addr_t iq; /* Current invalidation queue (IQ) pointer */ > + uint16_t iq_size; /* IQ Size in number of entries */ > + bool qi_enabled; /* Set if the QI is enabled */ > + uint8_t iq_last_desc_type; /* The type of last completed descriptor */ > + > + /* The index of the Fault Recording Register to be used next. > + * Wraps around from N-1 to 0, where N is the number of FRCD_REG. > + */ > + uint16_t next_frcd_reg; > + > + uint64_t cap; /* The value of Capability Register */ > + uint64_t ecap; /* The value of Extended Capability Register */ > + > + MemoryRegionIOMMUOps iommu_ops; > + VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX]; > +}; > + > +#endif > -- > 1.9.1