Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-26 Thread Aviv B.D.
On Wed, Mar 23, 2016 at 5:33 PM, Michael S. Tsirkin  wrote:

> On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> > From: "Aviv B.D." 
> >
> >  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
> > device are present.
> >  * Advertise Cache Mode capability in iommu cap register.
> >  * Register every VFIO device with IOMMU state.
> >  * On page cache invalidation in vIOMMU, check if the domain belong to
> >VFIO device and mirror the guest requests to host.
> >
> > Not working (Yet!):
> >  * Tested only with network interface card (ixgbevf) and
> > intel_iommu=strict in guest's kernel command line.
> >  * Lock up under high load.
> >  * Errors on guest poweroff.
> >  * High relative latency compare to VFIO without IOMMU.
> >
> > Signed-off-by: Aviv B.D. 
> > ---
> >  hw/i386/intel_iommu.c  | 76
> ++
> >  hw/i386/intel_iommu_internal.h |  1 +
> >  hw/vfio/common.c   | 12 +--
> >  include/hw/i386/intel_iommu.h  |  4 +++
> >  include/hw/vfio/vfio-common.h  |  1 +
> >  5 files changed, 85 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 347718f..046688f 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
> VTD_DBGBIT
> > (CSR);
> >  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
> >  #endif
> >
> > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> > +uint8_t devfn, VTDContextEntry *ce);
> > +
> >  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t
> val,
> >  uint64_t wmask, uint64_t w1cmask)
> >  {
> > @@ -126,6 +130,19 @@ static uint32_t
> vtd_set_clear_mask_long(IntelIOMMUState
> > *s, hwaddr addr,
> >  return new_val;
> >  }
> >
> > +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
> uint8_t
> > devfn)
> > +{
> > +VTDContextEntry ce;
> > +int ret_fr;
> > +
> > +ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
> > +if (ret_fr){
> > +return -1;
> > +}
> > +
> > +return VTD_CONTEXT_ENTRY_DID(ce.hi);
> > +}
> > +
> >  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
> >  uint64_t clear, uint64_t mask)
> >  {
> > @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState
> *s,
> > uint8_t bus_num,
> >  }
> >
> >  if (!vtd_context_entry_present(ce)) {
> > -VTD_DPRINTF(GENERAL,
> > +/*VTD_DPRINTF(GENERAL,
> >  "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> > -"is not present", devfn, bus_num);
> > +"is not present", devfn, bus_num);*/
> >  return -VTD_FR_CONTEXT_ENTRY_P;
> >  } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
> > (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> > @@ -1020,14 +1037,53 @@ static void
> vtd_iotlb_page_invalidate(IntelIOMMUState
> > *s, uint16_t domain_id,
> >hwaddr addr, uint8_t am)
> >  {
> >  VTDIOTLBPageInvInfo info;
> > +VFIOGuestIOMMU * giommu;
> > +bool flag = false;
> >
> >  assert(am <= VTD_MAMV);
> >  info.domain_id = domain_id;
> >  info.addr = addr;
> >  info.mask = ~((1 << am) - 1);
> > +
> > +QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > +VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > +uint16_t vfio_source_id =
> vtd_make_source_id(pci_bus_num(vtd_as->bus),
> > vtd_as->devfn);
> > +uint16_t vfio_domain_id = vtd_get_did_dev(s,
> pci_bus_num(vtd_as->bus),
> > vtd_as->devfn);
> > +if (vfio_domain_id != (uint16_t)-1 &&
> > +domain_id == vfio_domain_id){
> > +VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> vfio_source_id,
> > addr);
> > +if (iotlb_entry != NULL){
> > +IOMMUTLBEntry entry;
> > +VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask
> %d", addr,
> > am);
> > +entry.iova = addr & VTD_PAGE_MASK_4K;
> > +entry.translated_addr =
> vtd_get_slpte_addr(iotlb_entry->slpte)
> > & VTD_PAGE_MASK_4K;
> > +entry.addr_mask = ~VTD_PAGE_MASK_4K;
> > +entry.perm = IOMMU_NONE;
> > +memory_region_notify_iommu(giommu->iommu, entry);
> > +flag = true;
> > +
> > +}
> > +}
> > +
> > +}
> > +
> >  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page,
> );
> > -}
> >
> > +QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > +VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > +uint16_t vfio_domain_id = 

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-23 Thread Michael S. Tsirkin
On Thu, Mar 17, 2016 at 01:58:13PM +0200, Aviv B.D. wrote:
> 
> 
> On Tue, Mar 15, 2016 at 12:53 PM, Michael S. Tsirkin  wrote:
> 
> On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> > From: "Aviv B.D." 
> >
> >  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
> >     device are present.
> >  * Advertise Cache Mode capability in iommu cap register.
> >  * Register every VFIO device with IOMMU state.
> >  * On page cache invalidation in vIOMMU, check if the domain belong to
> >    VFIO device and mirror the guest requests to host.
> >
> > Not working (Yet!):
> >  * Tested only with network interface card (ixgbevf) and
> >     intel_iommu=strict in guest's kernel command line.
> >  * Lock up under high load.
> >  * Errors on guest poweroff.
> >  * High relative latency compare to VFIO without IOMMU.
> >
> > Signed-off-by: Aviv B.D. 
> 
> Thanks, this is very interesting.
> So this needs some cleanup, and some issues that will have to be 
> addressed.
> See below.
> Thanks!
> 
> 
> Thanks! 
> 
> 
> > ---
> >  hw/i386/intel_iommu.c          | 76
> ++
> >  hw/i386/intel_iommu_internal.h |  1 +
> >  hw/vfio/common.c               | 12 +--
> >  include/hw/i386/intel_iommu.h  |  4 +++
> >  include/hw/vfio/vfio-common.h  |  1 +
> >  5 files changed, 85 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 347718f..046688f 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
> VTD_DBGBIT
> > (CSR);
> >  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
> >  #endif
> >  
> > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t 
> bus_num,
> > +                                    uint8_t devfn, VTDContextEntry 
> *ce);
> > +
> >  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t
> val,
> >                              uint64_t wmask, uint64_t w1cmask)
> >  {
> > @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long
> (IntelIOMMUState
> > *s, hwaddr addr,
> >      return new_val;
> >  }
> >  
> > +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
> uint8_t
> > devfn)
> > +{
> > +    VTDContextEntry ce;
> > +    int ret_fr;
> > +
> > +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
> > +    if (ret_fr){
> > +        return -1;
> > +    }
> > +
> > +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
> > +}
> > +
> >  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr 
> addr,
> >                                          uint64_t clear, uint64_t mask)
> >  {
> > @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState
> *s,
> > uint8_t bus_num,
> >      }
> >  
> >      if (!vtd_context_entry_present(ce)) {
> > -        VTD_DPRINTF(GENERAL,
> > +        /*VTD_DPRINTF(GENERAL,
> >                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> > -                    "is not present", devfn, bus_num);
> > +                    "is not present", devfn, bus_num);*/
> >          return -VTD_FR_CONTEXT_ENTRY_P;
> >      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
> >                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> > @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate
> (IntelIOMMUState
> > *s, uint16_t domain_id,
> >                                        hwaddr addr, uint8_t am)
> >  {
> >      VTDIOTLBPageInvInfo info;
> > +    VFIOGuestIOMMU * giommu;
> > +    bool flag = false;
> >  
> >      assert(am <= VTD_MAMV);
> >      info.domain_id = domain_id;
> >      info.addr = addr;
> >      info.mask = ~((1 << am) - 1);
> > +
> > +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > +        VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > +        uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num
> (vtd_as->bus),
> > vtd_as->devfn);
> > +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num
> (vtd_as->bus),
> > vtd_as->devfn);
> > +        if (vfio_domain_id != (uint16_t)-1 && 
> > +                domain_id == vfio_domain_id){
> > +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> vfio_source_id,
> > addr);
> > +            if (iotlb_entry != NULL){
> > +                IOMMUTLBEntry entry; 
> > +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask 
> %d",
> addr,
> > 

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-23 Thread Michael S. Tsirkin
On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> From: "Aviv B.D." 
> 
>  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>     device are present.
>  * Advertise Cache Mode capability in iommu cap register.
>  * Register every VFIO device with IOMMU state.
>  * On page cache invalidation in vIOMMU, check if the domain belong to
>    VFIO device and mirror the guest requests to host.
> 
> Not working (Yet!):
>  * Tested only with network interface card (ixgbevf) and
>     intel_iommu=strict in guest's kernel command line.
>  * Lock up under high load.
>  * Errors on guest poweroff.
>  * High relative latency compare to VFIO without IOMMU.
> 
> Signed-off-by: Aviv B.D. 
> ---
>  hw/i386/intel_iommu.c          | 76 
> ++
>  hw/i386/intel_iommu_internal.h |  1 +
>  hw/vfio/common.c               | 12 +--
>  include/hw/i386/intel_iommu.h  |  4 +++
>  include/hw/vfio/vfio-common.h  |  1 +
>  5 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 347718f..046688f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT
> (CSR);
>  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>  #endif
>  
> +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> +                                    uint8_t devfn, VTDContextEntry *ce);
> +
>  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                              uint64_t wmask, uint64_t w1cmask)
>  {
> @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState
> *s, hwaddr addr,
>      return new_val;
>  }
>  
> +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t
> devfn)
> +{
> +    VTDContextEntry ce;
> +    int ret_fr;
> +
> +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
> +    if (ret_fr){
> +        return -1;
> +    }
> +
> +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
> +}
> +
>  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
>                                          uint64_t clear, uint64_t mask)
>  {
> @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
> uint8_t bus_num,
>      }
>  
>      if (!vtd_context_entry_present(ce)) {
> -        VTD_DPRINTF(GENERAL,
> +        /*VTD_DPRINTF(GENERAL,
>                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> -                    "is not present", devfn, bus_num);
> +                    "is not present", devfn, bus_num);*/
>          return -VTD_FR_CONTEXT_ENTRY_P;
>      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
>                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState
> *s, uint16_t domain_id,
>                                        hwaddr addr, uint8_t am)
>  {
>      VTDIOTLBPageInvInfo info;
> +    VFIOGuestIOMMU * giommu;
> +    bool flag = false;
>  
>      assert(am <= VTD_MAMV);
>      info.domain_id = domain_id;
>      info.addr = addr;
>      info.mask = ~((1 << am) - 1);
> +
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> VTDAddressSpace,
> iommu);
> +        uint16_t vfio_source_id = 
> vtd_make_source_id(pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id){
> +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id,
> addr);
> +            if (iotlb_entry != NULL){
> +                IOMMUTLBEntry entry; 
> +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", 
> addr,
> am);
> +                entry.iova = addr & VTD_PAGE_MASK_4K;
> +                entry.translated_addr = 
> vtd_get_slpte_addr(iotlb_entry->slpte)
> & VTD_PAGE_MASK_4K;
> +                entry.addr_mask = ~VTD_PAGE_MASK_4K;
> +                entry.perm = IOMMU_NONE;
> +                memory_region_notify_iommu(giommu->iommu, entry);
> +                flag = true;
> +
> +            }
> +        }
> +
> +    }
> +
>      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
> -}
>  
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> VTDAddressSpace,
> iommu);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id && !flag){
> +            /* do vfio map */

So what happens here if the address is changed when entry stays valid?
I think vfio in kernel will fail the map address,

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-22 Thread Aviv B.D.
On Mon, Mar 21, 2016 at 4:30 AM, Peter Xu  wrote:

> On Sat, Mar 19, 2016 at 11:40:04AM +0200, Aviv B.D. wrote:
> [...]
> > As far as I understand the code, currently there is no way to turn off
> the
> > IOTLB.
> > Furthermore. the IOTLB is not implemented as LRU, and actually caches
> > (indefinitely)
> > any accessed address, without any size constrains. I use those
> assumptions
> > to know
> > whether the current invalidation is for unmap operation or map operation.
>
> Please have a look at VTD_IOTLB_MAX_SIZE. It seems to be the size of
> the hash.
>
> Btw, I guess it's a much bigger problem if IOTLB has unlimited cache
> size...
>
> Thanks.

-- peterx
>

You are correct, VTD_IOTLB_MAX_SIZE limits the cache size (and reset the
whole cache
if this threshold exceeds...) I will think on another mechanism to identify
the correct
action for each invalidation.

Thanks,
Aviv.


Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-20 Thread Peter Xu
On Sat, Mar 19, 2016 at 11:40:04AM +0200, Aviv B.D. wrote:
[...]
> As far as I understand the code, currently there is no way to turn off the
> IOTLB.
> Furthermore. the IOTLB is not implemented as LRU, and actually caches
> (indefinitely)
> any accessed address, without any size constrains. I use those assumptions
> to know
> whether the current invalidation is for unmap operation or map operation.

Please have a look at VTD_IOTLB_MAX_SIZE. It seems to be the size of
the hash.

Btw, I guess it's a much bigger problem if IOTLB has unlimited cache
size...

Thanks.

-- peterx



Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-19 Thread Aviv B.D.
On Fri, Mar 18, 2016 at 5:06 AM, Peter Xu  wrote:

> On Thu, Mar 17, 2016 at 01:17:30PM +0200, Aviv B.D. wrote:
> [...]
> > vtd_get_did_dev returns -1 if the device is not mapped to any domain
> > (generally, the CE is not present).
> > probably a better interface is to return whether the device has a domain
> or
> > not and returns the domain_id via the pointer argument.
>
> Possibly, as long as guest kernel might be using (uint16_t)-1 as
> domain ID. ;)
>
> >
> >
> > >
> > > > >+domain_id == vfio_domain_id){
> > > > >+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> > > vfio_source_id, addr);
> > > > >+if (iotlb_entry != NULL){
> > >
> > > Here, shall we notify VFIO even if the address is not cached in
> > > IOTLB? Anyway, we need to do the unmap() of the address, am I
> > > correct?
> > >
> > With this code I do a unmap operation if the address was cached in the
> > IOTLB, if not I'm assuming that the current invalidation invalidate an
> > (previously) non present address and I should do a map operation (during
> > the map operation I'm calling s->iommu_ops.translate that caches the
> > address).
>
> I am not 100% sure of this, but... is this related to IOTLB at all?
> What I see is that, IOTLB is only a cache layer of IOMMU, and it is
> possible that we mapped some areas which are not in the IOTLB at
> all.
>
> Or, let's make an assumption here: what if I turn IOTLB off (or say,
> set hash size to zero)? IOMMU should still work, though slower,
> right?  However, due to above checking, we'll never do ummap() in
> this case (while IMHO we should).
>
Thanks.
>
> -- peterx
>


Hi,
As far as I understand the code, currently there is no way to turn off the
IOTLB.
Furthermore. the IOTLB is not implemented as LRU, and actually caches
(indefinitely)
any accessed address, without any size constrains. I use those assumptions
to know
whether the current invalidation is for unmap operation or map operation.

But, I need to check if it possible (for the guest kernel) to squeeze
together unmap and
map operations and issue for them only one invalidation (probably the
answer is yes,
and it may explain one of my bugs)

Aviv.


Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-19 Thread Aviv B.D.
On Tue, Mar 15, 2016 at 10:52 AM, Peter Xu  wrote:

> On Mon, Mar 14, 2016 at 08:52:33PM +0200, Marcel Apfelbaum wrote:
> > On 03/12/2016 06:13 PM, Aviv B.D. wrote:
> > Adding (possibly) interested developers to the thread.
>
> Thanks CC.
>
> Hi, Aviv, several questions inline.
>
> [...]
>
> > >@@ -1020,14 +1037,53 @@ static void
> vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> > >hwaddr addr, uint8_t am)
> > >  {
> > >  VTDIOTLBPageInvInfo info;
> > >+VFIOGuestIOMMU * giommu;
> > >+bool flag = false;
> > >  assert(am <= VTD_MAMV);
> > >  info.domain_id = domain_id;
> > >  info.addr = addr;
> > >  info.mask = ~((1 << am) - 1);
> > >+
> > >+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > >+VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace, iommu);
> > >+uint16_t vfio_source_id =
> vtd_make_source_id(pci_bus_num(vtd_as->bus), vtd_as->devfn);
> > >+uint16_t vfio_domain_id = vtd_get_did_dev(s,
> pci_bus_num(vtd_as->bus), vtd_as->devfn);
> > >+if (vfio_domain_id != (uint16_t)-1 &&
>
> Could you (or anyone) help explain what does vfio_domain_id != -1
> mean?


vtd_get_did_dev returns -1 if the device is not mapped to any domain
(generally, the CE is not present).
probably a better interface is to return whether the device has a domain or
not and returns the domain_id via the pointer argument.


>
> > >+domain_id == vfio_domain_id){
> > >+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> vfio_source_id, addr);
> > >+if (iotlb_entry != NULL){
>
> Here, shall we notify VFIO even if the address is not cached in
> IOTLB? Anyway, we need to do the unmap() of the address, am I
> correct?
>
With this code I do a unmap operation if the address was cached in the
IOTLB, if not I'm assuming that the current invalidation invalidate an
(previously) non present address and I should do a map operation (during
the map operation I'm calling s->iommu_ops.translate that caches the
address).

>
> > >+IOMMUTLBEntry entry;
> > >+VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask
> %d", addr, am);
> > >+entry.iova = addr & VTD_PAGE_MASK_4K;
> > >+entry.translated_addr =
> vtd_get_slpte_addr(iotlb_entry->slpte) & VTD_PAGE_MASK_4K;
> > >+entry.addr_mask = ~VTD_PAGE_MASK_4K;
> > >+entry.perm = IOMMU_NONE;
> > >+memory_region_notify_iommu(giommu->iommu, entry);
> > >+flag = true;
> > >+
> > >+}
> > >+}
> > >+
> > >+}
> > >+
> > >  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page,
> );
> > >-}
> > >+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > >+VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace, iommu);
> > >+uint16_t vfio_domain_id = vtd_get_did_dev(s,
> pci_bus_num(vtd_as->bus), vtd_as->devfn);
> > >+if (vfio_domain_id != (uint16_t)-1 &&
> > >+domain_id == vfio_domain_id && !flag){
> > >+/* do vfio map */
> > >+VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d",
> addr, am);
> > >+/* call to vtd_iommu_translate */
> > >+IOMMUTLBEntry entry =
> s->iommu_ops.translate(giommu->iommu, addr, 0);
> > >+entry.perm = IOMMU_RW;
> > >+memory_region_notify_iommu(giommu->iommu, entry);
> > >+//g_vfio_iommu->n.notify(_vfio_iommu->n, );
> > >+}
> > >+}
> > >+}
>
> I see that we did handled all the page invalidations. Would it
> possible that guest kernel send domain/global invalidations? Should
> we handle them too?
>

You are correct, currently this code is pretty much at POC level, and i
support only the page invalidation because this is what linux is using. The
final version should support also those invalidation ops.

>
> [...]
>
> > >  static void vfio_listener_region_add(MemoryListener *listener,
> > >   MemoryRegionSection *section)
> > >  {
> > >@@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener
> *listener,
> > >  iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > >  llend = int128_make64(section->offset_within_address_space);
> > >  llend = int128_add(llend, section->size);
> > >+llend = int128_add(llend, int128_exts64(-1));
>
> Here, -1 should fix the assertion core dump. However, shall we also
> handle all the rest places that used "llend" (possibly with variable
> "end") too? For example, at the end of current function, when we map
> dma regions:
>
> ret = vfio_dma_map(container, iova, end - iova, vaddr,
> section->readonly);
>
> To:
>
> ret = vfio_dma_map(container, iova, end + 1 - iova, vaddr,
> section->readonly);
>
> I will add this to the next version of the patch, thanks!

Thanks.
> 

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-19 Thread Peter Xu
On Thu, Mar 17, 2016 at 01:17:30PM +0200, Aviv B.D. wrote:
[...]
> vtd_get_did_dev returns -1 if the device is not mapped to any domain
> (generally, the CE is not present).
> probably a better interface is to return whether the device has a domain or
> not and returns the domain_id via the pointer argument.

Possibly, as long as guest kernel might be using (uint16_t)-1 as
domain ID. ;)

> 
> 
> >
> > > >+domain_id == vfio_domain_id){
> > > >+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> > vfio_source_id, addr);
> > > >+if (iotlb_entry != NULL){
> >
> > Here, shall we notify VFIO even if the address is not cached in
> > IOTLB? Anyway, we need to do the unmap() of the address, am I
> > correct?
> >
> With this code I do a unmap operation if the address was cached in the
> IOTLB, if not I'm assuming that the current invalidation invalidate an
> (previously) non present address and I should do a map operation (during
> the map operation I'm calling s->iommu_ops.translate that caches the
> address).

I am not 100% sure of this, but... is this related to IOTLB at all?
What I see is that, IOTLB is only a cache layer of IOMMU, and it is
possible that we mapped some areas which are not in the IOTLB at
all.

Or, let's make an assumption here: what if I turn IOTLB off (or say,
set hash size to zero)? IOMMU should still work, though slower,
right?  However, due to above checking, we'll never do ummap() in
this case (while IMHO we should).

Thanks.

-- peterx



Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-18 Thread Aviv B.D.
On Tue, Mar 15, 2016 at 12:53 PM, Michael S. Tsirkin  wrote:

> On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> > From: "Aviv B.D." 
> >
> >  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
> > device are present.
> >  * Advertise Cache Mode capability in iommu cap register.
> >  * Register every VFIO device with IOMMU state.
> >  * On page cache invalidation in vIOMMU, check if the domain belong to
> >VFIO device and mirror the guest requests to host.
> >
> > Not working (Yet!):
> >  * Tested only with network interface card (ixgbevf) and
> > intel_iommu=strict in guest's kernel command line.
> >  * Lock up under high load.
> >  * Errors on guest poweroff.
> >  * High relative latency compare to VFIO without IOMMU.
> >
> > Signed-off-by: Aviv B.D. 
>
> Thanks, this is very interesting.
> So this needs some cleanup, and some issues that will have to be addressed.
> See below.
> Thanks!
>
> Thanks!

>
> > ---
> >  hw/i386/intel_iommu.c  | 76
> ++
> >  hw/i386/intel_iommu_internal.h |  1 +
> >  hw/vfio/common.c   | 12 +--
> >  include/hw/i386/intel_iommu.h  |  4 +++
> >  include/hw/vfio/vfio-common.h  |  1 +
> >  5 files changed, 85 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 347718f..046688f 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
> VTD_DBGBIT
> > (CSR);
> >  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
> >  #endif
> >
> > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> > +uint8_t devfn, VTDContextEntry *ce);
> > +
> >  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t
> val,
> >  uint64_t wmask, uint64_t w1cmask)
> >  {
> > @@ -126,6 +130,19 @@ static uint32_t
> vtd_set_clear_mask_long(IntelIOMMUState
> > *s, hwaddr addr,
> >  return new_val;
> >  }
> >
> > +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
> uint8_t
> > devfn)
> > +{
> > +VTDContextEntry ce;
> > +int ret_fr;
> > +
> > +ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
> > +if (ret_fr){
> > +return -1;
> > +}
> > +
> > +return VTD_CONTEXT_ENTRY_DID(ce.hi);
> > +}
> > +
> >  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
> >  uint64_t clear, uint64_t mask)
> >  {
> > @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState
> *s,
> > uint8_t bus_num,
> >  }
> >
> >  if (!vtd_context_entry_present(ce)) {
> > -VTD_DPRINTF(GENERAL,
> > +/*VTD_DPRINTF(GENERAL,
> >  "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> > -"is not present", devfn, bus_num);
> > +"is not present", devfn, bus_num);*/
> >  return -VTD_FR_CONTEXT_ENTRY_P;
> >  } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
> > (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> > @@ -1020,14 +1037,53 @@ static void
> vtd_iotlb_page_invalidate(IntelIOMMUState
> > *s, uint16_t domain_id,
> >hwaddr addr, uint8_t am)
> >  {
> >  VTDIOTLBPageInvInfo info;
> > +VFIOGuestIOMMU * giommu;
> > +bool flag = false;
> >
> >  assert(am <= VTD_MAMV);
> >  info.domain_id = domain_id;
> >  info.addr = addr;
> >  info.mask = ~((1 << am) - 1);
> > +
> > +QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > +VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > +uint16_t vfio_source_id =
> vtd_make_source_id(pci_bus_num(vtd_as->bus),
> > vtd_as->devfn);
> > +uint16_t vfio_domain_id = vtd_get_did_dev(s,
> pci_bus_num(vtd_as->bus),
> > vtd_as->devfn);
> > +if (vfio_domain_id != (uint16_t)-1 &&
> > +domain_id == vfio_domain_id){
> > +VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> vfio_source_id,
> > addr);
> > +if (iotlb_entry != NULL){
> > +IOMMUTLBEntry entry;
> > +VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask
> %d", addr,
> > am);
> > +entry.iova = addr & VTD_PAGE_MASK_4K;
> > +entry.translated_addr =
> vtd_get_slpte_addr(iotlb_entry->slpte)
> > & VTD_PAGE_MASK_4K;
> > +entry.addr_mask = ~VTD_PAGE_MASK_4K;
> > +entry.perm = IOMMU_NONE;
> > +memory_region_notify_iommu(giommu->iommu, entry);
> > +flag = true;
> > +
> > +}
> > +}
> > +
> > +}
> > +
> >  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page,
> );
> > -}
> >
> > +QLIST_FOREACH(giommu, &(s->giommu_list), 

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-15 Thread Michael S. Tsirkin
On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> From: "Aviv B.D." 
> 
>  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>     device are present.
>  * Advertise Cache Mode capability in iommu cap register.
>  * Register every VFIO device with IOMMU state.
>  * On page cache invalidation in vIOMMU, check if the domain belong to
>    VFIO device and mirror the guest requests to host.
> 
> Not working (Yet!):
>  * Tested only with network interface card (ixgbevf) and
>     intel_iommu=strict in guest's kernel command line.
>  * Lock up under high load.
>  * Errors on guest poweroff.
>  * High relative latency compare to VFIO without IOMMU.
> 
> Signed-off-by: Aviv B.D. 

Thanks, this is very interesting.
So this needs some cleanup, and some issues that will have to be addressed.
See below.
Thanks!


> ---
>  hw/i386/intel_iommu.c          | 76 
> ++
>  hw/i386/intel_iommu_internal.h |  1 +
>  hw/vfio/common.c               | 12 +--
>  include/hw/i386/intel_iommu.h  |  4 +++
>  include/hw/vfio/vfio-common.h  |  1 +
>  5 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 347718f..046688f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT
> (CSR);
>  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>  #endif
>  
> +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> +                                    uint8_t devfn, VTDContextEntry *ce);
> +
>  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                              uint64_t wmask, uint64_t w1cmask)
>  {
> @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState
> *s, hwaddr addr,
>      return new_val;
>  }
>  
> +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t
> devfn)
> +{
> +    VTDContextEntry ce;
> +    int ret_fr;
> +
> +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
> +    if (ret_fr){
> +        return -1;
> +    }
> +
> +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
> +}
> +
>  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
>                                          uint64_t clear, uint64_t mask)
>  {
> @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
> uint8_t bus_num,
>      }
>  
>      if (!vtd_context_entry_present(ce)) {
> -        VTD_DPRINTF(GENERAL,
> +        /*VTD_DPRINTF(GENERAL,
>                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> -                    "is not present", devfn, bus_num);
> +                    "is not present", devfn, bus_num);*/
>          return -VTD_FR_CONTEXT_ENTRY_P;
>      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
>                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState
> *s, uint16_t domain_id,
>                                        hwaddr addr, uint8_t am)
>  {
>      VTDIOTLBPageInvInfo info;
> +    VFIOGuestIOMMU * giommu;
> +    bool flag = false;
>  
>      assert(am <= VTD_MAMV);
>      info.domain_id = domain_id;
>      info.addr = addr;
>      info.mask = ~((1 << am) - 1);
> +
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> VTDAddressSpace,
> iommu);
> +        uint16_t vfio_source_id = 
> vtd_make_source_id(pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id){
> +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id,
> addr);
> +            if (iotlb_entry != NULL){
> +                IOMMUTLBEntry entry; 
> +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", 
> addr,
> am);
> +                entry.iova = addr & VTD_PAGE_MASK_4K;
> +                entry.translated_addr = 
> vtd_get_slpte_addr(iotlb_entry->slpte)
> & VTD_PAGE_MASK_4K;
> +                entry.addr_mask = ~VTD_PAGE_MASK_4K;
> +                entry.perm = IOMMU_NONE;
> +                memory_region_notify_iommu(giommu->iommu, entry);
> +                flag = true;
> +
> +            }
> +        }
> +
> +    }
> +
>      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
> -}
>  
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> VTDAddressSpace,
> iommu);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> pci_bus_num(vtd_as->bus),
> vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 && 
> +                domain_id == vfio_domain_id && !flag){
> +            /* do 

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-15 Thread Peter Xu
On Mon, Mar 14, 2016 at 08:52:33PM +0200, Marcel Apfelbaum wrote:
> On 03/12/2016 06:13 PM, Aviv B.D. wrote:
> Adding (possibly) interested developers to the thread.

Thanks CC.

Hi, Aviv, several questions inline.

[...]

> >@@ -1020,14 +1037,53 @@ static void 
> >vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> >hwaddr addr, uint8_t am)
> >  {
> >  VTDIOTLBPageInvInfo info;
> >+VFIOGuestIOMMU * giommu;
> >+bool flag = false;
> >  assert(am <= VTD_MAMV);
> >  info.domain_id = domain_id;
> >  info.addr = addr;
> >  info.mask = ~((1 << am) - 1);
> >+
> >+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> >+VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> >VTDAddressSpace, iommu);
> >+uint16_t vfio_source_id = 
> >vtd_make_source_id(pci_bus_num(vtd_as->bus), vtd_as->devfn);
> >+uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> >pci_bus_num(vtd_as->bus), vtd_as->devfn);
> >+if (vfio_domain_id != (uint16_t)-1 &&

Could you (or anyone) help explain what does vfio_domain_id != -1
mean?

> >+domain_id == vfio_domain_id){
> >+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, 
> >vfio_source_id, addr);
> >+if (iotlb_entry != NULL){

Here, shall we notify VFIO even if the address is not cached in
IOTLB? Anyway, we need to do the unmap() of the address, am I
correct?

> >+IOMMUTLBEntry entry;
> >+VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", 
> >addr, am);
> >+entry.iova = addr & VTD_PAGE_MASK_4K;
> >+entry.translated_addr = 
> >vtd_get_slpte_addr(iotlb_entry->slpte) & VTD_PAGE_MASK_4K;
> >+entry.addr_mask = ~VTD_PAGE_MASK_4K;
> >+entry.perm = IOMMU_NONE;
> >+memory_region_notify_iommu(giommu->iommu, entry);
> >+flag = true;
> >+
> >+}
> >+}
> >+
> >+}
> >+
> >  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
> >-}
> >+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> >+VTDAddressSpace *vtd_as = container_of(giommu->iommu, 
> >VTDAddressSpace, iommu);
> >+uint16_t vfio_domain_id = vtd_get_did_dev(s, 
> >pci_bus_num(vtd_as->bus), vtd_as->devfn);
> >+if (vfio_domain_id != (uint16_t)-1 &&
> >+domain_id == vfio_domain_id && !flag){
> >+/* do vfio map */
> >+VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
> >+/* call to vtd_iommu_translate */
> >+IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, 
> >addr, 0);
> >+entry.perm = IOMMU_RW;
> >+memory_region_notify_iommu(giommu->iommu, entry);
> >+//g_vfio_iommu->n.notify(_vfio_iommu->n, );
> >+}
> >+}
> >+}

I see that we did handled all the page invalidations. Would it
possible that guest kernel send domain/global invalidations? Should
we handle them too?

[...]

> >  static void vfio_listener_region_add(MemoryListener *listener,
> >   MemoryRegionSection *section)
> >  {
> >@@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener 
> >*listener,
> >  iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> >  llend = int128_make64(section->offset_within_address_space);
> >  llend = int128_add(llend, section->size);
> >+llend = int128_add(llend, int128_exts64(-1));

Here, -1 should fix the assertion core dump. However, shall we also
handle all the rest places that used "llend" (possibly with variable
"end") too? For example, at the end of current function, when we map
dma regions:

ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);

To:

ret = vfio_dma_map(container, iova, end + 1 - iova, vaddr, 
section->readonly);

Thanks.
Peter



Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-15 Thread Michael S. Tsirkin
On Mon, Mar 14, 2016 at 07:58:23PM +0100, Jan Kiszka wrote:
> On 2016-03-14 19:52, Marcel Apfelbaum wrote:
> > On 03/12/2016 06:13 PM, Aviv B.D. wrote:
> >> From: "Aviv B.D." >
> >>
> >>   * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
> >>  device are present.
> >>   * Advertise Cache Mode capability in iommu cap register.
> 
> For the final version: Please keep that feature optional, for the sake
> of emulation accuracy (no modern hw exposes it any more). Maybe turn it
> one once a vfio device is in the scope of the IOMMU?

That would be hard to implement: VFIO supports hotplug and there's no way to
change this on the fly.

I would say
- make the feature an optional flag
- deny adding a VFIO device if the flag is not set


> >>   * Register every VFIO device with IOMMU state.
> >>   * On page cache invalidation in vIOMMU, check if the domain belong to
> >> VFIO device and mirror the guest requests to host.
> >>
> >> Not working (Yet!):
> >>   * Tested only with network interface card (ixgbevf) and
> >>  intel_iommu=strict in guest's kernel command line.
> >>   * Lock up under high load.
> >>   * Errors on guest poweroff.
> >>   * High relative latency compare to VFIO without IOMMU.
> > 
> > Adding (possibly) interested developers to the thread.
> 
> Thanks,
> Jan
> 
> -- 
> Siemens AG, Corporate Technology, CT RDA ITP SES-DE
> Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-14 Thread Jan Kiszka
On 2016-03-14 19:52, Marcel Apfelbaum wrote:
> On 03/12/2016 06:13 PM, Aviv B.D. wrote:
>> From: "Aviv B.D." >
>>
>>   * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>>  device are present.
>>   * Advertise Cache Mode capability in iommu cap register.

For the final version: Please keep that feature optional, for the sake
of emulation accuracy (no modern hw exposes it any more). Maybe turn it
one once a vfio device is in the scope of the IOMMU?

>>   * Register every VFIO device with IOMMU state.
>>   * On page cache invalidation in vIOMMU, check if the domain belong to
>> VFIO device and mirror the guest requests to host.
>>
>> Not working (Yet!):
>>   * Tested only with network interface card (ixgbevf) and
>>  intel_iommu=strict in guest's kernel command line.
>>   * Lock up under high load.
>>   * Errors on guest poweroff.
>>   * High relative latency compare to VFIO without IOMMU.
> 
> Adding (possibly) interested developers to the thread.

Thanks,
Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux



Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-14 Thread Marcel Apfelbaum

On 03/12/2016 06:13 PM, Aviv B.D. wrote:

From: "Aviv B.D." >

  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
 device are present.
  * Advertise Cache Mode capability in iommu cap register.
  * Register every VFIO device with IOMMU state.
  * On page cache invalidation in vIOMMU, check if the domain belong to
VFIO device and mirror the guest requests to host.

Not working (Yet!):
  * Tested only with network interface card (ixgbevf) and
 intel_iommu=strict in guest's kernel command line.
  * Lock up under high load.
  * Errors on guest poweroff.
  * High relative latency compare to VFIO without IOMMU.


Adding (possibly) interested developers to the thread.

Thanks,
Marcel



Signed-off-by: Aviv B.D. >
---
  hw/i386/intel_iommu.c  | 76 ++
  hw/i386/intel_iommu_internal.h |  1 +
  hw/vfio/common.c   | 12 +--
  include/hw/i386/intel_iommu.h  |  4 +++
  include/hw/vfio/vfio-common.h  |  1 +
  5 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 347718f..046688f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | 
VTD_DBGBIT(CSR);
  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
  #endif
+static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+uint8_t devfn, VTDContextEntry *ce);
+
  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
  uint64_t wmask, uint64_t w1cmask)
  {
@@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState 
*s, hwaddr addr,
  return new_val;
  }
+static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t 
devfn)
+{
+VTDContextEntry ce;
+int ret_fr;
+
+ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
+if (ret_fr){
+return -1;
+}
+
+return VTD_CONTEXT_ENTRY_DID(ce.hi);
+}
+
  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
  uint64_t clear, uint64_t mask)
  {
@@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, 
uint8_t bus_num,
  }
  if (!vtd_context_entry_present(ce)) {
-VTD_DPRINTF(GENERAL,
+/*VTD_DPRINTF(GENERAL,
  "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
-"is not present", devfn, bus_num);
+"is not present", devfn, bus_num);*/
  return -VTD_FR_CONTEXT_ENTRY_P;
  } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
@@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState 
*s, uint16_t domain_id,
hwaddr addr, uint8_t am)
  {
  VTDIOTLBPageInvInfo info;
+VFIOGuestIOMMU * giommu;
+bool flag = false;
  assert(am <= VTD_MAMV);
  info.domain_id = domain_id;
  info.addr = addr;
  info.mask = ~((1 << am) - 1);
+
+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, 
iommu);
+uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+if (vfio_domain_id != (uint16_t)-1 &&
+domain_id == vfio_domain_id){
+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id, 
addr);
+if (iotlb_entry != NULL){
+IOMMUTLBEntry entry;
+VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr, 
am);
+entry.iova = addr & VTD_PAGE_MASK_4K;
+entry.translated_addr = vtd_get_slpte_addr(iotlb_entry->slpte) 
& VTD_PAGE_MASK_4K;
+entry.addr_mask = ~VTD_PAGE_MASK_4K;
+entry.perm = IOMMU_NONE;
+memory_region_notify_iommu(giommu->iommu, entry);
+flag = true;
+
+}
+}
+
+}
+
  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
-}
+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, 
iommu);
+uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), 
vtd_as->devfn);
+if (vfio_domain_id != (uint16_t)-1 &&
+domain_id == vfio_domain_id && !flag){
+/* do vfio map */
+VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
+/* call to vtd_iommu_translate */
+IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, addr, 
0);
+entry.perm = IOMMU_RW;
+ 

[Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present

2016-03-12 Thread Aviv B.D.
From: "Aviv B.D." 

 * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
device are present.
 * Advertise Cache Mode capability in iommu cap register.
 * Register every VFIO device with IOMMU state.
 * On page cache invalidation in vIOMMU, check if the domain belong to
   VFIO device and mirror the guest requests to host.

Not working (Yet!):
 * Tested only with network interface card (ixgbevf) and
intel_iommu=strict in guest's kernel command line.
 * Lock up under high load.
 * Errors on guest poweroff.
 * High relative latency compare to VFIO without IOMMU.

Signed-off-by: Aviv B.D. 
---
 hw/i386/intel_iommu.c  | 76
++
 hw/i386/intel_iommu_internal.h |  1 +
 hw/vfio/common.c   | 12 +--
 include/hw/i386/intel_iommu.h  |  4 +++
 include/hw/vfio/vfio-common.h  |  1 +
 5 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 347718f..046688f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
VTD_DBGBIT(CSR);
 #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
 #endif

+static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
+uint8_t devfn, VTDContextEntry *ce);
+
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
 uint64_t wmask, uint64_t w1cmask)
 {
@@ -126,6 +130,19 @@ static uint32_t
vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
 return new_val;
 }

+static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
uint8_t devfn)
+{
+VTDContextEntry ce;
+int ret_fr;
+
+ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, );
+if (ret_fr){
+return -1;
+}
+
+return VTD_CONTEXT_ENTRY_DID(ce.hi);
+}
+
 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
 uint64_t clear, uint64_t mask)
 {
@@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
uint8_t bus_num,
 }

 if (!vtd_context_entry_present(ce)) {
-VTD_DPRINTF(GENERAL,
+/*VTD_DPRINTF(GENERAL,
 "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
-"is not present", devfn, bus_num);
+"is not present", devfn, bus_num);*/
 return -VTD_FR_CONTEXT_ENTRY_P;
 } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
(ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
@@ -1020,14 +1037,53 @@ static void
vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
   hwaddr addr, uint8_t am)
 {
 VTDIOTLBPageInvInfo info;
+VFIOGuestIOMMU * giommu;
+bool flag = false;

 assert(am <= VTD_MAMV);
 info.domain_id = domain_id;
 info.addr = addr;
 info.mask = ~((1 << am) - 1);
+
+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+VTDAddressSpace *vtd_as = container_of(giommu->iommu,
VTDAddressSpace, iommu);
+uint16_t vfio_source_id =
vtd_make_source_id(pci_bus_num(vtd_as->bus), vtd_as->devfn);
+uint16_t vfio_domain_id = vtd_get_did_dev(s,
pci_bus_num(vtd_as->bus), vtd_as->devfn);
+if (vfio_domain_id != (uint16_t)-1 &&
+domain_id == vfio_domain_id){
+VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
vfio_source_id, addr);
+if (iotlb_entry != NULL){
+IOMMUTLBEntry entry;
+VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d",
addr, am);
+entry.iova = addr & VTD_PAGE_MASK_4K;
+entry.translated_addr =
vtd_get_slpte_addr(iotlb_entry->slpte) & VTD_PAGE_MASK_4K;
+entry.addr_mask = ~VTD_PAGE_MASK_4K;
+entry.perm = IOMMU_NONE;
+memory_region_notify_iommu(giommu->iommu, entry);
+flag = true;
+
+}
+}
+
+}
+
 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
-}

+QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
+VTDAddressSpace *vtd_as = container_of(giommu->iommu,
VTDAddressSpace, iommu);
+uint16_t vfio_domain_id = vtd_get_did_dev(s,
pci_bus_num(vtd_as->bus), vtd_as->devfn);
+if (vfio_domain_id != (uint16_t)-1 &&
+domain_id == vfio_domain_id && !flag){
+/* do vfio map */
+VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr,
am);
+/* call to vtd_iommu_translate */
+IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu,
addr, 0);
+entry.perm = IOMMU_RW;
+memory_region_notify_iommu(giommu->iommu, entry);
+//g_vfio_iommu->n.notify(_vfio_iommu->n, );
+}
+}
+}
 /* Flush IOTLB
  * Returns the IOTLB Actual Invalidation Granularity.
  * @val: