Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-04 Thread Jan Beulich
>>> On 04.12.18 at 17:53,  wrote:
>> From: Xen-devel [mailto:xen-devel-boun...@lists.xenproject.org] On Behalf
>> Of Jan Beulich
>> Sent: 04 December 2018 16:02
>> 
>> >>> On 04.12.18 at 16:36,  wrote:
>> >> From: Jan Beulich [mailto:jbeul...@suse.com]
>> >> Sent: 04 December 2018 15:17
>> >>
>> >> >>> On 03.12.18 at 18:40,  wrote:
>> >> > --- a/xen/drivers/passthrough/vtd/iommu.c
>> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c
>> >> > @@ -633,11 +633,14 @@ static int __must_check
>> iommu_flush_iotlb(struct
>> >> domain *d, dfn_t dfn,
>> >> >
>> >> >  static int __must_check iommu_flush_iotlb_pages(struct domain *d,
>> >> >  dfn_t dfn,
>> >> > -unsigned int
>> >> page_count)
>> >> > +unsigned int
>> >> page_count,
>> >> > +unsigned int
>> >> flush_flags)
>> >> >  {
>> >> >  ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
>> >> > +ASSERT(flush_flags);
>> >> >
>> >> > -return iommu_flush_iotlb(d, dfn, 1, page_count);
>> >> > +return iommu_flush_iotlb(d, dfn, flush_flags &
>> >> IOMMU_FLUSHF_modified,
>> >> > + page_count);
>> >>
>> >> Why the restriction to "modified"?
>> >
>> > The parameter is a bool which should be true if an existing PTE was
>> modified
>> > or false otherwise. I can make this !!(flush_flags &
>> IOMMU_FLUSHF_modified) is
>> > you prefer.
>> 
>> No, that wasn't my point. The question is why this isn't just
>> "flush_flags", without any masking. Iirc there are precautions
>> in the VT-d code to deal with hardware which may cache
>> non-present entries. In that case "added" requires flushing too.
>> 
> 
> I don't understand. iommu_flush_iotlb()'s third argument is 
> 'dma_old_pte_present' so that should be true iff IOMMU_FLUSHF_modified is 
> set. IOMMU_FLUSHF_added is irrelevant to the implementation.

Oh, you're right. I'm sorry for the noise then.

Jan



___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-04 Thread Paul Durrant
> -Original Message-
> From: Xen-devel [mailto:xen-devel-boun...@lists.xenproject.org] On Behalf
> Of Jan Beulich
> Sent: 04 December 2018 16:02
> To: Paul Durrant 
> Cc: Kevin Tian ; Stefano Stabellini
> ; Wei Liu ; Konrad Rzeszutek
> Wilk ; Andrew Cooper ;
> Tim (Xen.org) ; George Dunlap ;
> Julien Grall ; Suravee Suthikulpanit
> ; xen-devel  de...@lists.xenproject.org>; Ian Jackson ; Brian
> Woods ; Roger Pau Monne 
> Subject: Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher
> order map/unmap operations
> 
> >>> On 04.12.18 at 16:36,  wrote:
> >> From: Jan Beulich [mailto:jbeul...@suse.com]
> >> Sent: 04 December 2018 15:17
> >>
> >> >>> On 03.12.18 at 18:40,  wrote:
> >> > --- a/xen/arch/arm/p2m.c
> >> > +++ b/xen/arch/arm/p2m.c
> >> > @@ -971,8 +971,17 @@ static int __p2m_set_entry(struct p2m_domain
> *p2m,
> >> >
> >> >  if ( need_iommu_pt_sync(p2m->domain) &&
> >> >   (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
> >> > +{
> >> > +unsigned int flush_flags = 0;
> >> > +
> >> > +if ( lpae_is_valid(orig_pte) )
> >> > +flush_flags |= IOMMU_FLUSHF_modified;
> >> > +if ( lpae_is_valid(*entry) )
> >> > +flush_flags |= IOMMU_FLUSHF_added;
> >>
> >> Shouldn't this be "else if" with the meaning assigned to both
> >> types? From an abstract perspective I'd also expect that for
> >> a single mapping no more than one of the flags can come
> >> back set (through the iommu_ops interface).
> >
> > That's not how I see it. My rationale is:
> >
> > - present PTE made non-present, or modified -> IOMMU_FLUSHF_modified
> > - new PTE value is present -> IOMMU_FLUSHF_added
> >
> > So, a single op can set any combination of bits and thus the above code
> does
> > not use 'else if'.
> 
> I can't fit this with the code comments:
> 
> enum
> {
> _IOMMU_FLUSHF_added, /* no modified entries, just additional entries
> */
> _IOMMU_FLUSHF_modified, /* modified entries */
> };
> 
> ..., in particular the "no modified entries" part.

That was supposed to emphasize the need for the other flag was needed the case 
of a mapping that modifies an existing entry... I'll re-state using a block 
comment.

> 
> >> > @@ -84,7 +86,7 @@ static bool set_iommu_pde_present(uint32_t *pde,
> >> unsigned long next_mfn,
> >> >
> >> >  if ( maddr_old != maddr_next || iw != old_w || ir != old_r
> ||
> >> >   old_level != next_level )
> >> > -need_flush = true;
> >> > +flush_flags = IOMMU_FLUSHF_modified;
> >>
> >> Why uniformly "modified"?
> >
> > Because the AMD IOMMU does require flushing for a non-present -> present
> > transition AFAICT. The old code certainly implies this.
> 
> It is one thing what the flush function does with the value, but
> another whether the modifying function "lies". I'm not opposed
> to simplification, but then a comment needs to explain this.
> 

Ok. Maybe it is simpler not to omit requesting the 'added' flush here and then 
just ignore it in the flush method.

> >> > @@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain
> *d)
> >> >  process_pending_softirqs();
> >> >  }
> >> >
> >> > +while ( !flush_flags && iommu_flush_all(d) )
> >> > +break;
> >>
> >> Is there a comment missing here explaining the seemingly odd
> >> loop?
> >
> > I'm merely using the construct you suggested, but I can add a comment.
> 
> And I'm fine with the construct, but in the other place (for which
> we did discuss this for the earlier version) there is a comment.
>

Ok. I'll add a similar comment.

> >> > --- a/xen/drivers/passthrough/vtd/iommu.c
> >> > +++ b/xen/drivers/passthrough/vtd/iommu.c
> >> > @@ -633,11 +633,14 @@ static int __must_check
> iommu_flush_iotlb(struct
> >> domain *d, dfn_t dfn,
> >> >
> >> >  static int __must_check iommu_flush_iotlb_pages(struct domain *d,
> >> >  dfn_t dfn,
> >> > -unsigned int
> >> page_count)
> >> > +unsigned int
> >> 

Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-04 Thread Jan Beulich
>>> On 04.12.18 at 16:36,  wrote:
>> From: Jan Beulich [mailto:jbeul...@suse.com]
>> Sent: 04 December 2018 15:17
>> 
>> >>> On 03.12.18 at 18:40,  wrote:
>> > --- a/xen/arch/arm/p2m.c
>> > +++ b/xen/arch/arm/p2m.c
>> > @@ -971,8 +971,17 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
>> >
>> >  if ( need_iommu_pt_sync(p2m->domain) &&
>> >   (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
>> > +{
>> > +unsigned int flush_flags = 0;
>> > +
>> > +if ( lpae_is_valid(orig_pte) )
>> > +flush_flags |= IOMMU_FLUSHF_modified;
>> > +if ( lpae_is_valid(*entry) )
>> > +flush_flags |= IOMMU_FLUSHF_added;
>> 
>> Shouldn't this be "else if" with the meaning assigned to both
>> types? From an abstract perspective I'd also expect that for
>> a single mapping no more than one of the flags can come
>> back set (through the iommu_ops interface).
> 
> That's not how I see it. My rationale is:
> 
> - present PTE made non-present, or modified -> IOMMU_FLUSHF_modified
> - new PTE value is present -> IOMMU_FLUSHF_added
> 
> So, a single op can set any combination of bits and thus the above code does 
> not use 'else if'.

I can't fit this with the code comments:

enum
{
_IOMMU_FLUSHF_added, /* no modified entries, just additional entries */
_IOMMU_FLUSHF_modified, /* modified entries */
};

..., in particular the "no modified entries" part.

>> > @@ -84,7 +86,7 @@ static bool set_iommu_pde_present(uint32_t *pde,
>> unsigned long next_mfn,
>> >
>> >  if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
>> >   old_level != next_level )
>> > -need_flush = true;
>> > +flush_flags = IOMMU_FLUSHF_modified;
>> 
>> Why uniformly "modified"?
> 
> Because the AMD IOMMU does require flushing for a non-present -> present 
> transition AFAICT. The old code certainly implies this.

It is one thing what the flush function does with the value, but
another whether the modifying function "lies". I'm not opposed
to simplification, but then a comment needs to explain this.

>> > @@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
>> >  process_pending_softirqs();
>> >  }
>> >
>> > +while ( !flush_flags && iommu_flush_all(d) )
>> > +break;
>> 
>> Is there a comment missing here explaining the seemingly odd
>> loop?
> 
> I'm merely using the construct you suggested, but I can add a comment.

And I'm fine with the construct, but in the other place (for which
we did discuss this for the earlier version) there is a comment.

>> > --- a/xen/drivers/passthrough/vtd/iommu.c
>> > +++ b/xen/drivers/passthrough/vtd/iommu.c
>> > @@ -633,11 +633,14 @@ static int __must_check iommu_flush_iotlb(struct
>> domain *d, dfn_t dfn,
>> >
>> >  static int __must_check iommu_flush_iotlb_pages(struct domain *d,
>> >  dfn_t dfn,
>> > -unsigned int
>> page_count)
>> > +unsigned int
>> page_count,
>> > +unsigned int
>> flush_flags)
>> >  {
>> >  ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
>> > +ASSERT(flush_flags);
>> >
>> > -return iommu_flush_iotlb(d, dfn, 1, page_count);
>> > +return iommu_flush_iotlb(d, dfn, flush_flags &
>> IOMMU_FLUSHF_modified,
>> > + page_count);
>> 
>> Why the restriction to "modified"?
> 
> The parameter is a bool which should be true if an existing PTE was modified 
> or false otherwise. I can make this !!(flush_flags & IOMMU_FLUSHF_modified) 
> is 
> you prefer.

No, that wasn't my point. The question is why this isn't just
"flush_flags", without any masking. Iirc there are precautions
in the VT-d code to deal with hardware which may cache
non-present entries. In that case "added" requires flushing too.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-04 Thread Paul Durrant
> -Original Message-
> From: Jan Beulich [mailto:jbeul...@suse.com]
> Sent: 04 December 2018 15:17
> To: Paul Durrant 
> Cc: Brian Woods ; Suravee Suthikulpanit
> ; Julien Grall ;
> Andrew Cooper ; Roger Pau Monne
> ; Wei Liu ; George Dunlap
> ; Ian Jackson ; Kevin
> Tian ; Stefano Stabellini ;
> xen-devel ; Konrad Rzeszutek Wilk
> ; Tim (Xen.org) 
> Subject: Re: [PATCH v2 3/4] iommu: elide flushing for higher order
> map/unmap operations
> 
> >>> On 03.12.18 at 18:40,  wrote:
> > This patch removes any implicit flushing that occurs in the
> implementation
> > of map and unmap operations and adds new iommu_map/unmap() wrapper
> > functions. To maintain sematics of the iommu_legacy_map/unmap() wrapper
> > functions, these are modified to call the new wrapper functions and then
> > perform an explicit flush operation.
> >
> > Because VT-d currently performs two different types of flush dependent
> upon
> > whether a PTE is being modified versus merely added (i.e. replacing a
> non-
> > present PTE) 'iommu flush flags' are defined by this patch and the
> > iommu_ops map_page() and unmap_page() methods are modified to OR the
> type
> > of flush necessary for the PTE that has been populated or depopulated
> into
> > an accumulated flags value. The accumulated value can then be passed
> into
> > the explicit flush operation.
> >
> > The ARM SMMU implementations of map_page() and unmap_page() currently
> > perform no implicit flushing and therefore the modified methods do not
> > adjust the flush flags.
> 
> Which, however, likely is wrong. If we mean the flushing to be initiated
> by the arch- and vendor-independent wrappers, then all map/unmap
> backends should indicate the needed kind of flush. Granted this can be
> done later, if things are otherwise correct on Arm right now.
> 
> > --- a/xen/arch/arm/p2m.c
> > +++ b/xen/arch/arm/p2m.c
> > @@ -971,8 +971,17 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
> >
> >  if ( need_iommu_pt_sync(p2m->domain) &&
> >   (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
> > +{
> > +unsigned int flush_flags = 0;
> > +
> > +if ( lpae_is_valid(orig_pte) )
> > +flush_flags |= IOMMU_FLUSHF_modified;
> > +if ( lpae_is_valid(*entry) )
> > +flush_flags |= IOMMU_FLUSHF_added;
> 
> Shouldn't this be "else if" with the meaning assigned to both
> types? From an abstract perspective I'd also expect that for
> a single mapping no more than one of the flags can come
> back set (through the iommu_ops interface).

That's not how I see it. My rationale is:

- present PTE made non-present, or modified -> IOMMU_FLUSHF_modified
- new PTE value is present -> IOMMU_FLUSHF_added

So, a single op can set any combination of bits and thus the above code does 
not use 'else if'.

> 
> > @@ -84,7 +86,7 @@ static bool set_iommu_pde_present(uint32_t *pde,
> unsigned long next_mfn,
> >
> >  if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
> >   old_level != next_level )
> > -need_flush = true;
> > +flush_flags = IOMMU_FLUSHF_modified;
> 
> Why uniformly "modified"?

Because the AMD IOMMU does require flushing for a non-present -> present 
transition AFAICT. The old code certainly implies this.

> 
> > @@ -645,11 +648,13 @@ static unsigned long flush_count(unsigned long
> dfn, unsigned int page_count,
> >  }
> >
> >  int amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
> > -unsigned int page_count)
> > +unsigned int page_count,
> > +unsigned int flush_flags)
> >  {
> >  unsigned long dfn_l = dfn_x(dfn);
> >
> >  ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
> > +ASSERT(flush_flags & IOMMU_FLUSHF_modified);
> 
> Is this valid? What if a map operation solely re-established what
> was already there? Aiui in that case set_iommu_pde_present()
> would always return zero. Or take this (seeing that the generic
> wrapper has a zero check for the flush flags):

Yes, the ASSERT is there because this should never be called unless flush_flags 
!= 0 (ensured by the wrapper) and the map code should only ever set 
IOMMU_FLUSHF_modified.

> 
> > @@ -692,6 +697,7 @@ int amd_iommu_reserve_domain_unity_map(struct domain
> *domain,
> >  unsigned long npages, i;
> >  unsigned long gfn;
> >  unsigned int flags = !!ir;
> > +unsigned int flush_flags = 0;
> >  int rt = 0;
> >
> >  if ( iw )
> > @@ -703,11 +709,21 @@ int amd_iommu_reserve_domain_unity_map(struct
> domain *domain,
> >  {
> >  unsigned long frame = gfn + i;
> >
> > -rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame),
> flags);
> > +rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame),
> flags,
> > +_flags);
> >  if ( rt != 0 )
> > -return rt;
> > +break;
> >  }
> > -return 0;
> > +

Re: [Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-04 Thread Jan Beulich
>>> On 03.12.18 at 18:40,  wrote:
> This patch removes any implicit flushing that occurs in the implementation
> of map and unmap operations and adds new iommu_map/unmap() wrapper
> functions. To maintain sematics of the iommu_legacy_map/unmap() wrapper
> functions, these are modified to call the new wrapper functions and then
> perform an explicit flush operation.
> 
> Because VT-d currently performs two different types of flush dependent upon
> whether a PTE is being modified versus merely added (i.e. replacing a non-
> present PTE) 'iommu flush flags' are defined by this patch and the
> iommu_ops map_page() and unmap_page() methods are modified to OR the type
> of flush necessary for the PTE that has been populated or depopulated into
> an accumulated flags value. The accumulated value can then be passed into
> the explicit flush operation.
> 
> The ARM SMMU implementations of map_page() and unmap_page() currently
> perform no implicit flushing and therefore the modified methods do not
> adjust the flush flags.

Which, however, likely is wrong. If we mean the flushing to be initiated
by the arch- and vendor-independent wrappers, then all map/unmap
backends should indicate the needed kind of flush. Granted this can be
done later, if things are otherwise correct on Arm right now.

> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -971,8 +971,17 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
>  
>  if ( need_iommu_pt_sync(p2m->domain) &&
>   (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
> +{
> +unsigned int flush_flags = 0;
> +
> +if ( lpae_is_valid(orig_pte) )
> +flush_flags |= IOMMU_FLUSHF_modified;
> +if ( lpae_is_valid(*entry) )
> +flush_flags |= IOMMU_FLUSHF_added;

Shouldn't this be "else if" with the meaning assigned to both
types? From an abstract perspective I'd also expect that for
a single mapping no more than one of the flags can come
back set (through the iommu_ops interface).

> @@ -84,7 +86,7 @@ static bool set_iommu_pde_present(uint32_t *pde, unsigned 
> long next_mfn,
>  
>  if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
>   old_level != next_level )
> -need_flush = true;
> +flush_flags = IOMMU_FLUSHF_modified;

Why uniformly "modified"?

> @@ -645,11 +648,13 @@ static unsigned long flush_count(unsigned long dfn, 
> unsigned int page_count,
>  }
>  
>  int amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
> -unsigned int page_count)
> +unsigned int page_count,
> +unsigned int flush_flags)
>  {
>  unsigned long dfn_l = dfn_x(dfn);
>  
>  ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
> +ASSERT(flush_flags & IOMMU_FLUSHF_modified);

Is this valid? What if a map operation solely re-established what
was already there? Aiui in that case set_iommu_pde_present()
would always return zero. Or take this (seeing that the generic
wrapper has a zero check for the flush flags):

> @@ -692,6 +697,7 @@ int amd_iommu_reserve_domain_unity_map(struct domain 
> *domain,
>  unsigned long npages, i;
>  unsigned long gfn;
>  unsigned int flags = !!ir;
> +unsigned int flush_flags = 0;
>  int rt = 0;
>  
>  if ( iw )
> @@ -703,11 +709,21 @@ int amd_iommu_reserve_domain_unity_map(struct domain 
> *domain,
>  {
>  unsigned long frame = gfn + i;
>  
> -rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags);
> +rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags,
> +_flags);
>  if ( rt != 0 )
> -return rt;
> +break;
>  }
> -return 0;
> +
> +/*
> + * The underlying implementation is void so the return value is
> + * meaningless and can hence be ignored.
> + */
> +while ( amd_iommu_flush_iotlb_pages(domain, _dfn(gfn), npages,
> +flush_flags) )
> +break;

Nothing here guarantees flush_flags to be non-zero.

> @@ -235,6 +236,9 @@ void __hwdom_init iommu_hwdom_init(struct domain *d)
>  process_pending_softirqs();
>  }
>  
> +while ( !flush_flags && iommu_flush_all(d) )
> +break;

Is there a comment missing here explaining the seemingly odd
loop?

> @@ -381,6 +402,17 @@ int iommu_legacy_unmap(struct domain *d, dfn_t dfn, 
> unsigned int page_order)
>  return rc;
>  }
>  
> +int iommu_legacy_unmap(struct domain *d, dfn_t dfn, unsigned int page_order)
> +{
> +unsigned int flush_flags = 0;
> +int rc = iommu_unmap(d, dfn, page_order, _flags);
> +
> +if ( !rc )
> +rc = iommu_flush(d, dfn, (1u << page_order), flush_flags);

No iommu_dont_flush_iotlb check needed here?

> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -633,11 +633,14 @@ static int 

[Xen-devel] [PATCH v2 3/4] iommu: elide flushing for higher order map/unmap operations

2018-12-03 Thread Paul Durrant
This patch removes any implicit flushing that occurs in the implementation
of map and unmap operations and adds new iommu_map/unmap() wrapper
functions. To maintain sematics of the iommu_legacy_map/unmap() wrapper
functions, these are modified to call the new wrapper functions and then
perform an explicit flush operation.

Because VT-d currently performs two different types of flush dependent upon
whether a PTE is being modified versus merely added (i.e. replacing a non-
present PTE) 'iommu flush flags' are defined by this patch and the
iommu_ops map_page() and unmap_page() methods are modified to OR the type
of flush necessary for the PTE that has been populated or depopulated into
an accumulated flags value. The accumulated value can then be passed into
the explicit flush operation.

The ARM SMMU implementations of map_page() and unmap_page() currently
perform no implicit flushing and therefore the modified methods do not
adjust the flush flags.

NOTE: The per-cpu 'iommu_dont_flush_iotlb' is respected by the
  iommu_legacy_map() wrapper function and therefore this now applies to
  all IOMMU implementations rather than just VT-d.

Signed-off-by: Paul Durrant 
---
Cc: Stefano Stabellini 
Cc: Julien Grall 
Cc: Andrew Cooper 
Cc: George Dunlap 
Cc: Ian Jackson 
Cc: Jan Beulich 
Cc: Konrad Rzeszutek Wilk 
Cc: Tim Deegan 
Cc: Wei Liu 
Cc: Suravee Suthikulpanit 
Cc: Brian Woods 
Cc: Kevin Tian 
Cc: "Roger Pau Monné" 

v2:
 - Add the new iommu_map/unmap() and don't proliferate use of
   iommu_dont_flush_iotlb.
 - Use 'flush flags' instead of a 'iommu_flush_type'
 - Add a 'flush_flags' argument to iommu_flush() and modify the call-sites.

This code has only been compile tested for ARM.
---
 xen/arch/arm/p2m.c| 11 -
 xen/common/memory.c   |  6 ++-
 xen/drivers/passthrough/amd/iommu_map.c   | 68 +--
 xen/drivers/passthrough/arm/smmu.c| 15 --
 xen/drivers/passthrough/iommu.c   | 57 +-
 xen/drivers/passthrough/vtd/iommu.c   | 33 -
 xen/drivers/passthrough/x86/iommu.c   | 19 +---
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  9 ++--
 xen/include/xen/iommu.h   | 28 +--
 9 files changed, 173 insertions(+), 73 deletions(-)

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index e8b7624492..74264711ce 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -971,8 +971,17 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
 
 if ( need_iommu_pt_sync(p2m->domain) &&
  (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
+{
+unsigned int flush_flags = 0;
+
+if ( lpae_is_valid(orig_pte) )
+flush_flags |= IOMMU_FLUSHF_modified;
+if ( lpae_is_valid(*entry) )
+flush_flags |= IOMMU_FLUSHF_added;
+
 rc = iommu_flush(p2m->domain, _dfn(gfn_x(sgfn)),
- 1UL << page_order);
+ 1UL << page_order, flush_flags);
+}
 else
 rc = 0;
 
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 7b668077d8..f6ba852136 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -865,11 +865,13 @@ int xenmem_add_to_physmap(struct domain *d, struct 
xen_add_to_physmap *xatp,
 
 this_cpu(iommu_dont_flush_iotlb) = 0;
 
-ret = iommu_flush(d, _dfn(xatp->idx - done), done);
+ret = iommu_flush(d, _dfn(xatp->idx - done), done,
+  IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
 if ( unlikely(ret) && rc >= 0 )
 rc = ret;
 
-ret = iommu_flush(d, _dfn(xatp->gpfn - done), done);
+ret = iommu_flush(d, _dfn(xatp->gpfn - done), done,
+  IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
 if ( unlikely(ret) && rc >= 0 )
 rc = ret;
 }
diff --git a/xen/drivers/passthrough/amd/iommu_map.c 
b/xen/drivers/passthrough/amd/iommu_map.c
index b6ddf211c6..de6ecb9f04 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -45,13 +45,15 @@ static void clear_iommu_pte_present(unsigned long l1_mfn, 
unsigned long dfn)
 unmap_domain_page(table);
 }
 
-static bool set_iommu_pde_present(uint32_t *pde, unsigned long next_mfn,
-  unsigned int next_level,
-  bool iw, bool ir)
+static unsigned int set_iommu_pde_present(uint32_t *pde,
+  unsigned long next_mfn,
+  unsigned int next_level, bool iw,
+  bool ir)
 {
 uint64_t maddr_next;
 uint32_t addr_lo, addr_hi, entry;
-bool need_flush = false, old_present;
+bool old_present;
+unsigned int flush_flags = 0;
 
 maddr_next = __pfn_to_paddr(next_mfn);
 
@@ -84,7 +86,7 @@ static bool set_iommu_pde_present(uint32_t *pde,