Re: [Xen-devel] [PATCH v6] x86/p2m: use large pages for MMIO mappings

2016-02-02 Thread Andrew Cooper
On 02/02/16 13:24, Jan Beulich wrote:
 On 01.02.16 at 16:00,  wrote:
>> On 01/02/16 09:14, Jan Beulich wrote:
>>> --- a/xen/arch/x86/mm/p2m.c
>>> +++ b/xen/arch/x86/mm/p2m.c
>>> @@ -899,48 +899,64 @@ void p2m_change_type_range(struct domain
>>>  p2m_unlock(p2m);
>>>  }
>>>  
>>> -/* Returns: 0 for success, -errno for failure */
>>> +/*
>>> + * Returns:
>>> + *0  for success
>>> + *-errno for failure
>>> + *1 + new order  for caller to retry with smaller order (guaranteed
>>> + *   to be smaller than order passed in)
>>> + */
>>>  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t 
>> mfn,
>>> -   p2m_type_t gfn_p2mt, p2m_access_t access)
>>> +   unsigned int order, p2m_type_t gfn_p2mt,
>>> +   p2m_access_t access)
>>>  {
>>>  int rc = 0;
>>>  p2m_access_t a;
>>>  p2m_type_t ot;
>>>  mfn_t omfn;
>>> +unsigned int cur_order = 0;
>>>  struct p2m_domain *p2m = p2m_get_hostp2m(d);
>>>  
>>>  if ( !paging_mode_translate(d) )
>>>  return -EIO;
>>>  
>>> -gfn_lock(p2m, gfn, 0);
>>> -omfn = p2m->get_entry(p2m, gfn, , , 0, NULL, NULL);
>>> +gfn_lock(p2m, gfn, order);
>>> +omfn = p2m->get_entry(p2m, gfn, , , 0, _order, NULL);
>>> +if ( cur_order < order )
>>> +{
>>> +gfn_unlock(p2m, gfn, order);
>>> +return cur_order + 1;
>>> +}
>>>  if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>>>  {
>>> -gfn_unlock(p2m, gfn, 0);
>>> +gfn_unlock(p2m, gfn, order);
>>>  domain_crash(d);
>>>  return -ENOENT;
>>>  }
>>>  else if ( p2m_is_ram(ot) )
>>>  {
>>> -ASSERT(mfn_valid(omfn));
>>> -set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
>>> +unsigned long i;
>>> +
>>> +for ( i = 0; i < (1UL << order); ++i )
>>> +{
>>> +ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
>>> +set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
>> On further consideration, shouldn't we have a preemption check here? 
>> Removing a 1GB superpage's worth of RAM mappings is going to execute for
>> an unreasonably long time.
> Maybe. We have 256k iteration loops elsewhere, so I'm not that
> concerned. The thing probably needing adjustment would then be
> map_mmio_regions(), to avoid multiplying the 256k here by the up
> to 64 iterations done there. Preempting here is not really
> possible, as we're holding the p2m lock.

Why is this problematic?  All that needs to happen is to -ERESTART out
to a point where the p2m lock is dropped.

>
> The only other alternative I see would be to disallow 1G mappings
> and only support 2M ones.
>
> Thoughts?

For now, restricting to 2M mappings at least limits the potential damage
while gaining some benefits of large MMIO mappings.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v6] x86/p2m: use large pages for MMIO mappings

2016-02-02 Thread Jan Beulich
>>> On 02.02.16 at 15:33,  wrote:
> On 02/02/16 13:24, Jan Beulich wrote:
> On 01.02.16 at 16:00,  wrote:
>>> On 01/02/16 09:14, Jan Beulich wrote:
 --- a/xen/arch/x86/mm/p2m.c
 +++ b/xen/arch/x86/mm/p2m.c
 @@ -899,48 +899,64 @@ void p2m_change_type_range(struct domain
  p2m_unlock(p2m);
  }
  
 -/* Returns: 0 for success, -errno for failure */
 +/*
 + * Returns:
 + *0  for success
 + *-errno for failure
 + *1 + new order  for caller to retry with smaller order (guaranteed
 + *   to be smaller than order passed in)
 + */
  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t 
>>> mfn,
 -   p2m_type_t gfn_p2mt, p2m_access_t access)
 +   unsigned int order, p2m_type_t gfn_p2mt,
 +   p2m_access_t access)
  {
  int rc = 0;
  p2m_access_t a;
  p2m_type_t ot;
  mfn_t omfn;
 +unsigned int cur_order = 0;
  struct p2m_domain *p2m = p2m_get_hostp2m(d);
  
  if ( !paging_mode_translate(d) )
  return -EIO;
  
 -gfn_lock(p2m, gfn, 0);
 -omfn = p2m->get_entry(p2m, gfn, , , 0, NULL, NULL);
 +gfn_lock(p2m, gfn, order);
 +omfn = p2m->get_entry(p2m, gfn, , , 0, _order, NULL);
 +if ( cur_order < order )
 +{
 +gfn_unlock(p2m, gfn, order);
 +return cur_order + 1;
 +}
  if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
  {
 -gfn_unlock(p2m, gfn, 0);
 +gfn_unlock(p2m, gfn, order);
  domain_crash(d);
  return -ENOENT;
  }
  else if ( p2m_is_ram(ot) )
  {
 -ASSERT(mfn_valid(omfn));
 -set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
 +unsigned long i;
 +
 +for ( i = 0; i < (1UL << order); ++i )
 +{
 +ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
 +set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
>>> On further consideration, shouldn't we have a preemption check here? 
>>> Removing a 1GB superpage's worth of RAM mappings is going to execute for
>>> an unreasonably long time.
>> Maybe. We have 256k iteration loops elsewhere, so I'm not that
>> concerned. The thing probably needing adjustment would then be
>> map_mmio_regions(), to avoid multiplying the 256k here by the up
>> to 64 iterations done there. Preempting here is not really
>> possible, as we're holding the p2m lock.
> 
> Why is this problematic?  All that needs to happen is to -ERESTART out
> to a point where the p2m lock is dropped.

Because with the p2m lock dropped the region we're just trying to
populate may get changed another way? And because we'd need
to track where we left off?

>> The only other alternative I see would be to disallow 1G mappings
>> and only support 2M ones.
>>
>> Thoughts?
> 
> For now, restricting to 2M mappings at least limits the potential damage
> while gaining some benefits of large MMIO mappings.

Okay, will do it that way then.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v6] x86/p2m: use large pages for MMIO mappings

2016-02-02 Thread Jan Beulich
>>> On 01.02.16 at 16:00,  wrote:
> On 01/02/16 09:14, Jan Beulich wrote:
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -899,48 +899,64 @@ void p2m_change_type_range(struct domain
>>  p2m_unlock(p2m);
>>  }
>>  
>> -/* Returns: 0 for success, -errno for failure */
>> +/*
>> + * Returns:
>> + *0  for success
>> + *-errno for failure
>> + *1 + new order  for caller to retry with smaller order (guaranteed
>> + *   to be smaller than order passed in)
>> + */
>>  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t 
> mfn,
>> -   p2m_type_t gfn_p2mt, p2m_access_t access)
>> +   unsigned int order, p2m_type_t gfn_p2mt,
>> +   p2m_access_t access)
>>  {
>>  int rc = 0;
>>  p2m_access_t a;
>>  p2m_type_t ot;
>>  mfn_t omfn;
>> +unsigned int cur_order = 0;
>>  struct p2m_domain *p2m = p2m_get_hostp2m(d);
>>  
>>  if ( !paging_mode_translate(d) )
>>  return -EIO;
>>  
>> -gfn_lock(p2m, gfn, 0);
>> -omfn = p2m->get_entry(p2m, gfn, , , 0, NULL, NULL);
>> +gfn_lock(p2m, gfn, order);
>> +omfn = p2m->get_entry(p2m, gfn, , , 0, _order, NULL);
>> +if ( cur_order < order )
>> +{
>> +gfn_unlock(p2m, gfn, order);
>> +return cur_order + 1;
>> +}
>>  if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>>  {
>> -gfn_unlock(p2m, gfn, 0);
>> +gfn_unlock(p2m, gfn, order);
>>  domain_crash(d);
>>  return -ENOENT;
>>  }
>>  else if ( p2m_is_ram(ot) )
>>  {
>> -ASSERT(mfn_valid(omfn));
>> -set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
>> +unsigned long i;
>> +
>> +for ( i = 0; i < (1UL << order); ++i )
>> +{
>> +ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
>> +set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
> 
> On further consideration, shouldn't we have a preemption check here? 
> Removing a 1GB superpage's worth of RAM mappings is going to execute for
> an unreasonably long time.

Maybe. We have 256k iteration loops elsewhere, so I'm not that
concerned. The thing probably needing adjustment would then be
map_mmio_regions(), to avoid multiplying the 256k here by the up
to 64 iterations done there. Preempting here is not really
possible, as we're holding the p2m lock.

The only other alternative I see would be to disallow 1G mappings
and only support 2M ones.

Thoughts?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6] x86/p2m: use large pages for MMIO mappings

2016-02-01 Thread Jan Beulich
When mapping large BARs (e.g. the frame buffer of a graphics card) the
overhead of establishing such mappings using only 4k pages has,
particularly after the XSA-125 fix, become unacceptable. Alter the
XEN_DOMCTL_memory_mapping semantics once again, so that there's no
longer a fixed amount of guest frames that represents the upper limit
of what a single invocation can map. Instead bound execution time by
limiting the number of iterations (regardless of page size).

Signed-off-by: Jan Beulich 
Acked-by: Ian Campbell 
Acked-by: Kevin Tian 
---
Open issues (perhaps for subsequent changes):
- ARM side unimplemented (and hence libxc for now made cope with both
  models), the main issue (besides my inability to test any change
  there) being the many internal uses of map_mmio_regions())
- iommu_{,un}map_page() interfaces don't support "order" (hence
  mmio_order() for now returns zero when !iommu_hap_pt_share, which in
  particular means the AMD side isn't being taken care of just yet, but
  note that this also has the intended effect of suppressing non-zero
  order mappings in the shadow mode case)
---
v6: Move an mfn_valid() assertion to cover the full MFN range. Use
PAGE_ORDER_4K in mmio_order(). Improve the return value description
of set_typed_p2m_entry().
v5: Refine comment in domctl.h.
v4: Move cleanup duty entirely to the caller of the hypercall. Move
return value description to from commit message to domctl.h.
v3: Re-base on top of "x86/hvm: fold opt_hap_{2mb,1gb} into
hap_capabilities". Extend description to spell out new return value
meaning. Add a couple of code comments. Use PAGE_ORDER_4K instead
of literal 0. Take into consideration r/o MMIO pages.
v2: Produce valid entries for large p2m_mmio_direct mappings in
p2m_pt_set_entry(). Don't open code iommu_use_hap_pt() in
mmio_order(). Update function comment of set_typed_p2m_entry() and
clear_mmio_p2m_entry(). Use PRI_mfn. Add ASSERT()s to
{,un}map_mmio_regions() to detect otherwise endless loops.

--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2229,7 +2229,7 @@ int xc_domain_memory_mapping(
 {
 DECLARE_DOMCTL;
 xc_dominfo_t info;
-int ret = 0, err;
+int ret = 0, rc;
 unsigned long done = 0, nr, max_batch_sz;
 
 if ( xc_domain_getinfo(xch, domid, 1, ) != 1 ||
@@ -2254,19 +2254,24 @@ int xc_domain_memory_mapping(
 domctl.u.memory_mapping.nr_mfns = nr;
 domctl.u.memory_mapping.first_gfn = first_gfn + done;
 domctl.u.memory_mapping.first_mfn = first_mfn + done;
-err = do_domctl(xch, );
-if ( err && errno == E2BIG )
+rc = do_domctl(xch, );
+if ( rc < 0 && errno == E2BIG )
 {
 if ( max_batch_sz <= 1 )
 break;
 max_batch_sz >>= 1;
 continue;
 }
+if ( rc > 0 )
+{
+done += rc;
+continue;
+}
 /* Save the first error... */
 if ( !ret )
-ret = err;
+ret = rc;
 /* .. and ignore the rest of them when removing. */
-if ( err && add_mapping != DPCI_REMOVE_MAPPING )
+if ( rc && add_mapping != DPCI_REMOVE_MAPPING )
 break;
 
 done += nr;
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -436,7 +436,8 @@ static __init void pvh_add_mem_mapping(s
 else
 a = p2m_access_rw;
 
-if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i), a)) )
+if ( (rc = set_mmio_p2m_entry(d, gfn + i, _mfn(mfn + i),
+  PAGE_ORDER_4K, a)) )
 panic("pvh_add_mem_mapping: gfn:%lx mfn:%lx i:%ld rc:%d\n",
   gfn, mfn, i, rc);
 if ( !(i & 0xf) )
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2491,7 +2491,7 @@ static int vmx_alloc_vlapic_mapping(stru
 share_xen_page_with_guest(pg, d, XENSHARE_writable);
 d->arch.hvm_domain.vmx.apic_access_mfn = mfn;
 set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(mfn),
-   p2m_get_hostp2m(d)->default_access);
+   PAGE_ORDER_4K, p2m_get_hostp2m(d)->default_access);
 
 return 0;
 }
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -899,48 +899,64 @@ void p2m_change_type_range(struct domain
 p2m_unlock(p2m);
 }
 
-/* Returns: 0 for success, -errno for failure */
+/*
+ * Returns:
+ *0  for success
+ *-errno for failure
+ *1 + new order  for caller to retry with smaller order (guaranteed
+ *   to be smaller than order passed in)
+ */
 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-   p2m_type_t gfn_p2mt, p2m_access_t access)
+   unsigned int order, p2m_type_t gfn_p2mt,
+   

Re: [Xen-devel] [PATCH v6] x86/p2m: use large pages for MMIO mappings

2016-02-01 Thread Andrew Cooper
On 01/02/16 09:14, Jan Beulich wrote:
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -899,48 +899,64 @@ void p2m_change_type_range(struct domain
>  p2m_unlock(p2m);
>  }
>  
> -/* Returns: 0 for success, -errno for failure */
> +/*
> + * Returns:
> + *0  for success
> + *-errno for failure
> + *1 + new order  for caller to retry with smaller order (guaranteed
> + *   to be smaller than order passed in)
> + */
>  static int set_typed_p2m_entry(struct domain *d, unsigned long gfn, mfn_t 
> mfn,
> -   p2m_type_t gfn_p2mt, p2m_access_t access)
> +   unsigned int order, p2m_type_t gfn_p2mt,
> +   p2m_access_t access)
>  {
>  int rc = 0;
>  p2m_access_t a;
>  p2m_type_t ot;
>  mfn_t omfn;
> +unsigned int cur_order = 0;
>  struct p2m_domain *p2m = p2m_get_hostp2m(d);
>  
>  if ( !paging_mode_translate(d) )
>  return -EIO;
>  
> -gfn_lock(p2m, gfn, 0);
> -omfn = p2m->get_entry(p2m, gfn, , , 0, NULL, NULL);
> +gfn_lock(p2m, gfn, order);
> +omfn = p2m->get_entry(p2m, gfn, , , 0, _order, NULL);
> +if ( cur_order < order )
> +{
> +gfn_unlock(p2m, gfn, order);
> +return cur_order + 1;
> +}
>  if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>  {
> -gfn_unlock(p2m, gfn, 0);
> +gfn_unlock(p2m, gfn, order);
>  domain_crash(d);
>  return -ENOENT;
>  }
>  else if ( p2m_is_ram(ot) )
>  {
> -ASSERT(mfn_valid(omfn));
> -set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
> +unsigned long i;
> +
> +for ( i = 0; i < (1UL << order); ++i )
> +{
> +ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
> +set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);

On further consideration, shouldn't we have a preemption check here? 
Removing a 1GB superpage's worth of RAM mappings is going to execute for
an unreasonably long time.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel