Re: [Xen-devel] [PATCH v4] boot allocator: Use arch helper for virt_to_mfn on DIRECTMAP

2017-04-03 Thread Vijay Kilari
Hi Julien,

On Mon, Apr 3, 2017 at 3:31 PM, Julien Grall  wrote:
> Hi Vijay,
>
> On 28/03/17 13:35, vijay.kil...@gmail.com wrote:
>>
>> From: Vijaya Kumar K 
>>
>> On ARM64, virt_to_mfn uses the hardware for address
>> translation. So if the virtual address is not mapped translation
>> fault is raised. On ARM64, DIRECTMAP_VIRT region is direct mapped.
>
>
> You are stating obvious things, a DIRECTMAP_VIRT region is as the name said
> direct mapped. What matter is all the RAM is mapped in Xen on ARM64.
>
>>
>> On ARM platforms with NUMA, While initializing second memory node,
>
>
> s/While/while/
>
>> panic is triggered from init_node_heap() when virt_to_mfn()
>> is called for DIRECTMAP_VIRT region address.
>> Here the check is made to ensure that MFN less than max MFN mapped.
>
>
> "The check is here to know whether the MFN is part of the direct mapping".
>
>> The max MFN is found by calling virt_to_mfn of DIRECTMAP_VIRT_END
>> region.
>
>
> DIRECTMAP_VIRT_END is the end of the region not a region.
>
>> Since DIRECMAP_VIRT region is not mapped to any virtual address
>
>
> s/DIRECMAP_VIRT/DIRECTMAP_VIRT/
>
>> on ARM, it fails.
>>
>> In this patch, instead of calling virt_to_mfn(), arch helper
>> arch_mfn_in_directmap() is introduced. On ARM64 this arch helper
>> will return true, whereas on ARM DIRECTMAP_VIRT region is not directly
>> mapped
>> only xenheap region is directly mapped.
>
>
> As said before, there is no DIRECTMAP_VIRT region on ARM. All the RAM is not
> mapped on Xen but the xenheap.
>
>> So on ARM return false always.
>
>
> I am OK if you always return false on ARM. But you need to explain why not
> return is_xen_heap_mfn(...);
>
>> For x86 this helper does virt_to_mfn.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  xen/common/page_alloc.c|  7 ++-
>>  xen/include/asm-arm/arm32/mm.h | 20 
>>  xen/include/asm-arm/arm64/mm.h | 20 
>>  xen/include/asm-arm/mm.h   |  8 
>>  xen/include/asm-x86/mm.h   | 11 +++
>>  5 files changed, 61 insertions(+), 5 deletions(-)
>>
>> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
>> index 42c20cb..c4ffb31 100644
>> --- a/xen/common/page_alloc.c
>> +++ b/xen/common/page_alloc.c
>> @@ -520,9 +520,6 @@ static unsigned long init_node_heap(int node, unsigned
>> long mfn,
>>  unsigned long needed = (sizeof(**_heap) +
>>  sizeof(**avail) * NR_ZONES +
>>  PAGE_SIZE - 1) >> PAGE_SHIFT;
>> -#ifdef DIRECTMAP_VIRT_END
>> -unsigned long eva = min(DIRECTMAP_VIRT_END, HYPERVISOR_VIRT_END);
>> -#endif
>>  int i, j;
>>
>>  if ( !first_node_initialised )
>> @@ -534,7 +531,7 @@ static unsigned long init_node_heap(int node, unsigned
>> long mfn,
>>  }
>>  #ifdef DIRECTMAP_VIRT_END
>
>
> Sorry I didn't spot that before. Why do we keep the #ifdef here given that
> the check is arch specific now?
>
>>  else if ( *use_tail && nr >= needed &&
>> -  (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) &&
>> +  arch_mfn_in_directmap(mfn + nr) &&
>>(!xenheap_bits ||
>> !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>>  {
>> @@ -543,7 +540,7 @@ static unsigned long init_node_heap(int node, unsigned
>> long mfn,
>>PAGE_SIZE - sizeof(**avail) * NR_ZONES;
>>  }
>>  else if ( nr >= needed &&
>> -  (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) &&
>> +  arch_mfn_in_directmap(mfn + needed) &&
>>(!xenheap_bits ||
>> !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>>  {
>> diff --git a/xen/include/asm-arm/arm32/mm.h
>> b/xen/include/asm-arm/arm32/mm.h
>> new file mode 100644
>> index 000..e93d9df
>> --- /dev/null
>> +++ b/xen/include/asm-arm/arm32/mm.h
>> @@ -0,0 +1,20 @@
>> +#ifndef __ARM_ARM32_MM_H__
>> +#define __ARM_ARM32_MM_H__
>> +
>> +/* On ARM only xenheap memory is directly mapped. Hence return false. */
>
>
> By reading this comment some people will wonder why you don't check whether
> the mfn is in xenheap then. As mentioned above, I am ok if you always return
> false here. But you need to explain why.

Is this ok?

"On ARM32, all the RAM is not mapped by Xen, instead it is mapped by xenheap.
So DIRECTMAP_VIRT region is not mapped.
Hence we return always false when mfn is checked on DIRECTMAP_VIRT region."

>
>
>> +static inline bool arch_mfn_in_directmap(unsigned long mfn)
>> +{
>> +return false;
>> +}
>> +
>> +#endif /* __ARM_ARM32_MM_H__ */
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * tab-width: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/include/asm-arm/arm64/mm.h
>> b/xen/include/asm-arm/arm64/mm.h
>> new file mode 100644
>> index 000..36ee9c8
>> --- /dev/null
>> +++ 

[Xen-devel] [xen-unstable test] 107160: tolerable FAIL - PUSHED

2017-04-03 Thread osstest service owner
flight 107160 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107160/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-checkfail  like 107138
 test-armhf-armhf-libvirt 13 saverestore-support-checkfail  like 107138
 test-amd64-i386-rumprun-i386 16 rumprun-demo-xenstorels/xenstorels.repeat fail 
like 107138
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stopfail like 107138
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 107138
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop fail like 107138
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail like 107138
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail  like 107138
 test-amd64-amd64-xl-rtds  9 debian-install   fail  like 107138

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl   1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt-qcow2  1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-credit2   1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-rtds  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-multivcpu  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 build-arm64-pvops 5 kernel-build fail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  4d0240e03349fd0715332eae65372e0a47b5a43b
baseline version:
 xen  41630eb1b615158af42f4468236457fd3f8a6819

Last test of basis   107138  2017-04-03 01:56:19 Z1 days
Testing same since   107160  2017-04-03 16:15:19 Z0 days1 attempts


People who touched revisions under test:
  Ian Jackson 
  Juergen Gross 
  Wei Liu 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  pass
 build-i386

[Xen-devel] [xen-4.6-testing baseline-only test] 71144: tolerable trouble: broken/fail/pass

2017-04-03 Thread Platform Team regression test user
This run is configured for baseline tests only.

flight 71144 xen-4.6-testing real [real]
http://osstest.xs.citrite.net/~osstest/testlogs/logs/71144/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt 19 capture-logs(19)   broken blocked in 71077
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop fail REGR. vs. 71077
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail REGR. vs. 71077
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-check fail blocked in 71077
 test-armhf-armhf-libvirt13 saverestore-support-check fail blocked in 71077
 test-armhf-armhf-libvirt-raw 12 saverestore-support-check fail blocked in 71077
 test-armhf-armhf-xl-rtds15 guest-start/debian.repeat fail blocked in 71077
 test-xtf-amd64-amd64-2   20 xtf/test-hvm32-invlpg~shadow fail   like 71077
 test-xtf-amd64-amd64-2  33 xtf/test-hvm32pae-invlpg~shadow fail like 71077
 test-xtf-amd64-amd64-2   44 xtf/test-hvm64-invlpg~shadow fail   like 71077
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop  fail like 71077
 test-amd64-amd64-xl-qemut-winxpsp3  9 windows-install  fail like 71077

Tests which did not succeed, but are not blocking:
 test-xtf-amd64-amd64-1   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-xtf-amd64-amd64-4   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-xtf-amd64-amd64-5   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-midway   12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-xtf-amd64-amd64-3   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-armhf-armhf-xl-midway   13 saverestore-support-checkfail   never pass
 test-xtf-amd64-amd64-2   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stop fail never pass

version targeted for testing:
 xen  f96efeb0c6b4f499194571ef6d767534ba851c6a
baseline version:
 xen  ac4c5d4ddf89051365da2acba5c6c306a10e0bbe

Last test of basis71077  2017-03-22 21:53:24 Z   12 days
Testing same since71144  2017-04-03 20:14:40 Z0 days1 attempts


People who touched revisions under test:
  Daniel De Graaf 
  Dario Faggioli 
  Jan Beulich 
  Roger Pau Monné 

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-armhf  pass
 build-i386   

[Xen-devel] Xen Code Review Dashboard

2017-04-03 Thread Todd Hendricks
Team,

I am interested in the project and would like to begin contributing. It
would be good to speak with a team lead about the project at a high level,
and to identify areas where I can provide quick wins. In addition to
CSS/HTML, my languages are JS, PHP, and SQL.

Thank you for your consideration.

Regards,

Todd Hendricks
281-702-1156
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [linux-arm-xen test] 107164: regressions - FAIL

2017-04-03 Thread osstest service owner
flight 107164 linux-arm-xen real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107164/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-check fail REGR. vs. 62674
 test-armhf-armhf-libvirt 13 saverestore-support-check fail REGR. vs. 62674

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl   1 build-check(1)   blocked  n/a
 build-arm64-libvirt   1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt-qcow2  1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-credit2   1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-rtds  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-multivcpu  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 build-arm64-xsm   5 xen-buildfail   never pass
 build-arm64-pvops 5 kernel-build fail   never pass
 build-arm64   5 xen-buildfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass

version targeted for testing:
 linux6878b2fa7229c9208a02d45f280c71389cba0617
baseline version:
 linux9550fff2bd1f88405dd61d86e90807046a580d6c

Last test of basis62674  2015-10-05 12:24:43 Z  546 days
Testing same since   107164  2017-04-03 20:34:31 Z0 days1 attempts


People who touched revisions under test:
  Stefano Stabellini 

jobs:
 build-arm64-xsm  fail
 build-armhf-xsm  pass
 build-arm64  fail
 build-armhf  pass
 build-arm64-libvirt  blocked 
 build-armhf-libvirt  pass
 build-arm64-pvopsfail
 build-armhf-pvopspass
 test-arm64-arm64-xl  blocked 
 test-armhf-armhf-xl  pass
 test-arm64-arm64-libvirt-xsm blocked 
 test-armhf-armhf-libvirt-xsm pass
 test-arm64-arm64-xl-xsm  blocked 
 test-armhf-armhf-xl-xsm  pass
 test-armhf-armhf-xl-arndale  pass
 test-arm64-arm64-xl-credit2  blocked 
 test-armhf-armhf-xl-credit2  pass
 test-armhf-armhf-xl-cubietruck   pass
 test-arm64-arm64-libvirt blocked 
 test-armhf-armhf-libvirt pass
 test-arm64-arm64-xl-multivcpublocked 
 test-armhf-armhf-xl-multivcpupass
 test-arm64-arm64-libvirt-qcow2   

Re: [Xen-devel] Can't ./configure latest git

2017-04-03 Thread Duncan X. Simpson
In addition, I had to grep -r -l '$(PYTHON)' * | xargs sed -i
's!$(PYTHON)!/usr/bin/python2!g' because I couldn't figure out where
$(PYTHON) was set. At first I thought it was tools/get-fields.sh but
setting it there didn't fix it.

On Mon, Apr 3, 2017 at 7:25 PM Duncan X. Simpson 
wrote:

> Worked around with the following:
>
>  ~git/xen   master ± git diff
> diff --git a/tools/configure b/tools/configure
> index 7a57e6562d..874498ad80 100755
> --- a/tools/configure
> +++ b/tools/configure
> @@ -6859,7 +6859,7 @@ if echo "$PYTHON" | grep -q "^/"; then :
>  PYTHON=`basename $PYTHONPATH`
>
>  elif test -z "$PYTHON"; then :
> -  PYTHON="python"
> +  PYTHON="python2"
>  else
>as_fn_error $? "PYTHON specified, but is not an absolute path"
> "$LINENO" 5
>  fi
>
> Not sure if this is ideal.
>
> On Mon, Apr 3, 2017 at 7:18 PM Duncan X. Simpson 
> wrote:
>
> I just cloned Xen from git, but it won't configure. I have both versions
> of Python installed, but it tries to use 3 to run 2 code:
>
> checking for inttypes.h... yes
> checking for stdint.h... yes
> checking for unistd.h... yes
>   File "", line 1
> import distutils.sysconfig; print
> distutils.sysconfig.get_config_var("VERSION")
>   ^
> SyntaxError: invalid syntax
> checking for python-config... /usr/bin/python-config
> checking Python.h usability... yes
> checking Python.h presence... yes
> checking for Python.h... yes
>
> How do I fix this?
> --
>
> Duncan X. Simpson, K7DXS
>
> --
>
> Duncan X. Simpson, K7DXS
>
-- 

Duncan X. Simpson, K7DXS
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Can't ./configure latest git

2017-04-03 Thread Duncan X. Simpson
Worked around with the following:

 ~git/xen   master ± git diff
diff --git a/tools/configure b/tools/configure
index 7a57e6562d..874498ad80 100755
--- a/tools/configure
+++ b/tools/configure
@@ -6859,7 +6859,7 @@ if echo "$PYTHON" | grep -q "^/"; then :
 PYTHON=`basename $PYTHONPATH`

 elif test -z "$PYTHON"; then :
-  PYTHON="python"
+  PYTHON="python2"
 else
   as_fn_error $? "PYTHON specified, but is not an absolute path" "$LINENO"
5
 fi

Not sure if this is ideal.

On Mon, Apr 3, 2017 at 7:18 PM Duncan X. Simpson 
wrote:

I just cloned Xen from git, but it won't configure. I have both versions of
Python installed, but it tries to use 3 to run 2 code:

checking for inttypes.h... yes
checking for stdint.h... yes
checking for unistd.h... yes
  File "", line 1
import distutils.sysconfig; print
distutils.sysconfig.get_config_var("VERSION")
  ^
SyntaxError: invalid syntax
checking for python-config... /usr/bin/python-config
checking Python.h usability... yes
checking Python.h presence... yes
checking for Python.h... yes

How do I fix this?
-- 

Duncan X. Simpson, K7DXS

-- 

Duncan X. Simpson, K7DXS
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Can't ./configure latest git

2017-04-03 Thread Duncan X. Simpson
I just cloned Xen from git, but it won't configure. I have both versions of
Python installed, but it tries to use 3 to run 2 code:

checking for inttypes.h... yes
checking for stdint.h... yes
checking for unistd.h... yes
  File "", line 1
import distutils.sysconfig; print
distutils.sysconfig.get_config_var("VERSION")
  ^
SyntaxError: invalid syntax
checking for python-config... /usr/bin/python-config
checking Python.h usability... yes
checking Python.h presence... yes
checking for Python.h... yes

How do I fix this?
-- 

Duncan X. Simpson, K7DXS
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Can't read bios file

2017-04-03 Thread Duncan X. Simpson
I apologize, I should probably include this information:
OS: Arch Linux
xl info:
host   : k7dxs-laptop-r500
release: 4.10.6-1-ARCH
version: #1 SMP PREEMPT Mon Mar 27 08:28:22 CEST 2017
machine: x86_64
nr_cpus: 2
max_cpu_id : 3
nr_nodes   : 1
cores_per_socket   : 2
threads_per_core   : 1
cpu_mhz: 2394
hw_caps:
b7ebfbff:0408e3fd:20100800:0001::::
virt_caps  : hvm
total_memory   : 1944
free_memory: 517
sharing_freed_memory   : 0
sharing_used_memory: 0
outstanding_claims : 0
free_cpus  : 0
xen_major  : 4
xen_minor  : 8
xen_extra  : .0
xen_version: 4.8.0
xen_caps   : xen-3.0-x86_64 xen-3.0-x86_32p hvm-3.0-x86_32
hvm-3.0-x86_32p hvm-3.0-x86_64
xen_scheduler  : credit
xen_pagesize   : 4096
platform_params: virt_start=0x8000
xen_changeset  :
xen_commandline: /boot/xen-4.8.0.gz xsave=1
cc_compiler: gcc (GCC) 6.3.1 20170306
cc_compile_by  : duncan
cc_compile_domain  :
cc_compile_date: Tue Mar 28 15:11:08 MST 2017
build_id   : 8b7628e151ee56a26b2f83b21160ee1168263b64
xend_config_format : 4


On Mon, Apr 3, 2017 at 6:25 PM Duncan X. Simpson 
wrote:

I'm trying to set up an HVM CentOS guest, and I've run into a problem. I
originally tried posting it on Stack Exchange (
https://superuser.com/questions/1193771/failed-to-read-bios-file-no-such-file-or-directory),
but it is not documented anywhere on the Internet that Google can see and I
have gotten no response. I regained interest in it today and used strace to
determine what bios file it was looking for, and found it was looking for a
file called 'yes' in the current directory. This is what made me decide it
was most likely a bug and post it here rather in xen-users. My
configuration is as listed on the Stack Exchange question and does not
contain the word yes anywhere in it. What should I do next?
-- 

Duncan X. Simpson, K7DXS

-- 

Duncan X. Simpson, K7DXS
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [linux-linus test] 107159: regressions - FAIL

2017-04-03 Thread osstest service owner
flight 107159 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107159/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-credit2  11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-xl-arndale  11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-xl-cubietruck 11 guest-start fail REGR. vs. 59254
 test-armhf-armhf-libvirt 11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-libvirt-xsm 11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-xl  11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-xl-xsm  11 guest-start   fail REGR. vs. 59254
 test-armhf-armhf-xl-multivcpu 11 guest-start  fail REGR. vs. 59254

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds 11 guest-start   fail REGR. vs. 59254
 test-amd64-amd64-xl-rtds  9 debian-installfail REGR. vs. 59254
 test-armhf-armhf-xl-vhd   9 debian-di-install   fail baseline untested
 test-armhf-armhf-libvirt-raw  9 debian-di-install   fail baseline untested
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop  fail like 59254
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stop fail like 59254
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop  fail like 59254
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stop fail like 59254

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl   1 build-check(1)   blocked  n/a
 build-arm64-libvirt   1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt-qcow2  1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-credit2   1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-rtds  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-multivcpu  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 build-arm64-xsm   5 xen-buildfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 build-arm64   5 xen-buildfail   never pass

version targeted for testing:
 linuxa71c9a1c779f2499fb2afc0553e543f18aff6edf
baseline version:
 linux45820c294fe1b1a9df495d57f40585ef2d069a39

Last test of basis59254  2015-07-09 04:20:48 Z  634 days
Failing since 59348  2015-07-10 04:24:05 Z  633 days  374 attempts
Testing same since   107142  2017-04-03 03:52:01 Z0 days2 attempts


8129 people touched revisions under test,
not listing them all

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  fail
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  fail
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  blocked 
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-arm64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumprun  pass
 build-i386-rumprun   pass
 test-amd64-amd64-xl  pass
 test-arm64-arm64-xl  

[Xen-devel] Can't read bios file

2017-04-03 Thread Duncan X. Simpson
I'm trying to set up an HVM CentOS guest, and I've run into a problem. I
originally tried posting it on Stack Exchange (
https://superuser.com/questions/1193771/failed-to-read-bios-file-no-such-file-or-directory),
but it is not documented anywhere on the Internet that Google can see and I
have gotten no response. I regained interest in it today and used strace to
determine what bios file it was looking for, and found it was looking for a
file called 'yes' in the current directory. This is what made me decide it
was most likely a bug and post it here rather in xen-users. My
configuration is as listed on the Stack Exchange question and does not
contain the word yes anywhere in it. What should I do next?
-- 

Duncan X. Simpson, K7DXS
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [xen-unstable-smoke test] 107167: tolerable trouble: broken/fail/pass - PUSHED

2017-04-03 Thread osstest service owner
flight 107167 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107167/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 build-arm64-pvops 5 kernel-build fail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  b32d442abd92cdd4d8f2a2e7794cfee9dba7fe22
baseline version:
 xen  80f9c316708400cea4417e36337267d3b26591db

Last test of basis   107163  2017-04-03 20:02:55 Z0 days
Testing same since   107167  2017-04-03 23:02:57 Z0 days1 attempts


People who touched revisions under test:
  Stefano Stabellini 

jobs:
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 build-arm64-pvopsfail
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  broken  
 test-amd64-amd64-xl-qemuu-debianhvm-i386 pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=xen-unstable-smoke
+ revision=b32d442abd92cdd4d8f2a2e7794cfee9dba7fe22
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lock ./ap-push xen-unstable-smoke 
b32d442abd92cdd4d8f2a2e7794cfee9dba7fe22
+ branch=xen-unstable-smoke
+ revision=b32d442abd92cdd4d8f2a2e7794cfee9dba7fe22
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x/home/osstest/repos/lock '!=' x/home/osstest/repos/lock ']'
+ . ./cri-common
++ . ./cri-getconfig
++ umask 002
+ select_xenbranch
+ case "$branch" in
+ tree=xen
+ xenbranch=xen-unstable-smoke
+ qemuubranch=qemu-upstream-unstable
+ '[' xxen = xlinux ']'
+ linuxbranch=
+ '[' xqemu-upstream-unstable = x ']'
+ select_prevxenbranch
++ ./cri-getprevxenbranch xen-unstable-smoke
+ prevxenbranch=xen-4.8-testing
+ '[' xb32d442abd92cdd4d8f2a2e7794cfee9dba7fe22 = x ']'
+ : tested/2.6.39.x
+ . ./ap-common
++ : osst...@xenbits.xen.org
+++ getconfig OsstestUpstream
+++ perl -e '
use Osstest;
readglobalconfig();
print $c{"OsstestUpstream"} or die $!;
'
++ :
++ : git://xenbits.xen.org/xen.git
++ : osst...@xenbits.xen.org:/home/xen/git/xen.git
++ : git://xenbits.xen.org/qemu-xen-traditional.git
++ : git://git.kernel.org
++ : git://git.kernel.org/pub/scm/linux/kernel/git
++ : git
++ : git://xenbits.xen.org/xtf.git
++ : osst...@xenbits.xen.org:/home/xen/git/xtf.git
++ : git://xenbits.xen.org/xtf.git
++ : git://xenbits.xen.org/libvirt.git
++ : osst...@xenbits.xen.org:/home/xen/git/libvirt.git
++ : git://xenbits.xen.org/libvirt.git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/rumprun.git
++ : git://git.seabios.org/seabios.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/seabios.git
++ : 

[Xen-devel] [PATCH v6 1/2] arm: remove irq from inflight, then change physical affinity

2017-04-03 Thread Stefano Stabellini
This patch fixes a potential race that could happen when
gic_update_one_lr and vgic_vcpu_inject_irq run simultaneously.

When GIC_IRQ_GUEST_MIGRATING is set, we must make sure that the irq has
been removed from inflight before changing physical affinity, to avoid
concurrent accesses to p->inflight, as vgic_vcpu_inject_irq will take a
different vcpu lock.

Signed-off-by: Stefano Stabellini 

---
 xen/arch/arm/gic.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 9522c6c..de996d9 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -503,6 +503,11 @@ static void gic_update_one_lr(struct vcpu *v, int i)
 gic_raise_guest_irq(v, irq, p->priority);
 else {
 list_del_init(>inflight);
+/* Remove from inflight, then change physical affinity. It
+ * makes sure that when a new interrupt is received on the
+ * next pcpu, inflight is already cleared. No concurrent
+ * accesses to inflight. */
+smp_wmb();
 if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, >status) )
 {
 struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v6 2/2] vgic: refuse irq migration when one is already in progress

2017-04-03 Thread Stefano Stabellini
When an irq migration is already in progress, but not yet completed
(GIC_IRQ_GUEST_MIGRATING is set), refuse any other irq migration
requests for the same irq.

This patch implements this approach by returning success or failure from
vgic_migrate_irq, and avoiding irq target changes on failure. It prints
a warning in case the irq migration fails.

It also moves the clear_bit of GIC_IRQ_GUEST_MIGRATING to after the
physical irq affinity has been changed so that all operations regarding
irq migration are completed.

Signed-off-by: Stefano Stabellini 
---
 xen/arch/arm/gic.c |  3 ++-
 xen/arch/arm/vgic-v2.c |  7 +++
 xen/arch/arm/vgic-v3.c |  7 ---
 xen/arch/arm/vgic.c| 14 +-
 xen/include/asm-arm/vgic.h |  2 +-
 5 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index de996d9..dc07df1 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -508,10 +508,11 @@ static void gic_update_one_lr(struct vcpu *v, int i)
  * next pcpu, inflight is already cleared. No concurrent
  * accesses to inflight. */
 smp_wmb();
-if ( test_and_clear_bit(GIC_IRQ_GUEST_MIGRATING, >status) )
+if ( test_bit(GIC_IRQ_GUEST_MIGRATING, >status) )
 {
 struct vcpu *v_target = vgic_get_target_vcpu(v, irq);
 irq_set_affinity(p->desc, cpumask_of(v_target->processor));
+clear_bit(GIC_IRQ_GUEST_MIGRATING, >status);
 }
 }
 }
diff --git a/xen/arch/arm/vgic-v2.c b/xen/arch/arm/vgic-v2.c
index 0674f7b..dc9f95b 100644
--- a/xen/arch/arm/vgic-v2.c
+++ b/xen/arch/arm/vgic-v2.c
@@ -156,12 +156,11 @@ static void vgic_store_itargetsr(struct domain *d, struct 
vgic_irq_rank *rank,
 /* Only migrate the vIRQ if the target vCPU has changed */
 if ( new_target != old_target )
 {
-vgic_migrate_irq(d->vcpu[old_target],
+if ( vgic_migrate_irq(d->vcpu[old_target],
  d->vcpu[new_target],
- virq);
+ virq) )
+write_atomic(>vcpu[offset], new_target);
 }
-
-write_atomic(>vcpu[offset], new_target);
 }
 }
 
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 0679e76..1e9890b 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -151,9 +151,10 @@ static void vgic_store_irouter(struct domain *d, struct 
vgic_irq_rank *rank,
 
 /* Only migrate the IRQ if the target vCPU has changed */
 if ( new_vcpu != old_vcpu )
-vgic_migrate_irq(old_vcpu, new_vcpu, virq);
-
-write_atomic(>vcpu[offset], new_vcpu->vcpu_id);
+{
+if ( vgic_migrate_irq(old_vcpu, new_vcpu, virq) )
+write_atomic(>vcpu[offset], new_vcpu->vcpu_id);
+}
 }
 
 static inline bool vgic_reg64_check_access(struct hsr_dabt dabt)
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 67d75a6..5eef359 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -237,18 +237,21 @@ static int vgic_get_virq_priority(struct vcpu *v, 
unsigned int virq)
 return priority;
 }
 
-void vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
+bool vgic_migrate_irq(struct vcpu *old, struct vcpu *new, unsigned int irq)
 {
 unsigned long flags;
 struct pending_irq *p = irq_to_pending(old, irq);
 
 /* nothing to do for virtual interrupts */
 if ( p->desc == NULL )
-return;
+return true;
 
 /* migration already in progress, no need to do anything */
 if ( test_bit(GIC_IRQ_GUEST_MIGRATING, >status) )
-return;
+{
+gdprintk(XENLOG_WARNING, "irq %d migration failed: requested while in 
progress\n", irq);
+return false;
+}
 
 perfc_incr(vgic_irq_migrates);
 
@@ -258,7 +261,7 @@ void vgic_migrate_irq(struct vcpu *old, struct vcpu *new, 
unsigned int irq)
 {
 irq_set_affinity(p->desc, cpumask_of(new->processor));
 spin_unlock_irqrestore(>arch.vgic.lock, flags);
-return;
+return true;
 }
 /* If the IRQ is still lr_pending, re-inject it to the new vcpu */
 if ( !list_empty(>lr_queue) )
@@ -269,7 +272,7 @@ void vgic_migrate_irq(struct vcpu *old, struct vcpu *new, 
unsigned int irq)
 irq_set_affinity(p->desc, cpumask_of(new->processor));
 spin_unlock_irqrestore(>arch.vgic.lock, flags);
 vgic_vcpu_inject_irq(new, irq);
-return;
+return true;
 }
 /* if the IRQ is in a GICH_LR register, set GIC_IRQ_GUEST_MIGRATING
  * and wait for the EOI */
@@ -277,6 +280,7 @@ void vgic_migrate_irq(struct vcpu *old, struct vcpu *new, 
unsigned int irq)
 set_bit(GIC_IRQ_GUEST_MIGRATING, >status);
 
 spin_unlock_irqrestore(>arch.vgic.lock, flags);
+return true;
 }
 
 void arch_move_irqs(struct vcpu *v)
diff --git a/xen/include/asm-arm/vgic.h 

[Xen-devel] [PATCH v6 0/2] xen/arm: remove race conditions in irq migration

2017-04-03 Thread Stefano Stabellini
Hi all,

this patch series removes three race conditions affecting the current
code base.

The first race condition is between gic_update_one_lr and
vgic_vcpu_inject_irq: as soon as gic_update_one_lr calls
irq_set_affinity a new interrupt could be injected in the new pcpu,
eventually vgic_vcpu_inject_irq is called which manipulates the inflight
list. The first patch solves this race by adding a barrier in
gic_update_one_lr. This patch was suggested by Julien.

The second race condition happens when gic_update_one_lr runs
simultaneously with vgic_store_itargetsr and vgic_migrate_irq. Setting
the new target is done after calling vgic_migrate_irq, which means that
gic_update_one_lr could end up setting the physical affinity to the one
of the old pcpu.

The third race condition happens again between gic_update_one_lr and
vgic_migrate_irq: when GIC_IRQ_GUEST_MIGRATING is already set and
vgic_migrate_irq is called again, it will take a different vgic lock
from the one that gic_update_one_lr is taking.

The second patch addressed the last two issues by refusing any irq
migration requests while one request is already in-progress and not yet
completed.


For your reference, it is not possible to take the p->desc lock from
gic_update_one_lr, because the correct lock ordering is p->desc lock,
then vgic lock.


Changes in v6:
- smp_mb/smb_wmb
- refuse nested irq migration requests instead of trying to handle them


Stefano Stabellini (2):
  arm: remove irq from inflight, then change physical affinity
  vgic: refuse irq migration when one is already in progress

 xen/arch/arm/gic.c |  8 +++-
 xen/arch/arm/vgic-v2.c |  7 +++
 xen/arch/arm/vgic-v3.c |  7 ---
 xen/arch/arm/vgic.c| 14 +-
 xen/include/asm-arm/vgic.h |  2 +-
 5 files changed, 24 insertions(+), 14 deletions(-)

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Outreachy] Interested to Work on Xen Code Review Dashboard

2017-04-03 Thread Jesus M. Gonzalez-Barahona
On Tue, 2017-04-04 at 05:07 +0530, Pooja wrote:
> Hello mentors,
> 
> I'd like to work on the new Xen Code Dashboard extensions project as
> part of applying to Outreachy Rd 14, 2017.
> 
> I've been working in front end development since 2013, and am quite
> comfortable in JavaScript, jQuery, JSON, CSS frameworks, SQL besides
> HTML5/CSS3.
> 
> I'd really appreciate if you could guide me about beginning the
> project with bite-sized tasks.

Hi, Pooja,

First of all, thanks for your interest.

And now, a warning notice: this project will require mainly Python and
noSQL (ElasticSearch, in particular) knowledge. If you're not familiar
with them, that could be a big problem.

[Lars, as I just commented in another message, I now notice this is
wrong in the project description at
https://wiki.xenproject.org/wiki/Outreach_Program_Projects
sorry about that. Could we change it?]

If you're still interested, I guess it would be good to have a quick
IRC chat, and discuss about next steps.

Meanwhile, you can start having a look at:

http://markmail.org/message/7adkmords3imkswd

Saludos,

Jesus.

> Thanks and Regards,
> Pooja G
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
-- 
Bitergia: http://bitergia.com
/me at Twitter: https://twitter.com/jgbarah


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [Outreachy] Interested to Work on Xen Code Review Dashboard

2017-04-03 Thread Pooja
Hello mentors,

I'd like to work on the new Xen Code Dashboard extensions project as part
of applying to Outreachy Rd 14, 2017.

I've been working in front end development since 2013, and am quite
comfortable in JavaScript, jQuery, JSON, CSS frameworks, SQL besides
HTML5/CSS3.

I'd really appreciate if you could guide me about beginning the project
with bite-sized tasks.

Thanks and Regards,
Pooja G
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [Outreachy] Interested to Work on Xen Code Review Dashboard

2017-04-03 Thread Pooja
Hello mentors,

I'd like to work on the new Xen Code Dashboard extensions project as part
of applying to Outreachy Rd 14, 2017.

I've been working in front end development since 2013, and am quite
comfortable in JavaScript, jQuery, JSON, CSS frameworks, SQL besides
HTML5/CSS3.

I'd really appreciate if you could guide me about beginning the project
with bite-sized tasks.

Thanks and Regards,
Pooja G
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [Outreachy] Interested to Work on Xen Code Review Dashboard

2017-04-03 Thread Pooja
Hello mentors,

I'd like to work on the new Xen Code Dashboard extensions project as part
of applying to Outreachy Rd 14, 2017.

I've been working in front end development since 2013, and am quite
comfortable in JavaScript, jQuery, JSON, CSS frameworks, SQL besides
HTML5/CSS3.

I'd really appreciate if you could guide me about beginning the project
with bite-sized tasks.

Thanks and Regards,
Pooja G
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 07/27] ARM: GICv3 ITS: introduce host LPI array

2017-04-03 Thread Julien Grall

Hi Andre,

On 04/03/2017 09:28 PM, Andre Przywara wrote:

 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 8b3660a..d3ee141 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -20,25 +20,59 @@


[...]


+/*
+ * There could be a lot of LPIs on the host side, and they always go to
+ * a guest. So having a struct irq_desc for each of them would be wasteful
+ * and useless.
+ * Instead just store enough information to find the right VCPU to inject
+ * those LPIs into, which just requires the virtual LPI number.
+ * To avoid a global lock on this data structure, this is using a lockless
+ * approach relying on the architectural atomicty of native data types:


s/atomicty/atomicity/

[...]


+/*
+ * Allocate a block of 32 LPIs on the given host ITS for device "devid",
+ * starting with "eventid". Put them into the respective ITT by issuing a
+ * MAPTI command for each of them.
+ */
+int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi)
+{


[...]


+/* If we hit an unallocated chunk, we initialize it and use entry 0. */
+if ( !lpi_data.host_lpis[chunk] )
+{
+union host_lpi *new_chunk;
+
+/* TODO: NUMA locality for quicker IRQ path? */
+new_chunk = xmalloc_bytes(PAGE_SIZE);


NIT: As suggested on v2, this could be xenheap_alloc_page(0);


+if ( !new_chunk )
+{
+spin_unlock(_data.host_lpis_lock);
+return -ENOMEM;
+}
+
+for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
+new_chunk[i].dom_id = DOMID_INVALID;


As host_lpis could be read without lock, I would add a barrier here to 
make sure new_chunk[*].dom_id have been written before setting up the page.


A similar barrier would be needed in the do_lpi(...) path.

[...]


diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 836a103..d04bd04 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -220,6 +220,8 @@ enum gic_version {
 GIC_V3,
 };

+#define INVALID_LPI 0


Again, I think INVALID_LPI should be moved in irq.h to stay with the 
definition of LPI_OFFSET. It was supposed to be fixed in v3...


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 06/27] ARM: GICv3 ITS: introduce ITS command handling

2017-04-03 Thread Julien Grall

Hi Andre,

On 04/03/2017 09:28 PM, Andre Przywara wrote:

+#define BUFPTR_MASK GENMASK_ULL(19, 5)
+static int its_send_command(struct host_its *hw_its, const void *its_cmd)
+{
+/* Some small grace period in case the command queue is congested. */


This comment is a nice improvement. But as mention in the previous 
version, should make it clear that it is a guess. People will likely ask 
why you choose 1ms whilst Linux is using 1s.


[...]


+/* Wait for an ITS to finish processing all commands. */
+static int gicv3_its_wait_commands(struct host_its *hw_its)
+{
+/* Define an upper limit for our wait time. */


See my remark on the previous timeout comment.

[...]


+static int gicv3_disable_its(struct host_its *hw_its)
+{
+uint32_t reg;
+/* A similar generous wait limit as we use for the command queue wait. */


See my above comments about the timeout.


+s_time_t deadline = NOW() + MILLISECS(100);
+
+reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
+if ( !(reg & GITS_CTLR_ENABLE) && (reg & GITS_CTLR_QUIESCENT) )
+return 0;
+
+writel_relaxed(reg & ~GITS_CTLR_ENABLE, hw_its->its_base + GITS_CTLR);
+
+do {
+reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
+if ( reg & GITS_CTLR_QUIESCENT )
+return 0;
+
+cpu_relax();
+udelay(1);
+} while ( NOW() <= deadline );
+
+dprintk(XENLOG_ERR, "ITS not quiescent.\n");


dprintk will disappear on non-debug build. But this looks quite useful. 
So I would use printk.


[...]


diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 54d2235..a559e5e 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -665,8 +665,25 @@ static int __init gicv3_populate_rdist(void)

 if ( typer & GICR_TYPER_PLPIS )
 {
+paddr_t rdist_addr;
+unsigned int procnum;
 int ret;

+/*
+ * The ITS refers to redistributors either by their 
physical
+ * address or by their ID. Which one to use is an ITS
+ * choice. So determine those two values here (which we
+ * can do only here in GICv3 code) and tell the
+ * ITS code about it, so it can use them later to be able
+ * to address those redistributors accordingly.
+ */


I said it on v2 this morning and will repeat it for record. This comment 
is not useful in itself here because redist_address could be used by 
other code. It would be more useful on top of the call to initialize ITS 
as it would explain why it is done there and not before.


[...]


diff --git a/xen/include/asm-arm/gic_v3_defs.h 
b/xen/include/asm-arm/gic_v3_defs.h
index 7cdebc5..b01b6ed 100644
--- a/xen/include/asm-arm/gic_v3_defs.h
+++ b/xen/include/asm-arm/gic_v3_defs.h
@@ -103,6 +103,8 @@
 #define GICR_TYPER_PLPIS (1U << 0)
 #define GICR_TYPER_VLPIS (1U << 1)
 #define GICR_TYPER_LAST  (1U << 4)
+#define GICR_TYPER_PROC_NUM_SHIFT8
+#define GICR_TYPER_PROC_NUM_MASK (0x << GICR_TYPER_PROC_NUM_SHIFT)

 /* For specifying the inner cacheability type only */
 #define GIC_BASER_CACHE_nCnB 0ULL
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index 3500b042..f4f3c9b 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -42,11 +42,11 @@
 #define GITS_CTLR_QUIESCENT BIT(31)
 #define GITS_CTLR_ENABLEBIT(0)

+#define GITS_TYPER_PTA  BIT_ULL(19)
 #define GITS_TYPER_DEVIDS_SHIFT 13
 #define GITS_TYPER_DEVIDS_MASK  (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
 #define GITS_TYPER_DEVICE_ID_BITS(r)(((r & GITS_TYPER_DEVIDS_MASK) >> \
GITS_TYPER_DEVIDS_SHIFT) + 1)
-


Spurious change.

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v5] altp2m: Allow specifying external-only use-case

2017-04-03 Thread Tamas K Lengyel
On Mon, Apr 3, 2017 at 3:24 PM, Tamas K Lengyel
 wrote:
> On Tue, Mar 28, 2017 at 12:59 PM, Daniel De Graaf  
> wrote:
>> On 03/22/2017 02:07 PM, Tamas K Lengyel wrote:
>>>
>>> Currently setting altp2mhvm=1 in the domain configuration allows access to
>>> the
>>> altp2m interface for both in-guest and external privileged tools. This
>>> poses
>>> a problem for use-cases where only external access should be allowed,
>>> requiring
>>> the user to compile Xen with XSM enabled to be able to appropriately
>>> restrict
>>> access.
>>>
>>> In this patch we deprecate the altp2mhvm domain configuration option and
>>> introduce the altp2m option, which allows specifying if by default the
>>> altp2m
>>> interface should be external-only. The information is stored in
>>> HVM_PARAM_ALTP2M which we now define with specific XEN_ALTP2M_* modes.
>>> If external mode is selected, the XSM check is shifted to use XSM_DM_PRIV
>>> type check, thus restricting access to the interface by the guest itself.
>>> Note
>>> that we keep the default XSM policy untouched. Users of XSM who wish to
>>> enforce
>>> external mode for altp2m can do so by adjusting their XSM policy directly,
>>> as this domain config option does not override an active XSM policy.
>>>
>>> Also, as part of this patch we adjust the hvmop handler to require
>>> HVM_PARAM_ALTP2M to be of a type other then disabled for all ops. This has
>>> been
>>> previously only required for get/set altp2m domain state, all other
>>> options
>>> were gated on altp2m_enabled. Since altp2m_enabled only gets set during
>>> set
>>> altp2m domain state, this change introduces no new requirements to the
>>> other
>>> ops but makes it more clear that it is required for all ops.
>>>
>>> Signed-off-by: Tamas K Lengyel 
>>> Signed-off-by: Sergej Proskurin 
>>
>>
>> I think the XSM-enabled case using the default types should have the same
>> flexibility as the XSM-disabled case.  I agree that it is useful to be able
>> to restrict the p2m features based on policy, and I don't think that it's
>> useful to expand the number of XSM permissions here.  In that case, the best
>> way to proceed would be to require that both the domain configuration and
>> XSM policy must allow the action (similar to how SELinux file controls and
>> UNIX permissions interact).  Currently, enabling XSM effectively forces the
>> value of this setting to "mixed", and "limited" is impossible to use with
>> XSM.
>
> I agree, however unfortunately due to the development effort to do
> that I will have to drop this patch. An earlier version only lacked
> the toolside ack so I thought it was about ready to go in. Hopefully
> one day in the future we will have XSM enabled by default and then we
> won't have to do things like this patch.
>

Daniel pointed out off-list that this can actually be achieved with a
minor adjustment in the flask function. I'll send v6 shortly.

Tamas

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [xen-unstable-smoke test] 107163: tolerable trouble: broken/fail/pass - PUSHED

2017-04-03 Thread osstest service owner
flight 107163 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107163/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 build-arm64-pvops 5 kernel-build fail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  80f9c316708400cea4417e36337267d3b26591db
baseline version:
 xen  4d0240e03349fd0715332eae65372e0a47b5a43b

Last test of basis   107157  2017-04-03 14:05:53 Z0 days
Testing same since   107163  2017-04-03 20:02:55 Z0 days1 attempts


People who touched revisions under test:
  Julien Grall 
  Shanker Donthineni 

jobs:
 build-amd64  pass
 build-armhf  pass
 build-amd64-libvirt  pass
 build-arm64-pvopsfail
 test-armhf-armhf-xl  pass
 test-arm64-arm64-xl-xsm  broken  
 test-amd64-amd64-xl-qemuu-debianhvm-i386 pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=xen-unstable-smoke
+ revision=80f9c316708400cea4417e36337267d3b26591db
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lock ./ap-push xen-unstable-smoke 
80f9c316708400cea4417e36337267d3b26591db
+ branch=xen-unstable-smoke
+ revision=80f9c316708400cea4417e36337267d3b26591db
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x/home/osstest/repos/lock '!=' x/home/osstest/repos/lock ']'
+ . ./cri-common
++ . ./cri-getconfig
++ umask 002
+ select_xenbranch
+ case "$branch" in
+ tree=xen
+ xenbranch=xen-unstable-smoke
+ qemuubranch=qemu-upstream-unstable
+ '[' xxen = xlinux ']'
+ linuxbranch=
+ '[' xqemu-upstream-unstable = x ']'
+ select_prevxenbranch
++ ./cri-getprevxenbranch xen-unstable-smoke
+ prevxenbranch=xen-4.8-testing
+ '[' x80f9c316708400cea4417e36337267d3b26591db = x ']'
+ : tested/2.6.39.x
+ . ./ap-common
++ : osst...@xenbits.xen.org
+++ getconfig OsstestUpstream
+++ perl -e '
use Osstest;
readglobalconfig();
print $c{"OsstestUpstream"} or die $!;
'
++ :
++ : git://xenbits.xen.org/xen.git
++ : osst...@xenbits.xen.org:/home/xen/git/xen.git
++ : git://xenbits.xen.org/qemu-xen-traditional.git
++ : git://git.kernel.org
++ : git://git.kernel.org/pub/scm/linux/kernel/git
++ : git
++ : git://xenbits.xen.org/xtf.git
++ : osst...@xenbits.xen.org:/home/xen/git/xtf.git
++ : git://xenbits.xen.org/xtf.git
++ : git://xenbits.xen.org/libvirt.git
++ : osst...@xenbits.xen.org:/home/xen/git/libvirt.git
++ : git://xenbits.xen.org/libvirt.git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/rumprun.git
++ : git://git.seabios.org/seabios.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/seabios.git
++ : 

Re: [Xen-devel] [PATCH v4 05/27] ARM: GICv3 ITS: map ITS command buffer

2017-04-03 Thread Julien Grall

Hi Andre,

On 04/03/2017 09:28 PM, Andre Przywara wrote:

Instead of directly manipulating the tables in memory, an ITS driver
sends commands via a ring buffer in normal system memory to the ITS h/w
to create or alter the LPI mappings.
Allocate memory for that buffer and tell the ITS about it to be able
to send ITS commands.

Signed-off-by: Andre Przywara 


I am not sure why Stefano and my reviewed-by were not carried from v3. 
Please start to collect them so we don't review again reviewed-by patches.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 03/27] ARM: GICv3: allocate LPI pending and property table

2017-04-03 Thread Julien Grall

Hi Andre,

Yeah... another round repeating the same things.

On 04/03/2017 09:28 PM, Andre Przywara wrote:

diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
new file mode 100644
index 000..a003a72
--- /dev/null
+++ b/xen/arch/arm/gic-v3-lpi.c


[...]


+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define LPI_PROPTABLE_NEEDS_FLUSHING(1U << 0)


Newline here.


+/* Global state */
+static struct {
+/* The global LPI property table, shared by all redistributors. */
+uint8_t *lpi_property;
+/*
+ * Number of physical LPIs the host supports. This is a property of
+ * the GIC hardware. We depart from the habit of naming these things
+ * "physical" in Xen, as the GICv3/4 spec uses the term "physical LPI"
+ * in a different context to differentiate them from "virtual LPIs".
+ */
+unsigned long int nr_host_lpis;


On v2, you said you will rename this variable to max_host_lpi_ids and ...


+unsigned int flags;
+} lpi_data;
+
+struct lpi_redist_data {
+void*pending_table;
+};
+
+static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
+
+#define MAX_PHYS_LPIS   (lpi_data.nr_host_lpis - LPI_OFFSET)


... this one to MAX_NR_PHYS_LPIS or even MAX_NR_HOST_LPIS to stay 
consistent.


So please do it.


+
+static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
+{
+uint64_t val;
+void *pendtable;
+
+if ( this_cpu(lpi_redist).pending_table )
+return -EBUSY;
+
+val  = GIC_BASER_CACHE_RaWaWb << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
+val |= GIC_BASER_CACHE_SameAsInner << 
GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT;
+val |= GIC_BASER_InnerShareable << GICR_PENDBASER_SHAREABILITY_SHIFT;
+
+/*
+ * The pending table holds one bit per LPI and even covers bits for
+ * interrupt IDs below 8192, so we allocate the full range.
+ * The GICv3 imposes a 64KB alignment requirement, also requires
+ * physically contiguous memory.
+ */
+pendtable = _xzalloc(lpi_data.nr_host_lpis / 8, SZ_64K);
+if ( !pendtable )
+return -ENOMEM;
+
+/* Make sure the physical address can be encoded in the register. */
+if ( (virt_to_maddr(pendtable) & ~GENMASK_ULL(51, 16)) )


The middle ( ... ) are not necessary.

[...]


+/*
+ * Tell a redistributor about the (shared) property table, allocating one
+ * if not already done.
+ */
+static int gicv3_lpi_set_proptable(void __iomem * rdist_base)
+{


[...]


+/* Encode the number of bits needed, minus one */
+reg |= (fls(lpi_data.nr_host_lpis - 1) - 1);


The outer ( ... ) are not necessary.

[...]


+int gicv3_lpi_init_rdist(void __iomem * rdist_base)
+{
+uint32_t reg;
+uint64_t table_reg;
+int ret;
+
+/* We don't support LPIs without an ITS. */
+if ( !gicv3_its_host_has_its() )
+return -ENODEV;
+
+/* Make sure LPIs are disabled before setting up the tables. */
+reg = readl_relaxed(rdist_base + GICR_CTLR);
+if ( reg & GICR_CTLR_ENABLE_LPIS )
+return -EBUSY;
+
+ret = gicv3_lpi_allocate_pendtable(_reg);
+if (ret)


Coding style:

if ( ... )


+return ret;
+writeq_relaxed(table_reg, rdist_base + GICR_PENDBASER);
+table_reg = readq_relaxed(rdist_base + GICR_PENDBASER);
+
+/* If the hardware reports non-shareable, drop cacheability as well. */
+if ( !(table_reg & GICR_PENDBASER_SHAREABILITY_MASK) )
+{
+table_reg &= GICR_PENDBASER_SHAREABILITY_MASK;
+table_reg &= GICR_PENDBASER_INNER_CACHEABILITY_MASK;
+table_reg |= GIC_BASER_CACHE_nC << 
GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
+
+writeq_relaxed(table_reg, rdist_base + GICR_PENDBASER);
+}
+
+return gicv3_lpi_set_proptable(rdist_base);
+}
+
+static unsigned int max_lpi_bits = 20;
+integer_param("max_lpi_bits", max_lpi_bits);
+
+int gicv3_lpi_init_host_lpis(unsigned int hw_lpi_bits)
+{


Again, this should be sanitize. A user could pass max_lpi_bits=10, and I 
don't think this code will behave well.



+lpi_data.nr_host_lpis = BIT_ULL(min(hw_lpi_bits, max_lpi_bits));


Again, nr_host_lpis is "unsigned long" so why are you using BIT_ULL? 
Looking at the introduction of GENMASK_ULL, it likely means nr_host_lpis 
should be unsigned long long.




+
+if ( lpi_data.nr_host_lpis > 16 * 1024 * 1024 )


Hmmm? 16 * 1024 * 1024? Where does it come from? Please add a comment 
and explain in the commit message.


Also, you could make the code more readable and using "16 << 20".


+printk(XENLOG_WARNING "Allocating %lu host LPIs, please limit with 
--max_lpi_bits\n",


The command line options on xen does not start with "--". Also the user 
may have purposefully chosen a value higher than 16 << 20. So this 
comment seem a big weird. How about:


"%lu host LPIs will allocated, to limit memory usage please restrict it 
with max_lpi_bits.\n".



+lpi_data.nr_host_lpis);


Please use warning_add, it will gather at 

Re: [Xen-devel] [GSoc] GSoc Introduction : Xen on ARM: create multiple guests from device tree

2017-04-03 Thread Julien Grall

Hi,

On 04/03/2017 10:19 PM, Stefano Stabellini wrote:

On Mon, 3 Apr 2017, Methuku Karthik wrote:
What we would like is to be able to extract the .config from a xen
binary manually, for example using the "strings" command. Let's supposed
that a user is running Xen and finds a bug. We asked her to provide us
with her Xen binary. We get the binary and from it we extract the
.config.


You might want to have a look at what Linux does to embedded and extract 
.config. Specially the script scripts/ikconfig under the Linux repo.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [qemu-mainline test] 107152: regressions - FAIL

2017-04-03 Thread osstest service owner
flight 107152 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107152/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-credit2 15 guest-start/debian.repeat fail REGR. vs. 107055

Regressions which are regarded as allowable (not blocking):
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 107055
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail  like 107055
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail like 107055
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-checkfail  like 107055
 test-amd64-amd64-xl-rtds  9 debian-install   fail  like 107055
 test-armhf-armhf-libvirt 13 saverestore-support-checkfail  like 107055

Tests which did not succeed, but are not blocking:
 test-arm64-arm64-libvirt-xsm  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl   1 build-check(1)   blocked  n/a
 build-arm64-libvirt   1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt-qcow2  1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-credit2   1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-rtds  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-multivcpu  1 build-check(1)   blocked  n/a
 test-arm64-arm64-xl-xsm   1 build-check(1)   blocked  n/a
 build-arm64-xsm   5 xen-buildfail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 build-arm64   5 xen-buildfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 build-arm64-pvops 5 kernel-build fail   never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass

version targeted for testing:
 qemuu6954cdc070b4209cc468263455670d8b02765a95
baseline version:
 qemuu95b31d709ba343ad237c3630047ee7438bac4065

Last test of basis   107055  2017-04-01 04:15:35 Z2 days
Testing same since   107152  2017-04-03 10:42:24 Z0 days1 attempts


People who touched revisions under test:
  Eric Blake 
  Jeff Cody 
  Max Reitz 
  Peter Maydell 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  fail
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  fail
 build-armhf 

Re: [Xen-devel] [PATCH v5 3/3] xen/arm: vgic_migrate_irq: do not race against GIC_IRQ_GUEST_MIGRATING

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 31/03/17 21:24, Stefano Stabellini wrote:
> > On Fri, 31 Mar 2017, Julien Grall wrote:
> > > On 30/03/17 00:47, Stefano Stabellini wrote:
> > > > On Fri, 3 Mar 2017, Julien Grall wrote:
> > > What you described is not a data corruption to me.
> > 
> > No, it is not, thanks to the previous two patches. The commit
> > description needs an update.
> > 
> > 
> > > The host IRQ will be routed
> > > to the wrong pCPU and then what? The IRQ will still trigger, ok on the
> > > wrong
> > > pCPU, it will be slower but we are capable to handle that.
> > > 
> > > The use case you describe would only happen if a guest is trying to change
> > > the
> > > routing multiple times while an interrupt is pending. So to be honest, a
> > > sane
> > > guest would not do that. But this would only affect stupid guest.
> > > 
> > > So I don't think this is worth to support considering how this patch will
> > > increase the code complexity in a component that is already a nightmare to
> > > handle.
> > 
> > I think we have to fix this because it is not predictable. Latency could
> > be much higher, depending on who wins the race. It also uses more Xen
> > resources -- the time that Xen spends to send and to handle SGIs could
> > be used for something  else.  I think it is more important to be
> > predictable than correct. Especially given that a sane guest shouldn't
> > do this, I prefer to refuse a "nested" migration we cannot handle (even
> > though it is a mistake) than provide unreliable latency.
> 
> Good point. We already have a couple of place in the vGIC we don't handle and
> print a message instead (see ACTIVER, I*PENDR registers).
> 
> I would prefer to refuse "nested" migration and warn the guest. If someone
> complain, then we can think about it.

That's fine by me.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v5] altp2m: Allow specifying external-only use-case

2017-04-03 Thread Tamas K Lengyel
On Tue, Mar 28, 2017 at 12:59 PM, Daniel De Graaf  wrote:
> On 03/22/2017 02:07 PM, Tamas K Lengyel wrote:
>>
>> Currently setting altp2mhvm=1 in the domain configuration allows access to
>> the
>> altp2m interface for both in-guest and external privileged tools. This
>> poses
>> a problem for use-cases where only external access should be allowed,
>> requiring
>> the user to compile Xen with XSM enabled to be able to appropriately
>> restrict
>> access.
>>
>> In this patch we deprecate the altp2mhvm domain configuration option and
>> introduce the altp2m option, which allows specifying if by default the
>> altp2m
>> interface should be external-only. The information is stored in
>> HVM_PARAM_ALTP2M which we now define with specific XEN_ALTP2M_* modes.
>> If external mode is selected, the XSM check is shifted to use XSM_DM_PRIV
>> type check, thus restricting access to the interface by the guest itself.
>> Note
>> that we keep the default XSM policy untouched. Users of XSM who wish to
>> enforce
>> external mode for altp2m can do so by adjusting their XSM policy directly,
>> as this domain config option does not override an active XSM policy.
>>
>> Also, as part of this patch we adjust the hvmop handler to require
>> HVM_PARAM_ALTP2M to be of a type other then disabled for all ops. This has
>> been
>> previously only required for get/set altp2m domain state, all other
>> options
>> were gated on altp2m_enabled. Since altp2m_enabled only gets set during
>> set
>> altp2m domain state, this change introduces no new requirements to the
>> other
>> ops but makes it more clear that it is required for all ops.
>>
>> Signed-off-by: Tamas K Lengyel 
>> Signed-off-by: Sergej Proskurin 
>
>
> I think the XSM-enabled case using the default types should have the same
> flexibility as the XSM-disabled case.  I agree that it is useful to be able
> to restrict the p2m features based on policy, and I don't think that it's
> useful to expand the number of XSM permissions here.  In that case, the best
> way to proceed would be to require that both the domain configuration and
> XSM policy must allow the action (similar to how SELinux file controls and
> UNIX permissions interact).  Currently, enabling XSM effectively forces the
> value of this setting to "mixed", and "limited" is impossible to use with
> XSM.

I agree, however unfortunately due to the development effort to do
that I will have to drop this patch. An earlier version only lacked
the toolside ack so I thought it was about ready to go in. Hopefully
one day in the future we will have XSM enabled by default and then we
won't have to do things like this patch.

Tamas

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [GSoc] GSoc Introduction : Xen on ARM: create multiple guests from device tree

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Methuku Karthik wrote:
> Hi Stefano,
> 
> I have asked questions in inline. Clarification below questions would really 
> help me in contribution. Please look into the questions. I am highlighting 
> them in this mail.

Hi Methuku,

please do not use HTML in emails.


>  For example, Dom1 should be able to share a page with Dom2 and a
>   different page with Dom3. It needs to be clear which page is shared with
> which VM from the VM config files.
> 
>  
> when we create vms using xl create , for example if i am planning create 
> three VMs,
> 
> Dom1, Dom2 and Dom3, because of the page sharing are we imposing any order of
>   creating VMs.
> 
>   I am asking this question to clarify this point, while creation of Dom1 if 
> its
>   sharing pages with Dom 2 and Dom 3 , should Xen already be aware of Dom2 
> and Dom3?

I don't think so. The user should be able to share the same page even
with Dom4 when it comes along.



>   I am referring to following links to understand about mem sharing.
> 
>   
> http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=tools/tests/mem-sharing/memshrtoo
>   l.c;h=8e5e22b9e95d91f1441d8eb226b64852eca075d5;hb=HEAD
>   http://xenbits.xen.org/docs/unstable/misc/grant-tables.txt
> 
>   I also want to figure out how domains are created and how xl tool parses 
> the file
>   and passes on the information to domain creation . Let me know if i am 
> thinking in
>   right direction.

I think we won't be able to use the grant table for this. The toolstack
will have to manually map a given page of one guest to another guest.


>   suggest any resource or work which would help with designing config file 
> options.

Look at the existing xl config file options which are described on the
man page.


>  I will start with Xen-38 that would help me in exploring init code. Correct 
> me if i
>   am wrong.

XEN-38 is a good start but it's actually only a build change.


>   I have a few questions and clarifications before proceeding further. I have 
> checked
>   how config.gz file is generated in linux kernel source.
>   In linux kernel sources, if CONFIG_IKCONFIG_PROC option is set, .conifg 
> file which
>   is generated after choosing options with lets say from make menuconfig  is 
> read into
>   a variable, this way its part of build.
> 
>   during init time proc_create service is used to create this file config.gz.
>   http://lxr.free-electrons.com/source/kernel/configs.c
> 
> 
>   I guess i have to do something similar.
> 
>   Questions :
> 
>   1. When Xen is build using the make command, we effectively set 
> XEN_COMPILE_ARCH,
>   XEN_OS, XEN_TARGET which allow using corresponding .mk file from config 
> folder.
>   These variable in turn decide what are the config options. I wasnt able to 
> find any
>   .config. Please direct me to find the file or if i am missing something. 

Give a look at xen/.config, that is the file we would like to embed into
the xen binary. Probably it would need to be included in an header file
and become a very long char[].


>   2. Where and how this config file should be accessible to  User once in 
> Dom0. Is the
>   xen folder created to keep the information about guest domains like proc 
> for process
>   in linux kernel ? Will that be suitable location to have config file.

One option is to export it as a new hypercall. That would be great, but
I think that is beyond the scope of a small inital code contribution.

What we would like is to be able to extract the .config from a xen
binary manually, for example using the "strings" command. Let's supposed
that a user is running Xen and finds a bug. We asked her to provide us
with her Xen binary. We get the binary and from it we extract the
.config.


>   3. if i assume that i will approach similarly, i have to add services to be 
> called
>   during init stage. As am not acquainted with code base, i could just grep 
> with
>   _start or _init or similar strings to find out initialization code. Any
>   input(function name or filename) to look for will be of great help.
>
> On Mon, Apr 3, 2017 at 3:35 PM, Stefano Stabellini  
> wrote:
>   Thank you! I am looking forward to your contribution on the list! If you
>   encounter any issues, please let us know.
> 
>   The code contribution is more important, but if you find the time in the
>   next few days, it would be nice to add more details to the
>   implementation plan, such as where the memory gets allocated, whether it
>   is taken from a VM, and if so, which one. Also what kind of "token"
>   could be used in the config option and how the toolstack could keep
>   track of the token - memory page references.
> 
>   Thanks,
> 
>   Stefano
> 
>   On Mon, 3 Apr 2017, Methuku Karthik wrote:
>   > Hi Stefano,
>   >
>   > Thanks for Input. I was not able to spend enough time last couple of 
> weeks due to
>   > projects. I have received mail from Lars Kurt 

Re: [Xen-devel] [PATCH v4 02/27] ARM: GICv3 ITS: initialize host ITS

2017-04-03 Thread Julien Grall

Hi Andre,

On 04/03/2017 09:28 PM, Andre Przywara wrote:

Map the registers frame for each host ITS and populate the host ITS
structure with some parameters describing the size of certain properties
like the number of bits for device IDs.
Introduce a command line parameter to limit the number of devices Xen
should handle. This defaults to the value advertised by hardware.

Signed-off-by: Andre Przywara 
---
 docs/misc/xen-command-line.markdown |  9 
 xen/arch/arm/gic-v3-its.c   | 38 
 xen/arch/arm/gic-v3.c   |  5 +
 xen/include/asm-arm/gic_v3_its.h| 44 +
 4 files changed, 96 insertions(+)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 9eb85d6..5a90625 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1172,6 +1172,15 @@ based interrupts. Any higher IRQs will be available for 
use via PCI MSI.
 ### maxcpus
 > `= `

+### max\_its\_device\_bits
+> `= `
+
+Specifies the maximum number of devices using MSIs on the ARM GICv3 ITS
+controller to allocate table entries for. Each table entry uses a hardware
+specific size, typically 8 or 16 bytes. This value is given as the number
+of bits required to hold one device ID.
+Defaults to the machine provided value, which is at most 32 bits.


I will re-explain my point here for convenience. I am not in favor of 
this option because a lambda user will not be able to know the correct 
value without the datasheet or help from the vendor.


If a platform is advertising too much device bits and not able to cope 
then it is should be fixed per-platform.


The rest of this patch looks good to me.

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 03/26] ARM: GICv3 ITS: allocate device and collection table

2017-04-03 Thread Julien Grall

On 04/03/2017 08:39 PM, Andre Przywara wrote:

Hi,

On 03/04/17 18:22, Julien Grall wrote:

Hi Andre,

On 03/04/17 16:38, Julien Grall wrote:

On 31/03/17 19:05, Andre Przywara wrote:

Each ITS maps a pair of a DeviceID (for instance derived from a PCI
b/d/f triplet) and an EventID (the MSI payload or interrupt ID) to a
pair of LPI number and collection ID, which points to the target CPU.
This mapping is stored in the device and collection tables, which
software
has to provide for the ITS to use.
Allocate the required memory and hand it to the ITS.
The maximum number of devices is limited to a compile-time constant
exposed in Kconfig.

Signed-off-by: Andre Przywara 


Reviewed-by: Julien Grall 


Actually I will withdraw my reviewed-by. I didn't spot you keep the
command line around which I clearly say no and gave some reasons why.
Sorry for the mess.

`
I thought we were talking about the Kconfig option to drop here (which
the commit msg wrongly states as still being around)?

For implementations that don't support indirect tables, but still
advertise high numbers, I'd find it useful to have the possibility to
limit this to avoid memory waste.


Again, how the user will know the magic numbers? If the platform 
advertises high device number, then it is none of our business. If the 
number needs to be reduced, this should be a platform specific code.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] setup vwfi correctly on cpu0

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 03/31/2017 11:37 PM, Stefano Stabellini wrote:
> > parse_vwfi runs after init_traps on cpu0, potentially resulting in the
> > wrong HCR_EL2 for it. Secondary cpus boot after parse_vwfi, so in their
> > case init_traps will write the correct set of flags to HCR_EL2.
> > 
> > For cpu0, fix the issue by changing HCR_EL2 setting from a new
> > presmp_initcall.
> > 
> > Signed-off-by: Stefano Stabellini 
> > 
> > ---
> > This patch should be apply to 4.8, 4.7, 4.6, not to unstable (it will be
> > fixed differently there).
> > ---
> > 
> > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> > index 614501f..65b5397 100644
> > --- a/xen/arch/arm/traps.c
> > +++ b/xen/arch/arm/traps.c
> > @@ -115,6 +115,22 @@ static void __init parse_vwfi(const char *s)
> >  }
> >  custom_param("vwfi", parse_vwfi);
> > 
> > +static int __init vwfi_init(void)
> > +{
> > +/*
> > + * HCR_EL2 has already been set on cpu0, change the setting here, if
> > + * needed. Other cpus haven't booted yet, init_traps will setup
> > + * HCR_EL2 correctly.
> > + */
> > +if ( vwfi == NATIVE ) {
> 
> Coding style:
> 
> if ( ... )
> {
> 
> > +register_t hcr;
> 
> NIT: newline here please.
> 
> 
> > +hcr = READ_SYSREG(HCR_EL2);
> > +WRITE_SYSREG(hcr & ~(HCR_TWI|HCR_TWE), HCR_EL2);
> > +}
> 
> Ditto.
> 
> > +return 0;
> > +}
> > +presmp_initcall(vwfi_init);
> > +
> >  void init_traps(void)
> >  {
> >  /* Setup Hyp vector base */
> > 
> 
> With that:
> 
> Reviewed-by: Julien Grall 

I committed the patch with your comments. However, because of the
backporting rules (only pushed-gated commits should be backported), I
only pushed it to staging for now.

I expect Wei Chen (CC'ed) to revert this patch completely or partially
as part of his series. Wei, to be clear, I committed this patch to fix a
bug on the stable trees where the vwfi option is not set correctly on
cpu0. With your series, this bug will be fixed in a much nicer way.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 06/27] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Julien Grall



On 04/03/2017 09:08 PM, Andre Przywara wrote:

Hi,


Hi Andre,


On 22/03/17 17:29, Julien Grall wrote:

+int gicv3_its_map_guest_device(struct domain *d,
+   paddr_t host_doorbell, uint32_t
host_devid,
+   paddr_t guest_doorbell, uint32_t
guest_devid,
+   uint32_t nr_events, bool valid)
+{
+void *itt_addr = NULL;
+struct host_its *hw_its;
+struct its_devices *dev = NULL, *temp;
+struct rb_node **new = >arch.vgic.its_devices.rb_node, *parent
= NULL;
+int ret = -ENOENT;
+
+hw_its = gicv3_its_find_by_doorbell(host_doorbell);
+if ( !hw_its )
+return ret;
+
+/* check for already existing mappings */
+spin_lock(>arch.vgic.its_devices_lock);
+while ( *new )
+{
+temp = rb_entry(*new, struct its_devices, rbnode);
+
+parent = *new;
+if ( !compare_its_guest_devices(temp, guest_doorbell,
guest_devid) )
+{
+if ( !valid )
+rb_erase(>rbnode, >arch.vgic.its_devices);
+
+spin_unlock(>arch.vgic.its_devices_lock);
+
+if ( valid )


Again, a printk(XENLOG_GUEST...) here would be useful to know which host
DeviceID was associated to the guest DeviceID.


I added a gprintk(XENLOG_DEBUG, ), which I think is more appropriate (as
it may spam the console when some stupid guest is running). Let me know
if you want to have a different loglevel.


I don't think this is more appropriate. gprintk will print the domain ID 
of the current domain, whilst this function will be called by the 
toolstack in the future.


Furthemore, if you look at the implementation of gprintk you will notice 
that it is basically turning into printk(XENLOG_GUEST...) and adding 
information of the current vCPU.


What matters for ratelimiting is XENLOG_GUEST.




+return -EBUSY;
+
+return remove_mapped_guest_device(temp);


Just above you removed the device from the RB-tree but this function may
fail and never free the memory. This means that memory will be leaked
leading to a potential denial of service.


So I fixed this case in v4, though there is still a tiny chance of a
memleak: if the MAPD(V=0) command fails. We can't free the ITT table
then, really, because it still belongs to the ITS. I don't think we can
do much about it, though.


This is a leak and even tiny is quite worrying. How do you plan to 
address this in the future? What is the best thing to do?



I free the other allocations of the memory now, anyway.


+}
+
+if ( compare_its_guest_devices(temp, guest_doorbell,
guest_devid) > 0 )
+new = &((*new)->rb_left);
+else
+new = &((*new)->rb_right);
+}
+
+if ( !valid )
+goto out_unlock;
+
+ret = -ENOMEM;
+
+/* An Interrupt Translation Table needs to be 256-byte aligned. */
+itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);
+if ( !itt_addr )
+goto out_unlock;
+
+dev = xzalloc(struct its_devices);
+if ( !dev )
+goto out_unlock;
+
+ret = its_send_cmd_mapd(hw_its, host_devid, fls(nr_events - 1) - 1,


I don't understand why nr_events - 1. Can you explain?


Xen lacks an ilog2, so "fls" is the closest I could find. "fls" has this
slightly weird semantic (from the Linux source):
"Note fls(0) = 0, fls(1) = 1, fls(0x8000) = 32."
I think this translates into: "How many bits do I need to express this
number?". For our case the highest event number we need to encode is
"nr_events - 1", hence the subtraction.
So is it worth to introduce a:
static inline int ilog2_64(uint64_t n) ...
in xen/include/asm-arm/bitops.h to document this?


This might make easier to read the code.



[...]


+/* Removing any connections a domain had to any ITS in the system. */
+void gicv3_its_unmap_all_devices(struct domain *d)
+{
+struct rb_node *victim;
+struct its_devices *dev;
+
+/*
+ * This is an easily readable, yet inefficient implementation.
+ * It uses the provided iteration wrapper and erases each node,
which
+ * possibly triggers rebalancing.
+ * This seems overkill since we are going to abolish the whole
tree, but
+ * avoids an open-coded re-implementation of the traversal
functions with
+ * some recursive function calls.
+ * Performance does not matter here, since we are destroying a
domain.


Again, this is slightly untrue. Performance matter when destroying a
domain as Xen cannot be preempted. So if it takes too long, you will
have an impact on the overall system.


I reworded this sentence in v3, since you apparently misunderstood me.
By inefficient I meant sub-optimal, but this is not a _critical_ path,
so we don't care too much. The execution time is clearly bounded by the
number of devices. We simply shouldn't allow gazillion of devices on a
DomU if we care about those things.


This is a very naive way of thinking how domain destruction is working 
on 

Re: [Xen-devel] [PATCH v6 0/7] Xen transport for 9pfs frontend driver

2017-04-03 Thread Stefano Stabellini
I'll ask the other Xen maintainers to queue this up on the Xen tree for
the appropriate merge window. If you are not OK with that, please shout.

On Thu, 23 Mar 2017, Stefano Stabellini wrote:
> 9pfs maintainers,
> 
> The patch series is fully acked, except for the header import from Xen
> (http://marc.info/?l=linux-kernel=149020945130417=2) which needs to
> be acked in Xen first (this is corresponding Xen series:
> http://marc.info/?l=xen-devel=149013482308654).
> 
> Once that happens, the series is ready to go in. Are you OK with that?
> 
> If so, would you rather have the code go in via your tree or the Xen
> tree?
> 
> Cheers,
> 
> Stefano
> 
> 
> On Wed, 22 Mar 2017, Stefano Stabellini wrote:
> > Hi all,
> > 
> > This patch series implements a new transport for 9pfs, aimed at Xen
> > systems.
> > 
> > The transport is based on a traditional Xen frontend and backend drivers
> > pair. This patch series implements the frontend, which typically runs in
> > a regular unprivileged guest.
> > 
> > I also sent a series that implements the backend in userspace in QEMU,
> > which typically runs in Dom0 (but could also run in a another guest).
> > 
> > The frontend complies to the Xen transport for 9pfs specification
> > version 1, available here:
> > 
> > https://xenbits.xen.org/docs/unstable/misc/9pfs.html
> > 
> > 
> > Changes in v6:
> > - add reviewd-bys
> > - fix error paths
> > - make p9_xen_write_todo return bool
> > 
> > Changes in v5:
> > - test priv->tag instead of ret
> > - run checkpatch.pl against the whole series, fix all issues
> > - set intf->ring_order appropriately
> > - use shorter link to 9pfs spec
> > 
> > Changes in v4:
> > - code style improvements
> > - use xenbus_read_unsigned when possible
> > - do not leak "versions"
> > - introduce BUILD_BUG_ON
> > - introduce rwlock to protect the xen_9pfs_devs list
> > - add review-by
> > 
> > Changes in v3:
> > - add full copyright header to trans_xen.c
> > - rename ring->ring to ring->data
> > - handle gnttab_grant_foreign_access errors
> > - remove ring->bytes
> > - wrap long lines
> > - add reviewed-by
> > 
> > Changes in v2:
> > - use XEN_PAGE_SHIFT instead of PAGE_SHIFT
> > - remove unnecessary initializations
> > - fix error paths
> > - fix memory allocations for 64K kernels
> > - simplify p9_xen_create and p9_xen_close
> > - use virt_XXX barriers
> > - set status = REQ_STATUS_ERROR inside the p9_xen_response loop
> > - add in-code comments
> > 
> > 
> > Stefano Stabellini (7):
> >   xen: import new ring macros in ring.h
> >   xen: introduce the header file for the Xen 9pfs transport protocol
> >   xen/9pfs: introduce Xen 9pfs transport driver
> >   xen/9pfs: connect to the backend
> >   xen/9pfs: send requests to the backend
> >   xen/9pfs: receive responses
> >   xen/9pfs: build 9pfs Xen transport driver
> > 
> >  include/xen/interface/io/9pfs.h |  42 
> >  include/xen/interface/io/ring.h | 133 ++
> >  net/9p/Kconfig  |   8 +
> >  net/9p/Makefile |   4 +
> >  net/9p/trans_xen.c  | 539 
> > 
> >  5 files changed, 726 insertions(+)
> >  create mode 100644 include/xen/interface/io/9pfs.h
> >  create mode 100644 net/9p/trans_xen.c
> > 
> > 
> > Cheers,
> > 
> > Stefano
> > 
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Oleksandr Tyshchenko wrote:
> On Mon, Apr 3, 2017 at 9:06 PM, Julien Grall  wrote:
> > Hi Andrew,
> >
> >
> > On 03/04/17 18:16, Andrew Cooper wrote:
> >>
> >> On 03/04/17 18:02, Julien Grall wrote:
> >>>
> >>> Hi Andrew,
> >>>
> >>> On 03/04/17 17:42, Andrew Cooper wrote:
> 
>  On 03/04/17 17:24, Oleksandr Tyshchenko wrote:
> >
> > Hi, all.
> >
> > Playing with non-shared IOMMU in Xen on ARM I faced one interesting
> > thing. I found out that the superpages were shattered during domain
> > life cycle.
> > This is the result of mapping of foreign pages, ballooning memory,
> > even if domain maps Xen shared pages, etc.
> > I don't bother with the memory fragmentation at the moment. But,
> > shattering bothers me from the IOMMU point of view.
> > As the Xen owns IOMMU it might manipulate IOMMU page tables when
> > passthoughed/protected device doing DMA in Linux. It is hard to detect
> > when the DMA transaction isn't in progress
> > in order to prevent this race. So, if we have inflight transaction
> > from a device when changing IOMMU mapping we might get into trouble.
> > Unfortunately, not in all the cases the
> > faulting transaction can be restarted. The chance to hit the problem
> > increases during shattering.
> >
> > I did next test:
> > The dom0 on my setup contains ethernet IP that are protected by IOMMU.
> > What is more, as the IOMMU I am playing with supports superpages (2M,
> > 1G) the IOMMU driver
> > takes into account these capabilities when building page tables. As I
> > gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
> > only. As I am using NFS for both dom0 and domU the ethernet IP
> > performs DMA transactions almost all the time.
> > Sometimes, I see the IOMMU page faults during creating guest domain. I
> > think, it happens during Xen is shattering 2M mappings 4K mappings (it
> > unmaps dom0 pages by one 4K page at a time, then maps domU pages there
> > for copying domU images).
> > But, I don't see any page faults when the IOMMU page table was built
> > by 4K pages only.
> >
> > I had a talk with Julien on IIRC and we came to conclusion that the
> > safest way would be to use 4K pages to prevent shattering, so the
> > IOMMU shouldn't report superpage capability.
> > On the other hand, if we build IOMMU from 4K pages we will have
> > performance drop (during building, walking page tables), TLB pressure,
> > etc.
> > Another possible solution Julien was suggesting is to always
> > ballooning with 2M, 1G, and not using 4K. That would help us to
> > prevent shattering effect.
> > The discussion was moved to the ML since it seems to be a generic
> > issue and the right solution should be think of.
> >
> > What do you think is the right way to follow? Use 4K pages and don't
> > bother with shattering or try to optimize? And if the idea to make
> > balloon mechanism smarter makes sense how to teach balloon to do so?
> > Thank you.
> 
> 
>  Ballooning and foreign mappings are terrible for trying to retain
>  superpage mappings.  No OS, not even Linux, can sensibly provide victim
>  pages in a useful way to avoid shattering.
> 
>  If you care about performance, don't ever balloon.  Foreign mappings in
>  translated guests should start from the top of RAM, and work upwards.
> >>>
> >>>
> >>> I am not sure to understand this. Can you extend?
> >>
> >>
> >> I am not sure what is unclear.  Handing random frames of RAM back to the
> >> hypervisor is what exacerbates host superpage fragmentation, and all
> >> balloon drivers currently do it.
> >>
> >> If you want to avoid host superpage fragmentation, don't use a
> >> scattergun approach of handing frames back to Xen.  However, because
> >> even Linux doesn't provide enough hooks into the physical memory
> >> management logic, the only solution is to not balloon at all, and to use
> >> already-unoccupied frames for foreign mappings.
> >
> >
> > Do you have any pointer in the Linux code?
> >
> >
> >>
> >>>
> 
> 
>  As for the IOMMU specifically, things are rather easier.  It is the
>  guests responsibility to ensure that frames offered up for ballooning or
>  foreign mappings are unused.  Therefore, if anything cares about the
>  specific 4K region becoming non-present in the IOMMU mappings, it is the
>  guest kernels fault for offering up a frame already in use.
> 
>  For the shattering however, It is Xen's responsibility to ensure that
>  all other mappings stay valid at all points.  The correct way to do this
>  is to construct a new L1 table, mirroring the L2 superpage but lacking
>  the specific 4K mapping in question, then atomically replace the L2
>  superpage entry with the new L1 table, 

[Xen-devel] [PATCH v4 01/27] ARM: GICv3 ITS: parse and store ITS subnodes from hardware DT

2017-04-03 Thread Andre Przywara
Parse the DT GIC subnodes to find every ITS MSI controller the hardware
offers. Store that information in a list to both propagate all of them
later to Dom0, but also to be able to iterate over all ITSes.
This introduces an ITS Kconfig option.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/Kconfig |  5 +++
 xen/arch/arm/Makefile|  1 +
 xen/arch/arm/gic-v3-its.c| 77 
 xen/arch/arm/gic-v3.c| 10 +++---
 xen/include/asm-arm/gic_v3_its.h | 67 ++
 5 files changed, 156 insertions(+), 4 deletions(-)
 create mode 100644 xen/arch/arm/gic-v3-its.c
 create mode 100644 xen/include/asm-arm/gic_v3_its.h

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 43123e6..d46b98c 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -45,6 +45,11 @@ config ACPI
 config HAS_GICV3
bool
 
+config HAS_ITS
+bool
+prompt "GICv3 ITS MSI controller support" if EXPERT = "y"
+depends on HAS_GICV3
+
 endmenu
 
 menu "ARM errata workaround via the alternative framework"
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 0ce94a8..39c0a03 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -18,6 +18,7 @@ obj-$(EARLY_PRINTK) += early_printk.o
 obj-y += gic.o
 obj-y += gic-v2.o
 obj-$(CONFIG_HAS_GICV3) += gic-v3.o
+obj-$(CONFIG_HAS_ITS) += gic-v3-its.o
 obj-y += guestcopy.o
 obj-y += hvm.o
 obj-y += io.o
diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
new file mode 100644
index 000..6b02349
--- /dev/null
+++ b/xen/arch/arm/gic-v3-its.c
@@ -0,0 +1,77 @@
+/*
+ * xen/arch/arm/gic-v3-its.c
+ *
+ * ARM GICv3 Interrupt Translation Service (ITS) support
+ *
+ * Copyright (C) 2016,2017 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see .
+ */
+
+#include 
+#include 
+#include 
+
+/*
+ * No lock here, as this list gets only populated upon boot while scanning
+ * firmware tables for all host ITSes, and only gets iterated afterwards.
+ */
+LIST_HEAD(host_its_list);
+
+bool gicv3_its_host_has_its(void)
+{
+return !list_empty(_its_list);
+}
+
+/* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
+void gicv3_its_dt_init(const struct dt_device_node *node)
+{
+const struct dt_device_node *its = NULL;
+struct host_its *its_data;
+
+/*
+ * Check for ITS MSI subnodes. If any, add the ITS register
+ * frames to the ITS list.
+ */
+dt_for_each_child_node(node, its)
+{
+uint64_t addr, size;
+
+if ( !dt_device_is_compatible(its, "arm,gic-v3-its") )
+continue;
+
+if ( dt_device_get_address(its, 0, , ) )
+panic("GICv3: Cannot find a valid ITS frame address");
+
+its_data = xzalloc(struct host_its);
+if ( !its_data )
+panic("GICv3: Cannot allocate memory for ITS frame");
+
+its_data->addr = addr;
+its_data->size = size;
+its_data->dt_node = its;
+
+printk("GICv3: Found ITS @0x%lx\n", addr);
+
+list_add_tail(_data->entry, _its_list);
+}
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 695f01f..b626298 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -1227,11 +1228,12 @@ static void __init gicv3_dt_init(void)
  */
 res = dt_device_get_address(node, 1 + gicv3.rdist_count,
 , );
-if ( res )
-return;
+if ( !res )
+dt_device_get_address(node, 1 + gicv3.rdist_count + 2,
+  , );
 
-dt_device_get_address(node, 1 + gicv3.rdist_count + 2,
-  , );
+/* Check for ITS child nodes and build the host ITS list accordingly. */
+gicv3_its_dt_init(node);
 }
 
 static int gicv3_iomem_deny_access(const struct domain *d)
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
new file mode 100644
index 000..765a655
--- /dev/null
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -0,0 +1,67 @@
+/*
+ * ARM GICv3 ITS support
+ *
+ * Andre Przywara 
+ * Copyright (c) 2016,2017 ARM Ltd.
+ *
+ * This 

[Xen-devel] [PATCH v4 06/27] ARM: GICv3 ITS: introduce ITS command handling

2017-04-03 Thread Andre Przywara
To be able to easily send commands to the ITS, create the respective
wrapper functions, which take care of the ring buffer.
The first two commands we implement provide methods to map a collection
to a redistributor (aka host core) and to flush the command queue (SYNC).
Start using these commands for mapping one collection to each host CPU.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c | 181 ++
 xen/arch/arm/gic-v3-lpi.c |  28 ++
 xen/arch/arm/gic-v3.c |  26 +-
 xen/include/asm-arm/gic_v3_defs.h |   2 +
 xen/include/asm-arm/gic_v3_its.h  |  38 +++-
 5 files changed, 273 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 51b1b60..8cc0ad8 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -19,11 +19,14 @@
  */
 
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #define ITS_CMD_QUEUE_SZSZ_1M
 
@@ -38,6 +41,145 @@ bool gicv3_its_host_has_its(void)
 return !list_empty(_its_list);
 }
 
+#define BUFPTR_MASK GENMASK_ULL(19, 5)
+static int its_send_command(struct host_its *hw_its, const void *its_cmd)
+{
+/* Some small grace period in case the command queue is congested. */
+s_time_t deadline = NOW() + MILLISECS(1);
+uint64_t readp, writep;
+int ret = -EBUSY;
+
+/* No ITS commands from an interrupt handler (at the moment). */
+ASSERT(!in_irq());
+
+spin_lock(_its->cmd_lock);
+
+do {
+readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
+writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
+
+if ( ((writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ) != readp )
+{
+ret = 0;
+break;
+}
+
+/*
+ * If the command queue is full, wait for a bit in the hope it drains
+ * before giving up.
+ */
+spin_unlock(_its->cmd_lock);
+cpu_relax();
+udelay(1);
+spin_lock(_its->cmd_lock);
+} while ( NOW() <= deadline );
+
+if ( ret )
+{
+spin_unlock(_its->cmd_lock);
+if ( printk_ratelimit() )
+printk(XENLOG_WARNING "host ITS: command queue full.\n");
+return ret;
+}
+
+memcpy(hw_its->cmd_buf + writep, its_cmd, ITS_CMD_SIZE);
+if ( hw_its->flags & HOST_ITS_FLUSH_CMD_QUEUE )
+clean_and_invalidate_dcache_va_range(hw_its->cmd_buf + writep,
+ ITS_CMD_SIZE);
+else
+dsb(ishst);
+
+writep = (writep + ITS_CMD_SIZE) % ITS_CMD_QUEUE_SZ;
+writeq_relaxed(writep & BUFPTR_MASK, hw_its->its_base + GITS_CWRITER);
+
+spin_unlock(_its->cmd_lock);
+
+return 0;
+}
+
+/* Wait for an ITS to finish processing all commands. */
+static int gicv3_its_wait_commands(struct host_its *hw_its)
+{
+/* Define an upper limit for our wait time. */
+s_time_t deadline = NOW() + MILLISECS(100);
+uint64_t readp, writep;
+
+do {
+spin_lock(_its->cmd_lock);
+readp = readq_relaxed(hw_its->its_base + GITS_CREADR) & BUFPTR_MASK;
+writep = readq_relaxed(hw_its->its_base + GITS_CWRITER) & BUFPTR_MASK;
+spin_unlock(_its->cmd_lock);
+
+if ( readp == writep )
+return 0;
+
+cpu_relax();
+udelay(1);
+} while ( NOW() <= deadline );
+
+return -ETIMEDOUT;
+}
+
+static uint64_t encode_rdbase(struct host_its *hw_its, unsigned int cpu,
+  uint64_t reg)
+{
+reg &= ~GENMASK_ULL(51, 16);
+
+reg |= gicv3_get_redist_address(cpu, hw_its->flags & HOST_ITS_USES_PTA);
+
+return reg;
+}
+
+static int its_send_cmd_sync(struct host_its *its, unsigned int cpu)
+{
+uint64_t cmd[4];
+
+cmd[0] = GITS_CMD_SYNC;
+cmd[1] = 0x00;
+cmd[2] = encode_rdbase(its, cpu, 0x0);
+cmd[3] = 0x00;
+
+return its_send_command(its, cmd);
+}
+
+static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
+ unsigned int cpu)
+{
+uint64_t cmd[4];
+
+cmd[0] = GITS_CMD_MAPC;
+cmd[1] = 0x00;
+cmd[2] = encode_rdbase(its, cpu, collection_id);
+cmd[2] |= GITS_VALID_BIT;
+cmd[3] = 0x00;
+
+return its_send_command(its, cmd);
+}
+
+/* Set up the (1:1) collection mapping for the given host CPU. */
+int gicv3_its_setup_collection(unsigned int cpu)
+{
+struct host_its *its;
+int ret;
+
+list_for_each_entry(its, _its_list, entry)
+{
+ret = its_send_cmd_mapc(its, cpu, cpu);
+if ( ret )
+return ret;
+
+ret = its_send_cmd_sync(its, cpu);
+if ( ret )
+return ret;
+
+ret = gicv3_its_wait_commands(its);
+if ( ret )
+return ret;
+}
+
+return 0;
+}
+
 #define BASER_ATTR_MASK   \

[Xen-devel] [PATCH v4 25/27] ARM: vITS: create and initialize virtual ITSes for Dom0

2017-04-03 Thread Andre Przywara
For each hardware ITS create and initialize a virtual ITS for Dom0.
We use the same memory mapped address to keep the doorbell working.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c   | 32 
 xen/arch/arm/vgic-v3.c   | 17 +
 xen/include/asm-arm/domain.h |  1 +
 xen/include/asm-arm/gic_v3_its.h | 16 
 4 files changed, 66 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 35a0730..dfb6eb3 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -1080,6 +1080,38 @@ static const struct mmio_handler_ops 
vgic_its_mmio_handler = {
 .write = vgic_v3_its_mmio_write,
 };
 
+int vgic_v3_its_init_virtual(struct domain *d, paddr_t guest_addr,
+ unsigned int devid_bits, unsigned int intid_bits)
+{
+struct virt_its *its;
+uint64_t base_attr;
+
+its = xzalloc(struct virt_its);
+if ( ! its )
+return -ENOMEM;
+
+base_attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
+base_attr |= GIC_BASER_CACHE_SameAsInner << 
GITS_BASER_OUTER_CACHEABILITY_SHIFT;
+base_attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
+
+its->cbaser  = base_attr;
+base_attr |= 0ULL << GITS_BASER_PAGE_SIZE_SHIFT;
+its->baser_dev  = GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT;
+its->baser_dev |= (7ULL << GITS_BASER_ENTRY_SIZE_SHIFT) | base_attr;
+its->baser_coll  = GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT;
+its->baser_coll |= (1ULL << GITS_BASER_ENTRY_SIZE_SHIFT) | base_attr;
+its->d = d;
+its->doorbell_address = guest_addr + ITS_DOORBELL_OFFSET;
+its->devid_bits = devid_bits;
+its->intid_bits = intid_bits;
+spin_lock_init(>vcmd_lock);
+spin_lock_init(>its_lock);
+
+register_mmio_handler(d, _its_mmio_handler, guest_addr, SZ_64K, its);
+
+return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index ebcfc16..3fc309e 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1582,6 +1583,7 @@ static int vgic_v3_domain_init(struct domain *d)
  */
 if ( is_hardware_domain(d) )
 {
+struct host_its *hw_its;
 unsigned int first_cpu = 0;
 
 d->arch.vgic.dbase = vgic_v3_hw.dbase;
@@ -1607,6 +1609,21 @@ static int vgic_v3_domain_init(struct domain *d)
 
 first_cpu += size / d->arch.vgic.rdist_stride;
 }
+d->arch.vgic.nr_regions = vgic_v3_hw.nr_rdist_regions;
+
+list_for_each_entry(hw_its, _its_list, entry)
+{
+/*
+* For each host ITS create a virtual ITS using the same
+* base and thus doorbell address.
+* Use the same number of device ID bits as the host, and
+* allow 20 bits for the interrupt ID.
+*/
+vgic_v3_its_init_virtual(d, hw_its->addr, hw_its->devid_bits, 20);
+
+d->arch.vgic.has_its = true;
+}
+
 }
 else
 {
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index f460457..6a60630 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -115,6 +115,7 @@ struct arch_domain
 spinlock_t its_devices_lock;/* Protects the its_devices tree */
 struct radix_tree_root pend_lpi_tree; /* Stores struct pending_irq's */
 rwlock_t pend_lpi_tree_lock;/* Protects the pend_lpi_tree */
+bool has_its;
 #endif
 } vgic;
 
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index 3b5f898..fb05311 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -154,6 +154,14 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool 
use_pta);
 int gicv3_its_setup_collection(unsigned int cpu);
 
 /*
+ * Create and register a virtual ITS at the given guest address.
+ * If a host ITS is specified, a hardware domain can reach out to that host
+ * ITS to deal with devices and LPI mappings and can enable/disable LPIs.
+ */
+int vgic_v3_its_init_virtual(struct domain *d, paddr_t guest_addr,
+unsigned int devid_bits, unsigned int intid_bits);
+
+/*
  * Map a device on the host by allocating an ITT on the host (ITS).
  * "nr_event" specifies how many events (interrupts) this device will need.
  * Setting "valid" to false deallocates the device.
@@ -219,6 +227,14 @@ static inline void gicv3_its_unmap_all_devices(struct 
domain *d)
 {
 }
 
+static inline int vgic_v3_its_init_virtual(struct domain *d,
+   paddr_t guest_addr,
+   unsigned int devid_bits,
+   unsigned int intid_bits)
+{
+return 

[Xen-devel] [PATCH v4 23/27] ARM: vITS: handle INV command

2017-04-03 Thread Andre Przywara
The INV command instructs the ITS to update the configuration data for
a given LPI by re-reading its entry from the property table.
We don't need to care so much about the priority value, but enabling
or disabling an LPI has some effect: We remove or push virtual LPIs
to their VCPUs, also check the virtual pending bit if an LPI gets enabled.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 62 ++
 1 file changed, 62 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 2f024b1..920c437 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -366,6 +366,65 @@ static int its_handle_int(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+/*
+ * For a given virtual LPI read the enabled bit and priority from the virtual
+ * property table and update the virtual IRQ's state.
+ * This takes care of removing or pushing of virtual LPIs to their VCPUs.
+ */
+static void update_lpi_enabled_status(struct virt_its* its,
+  struct vcpu *vcpu, uint32_t vlpi)
+{
+struct pending_irq *p = lpi_to_pending(its->d, vlpi);
+paddr_t proptable_addr;
+uint8_t *property;
+
+if ( !p )
+return;
+
+proptable_addr = its->d->arch.vgic.rdist_propbase & GENMASK_ULL(51, 12);
+property = map_one_guest_page(its->d, proptable_addr + vlpi - LPI_OFFSET);
+
+p->lpi_priority = *property & LPI_PROP_PRIO_MASK;
+
+if ( *property & LPI_PROP_ENABLED )
+{
+unsigned long flags;
+
+set_bit(GIC_IRQ_GUEST_ENABLED, >status);
+spin_lock_irqsave(>arch.vgic.lock, flags);
+if ( !list_empty(>inflight) &&
+ !test_bit(GIC_IRQ_GUEST_VISIBLE, >status) )
+gic_raise_guest_irq(vcpu, vlpi, p->lpi_priority);
+spin_unlock_irqrestore(>arch.vgic.lock, flags);
+
+/* Check whether the LPI has fired while the guest had it disabled. */
+if ( test_and_clear_bit(GIC_IRQ_GUEST_LPI_PENDING, >status) )
+vgic_vcpu_inject_irq(vcpu, vlpi);
+}
+else
+{
+clear_bit(GIC_IRQ_GUEST_ENABLED, >status);
+gic_remove_from_queues(vcpu, vlpi);
+}
+
+unmap_one_guest_page(property);
+}
+
+static int its_handle_inv(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+uint32_t eventid = its_cmd_get_id(cmdptr);
+struct vcpu *vcpu;
+uint32_t vlpi;
+
+if ( !read_itte(its, devid, eventid, , ) )
+return -1;
+
+update_lpi_enabled_status(its, vcpu, vlpi);
+
+return 0;
+}
+
 static int its_handle_mapc(struct virt_its *its, uint64_t *cmdptr)
 {
 uint32_t collid = its_cmd_get_collection(cmdptr);
@@ -546,6 +605,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_INT:
 ret = its_handle_int(its, cmdptr);
 break;
+case GITS_CMD_INV:
+ret = its_handle_inv(its, cmdptr);
+   break;
 case GITS_CMD_MAPC:
 ret = its_handle_mapc(its, cmdptr);
 break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 21/27] ARM: vITS: handle MOVI command

2017-04-03 Thread Andre Przywara
The MOVI command moves the interrupt affinity from one redistributor
(read: VCPU) to another.
For now migration of "live" LPIs is not yet implemented, but we store
the changed affinity in the host LPI structure and in our virtual ITTE.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 24 
 xen/arch/arm/gic-v3-lpi.c| 13 +
 xen/arch/arm/vgic-v3-its.c   | 24 
 xen/include/asm-arm/gic_v3_its.h |  4 
 4 files changed, 65 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 3bc1e58..f611e2f 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -835,6 +835,30 @@ struct pending_irq *gicv3_assign_guest_event(struct domain 
*d,
 return pirq;
 }
 
+/* Changes the target VCPU for a given host LPI assigned to a domain. */
+int gicv3_lpi_change_vcpu(struct domain *d, paddr_t doorbell,
+  uint32_t vdevid, uint32_t veventid,
+  unsigned int vcpu_id)
+{
+uint32_t host_lpi;
+struct its_devices *dev;
+
+spin_lock(>arch.vgic.its_devices_lock);
+dev = get_its_device(d, doorbell, vdevid);
+if ( dev )
+host_lpi = get_host_lpi(dev, veventid);
+else
+host_lpi = 0;
+spin_unlock(>arch.vgic.its_devices_lock);
+
+if ( !host_lpi )
+return -ENOENT;
+
+gicv3_lpi_update_host_vcpuid(host_lpi, vcpu_id);
+
+return 0;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 067ccb2..94c3646 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -206,6 +206,19 @@ void gicv3_lpi_update_host_entry(uint32_t host_lpi, int 
domain_id,
 write_u64_atomic(>data, hlpi.data);
 }
 
+int gicv3_lpi_update_host_vcpuid(uint32_t host_lpi, unsigned int vcpu_id)
+{
+union host_lpi *hlpip;
+
+host_lpi -= LPI_OFFSET;
+
+hlpip = _data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % 
HOST_LPIS_PER_PAGE];
+
+write_u16_atomic(>vcpu_id, vcpu_id);
+
+return 0;
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
 uint64_t val;
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index e9a309d..a2758cd 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -455,6 +455,24 @@ static int its_handle_mapti(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+static int its_handle_movi(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+uint32_t eventid = its_cmd_get_id(cmdptr);
+int collid = its_cmd_get_collection(cmdptr);
+struct vcpu *vcpu;
+
+if ( !write_itte(its, devid, eventid, collid, SKIP_LPI_UPDATE, ) )
+return -1;
+
+/* TODO: lookup currently-in-guest virtual IRQs and migrate them */
+
+gicv3_lpi_change_vcpu(its->d,
+  its->doorbell_address, devid, eventid, 
vcpu->vcpu_id);
+
+return 0;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -508,6 +526,12 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_MAPTI:
 ret = its_handle_mapti(its, cmdptr);
 break;
+case GITS_CMD_MOVALL:
+gdprintk(XENLOG_G_INFO, "ITS: ignoring MOVALL command\n");
+break;
+case GITS_CMD_MOVI:
+ret = its_handle_movi(its, cmdptr);
+break;
 case GITS_CMD_SYNC:
 /* We handle ITS commands synchronously, so we ignore SYNC. */
break;
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index eeffd58..3b5f898 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -169,8 +169,12 @@ void gicv3_free_host_lpi_block(uint32_t first_lpi);
 struct pending_irq *gicv3_assign_guest_event(struct domain *d, paddr_t 
doorbell,
  uint32_t devid, uint32_t eventid,
  struct vcpu *v, uint32_t 
virt_lpi);
+int gicv3_lpi_change_vcpu(struct domain *d, paddr_t doorbell,
+  uint32_t devid, uint32_t eventid,
+  unsigned int vcpu_id);
 void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
  unsigned int vcpu_id, uint32_t virt_lpi);
+int gicv3_lpi_update_host_vcpuid(uint32_t host_lpi, unsigned int vcpu_id);
 
 #else
 
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 13/27] ARM: vGICv3: Handle disabled LPIs

2017-04-03 Thread Andre Przywara
If a guest disables an LPI, we do not forward this to the associated
host LPI to avoid queueing commands to the host ITS command queue.
So it may happen that an LPI fires nevertheless on the host. In this
case we can bail out early, but have to save the pending state on the
virtual side.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-lpi.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index ad89863..a86fcf5 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -125,6 +125,21 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool 
use_pta)
 return per_cpu(lpi_redist, cpu).redist_id << 16;
 }
 
+static bool vgic_can_inject_lpi(struct vcpu *vcpu, uint32_t vlpi)
+{
+struct pending_irq *p = lpi_to_pending(vcpu->domain, vlpi);
+
+if ( !p )
+return false;
+
+if ( test_bit(GIC_IRQ_GUEST_ENABLED, >status) )
+return true;
+
+set_bit(GIC_IRQ_GUEST_LPI_PENDING, >status);
+
+return false;
+}
+
 /*
  * Handle incoming LPIs, which are a bit special, because they are potentially
  * numerous and also only get injected into guests. Treat them specially here,
@@ -162,7 +177,13 @@ void do_LPI(unsigned int lpi)
 
 vcpu = d->vcpu[hlpi.vcpu_id];
 
-vgic_vcpu_inject_irq(vcpu, hlpi.virt_lpi);
+/*
+ * We keep all host LPIs enabled, so check if it's disabled on the guest
+ * side and just record this LPI in the virtual pending table in this case.
+ * The guest picks it up once it gets enabled again.
+ */
+if ( vgic_can_inject_lpi(vcpu, hlpi.virt_lpi) )
+vgic_vcpu_inject_irq(vcpu, hlpi.virt_lpi);
 
 rcu_unlock_domain(d);
 }
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 24/27] ARM: vITS: handle INVALL command

2017-04-03 Thread Andre Przywara
The INVALL command instructs an ITS to invalidate the configuration
data for all LPIs associated with a given redistributor (read: VCPU).
This is nasty to emulate exactly with our architecture, so we just scan
the pending table and inject _every_ LPI found there that got enabled.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 920c437..35a0730 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -425,6 +425,49 @@ static int its_handle_inv(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+/*
+ * INVALL updates the per-LPI configuration status for every LPI mapped to
+ * a particular redistributor.
+ * We iterate over all mapped LPIs in our radix tree and update those.
+ */
+static int its_handle_invall(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t collid = its_cmd_get_collection(cmdptr);
+struct vcpu *vcpu;
+struct pending_irq *pirqs[16];
+uint32_t vlpi = 0;
+int nr_lpis, i;
+
+/* We may want to revisit this implementation for DomUs. */
+ASSERT(is_hardware_domain(its->d));
+
+spin_lock(>its_lock);
+vcpu = get_vcpu_from_collection(its, collid);
+spin_unlock(>its_lock);
+
+read_lock(>d->arch.vgic.pend_lpi_tree_lock);
+
+do {
+nr_lpis = radix_tree_gang_lookup(>d->arch.vgic.pend_lpi_tree,
+ (void **)pirqs, vlpi,
+ARRAY_SIZE(pirqs));
+
+for ( i = 0; i < nr_lpis; i++ )
+{
+vlpi = pirqs[i]->irq;
+update_lpi_enabled_status(its, vcpu, vlpi);
+}
+
+/* Protect from overflow when incrementing 0x */
+if ( vlpi == ~0 || ++vlpi < its->d->arch.vgic.nr_lpis )
+break;
+} while ( nr_lpis == ARRAY_SIZE(pirqs));
+
+read_unlock(>d->arch.vgic.pend_lpi_tree_lock);
+
+return 0;
+}
+
 static int its_handle_mapc(struct virt_its *its, uint64_t *cmdptr)
 {
 uint32_t collid = its_cmd_get_collection(cmdptr);
@@ -608,6 +651,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_INV:
 ret = its_handle_inv(its, cmdptr);
break;
+case GITS_CMD_INVALL:
+ret = its_handle_invall(its, cmdptr);
+   break;
 case GITS_CMD_MAPC:
 ret = its_handle_mapc(its, cmdptr);
 break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 00/27] arm64: Dom0 ITS emulation

2017-04-03 Thread Andre Przywara
Hi,

I managed to go over the remaining emails I couldn't finish on Friday.
This series is the result of this and has about 30 smaller fixes, see
the changelog below. This should address all comments Stefano had.
There are a few things my brain cannot cope with today anymore, so I
will address them with Julien face-to-face tomorrow:
- Move lpi_get_priority() and do_LPI() into gic_ops
- check MOVI behavior in our special case
- check LPI state changes if already in an LR
- implement indirect table
- move GENMASK_ULL and other helpers into separate patches
- re-check issue reported by Cavium
- agree on having a command line for the devices or not
- rebasing artifacts
- anything not mentioned here ;-)

Cheers,
Andre

--
This series adds support for emulation of an ARM GICv3 ITS interrupt
controller. For hardware which relies on the ITS to provide interrupts for
its peripherals this code is needed to get a machine booted into Dom0 at
all. ITS emulation for DomUs is only really useful with PCI passthrough,
which is not yet available for ARM. It is expected that this feature
will be co-developed with the ITS DomU code. However this code drop here
considered DomU emulation already, to keep later architectural changes
to a minimum.

Some generic design principles:

* The current GIC code statically allocates structures for each supported
IRQ (both for the host and the guest), which due to the potentially
millions of LPI interrupts is not feasible to copy for the ITS.
So we refrain from introducing the ITS as a first class Xen interrupt
controller, also we don't hold struct irq_desc's or struct pending_irq's
for each possible LPI.
Fortunately LPIs are only interesting to guests, so we get away with
storing only the virtual IRQ number and the guest VCPU for each allocated
host LPI, which can be stashed into one uint64_t. This data is stored in
a two-level table, which is both memory efficient and quick to access.
We hook into the existing IRQ handling and VGIC code to avoid accessing
the normal structures, providing alternative methods for getting the
needed information (priority, is enabled?) for LPIs.
For interrupts which are queued to or are actually in a guest we
allocate struct pending_irq's on demand. As it is expected that only a
very small number of interrupts is ever on a VCPU at the same time, this
seems like the best approach. For now allocated structs are re-used and
held in a linked list. Should it emerge that traversing a linked list
is a performance issue, this can be changed to use a hash table.

* On the guest side we (later will) have to deal with malicious guests
trying to hog Xen with mapping requests for a lot of LPIs, for instance.
As the ITS actually uses system memory for storing status information,
we use this memory (which the guest has to provide) to naturally limit
a guest. For those tables which are page sized (devices, collections (CPUs),
LPI properties) we map those pages into Xen, so we can easily access
them from the virtual GIC code.
Unfortunately the actual interrupt mapping tables are not necessarily
page aligned, also can be much smaller than a page, so mapping all of
them permanently is fiddly. As ITS commands in need to iterate those
tables are pretty rare after all, we for now map them on demand upon
emulating a virtual ITS command. This is acceptable because "mapping"
them is actually very cheap on arm64. Also as we can't properly protect
those areas due to their sub-page-size property, we validate the data
in there before actually using it. The vITS code basically just stores
the data in there which the guest has actually transferred via the
virtual ITS command queue before, so there is no secret revealed nor
does it create an attack vector for a malicious guest.

* An obvious approach to handling some guest ITS commands would be to
propagate them to the host, for instance to map devices and LPIs and
to enable or disable LPIs.
However this (later with DomU support) will create an attack vector, as
a malicious guest could try to fill the host command queue with
propagated commands.
So we try to avoid this situation: Dom0 sending a device mapping (MAPD)
command is the only time we allow queuing commands to the host ITS command
queue, as this seems to be the only reliable way of getting the
required information at the moment. However at the same time we map all
events to LPIs already, also enable them. This avoids sending commands
later at runtime, as we can deal with mappings and LPI enabling/disabling
internally.

As it is expected that the ITS support will become a tech preview in the
first release, there is a Kconfig option to enable it. Also it is
supported on arm64 only, which will most likely not change in the future.
This leads to some hideous constructs like an #ifdef'ed header file with
empty function stubs, I have some hope we can still clean this up.
Also some parameters are config options which can be overridden on the
Xen 

[Xen-devel] [PATCH v4 20/27] ARM: vITS: handle MAPTI command

2017-04-03 Thread Andre Przywara
The MAPTI commands associates a DeviceID/EventID pair with a LPI/CPU
pair and actually instantiates LPI interrupts.
We connect the already allocated host LPI to this virtual LPI, so that
any triggering IRQ on the host can be quickly forwarded to a guest.
Beside entering the VCPU and the virtual LPI number in the respective
host LPI entry, we also initialize and add the already allocated
struct pending_irq to our radix tree, so that we can now easily find it
by its virtual LPI number.
This exports the vgic_init_pending_irq() function for that purpose.
As write_itte() is now eventually used, we can now add the static tag.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 70 
 xen/arch/arm/gic-v3-lpi.c| 18 +++
 xen/arch/arm/vgic-v3-its.c   | 36 +++--
 xen/arch/arm/vgic.c  |  2 +-
 xen/include/asm-arm/gic_v3_its.h |  6 
 xen/include/asm-arm/vgic.h   |  1 +
 6 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 880c7fc..3bc1e58 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -765,6 +765,76 @@ void gicv3_its_unmap_all_devices(struct domain *d)
 spin_unlock(>arch.vgic.its_devices_lock);
 }
 
+/* Must be called with the its_device_lock held. */
+static struct its_devices *get_its_device(struct domain *d, paddr_t doorbell,
+  uint32_t devid)
+{
+struct rb_node *node = d->arch.vgic.its_devices.rb_node;
+struct its_devices *dev;
+
+while (node)
+{
+int cmp;
+
+dev = rb_entry(node, struct its_devices, rbnode);
+cmp = compare_its_guest_devices(dev, doorbell, devid);
+
+if ( !cmp )
+return dev;
+
+if ( cmp > 0 )
+node = node->rb_left;
+else
+node = node->rb_right;
+}
+
+return NULL;
+}
+
+static uint32_t get_host_lpi(struct its_devices *dev, uint32_t eventid)
+{
+uint32_t host_lpi = 0;
+
+if ( dev && (eventid < dev->eventids) )
+host_lpi = dev->host_lpi_blocks[eventid / LPI_BLOCK] +
+   (eventid % LPI_BLOCK);
+
+return host_lpi;
+}
+
+/*
+ * Connects the event ID for an already assigned device to the given VCPU/vLPI
+ * pair. The corresponding physical LPI is already mapped on the host side
+ * (when assigning the physical device to the guest), so we just connect the
+ * target VCPU/vLPI pair to that interrupt to inject it properly if it fires.
+ */
+struct pending_irq *gicv3_assign_guest_event(struct domain *d,
+ paddr_t doorbell_address,
+ uint32_t vdevid, uint32_t 
veventid,
+ struct vcpu *v, uint32_t virt_lpi)
+{
+struct its_devices *dev;
+struct pending_irq *pirq = NULL;
+uint32_t host_lpi = 0;
+
+spin_lock(>arch.vgic.its_devices_lock);
+dev = get_its_device(d, doorbell_address, vdevid);
+if ( dev )
+{
+host_lpi = get_host_lpi(dev, veventid);
+pirq = >pend_irqs[veventid];
+}
+spin_unlock(>arch.vgic.its_devices_lock);
+
+if ( !host_lpi || !pirq )
+return NULL;
+
+gicv3_lpi_update_host_entry(host_lpi, d->domain_id,
+v ? v->vcpu_id : -1, virt_lpi);
+
+return pirq;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index a86fcf5..067ccb2 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -188,6 +188,24 @@ void do_LPI(unsigned int lpi)
 rcu_unlock_domain(d);
 }
 
+void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
+ unsigned int vcpu_id, uint32_t virt_lpi)
+{
+union host_lpi *hlpip, hlpi;
+
+ASSERT(host_lpi >= LPI_OFFSET);
+
+host_lpi -= LPI_OFFSET;
+
+hlpip = _data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % 
HOST_LPIS_PER_PAGE];
+
+hlpi.virt_lpi = virt_lpi;
+hlpi.dom_id = domain_id;
+hlpi.vcpu_id = vcpu_id;
+
+write_u64_atomic(>data, hlpi.data);
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
 uint64_t val;
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 0e636de..e9a309d 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -264,8 +264,8 @@ static bool read_itte(struct virt_its *its, uint32_t devid, 
uint32_t evid,
  * This function takes care of the locking by taking the its_lock itself, so
  * a caller shall not hold this. Upon returning, the lock is dropped again.
  */
-bool write_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
-uint32_t collid, uint32_t vlpi, struct vcpu 

[Xen-devel] [PATCH v4 14/27] ARM: vGICv3: introduce basic ITS emulation bits

2017-04-03 Thread Andre Przywara
Create a new file to hold the emulation code for the ITS widget.
For now we emulate the memory mapped ITS registers and provide a stub
to introduce the ITS command handling framework (but without actually
emulating any commands at this time).

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/Makefile |   1 +
 xen/arch/arm/vgic-v3-its.c| 547 ++
 xen/arch/arm/vgic-v3.c|   9 -
 xen/include/asm-arm/gic_v3_defs.h |  19 ++
 xen/include/asm-arm/gic_v3_its.h  |   2 +
 5 files changed, 569 insertions(+), 9 deletions(-)
 create mode 100644 xen/arch/arm/vgic-v3-its.c

diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 6be85ab..49e1fb2 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -47,6 +47,7 @@ obj-y += traps.o
 obj-y += vgic.o
 obj-y += vgic-v2.o
 obj-$(CONFIG_HAS_GICV3) += vgic-v3.o
+obj-$(CONFIG_HAS_ITS) += vgic-v3-its.o
 obj-y += vm_event.o
 obj-y += vtimer.o
 obj-y += vpsci.o
diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
new file mode 100644
index 000..fd3b9a1
--- /dev/null
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -0,0 +1,547 @@
+/*
+ * xen/arch/arm/vgic-v3-its.c
+ *
+ * ARM Interrupt Translation Service (ITS) emulation
+ *
+ * Andre Przywara 
+ * Copyright (c) 2016,2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Data structure to describe a virtual ITS */
+#define VIRT_ITS_ENABLED0
+#define VIRT_ITS_COLL_VALID 1
+#define VIRT_ITS_DEV_VALID  2
+#define VIRT_ITS_CMDBUF_VALID   3
+struct virt_its {
+struct domain *d;
+spinlock_t vcmd_lock;   /* Protects the virtual command buffer. */
+uint64_t cbaser;
+uint64_t cwriter;
+uint64_t creadr;
+spinlock_t its_lock;/* Protects the collection and device tables. 
*/
+uint64_t baser_dev, baser_coll;
+unsigned int max_collections;
+unsigned int max_devices;
+unsigned int devid_bits;
+unsigned int intid_bits;
+unsigned long flags;
+};
+
+/*
+ * An Interrupt Translation Table Entry: this is indexed by a
+ * DeviceID/EventID pair and is located in guest memory.
+ */
+struct vits_itte
+{
+uint32_t vlpi;
+uint16_t collection;
+uint16_t pad;
+};
+
+static bool its_is_enabled(struct virt_its *its)
+{
+return test_bit(VIRT_ITS_ENABLED, >flags);
+}
+
+/**
+ * Functions that handle ITS commands *
+ **/
+
+static uint64_t its_cmd_mask_field(uint64_t *its_cmd, unsigned int word,
+   unsigned int shift, unsigned int size)
+{
+return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT(size) - 1);
+}
+
+#define its_cmd_get_command(cmd)its_cmd_mask_field(cmd, 0,  0,  8)
+#define its_cmd_get_deviceid(cmd)   its_cmd_mask_field(cmd, 0, 32, 32)
+#define its_cmd_get_size(cmd)   its_cmd_mask_field(cmd, 1,  0,  5)
+#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1,  0, 32)
+#define its_cmd_get_physical_id(cmd)its_cmd_mask_field(cmd, 1, 32, 32)
+#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2,  0, 16)
+#define its_cmd_get_target_addr(cmd)its_cmd_mask_field(cmd, 2, 16, 32)
+#define its_cmd_get_validbit(cmd)   its_cmd_mask_field(cmd, 2, 63,  1)
+
+#define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
+
+static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
+uint32_t writer)
+{
+paddr_t cmdbuf_addr = its->cbaser & GENMASK_ULL(51, 12);
+void *cmdbuf = NULL;
+uint64_t *cmdptr;
+
+if ( writer >= ITS_CMD_BUFFER_SIZE(its->cbaser) )
+return -1;
+
+spin_lock(>vcmd_lock);
+
+while ( its->creadr != writer )
+{
+int ret;
+
+ret = 0;
+
+/*
+ * If this is the first command we handle or we cross a page boundary,
+ * we need to (re)map the command buffer.
+ */
+if ( !cmdbuf || (its->creadr & ~PAGE_MASK) == 0 )
+{
+if ( cmdbuf )
+unmap_one_guest_page(cmdbuf);
+cmdbuf = map_one_guest_page(d,
+   (cmdbuf_addr 

[Xen-devel] [PATCH v4 22/27] ARM: vITS: handle DISCARD command

2017-04-03 Thread Andre Przywara
The DISCARD command drops the connection between a DeviceID/EventID
and an LPI/collection pair.
We mark the respective structure entries as not allocated and make
sure that any queued IRQs are removed.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index a2758cd..2f024b1 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -473,6 +473,33 @@ static int its_handle_movi(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+static int its_handle_discard(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+uint32_t eventid = its_cmd_get_id(cmdptr);
+struct pending_irq *pirq;
+struct vcpu *vcpu;
+uint32_t vlpi;
+
+if ( !read_itte(its, devid, eventid, , ) )
+return -1;
+
+pirq = lpi_to_pending(its->d, vlpi);
+if ( pirq )
+{
+clear_bit(GIC_IRQ_GUEST_QUEUED, >status);
+gic_remove_from_queues(vcpu, vlpi);
+}
+
+if ( !write_itte(its, devid, eventid, UNMAPPED_COLLECTION, INVALID_LPI, 
NULL) )
+return -1;
+
+gicv3_assign_guest_event(its->d, its->doorbell_address,
+ devid, eventid, NULL, 0);
+
+return 0;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -513,6 +540,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_CLEAR:
 ret = its_handle_clear(its, cmdptr);
 break;
+case GITS_CMD_DISCARD:
+ret = its_handle_discard(its, cmdptr);
+break;
 case GITS_CMD_INT:
 ret = its_handle_int(its, cmdptr);
 break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 10/27] ARM: GICv3: forward pending LPIs to guests

2017-04-03 Thread Andre Przywara
Upon receiving an LPI, we need to find the right VCPU and virtual IRQ
number to get this IRQ injected.
Iterate our two-level LPI table to find this information quickly when
the host takes an LPI. Call the existing injection function to let the
GIC emulation deal with this interrupt.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-lpi.c  | 42 ++
 xen/arch/arm/gic.c |  8 +++-
 xen/arch/arm/vgic-v3.c | 11 +++
 xen/arch/arm/vgic.c|  9 -
 xen/include/asm-arm/irq.h  |  2 ++
 xen/include/asm-arm/vgic.h |  2 ++
 6 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index d3ee141..ad89863 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -125,6 +125,48 @@ uint64_t gicv3_get_redist_address(unsigned int cpu, bool 
use_pta)
 return per_cpu(lpi_redist, cpu).redist_id << 16;
 }
 
+/*
+ * Handle incoming LPIs, which are a bit special, because they are potentially
+ * numerous and also only get injected into guests. Treat them specially here,
+ * by just looking up their target vCPU and virtual LPI number and hand it
+ * over to the injection function.
+ */
+void do_LPI(unsigned int lpi)
+{
+struct domain *d;
+union host_lpi *hlpip, hlpi;
+struct vcpu *vcpu;
+
+WRITE_SYSREG32(lpi, ICC_EOIR1_EL1);
+
+hlpip = gic_get_host_lpi(lpi);
+if ( !hlpip )
+return;
+
+hlpi.data = read_u64_atomic(>data);
+
+/* Unmapped events are marked with an invalid LPI ID. */
+if ( hlpi.virt_lpi == INVALID_LPI )
+return;
+
+d = rcu_lock_domain_by_id(hlpi.dom_id);
+if ( !d )
+return;
+
+/* Make sure we don't step beyond the vcpu array. */
+if ( hlpi.vcpu_id >= d->max_vcpus )
+{
+rcu_unlock_domain(d);
+return;
+}
+
+vcpu = d->vcpu[hlpi.vcpu_id];
+
+vgic_vcpu_inject_irq(vcpu, hlpi.virt_lpi);
+
+rcu_unlock_domain(d);
+}
+
 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
 {
 uint64_t val;
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 3ed6f81..a6037d4 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -709,7 +709,13 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
 do_IRQ(regs, irq, is_fiq);
 local_irq_disable();
 }
-else if (unlikely(irq < 16))
+#ifdef CONFIG_HAS_ITS
+else if ( is_lpi(irq) )
+{
+do_LPI(irq);
+}
+#endif
+else if ( unlikely(irq < 16) )
 {
 do_sgi(regs, irq);
 }
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 95fa0ba..797fd86 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -347,6 +347,17 @@ struct pending_irq *lpi_to_pending(struct domain *d, 
unsigned int lpi)
 return pirq;
 }
 
+/* Retrieve the priority of an LPI from its struct pending_irq. */
+int vgic_lpi_get_priority(struct domain *d, uint32_t vlpi)
+{
+struct pending_irq *p = lpi_to_pending(d, vlpi);
+
+if ( !p )
+return GIC_PRI_IRQ;
+
+return p->lpi_priority;
+}
+
 static int __vgic_v3_rdistr_rd_mmio_write(struct vcpu *v, mmio_info_t *info,
   uint32_t gicr_reg,
   register_t r)
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 28f6f66..4720f46 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -226,10 +226,17 @@ struct vcpu *vgic_get_target_vcpu(struct vcpu *v, 
unsigned int virq)
 
 static int vgic_get_virq_priority(struct vcpu *v, unsigned int virq)
 {
-struct vgic_irq_rank *rank = vgic_rank_irq(v, virq);
+struct vgic_irq_rank *rank;
 unsigned long flags;
 int priority;
 
+#ifdef CONFIG_HAS_ITS
+/* LPIs don't have a rank, also store their priority separately. */
+if ( is_lpi(virq) )
+return vgic_lpi_get_priority(v->domain, virq);
+#endif
+
+rank = vgic_rank_irq(v, virq);
 vgic_lock_rank(v, rank, flags);
 priority = rank->priority[virq & INTERRUPT_RANK_MASK];
 vgic_unlock_rank(v, rank, flags);
diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
index 2f7ee8a..0cd0117 100644
--- a/xen/include/asm-arm/irq.h
+++ b/xen/include/asm-arm/irq.h
@@ -46,6 +46,8 @@ static inline bool is_lpi(unsigned int irq)
 return irq >= LPI_OFFSET;
 }
 
+void do_LPI(unsigned int irq);
+
 #define domain_pirq_to_irq(d, pirq) (pirq)
 
 bool_t is_assignable_irq(unsigned int irq);
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 69ef160..a24a971 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -66,12 +66,14 @@ struct pending_irq
 #define GIC_IRQ_GUEST_VISIBLE  2
 #define GIC_IRQ_GUEST_ENABLED  3
 #define GIC_IRQ_GUEST_MIGRATING   4
+#define GIC_IRQ_GUEST_LPI_PENDING 5
 unsigned long status;
 struct irq_desc *desc; /* only set it the irq corresponds 

[Xen-devel] [PATCH v4 05/27] ARM: GICv3 ITS: map ITS command buffer

2017-04-03 Thread Andre Przywara
Instead of directly manipulating the tables in memory, an ITS driver
sends commands via a ring buffer in normal system memory to the ITS h/w
to create or alter the LPI mappings.
Allocate memory for that buffer and tell the ITS about it to be able
to send ITS commands.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 53 
 xen/include/asm-arm/gic_v3_its.h |  6 +
 2 files changed, 59 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 00a1f7b..51b1b60 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -20,10 +20,13 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 
+#define ITS_CMD_QUEUE_SZSZ_1M
+
 /*
  * No lock here, as this list gets only populated upon boot while scanning
  * firmware tables for all host ITSes, and only gets iterated afterwards.
@@ -60,6 +63,51 @@ static uint64_t encode_baser_phys_addr(paddr_t addr, 
unsigned int page_bits)
 return ret | ((addr & GENMASK_ULL(51, 48)) >> (48 - 12));
 }
 
+static void *its_map_cbaser(struct host_its *its)
+{
+void __iomem *cbasereg = its->its_base + GITS_CBASER;
+uint64_t reg;
+void *buffer;
+
+reg  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
+reg |= GIC_BASER_CACHE_SameAsInner << GITS_BASER_OUTER_CACHEABILITY_SHIFT;
+reg |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
+
+buffer = _xzalloc(ITS_CMD_QUEUE_SZ, SZ_64K);
+if ( !buffer )
+return NULL;
+
+if ( virt_to_maddr(buffer) & ~GENMASK_ULL(51, 12) )
+{
+xfree(buffer);
+return NULL;
+}
+
+reg |= GITS_VALID_BIT | virt_to_maddr(buffer);
+reg |= ((ITS_CMD_QUEUE_SZ / SZ_4K) - 1) & GITS_CBASER_SIZE_MASK;
+writeq_relaxed(reg, cbasereg);
+reg = readq_relaxed(cbasereg);
+
+/* If the ITS dropped shareability, drop cacheability as well. */
+if ( (reg & GITS_BASER_SHAREABILITY_MASK) == 0 )
+{
+reg &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
+writeq_relaxed(reg, cbasereg);
+}
+
+/*
+ * If the command queue memory is mapped as uncached, we need to flush
+ * it on every access.
+ */
+if ( !(reg & GITS_BASER_INNER_CACHEABILITY_MASK) )
+{
+its->flags |= HOST_ITS_FLUSH_CMD_QUEUE;
+printk(XENLOG_WARNING "using non-cacheable ITS command queue\n");
+}
+
+return buffer;
+}
+
 /* The ITS BASE registers work with page sizes of 4K, 16K or 64K. */
 #define BASER_PAGE_BITS(sz) ((sz) * 2 + 12)
 
@@ -185,6 +233,11 @@ static int gicv3_its_init_single_its(struct host_its 
*hw_its)
 }
 }
 
+hw_its->cmd_buf = its_map_cbaser(hw_its);
+if ( !hw_its->cmd_buf )
+return -ENOMEM;
+writeq_relaxed(0, hw_its->its_base + GITS_CWRITER);
+
 return 0;
 }
 
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index c7d8766..3500b042 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -84,8 +84,12 @@
 #define GITS_BASER_OUTER_CACHEABILITY_MASK   (0x7ULL << 
GITS_BASER_OUTER_CACHEABILITY_SHIFT)
 #define GITS_BASER_INNER_CACHEABILITY_MASK   (0x7ULL << 
GITS_BASER_INNER_CACHEABILITY_SHIFT)
 
+#define GITS_CBASER_SIZE_MASK   0xff
+
 #include 
 
+#define HOST_ITS_FLUSH_CMD_QUEUE(1U << 0)
+
 /* data structure for each hardware ITS */
 struct host_its {
 struct list_head entry;
@@ -96,6 +100,8 @@ struct host_its {
 unsigned int devid_bits;
 unsigned int evid_bits;
 unsigned int itte_size;
+void *cmd_buf;
+unsigned int flags;
 };
 
 
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 09/27] ARM: GICv3: introduce separate pending_irq structs for LPIs

2017-04-03 Thread Andre Przywara
For the same reason that allocating a struct irq_desc for each
possible LPI is not an option, having a struct pending_irq for each LPI
is also not feasible. We only care about mapped LPIs, so we can get away
with having struct pending_irq's only for them.
Maintain a radix tree per domain where we drop the pointer to the
respective pending_irq. The index used is the virtual LPI number.
The memory for the actual structures has been allocated already per
device at device mapping time.
Teach the existing VGIC functions to find the right pointer when being
given a virtual LPI number.
We also take care of checking for a NULL pointer in the VCPU exit path,
should an LPI have been removed from the tree for any reason.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic.c   | 12 
 xen/arch/arm/vgic-v3.c   | 21 +
 xen/arch/arm/vgic.c  |  5 +
 xen/include/asm-arm/domain.h |  2 ++
 xen/include/asm-arm/vgic.h   |  1 +
 5 files changed, 41 insertions(+)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 9522c6c..3ed6f81 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -461,7 +461,19 @@ static void gic_update_one_lr(struct vcpu *v, int i)
 
 gic_hw_ops->read_lr(i, _val);
 irq = lr_val.virq;
+
 p = irq_to_pending(v, irq);
+/* An LPI might have been unmapped, in which case we just clean up here. */
+if ( !p )
+{
+ASSERT(is_lpi(irq));
+
+gic_hw_ops->clear_lr(i);
+clear_bit(i, _cpu(lr_mask));
+
+return;
+}
+
 if ( lr_val.state & GICH_LR_ACTIVE )
 {
 set_bit(GIC_IRQ_GUEST_ACTIVE, >status);
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 3c7161b..95fa0ba 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -330,6 +330,23 @@ read_unknown:
 return 1;
 }
 
+/*
+ * Looks up a virtual LPI number in our tree of mapped LPIs. This will return
+ * the corresponding struct pending_irq, which we also use to store the
+ * enabled and pending bit plus the priority.
+ * Returns NULL if an LPI cannot be found.
+ */
+struct pending_irq *lpi_to_pending(struct domain *d, unsigned int lpi)
+{
+struct pending_irq *pirq;
+
+read_lock(>arch.vgic.pend_lpi_tree_lock);
+pirq = radix_tree_lookup(>arch.vgic.pend_lpi_tree, lpi);
+read_unlock(>arch.vgic.pend_lpi_tree_lock);
+
+return pirq;
+}
+
 static int __vgic_v3_rdistr_rd_mmio_write(struct vcpu *v, mmio_info_t *info,
   uint32_t gicr_reg,
   register_t r)
@@ -1452,6 +1469,9 @@ static int vgic_v3_domain_init(struct domain *d)
 spin_lock_init(>arch.vgic.its_devices_lock);
 d->arch.vgic.its_devices = RB_ROOT;
 
+rwlock_init(>arch.vgic.pend_lpi_tree_lock);
+radix_tree_init(>arch.vgic.pend_lpi_tree);
+
 /*
  * Domain 0 gets the hardware address.
  * Guests get the virtual platform layout.
@@ -1525,6 +1545,7 @@ static int vgic_v3_domain_init(struct domain *d)
 static void vgic_v3_domain_free(struct domain *d)
 {
 gicv3_its_unmap_all_devices(d);
+radix_tree_destroy(>arch.vgic.pend_lpi_tree, NULL);
 xfree(d->arch.vgic.rdist_regions);
 }
 
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 67d75a6..28f6f66 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -431,10 +431,15 @@ bool vgic_to_sgi(struct vcpu *v, register_t sgir, enum 
gic_sgi_mode irqmode,
 struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq)
 {
 struct pending_irq *n;
+
 /* Pending irqs allocation strategy: the first vgic.nr_spis irqs
  * are used for SPIs; the rests are used for per cpu irqs */
 if ( irq < 32 )
 n = >arch.vgic.pending_irqs[irq];
+#ifdef CONFIG_HAS_ITS
+else if ( is_lpi(irq) )
+n = lpi_to_pending(v->domain, irq);
+#endif
 else
 n = >domain->arch.vgic.pending_irqs[irq - 32];
 return n;
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 503a3cf..6ee7538 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -111,6 +111,8 @@ struct arch_domain
 uint32_t rdist_stride;  /* Re-Distributor stride */
 struct rb_root its_devices; /* Devices mapped to an ITS */
 spinlock_t its_devices_lock;/* Protects the its_devices tree */
+struct radix_tree_root pend_lpi_tree; /* Stores struct pending_irq's */
+rwlock_t pend_lpi_tree_lock;/* Protects the pend_lpi_tree */
 #endif
 } vgic;
 
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 894c3f1..69ef160 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -300,6 +300,7 @@ extern void vgic_vcpu_inject_spi(struct domain *d, unsigned 
int virq);
 extern void vgic_clear_pending_irqs(struct vcpu *v);
 extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
 extern struct 

[Xen-devel] [PATCH v4 03/27] ARM: GICv3: allocate LPI pending and property table

2017-04-03 Thread Andre Przywara
The ARM GICv3 provides a new kind of interrupt called LPIs.
The pending bits and the configuration data (priority, enable bits) for
those LPIs are stored in tables in normal memory, which software has to
provide to the hardware.
Allocate the required memory, initialize it and hand it over to each
redistributor. The maximum number of LPIs to be used can be adjusted with
the command line option "max_lpi_bits", which defaults to 20 bits,
covering about one million LPIs.

Signed-off-by: Andre Przywara 
---
 docs/misc/xen-command-line.markdown |   9 ++
 xen/arch/arm/Makefile   |   1 +
 xen/arch/arm/gic-v3-lpi.c   | 213 
 xen/arch/arm/gic-v3.c   |  17 +++
 xen/include/asm-arm/bitops.h|   1 +
 xen/include/asm-arm/config.h|   2 +
 xen/include/asm-arm/gic_v3_defs.h   |  54 -
 xen/include/asm-arm/gic_v3_its.h|  15 ++-
 xen/include/asm-arm/irq.h   |   8 ++
 xen/include/xen/bitops.h|   5 +-
 10 files changed, 322 insertions(+), 3 deletions(-)
 create mode 100644 xen/arch/arm/gic-v3-lpi.c

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 5a90625..ae8de4f 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1181,6 +1181,15 @@ specific size, typically 8 or 16 bytes. This value is 
given as the number
 of bits required to hold one device ID.
 Defaults to the machine provided value, which is at most 32 bits.
 
+### max\_lpi\_bits
+> `= `
+
+Specifies the number of ARM GICv3 LPI interrupts to allocate on the host,
+presented as the number of bits needed to encode it. This must be at least
+14 and not exceed 32, and each LPI requires one byte (configuration) and
+one pending bit to be allocated.
+Defaults to 20 bits (to cover at most 1048576 interrupts).
+
 ### mce
 > `= `
 
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 39c0a03..6be85ab 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -19,6 +19,7 @@ obj-y += gic.o
 obj-y += gic-v2.o
 obj-$(CONFIG_HAS_GICV3) += gic-v3.o
 obj-$(CONFIG_HAS_ITS) += gic-v3-its.o
+obj-$(CONFIG_HAS_ITS) += gic-v3-lpi.o
 obj-y += guestcopy.o
 obj-y += hvm.o
 obj-y += io.o
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
new file mode 100644
index 000..a003a72
--- /dev/null
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -0,0 +1,213 @@
+/*
+ * xen/arch/arm/gic-v3-lpi.c
+ *
+ * ARM GICv3 Locality-specific Peripheral Interrupts (LPI) support
+ *
+ * Copyright (C) 2016,2017 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see .
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define LPI_PROPTABLE_NEEDS_FLUSHING(1U << 0)
+/* Global state */
+static struct {
+/* The global LPI property table, shared by all redistributors. */
+uint8_t *lpi_property;
+/*
+ * Number of physical LPIs the host supports. This is a property of
+ * the GIC hardware. We depart from the habit of naming these things
+ * "physical" in Xen, as the GICv3/4 spec uses the term "physical LPI"
+ * in a different context to differentiate them from "virtual LPIs".
+ */
+unsigned long int nr_host_lpis;
+unsigned int flags;
+} lpi_data;
+
+struct lpi_redist_data {
+void*pending_table;
+};
+
+static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
+
+#define MAX_PHYS_LPIS   (lpi_data.nr_host_lpis - LPI_OFFSET)
+
+static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
+{
+uint64_t val;
+void *pendtable;
+
+if ( this_cpu(lpi_redist).pending_table )
+return -EBUSY;
+
+val  = GIC_BASER_CACHE_RaWaWb << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
+val |= GIC_BASER_CACHE_SameAsInner << 
GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT;
+val |= GIC_BASER_InnerShareable << GICR_PENDBASER_SHAREABILITY_SHIFT;
+
+/*
+ * The pending table holds one bit per LPI and even covers bits for
+ * interrupt IDs below 8192, so we allocate the full range.
+ * The GICv3 imposes a 64KB alignment requirement, also requires
+ * physically contiguous memory.
+ */
+pendtable = _xzalloc(lpi_data.nr_host_lpis / 8, SZ_64K);
+if ( !pendtable )
+return -ENOMEM;
+
+/* Make sure the physical address can be encoded in the register. */
+if ( (virt_to_maddr(pendtable) & 

[Xen-devel] [PATCH v4 04/27] ARM: GICv3 ITS: allocate device and collection table

2017-04-03 Thread Andre Przywara
Each ITS maps a pair of a DeviceID (for instance derived from a PCI
b/d/f triplet) and an EventID (the MSI payload or interrupt ID) to a
pair of LPI number and collection ID, which points to the target CPU.
This mapping is stored in the device and collection tables, which software
has to provide for the ITS to use.
Allocate the required memory and hand it to the ITS.
The maximum number of devices can be limited to a compile-time variable.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 132 +++
 xen/include/asm-arm/gic_v3_its.h |  32 ++
 2 files changed, 164 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 58c6ac0..00a1f7b 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -35,6 +35,105 @@ bool gicv3_its_host_has_its(void)
 return !list_empty(_its_list);
 }
 
+#define BASER_ATTR_MASK   \
+((0x3UL << GITS_BASER_SHAREABILITY_SHIFT)   | \
+ (0x7UL << GITS_BASER_OUTER_CACHEABILITY_SHIFT) | \
+ (0x7UL << GITS_BASER_INNER_CACHEABILITY_SHIFT))
+#define BASER_RO_MASK   (GENMASK_ULL(58, 56) | GENMASK_ULL(52, 48))
+
+/* Check that the physical address can be encoded in the PROPBASER register. */
+static bool check_baser_phys_addr(void *vaddr, unsigned int page_bits)
+{
+paddr_t paddr = virt_to_maddr(vaddr);
+
+return (!(paddr & ~GENMASK_ULL(page_bits < 16 ? 47 : 51, page_bits)));
+}
+
+static uint64_t encode_baser_phys_addr(paddr_t addr, unsigned int page_bits)
+{
+uint64_t ret = addr & GENMASK_ULL(47, page_bits);
+
+if ( page_bits < 16 )
+return ret;
+
+/* For 64K pages address bits 51-48 are encoded in bits 15-12. */
+return ret | ((addr & GENMASK_ULL(51, 48)) >> (48 - 12));
+}
+
+/* The ITS BASE registers work with page sizes of 4K, 16K or 64K. */
+#define BASER_PAGE_BITS(sz) ((sz) * 2 + 12)
+
+static int its_map_baser(void __iomem *basereg, uint64_t regc,
+ unsigned int nr_items)
+{
+uint64_t attr, reg;
+unsigned int entry_size = GITS_BASER_ENTRY_SIZE(regc);
+unsigned int pagesz = 2;/* try 64K pages first, then go down. */
+unsigned int table_size;
+void *buffer;
+
+attr  = GIC_BASER_InnerShareable << GITS_BASER_SHAREABILITY_SHIFT;
+attr |= GIC_BASER_CACHE_SameAsInner << GITS_BASER_OUTER_CACHEABILITY_SHIFT;
+attr |= GIC_BASER_CACHE_RaWaWb << GITS_BASER_INNER_CACHEABILITY_SHIFT;
+
+/*
+ * Setup the BASE register with the attributes that we like. Then read
+ * it back and see what sticks (page size, cacheability and shareability
+ * attributes), retrying if necessary.
+ */
+retry:
+table_size = ROUNDUP(nr_items * entry_size, BIT(BASER_PAGE_BITS(pagesz)));
+/* The BASE registers support at most 256 pages. */
+table_size = min(table_size, 256U << BASER_PAGE_BITS(pagesz));
+
+buffer = _xzalloc(table_size, BIT(BASER_PAGE_BITS(pagesz)));
+if ( !buffer )
+return -ENOMEM;
+
+if ( !check_baser_phys_addr(buffer, BASER_PAGE_BITS(pagesz)) )
+{
+xfree(buffer);
+return -ERANGE;
+}
+
+reg  = attr;
+reg |= (pagesz << GITS_BASER_PAGE_SIZE_SHIFT);
+reg |= (table_size >> BASER_PAGE_BITS(pagesz)) - 1;
+reg |= regc & BASER_RO_MASK;
+reg |= GITS_VALID_BIT;
+reg |= encode_baser_phys_addr(virt_to_maddr(buffer),
+  BASER_PAGE_BITS(pagesz));
+
+writeq_relaxed(reg, basereg);
+regc = readq_relaxed(basereg);
+
+/* The host didn't like our attributes, just use what it returned. */
+if ( (regc & BASER_ATTR_MASK) != attr )
+{
+/* If we can't map it shareable, drop cacheability as well. */
+if ( (regc & GITS_BASER_SHAREABILITY_MASK) == GIC_BASER_NonShareable )
+{
+regc &= ~GITS_BASER_INNER_CACHEABILITY_MASK;
+writeq_relaxed(regc, basereg);
+}
+attr = regc & BASER_ATTR_MASK;
+}
+if ( (regc & GITS_BASER_INNER_CACHEABILITY_MASK) <= GIC_BASER_CACHE_nC )
+clean_and_invalidate_dcache_va_range(buffer, table_size);
+
+/* If the host accepted our page size, we are done. */
+if ( ((regc >> GITS_BASER_PAGE_SIZE_SHIFT) & 0x3UL) == pagesz )
+return 0;
+
+xfree(buffer);
+
+if ( pagesz-- > 0 )
+goto retry;
+
+/* None of the page sizes was accepted, give up */
+return -EINVAL;
+}
+
 /* Allow a user to limit the number of devices. */
 static unsigned int max_its_device_bits = 32;
 integer_param("max_its_device_bits", max_its_device_bits);
@@ -42,6 +141,7 @@ integer_param("max_its_device_bits", max_its_device_bits);
 static int gicv3_its_init_single_its(struct host_its *hw_its)
 {
 uint64_t reg;
+int i, ret;
 
 hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
 if ( !hw_its->its_base )
@@ -53,6 +153,38 @@ static int 

[Xen-devel] [PATCH v4 11/27] ARM: GICv3: enable ITS and LPIs on the host

2017-04-03 Thread Andre Przywara
Now that the host part of the ITS code is in place, we can enable the
ITS and also LPIs on each redistributor to get the show rolling.
At this point there would be no LPIs mapped, as guests don't know about
the ITS yet.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c |  4 
 xen/arch/arm/gic-v3.c | 18 ++
 2 files changed, 22 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index c7c32b9..880c7fc 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -488,6 +488,10 @@ static int gicv3_its_init_single_its(struct host_its 
*hw_its)
 return -ENOMEM;
 writeq_relaxed(0, hw_its->its_base + GITS_CWRITER);
 
+/* Now enable interrupt translation and command processing on that ITS. */
+reg = readl_relaxed(hw_its->its_base + GITS_CTLR);
+writel_relaxed(reg | GITS_CTLR_ENABLE, hw_its->its_base + GITS_CTLR);
+
 return 0;
 }
 
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index a559e5e..eda3410 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -620,6 +620,21 @@ static int gicv3_enable_redist(void)
 return 0;
 }
 
+/* Enable LPIs on this redistributor (only useful when the host has an ITS). */
+static bool gicv3_enable_lpis(void)
+{
+uint32_t val;
+
+val = readl_relaxed(GICD_RDIST_BASE + GICR_TYPER);
+if ( !(val & GICR_TYPER_PLPIS) )
+return false;
+
+val = readl_relaxed(GICD_RDIST_BASE + GICR_CTLR);
+writel_relaxed(val | GICR_CTLR_ENABLE_LPIS, GICD_RDIST_BASE + GICR_CTLR);
+
+return true;
+}
+
 static int __init gicv3_populate_rdist(void)
 {
 int i;
@@ -731,11 +746,14 @@ static int gicv3_cpu_init(void)
 if ( gicv3_enable_redist() )
 return -ENODEV;
 
+/* If the host has any ITSes, enable LPIs now. */
 if ( gicv3_its_host_has_its() )
 {
 ret = gicv3_its_setup_collection(smp_processor_id());
 if ( ret )
 return ret;
+if ( !gicv3_enable_lpis() )
+return -EBUSY;
 }
 
 /* Set priority on PPI and SGI interrupts */
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 18/27] ARM: vITS: handle MAPC command

2017-04-03 Thread Andre Przywara
The MAPC command associates a given collection ID with a given
redistributor, thus mapping collections to VCPUs.
We just store the vcpu_id in the collection table for that.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index dd43eaf..639fbbf 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -87,6 +87,26 @@ static paddr_t get_baser_phys_addr(uint64_t reg)
 return reg & GENMASK_ULL(47, 12);
 }
 
+static int its_set_collection(struct virt_its *its, uint16_t collid,
+  uint16_t vcpu_id)
+{
+paddr_t addr = get_baser_phys_addr(its->baser_coll);
+uint16_t *coll_table;
+
+if ( collid >= its->max_collections )
+return -ENOENT;
+
+coll_table = map_one_guest_page(its->d, addr + collid * sizeof(uint16_t));
+if ( !coll_table )
+return -EFAULT;
+
+*coll_table = vcpu_id;
+
+unmap_one_guest_page(coll_table);
+
+return 0;
+}
+
 /* Must be called with the ITS lock held. */
 static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
  uint16_t collid)
@@ -324,6 +344,29 @@ static int its_handle_int(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+static int its_handle_mapc(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t collid = its_cmd_get_collection(cmdptr);
+uint64_t rdbase = its_cmd_mask_field(cmdptr, 2, 16, 44);
+
+if ( collid >= its->max_collections )
+return -1;
+
+if ( rdbase >= its->d->max_vcpus )
+return -1;
+
+spin_lock(>its_lock);
+
+if ( its_cmd_get_validbit(cmdptr) )
+its_set_collection(its, collid, rdbase);
+else
+its_set_collection(its, collid, UNMAPPED_COLLECTION);
+
+spin_unlock(>its_lock);
+
+return 0;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -367,6 +410,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_INT:
 ret = its_handle_int(its, cmdptr);
 break;
+case GITS_CMD_MAPC:
+ret = its_handle_mapc(its, cmdptr);
+break;
 case GITS_CMD_SYNC:
 /* We handle ITS commands synchronously, so we ignore SYNC. */
break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 17/27] ARM: vITS: handle INT command

2017-04-03 Thread Andre Przywara
The INT command sets a given LPI identified by a DeviceID/EventID pair
as pending and thus triggers it to be injected.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index cc1d7a0..dd43eaf 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -297,6 +297,33 @@ static int its_handle_clear(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+static int its_handle_int(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+uint32_t eventid = its_cmd_get_id(cmdptr);
+struct pending_irq *p;
+struct vcpu *vcpu;
+uint32_t vlpi;
+
+if ( !read_itte(its, devid, eventid, , ) )
+return -1;
+
+p = lpi_to_pending(its->d, vlpi);
+if ( !p )
+return -1;
+
+/*
+ * If the LPI is enabled, inject it.
+ * If not, store the pending state to inject it once it gets enabled later.
+ */
+if ( test_bit(GIC_IRQ_GUEST_ENABLED, >status) )
+vgic_vcpu_inject_irq(vcpu, vlpi);
+else
+set_bit(GIC_IRQ_GUEST_LPI_PENDING, >status);
+
+return 0;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -337,6 +364,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_CLEAR:
 ret = its_handle_clear(its, cmdptr);
 break;
+case GITS_CMD_INT:
+ret = its_handle_int(its, cmdptr);
+break;
 case GITS_CMD_SYNC:
 /* We handle ITS commands synchronously, so we ignore SYNC. */
break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 07/27] ARM: GICv3 ITS: introduce host LPI array

2017-04-03 Thread Andre Przywara
The number of LPIs on a host can be potentially huge (millions),
although in practise will be mostly reasonable. So prematurely allocating
an array of struct irq_desc's for each LPI is not an option.
However Xen itself does not care about LPIs, as every LPI will be injected
into a guest (Dom0 for now).
Create a dense data structure (8 Bytes) for each LPI which holds just
enough information to determine the virtual IRQ number and the VCPU into
which the LPI needs to be injected.
Also to not artificially limit the number of LPIs, we create a 2-level
table for holding those structures.
This patch introduces functions to initialize these tables and to
create, lookup and destroy entries for a given LPI.
By using the naturally atomic access guarantee the native uint64_t data
type gives us, we allocate and access LPI information in a way that does
not require a lock.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c|  58 +++
 xen/arch/arm/gic-v3-lpi.c| 210 +++
 xen/include/asm-arm/gic.h|   2 +
 xen/include/asm-arm/gic_v3_its.h |   6 ++
 xen/include/asm-arm/irq.h|   5 +
 5 files changed, 281 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 8cc0ad8..bd189e8 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -142,6 +142,20 @@ static int its_send_cmd_sync(struct host_its *its, 
unsigned int cpu)
 return its_send_command(its, cmd);
 }
 
+static int its_send_cmd_mapti(struct host_its *its,
+  uint32_t deviceid, uint32_t eventid,
+  uint32_t pintid, uint16_t icid)
+{
+uint64_t cmd[4];
+
+cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32);
+cmd[1] = eventid | ((uint64_t)pintid << 32);
+cmd[2] = icid;
+cmd[3] = 0x00;
+
+return its_send_command(its, cmd);
+}
+
 static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
  unsigned int cpu)
 {
@@ -156,6 +170,19 @@ static int its_send_cmd_mapc(struct host_its *its, 
uint32_t collection_id,
 return its_send_command(its, cmd);
 }
 
+static int its_send_cmd_inv(struct host_its *its,
+uint32_t deviceid, uint32_t eventid)
+{
+uint64_t cmd[4];
+
+cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32);
+cmd[1] = eventid;
+cmd[2] = 0x00;
+cmd[3] = 0x00;
+
+return its_send_command(its, cmd);
+}
+
 /* Set up the (1:1) collection mapping for the given host CPU. */
 int gicv3_its_setup_collection(unsigned int cpu)
 {
@@ -437,6 +464,37 @@ int gicv3_its_init(void)
 return 0;
 }
 
+/*
+ * On the host ITS @its, map @nr_events consecutive LPIs.
+ * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
+ * increasing both @eventid and @lpi to cover the number of requested LPIs.
+ */
+static int gicv3_its_map_host_events(struct host_its *its,
+ uint32_t devid, uint32_t eventid,
+ uint32_t lpi, uint32_t nr_events)
+{
+uint32_t i;
+int ret;
+
+for ( i = 0; i < nr_events; i++ )
+{
+/* For now we map every host LPI to host CPU 0 */
+ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0);
+if ( ret )
+return ret;
+
+ret = its_send_cmd_inv(its, devid, eventid + i);
+if ( ret )
+return ret;
+}
+
+ret = its_send_cmd_sync(its, 0);
+if ( ret )
+return ret;
+
+return gicv3_its_wait_commands(its);
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
index 8b3660a..d3ee141 100644
--- a/xen/arch/arm/gic-v3-lpi.c
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -20,25 +20,59 @@
 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
+/*
+ * There could be a lot of LPIs on the host side, and they always go to
+ * a guest. So having a struct irq_desc for each of them would be wasteful
+ * and useless.
+ * Instead just store enough information to find the right VCPU to inject
+ * those LPIs into, which just requires the virtual LPI number.
+ * To avoid a global lock on this data structure, this is using a lockless
+ * approach relying on the architectural atomicty of native data types:
+ * We read or write the "data" view of this union atomically, then can
+ * access the broken-down fields in our local copy.
+ */
+union host_lpi {
+uint64_t data;
+struct {
+uint32_t virt_lpi;
+uint16_t dom_id;
+uint16_t vcpu_id;
+};
+};
+
 #define LPI_PROPTABLE_NEEDS_FLUSHING(1U << 0)
 /* Global state */
 static struct {
 /* The global LPI property table, shared by all redistributors. */
 uint8_t *lpi_property;
 

[Xen-devel] [PATCH v4 08/27] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Andre Przywara
The ITS uses device IDs to map LPIs to a device. Dom0 will later use
those IDs, which we directly pass on to the host.
For this we have to map each device that Dom0 may request to a host
ITS device with the same identifier.
Allocate the respective memory and enter each device into an rbtree to
later be able to iterate over it or to easily teardown guests.
Because device IDs are per ITS, we need to identify a virtual ITS. We
use the doorbell address for that purpose, as it is a nice architectural
MSI property and spares us handling with opaque pointer or break
the VGIC abstraction.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 266 +++
 xen/arch/arm/vgic-v3.c   |   4 +
 xen/include/asm-arm/domain.h |   3 +
 xen/include/asm-arm/gic_v3_its.h |  17 +++
 4 files changed, 290 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index bd189e8..c7c32b9 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -21,6 +21,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -36,6 +38,26 @@
  */
 LIST_HEAD(host_its_list);
 
+/*
+ * Describes a device which is using the ITS and is used by a guest.
+ * Since device IDs are per ITS (in contrast to vLPIs, which are per
+ * guest), we have to differentiate between different virtual ITSes.
+ * We use the doorbell address here, since this is a nice architectural
+ * property of MSIs in general and we can easily get to the base address
+ * of the ITS and look that up.
+ */
+struct its_devices {
+struct rb_node rbnode;
+struct host_its *hw_its;
+void *itt_addr;
+paddr_t guest_doorbell; /* Identifies the virtual ITS */
+uint32_t host_devid;
+uint32_t guest_devid;
+uint32_t eventids;  /* Number of event IDs (MSIs) */
+uint32_t *host_lpi_blocks;  /* Which LPIs are used on the host */
+struct pending_irq *pend_irqs;  /* One struct per event */
+};
+
 bool gicv3_its_host_has_its(void)
 {
 return !list_empty(_its_list);
@@ -170,6 +192,26 @@ static int its_send_cmd_mapc(struct host_its *its, 
uint32_t collection_id,
 return its_send_command(its, cmd);
 }
 
+static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
+ uint8_t size_bits, paddr_t itt_addr, bool valid)
+{
+uint64_t cmd[4];
+
+if ( valid )
+{
+ASSERT(size_bits <= its->evid_bits);
+ASSERT(!(itt_addr & ~GENMASK_ULL(51, 8)));
+}
+cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
+cmd[1] = size_bits - 1;
+cmd[2] = itt_addr;
+if ( valid )
+cmd[2] |= GITS_VALID_BIT;
+cmd[3] = 0x00;
+
+return its_send_command(its, cmd);
+}
+
 static int its_send_cmd_inv(struct host_its *its,
 uint32_t deviceid, uint32_t eventid)
 {
@@ -464,6 +506,64 @@ int gicv3_its_init(void)
 return 0;
 }
 
+static int remove_mapped_guest_device(struct its_devices *dev)
+{
+int ret = 0;
+unsigned int i;
+
+if ( dev->hw_its )
+/* MAPD also discards all events with this device ID. */
+ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 1, 0, false);
+
+for ( i = 0; i < DIV_ROUND_UP(dev->eventids, LPI_BLOCK); i++ )
+gicv3_free_host_lpi_block(dev->host_lpi_blocks[i]);
+
+/* Make sure the MAPD command above is really executed. */
+if ( !ret )
+ret = gicv3_its_wait_commands(dev->hw_its);
+
+/* We can't free the ITT memory if the MAPD(V=0) failed for any reason. */
+if ( !ret )
+xfree(dev->itt_addr);
+
+xfree(dev->pend_irqs);
+xfree(dev->host_lpi_blocks);
+xfree(dev);
+
+return 0;
+}
+
+static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
+{
+struct host_its *hw_its;
+
+list_for_each_entry(hw_its, _its_list, entry)
+{
+if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
+return hw_its;
+}
+
+return NULL;
+}
+
+static int compare_its_guest_devices(struct its_devices *dev,
+ paddr_t doorbell, uint32_t devid)
+{
+if ( dev->guest_doorbell < doorbell )
+return -1;
+
+if ( dev->guest_doorbell > doorbell )
+return 1;
+
+if ( dev->guest_devid < devid )
+return -1;
+
+if ( dev->guest_devid > devid )
+return 1;
+
+return 0;
+}
+
 /*
  * On the host ITS @its, map @nr_events consecutive LPIs.
  * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
@@ -495,6 +595,172 @@ static int gicv3_its_map_host_events(struct host_its *its,
 return gicv3_its_wait_commands(its);
 }
 
+/*
+ * Map a hardware device, identified by a certain host ITS and its device ID
+ * to domain d, a guest ITS (identified by its doorbell address) and device ID.
+ * Also provide the number of events (MSIs) needed for that device.
+ * This 

[Xen-devel] [PATCH v4 02/27] ARM: GICv3 ITS: initialize host ITS

2017-04-03 Thread Andre Przywara
Map the registers frame for each host ITS and populate the host ITS
structure with some parameters describing the size of certain properties
like the number of bits for device IDs.
Introduce a command line parameter to limit the number of devices Xen
should handle. This defaults to the value advertised by hardware.

Signed-off-by: Andre Przywara 
---
 docs/misc/xen-command-line.markdown |  9 
 xen/arch/arm/gic-v3-its.c   | 38 
 xen/arch/arm/gic-v3.c   |  5 +
 xen/include/asm-arm/gic_v3_its.h| 44 +
 4 files changed, 96 insertions(+)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 9eb85d6..5a90625 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1172,6 +1172,15 @@ based interrupts. Any higher IRQs will be available for 
use via PCI MSI.
 ### maxcpus
 > `= `
 
+### max\_its\_device\_bits
+> `= `
+
+Specifies the maximum number of devices using MSIs on the ARM GICv3 ITS
+controller to allocate table entries for. Each table entry uses a hardware
+specific size, typically 8 or 16 bytes. This value is given as the number
+of bits required to hold one device ID.
+Defaults to the machine provided value, which is at most 32 bits.
+
 ### mce
 > `= `
 
diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index 6b02349..58c6ac0 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -19,8 +19,10 @@
  */
 
 #include 
+#include 
 #include 
 #include 
+#include 
 
 /*
  * No lock here, as this list gets only populated upon boot while scanning
@@ -33,6 +35,42 @@ bool gicv3_its_host_has_its(void)
 return !list_empty(_its_list);
 }
 
+/* Allow a user to limit the number of devices. */
+static unsigned int max_its_device_bits = 32;
+integer_param("max_its_device_bits", max_its_device_bits);
+
+static int gicv3_its_init_single_its(struct host_its *hw_its)
+{
+uint64_t reg;
+
+hw_its->its_base = ioremap_nocache(hw_its->addr, hw_its->size);
+if ( !hw_its->its_base )
+return -ENOMEM;
+
+reg = readq_relaxed(hw_its->its_base + GITS_TYPER);
+hw_its->devid_bits = GITS_TYPER_DEVICE_ID_BITS(reg);
+hw_its->devid_bits = min(hw_its->devid_bits, max_its_device_bits);
+hw_its->evid_bits = GITS_TYPER_EVENT_ID_BITS(reg);
+hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
+
+return 0;
+}
+
+int gicv3_its_init(void)
+{
+struct host_its *hw_its;
+int ret;
+
+list_for_each_entry(hw_its, _its_list, entry)
+{
+ret = gicv3_its_init_single_its(hw_its);
+if ( ret )
+return ret;
+}
+
+return 0;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index b626298..d3d5784 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -1590,6 +1590,11 @@ static int __init gicv3_init(void)
 spin_lock();
 
 gicv3_dist_init();
+
+res = gicv3_its_init();
+if ( res )
+panic("GICv3: ITS: initialization failed: %d\n", res);
+
 res = gicv3_cpu_init();
 gicv3_hyp_init();
 
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index 765a655..7d88987 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -20,6 +20,38 @@
 #ifndef __ASM_ARM_ITS_H__
 #define __ASM_ARM_ITS_H__
 
+#define GITS_CTLR   0x000
+#define GITS_IIDR   0x004
+#define GITS_TYPER  0x008
+#define GITS_CBASER 0x080
+#define GITS_CWRITER0x088
+#define GITS_CREADR 0x090
+#define GITS_BASER_NR_REGS  8
+#define GITS_BASER0 0x100
+#define GITS_BASER1 0x108
+#define GITS_BASER2 0x110
+#define GITS_BASER3 0x118
+#define GITS_BASER4 0x120
+#define GITS_BASER5 0x128
+#define GITS_BASER6 0x130
+#define GITS_BASER7 0x138
+
+/* Register bits */
+#define GITS_TYPER_DEVIDS_SHIFT 13
+#define GITS_TYPER_DEVIDS_MASK  (0x1fUL << GITS_TYPER_DEVIDS_SHIFT)
+#define GITS_TYPER_DEVICE_ID_BITS(r)(((r & GITS_TYPER_DEVIDS_MASK) >> \
+   GITS_TYPER_DEVIDS_SHIFT) + 1)
+
+#define GITS_TYPER_IDBITS_SHIFT 8
+#define GITS_TYPER_IDBITS_MASK  (0x1fUL << GITS_TYPER_IDBITS_SHIFT)
+#define GITS_TYPER_EVENT_ID_BITS(r) (((r & GITS_TYPER_IDBITS_MASK) >> \
+   GITS_TYPER_IDBITS_SHIFT) + 1)
+
+#define GITS_TYPER_ITT_SIZE_SHIFT   4
+#define GITS_TYPER_ITT_SIZE_MASK(0xfUL << GITS_TYPER_ITT_SIZE_SHIFT)
+#define GITS_TYPER_ITT_SIZE(r)   

[Xen-devel] [PATCH v4 27/27] ARM: vGIC: advertise LPI support

2017-04-03 Thread Andre Przywara
To let a guest know about the availability of virtual LPIs, set the
respective bits in the virtual GIC registers and let a guest control
the LPI enable bit.
Only report the LPI capability if the host has initialized at least
one ITS.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3.c | 74 ++
 1 file changed, 69 insertions(+), 5 deletions(-)

diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 3fc309e..a6a0126 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -168,8 +168,10 @@ static int __vgic_v3_rdistr_rd_mmio_read(struct vcpu *v, 
mmio_info_t *info,
 switch ( gicr_reg )
 {
 case VREG32(GICR_CTLR):
-/* We have not implemented LPI's, read zero */
-goto read_as_zero_32;
+if ( dabt.size != DABT_WORD ) goto bad_width;
+*r = vgic_reg32_extract(!!(v->arch.vgic.flags & VGIC_V3_LPIS_ENABLED),
+info);
+return 1;
 
 case VREG32(GICR_IIDR):
 if ( dabt.size != DABT_WORD ) goto bad_width;
@@ -181,16 +183,19 @@ static int __vgic_v3_rdistr_rd_mmio_read(struct vcpu *v, 
mmio_info_t *info,
 uint64_t typer, aff;
 
 if ( !vgic_reg64_check_access(dabt) ) goto bad_width;
-/* TBD: Update processor id in [23:8] when ITS support is added */
 aff = (MPIDR_AFFINITY_LEVEL(v->arch.vmpidr, 3) << 56 |
MPIDR_AFFINITY_LEVEL(v->arch.vmpidr, 2) << 48 |
MPIDR_AFFINITY_LEVEL(v->arch.vmpidr, 1) << 40 |
MPIDR_AFFINITY_LEVEL(v->arch.vmpidr, 0) << 32);
 typer = aff;
+typer |= (v->vcpu_id & 0x) << 8;
 
 if ( v->arch.vgic.flags & VGIC_V3_RDIST_LAST )
 typer |= GICR_TYPER_LAST;
 
+if ( v->domain->arch.vgic.has_its )
+typer |= GICR_TYPER_PLPIS;
+
 *r = vgic_reg64_extract(typer, info);
 
 return 1;
@@ -433,6 +438,35 @@ static uint64_t sanitize_pendbaser(uint64_t reg)
 return reg;
 }
 
+static void vgic_vcpu_enable_lpis(struct vcpu *v)
+{
+uint64_t reg = v->domain->arch.vgic.rdist_propbase;
+unsigned int nr_lpis = BIT((reg & 0x1f) + 1) - LPI_OFFSET;
+int nr_pages;
+
+/* The first VCPU to enable LPIs maps the property table. */
+if ( !v->domain->arch.vgic.nr_lpis )
+{
+v->domain->arch.vgic.nr_lpis = nr_lpis;
+
+nr_pages = DIV_ROUND_UP(nr_lpis, PAGE_SIZE);
+get_guest_pages(v->domain, reg & GENMASK_ULL(51, 12), nr_pages);
+gprintk(XENLOG_INFO, "VGIC-v3: VCPU%d mapped %d pages for property 
table\n",
+   v->vcpu_id, nr_pages);
+}
+nr_pages = DIV_ROUND_UP(((nr_lpis + LPI_OFFSET) / 8), PAGE_SIZE);
+reg = v->arch.vgic.rdist_pendbase;
+
+get_guest_pages(v->domain, reg & GENMASK_ULL(51, 12), nr_pages);
+
+gprintk(XENLOG_INFO, "VGIC-v3: VCPU%d mapped %d pages for pending table\n",
+v->vcpu_id, nr_pages);
+
+v->arch.vgic.flags |= VGIC_V3_LPIS_ENABLED;
+
+printk("VGICv3: enabled %d LPIs for VCPU%d\n", nr_lpis, v->vcpu_id);
+}
+
 static int __vgic_v3_rdistr_rd_mmio_write(struct vcpu *v, mmio_info_t *info,
   uint32_t gicr_reg,
   register_t r)
@@ -443,8 +477,18 @@ static int __vgic_v3_rdistr_rd_mmio_write(struct vcpu *v, 
mmio_info_t *info,
 switch ( gicr_reg )
 {
 case VREG32(GICR_CTLR):
-/* LPI's not implemented */
-goto write_ignore_32;
+if ( dabt.size != DABT_WORD ) goto bad_width;
+if ( !v->domain->arch.vgic.has_its )
+return 1;
+
+/* LPIs can only be enabled once, but never disabled again. */
+if ( !(r & GICR_CTLR_ENABLE_LPIS) ||
+ (v->arch.vgic.flags & VGIC_V3_LPIS_ENABLED) )
+return 1;
+
+vgic_vcpu_enable_lpis(v);
+
+return 1;
 
 case VREG32(GICR_IIDR):
 /* RO */
@@ -1044,6 +1088,11 @@ static int vgic_v3_distr_mmio_read(struct vcpu *v, 
mmio_info_t *info,
 typer = ((ncpus - 1) << GICD_TYPE_CPUS_SHIFT |
  DIV_ROUND_UP(v->domain->arch.vgic.nr_spis, 32));
 
+if ( v->domain->arch.vgic.has_its )
+{
+typer |= GICD_TYPE_LPIS;
+irq_bits = 16;
+}
 typer |= (irq_bits - 1) << GICD_TYPE_ID_BITS_SHIFT;
 
 *r = vgic_reg32_extract(typer, info);
@@ -1665,6 +1714,21 @@ static int vgic_v3_domain_init(struct domain *d)
 
 static void vgic_v3_domain_free(struct domain *d)
 {
+int nr_pages;
+struct vcpu *v;
+
+if ( d->arch.vgic.nr_lpis )
+{
+nr_pages = DIV_ROUND_UP(d->arch.vgic.nr_lpis, PAGE_SIZE);
+put_guest_pages(d, d->arch.vgic.rdist_propbase & GENMASK_ULL(51, 12),
+nr_pages);
+
+nr_pages = DIV_ROUND_UP((d->arch.vgic.nr_lpis + LPI_OFFSET) / 8,
+PAGE_SIZE);
+for_each_vcpu(d, v)
+put_guest_pages(d, 

[Xen-devel] [PATCH v4 19/27] ARM: vITS: handle MAPD command

2017-04-03 Thread Andre Przywara
The MAPD command maps a device by associating a memory region for
storing ITEs with a certain device ID.
We store the given guest physical address in the device table, and, if
this command comes from Dom0, tell the host ITS driver about this new
mapping, so it can issue the corresponing host MAPD command and create
the required tables.
We don't map the device tables permanently, as their alignment
requirement is only 256 Bytes, thus making mapping of several tables
complicated. Instead we map the device tables on demand when we need
them later.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 63 ++
 1 file changed, 63 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index 639fbbf..0e636de 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -42,6 +42,7 @@
 #define VIRT_ITS_CMDBUF_VALID   3
 struct virt_its {
 struct domain *d;
+paddr_t doorbell_address;
 spinlock_t vcmd_lock;   /* Protects the virtual command buffer. */
 uint64_t cbaser;
 uint64_t cwriter;
@@ -142,6 +143,27 @@ static struct vcpu *get_vcpu_from_collection(struct 
virt_its *its,
 #define DEV_TABLE_ENTRY(addr, bits) \
 (((addr) & GENMASK_ULL(51, 8)) | (((bits) - 1) & GENMASK_ULL(7, 0)))
 
+/* Set the address of an ITT for a given device ID. */
+static int its_set_itt_address(struct virt_its *its, uint32_t devid,
+   paddr_t itt_address, uint32_t nr_bits)
+{
+paddr_t addr = get_baser_phys_addr(its->baser_dev);
+uint64_t *itt;
+
+if ( devid >= its->max_devices )
+return -ENOENT;
+
+itt = map_one_guest_page(its->d, addr + devid * sizeof(uint64_t));
+if ( !itt )
+return -EFAULT;
+
+*itt = DEV_TABLE_ENTRY(itt_address, nr_bits);
+
+unmap_one_guest_page(itt);
+
+return 0;
+}
+
 /*
  * Lookup the address of the Interrupt Translation Table associated with
  * a device ID and return the address of the ITTE belonging to the event ID
@@ -367,6 +389,44 @@ static int its_handle_mapc(struct virt_its *its, uint64_t 
*cmdptr)
 return 0;
 }
 
+static int its_handle_mapd(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+unsigned int size = its_cmd_get_size(cmdptr) + 1;
+bool valid = its_cmd_get_validbit(cmdptr);
+paddr_t itt_addr = its_cmd_mask_field(cmdptr, 2, 0, 52) &
+   GENMASK_ULL(51, 8);
+int ret;
+
+/*
+ * There is no easy and clean way for Xen to know the ITS device ID of a
+ * particular (PCI) device, so we have to rely on the guest telling
+ * us about it. For *now* we are just using the device ID *Dom0* uses,
+ * because the driver there has the actual knowledge.
+ * Eventually this will be replaced with a dedicated hypercall to
+ * announce pass-through of devices.
+ */
+if ( is_hardware_domain(its->d) )
+{
+/* Dom0's ITSes are mapped 1:1, so both address are the same. */
+ret = gicv3_its_map_guest_device(its->d, its->doorbell_address, devid,
+ its->doorbell_address, devid,
+ BIT(size), valid);
+if ( ret )
+return ret;
+}
+
+spin_lock(>its_lock);
+if ( valid )
+ret = its_set_itt_address(its, devid, itt_addr, size);
+else
+ret = its_set_itt_address(its, devid, INVALID_PADDR, 1);
+
+spin_unlock(>its_lock);
+
+return ret;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -413,6 +473,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 case GITS_CMD_MAPC:
 ret = its_handle_mapc(its, cmdptr);
 break;
+case GITS_CMD_MAPD:
+ret = its_handle_mapd(its, cmdptr);
+   break;
 case GITS_CMD_SYNC:
 /* We handle ITS commands synchronously, so we ignore SYNC. */
break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 26/27] ARM: vITS: create ITS subnodes for Dom0 DT

2017-04-03 Thread Andre Przywara
Dom0 expects all ITSes in the system to be propagated to be able to
use MSIs.
Create Dom0 DT nodes for each hardware ITS, keeping the register frame
address the same, as the doorbell address that the Dom0 drivers program
into the BARs has to match the hardware.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/gic-v3-its.c| 78 
 xen/arch/arm/gic-v3.c|  4 ++-
 xen/include/asm-arm/gic_v3_its.h | 13 +++
 3 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index f611e2f..13b21bf 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -20,6 +20,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -859,6 +860,83 @@ int gicv3_lpi_change_vcpu(struct domain *d, paddr_t 
doorbell,
 return 0;
 }
 
+/*
+ * Create the respective guest DT nodes for a list of host ITSes.
+ * This copies the reg property, so the guest sees the ITS at the same address
+ * as the host.
+ * Giving NULL for the its_list will make it use the list of host ITSes.
+ */
+int gicv3_its_make_dt_nodes(struct list_head *its_list,
+const struct domain *d,
+const struct dt_device_node *gic,
+void *fdt)
+{
+uint32_t len;
+int res;
+const void *prop = NULL;
+const struct dt_device_node *its = NULL;
+const struct host_its *its_data;
+
+if ( !its_list )
+its_list = _its_list;
+
+if ( list_empty(its_list) )
+return 0;
+
+/* The sub-nodes require the ranges property */
+prop = dt_get_property(gic, "ranges", );
+if ( !prop )
+{
+printk(XENLOG_ERR "Can't find ranges property for the gic node\n");
+return -FDT_ERR_XEN(ENOENT);
+}
+
+res = fdt_property(fdt, "ranges", prop, len);
+if ( res )
+return res;
+
+list_for_each_entry(its_data, its_list, entry)
+{
+its = its_data->dt_node;
+
+res = fdt_begin_node(fdt, its->name);
+if ( res )
+return res;
+
+res = fdt_property_string(fdt, "compatible", "arm,gic-v3-its");
+if ( res )
+return res;
+
+res = fdt_property(fdt, "msi-controller", NULL, 0);
+if ( res )
+return res;
+
+if ( its->phandle )
+{
+res = fdt_property_cell(fdt, "phandle", its->phandle);
+if ( res )
+return res;
+}
+
+/* Use the same reg regions as the ITS node in host DTB. */
+prop = dt_get_property(its, "reg", );
+if ( !prop )
+{
+printk(XENLOG_ERR "GICv3: Can't find ITS reg property.\n");
+res = -FDT_ERR_XEN(ENOENT);
+return res;
+}
+
+res = fdt_property(fdt, "reg", prop, len);
+if ( res )
+return res;
+
+fdt_end_node(fdt);
+}
+
+return res;
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index eda3410..ddfdd97 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -1172,8 +1172,10 @@ static int gicv3_make_hwdom_dt_node(const struct domain 
*d,
 
 res = fdt_property(fdt, "reg", new_cells, len);
 xfree(new_cells);
+if ( res )
+return res;
 
-return res;
+return gicv3_its_make_dt_nodes(NULL, d, gic, fdt);
 }
 
 static const hw_irq_controller gicv3_host_irq_type = {
diff --git a/xen/include/asm-arm/gic_v3_its.h b/xen/include/asm-arm/gic_v3_its.h
index fb05311..abf9e8c 100644
--- a/xen/include/asm-arm/gic_v3_its.h
+++ b/xen/include/asm-arm/gic_v3_its.h
@@ -161,6 +161,12 @@ int gicv3_its_setup_collection(unsigned int cpu);
 int vgic_v3_its_init_virtual(struct domain *d, paddr_t guest_addr,
 unsigned int devid_bits, unsigned int intid_bits);
 
+/* Given a list of ITSes, create the appropriate DT nodes for a domain. */
+int gicv3_its_make_dt_nodes(struct list_head *its_list,
+const struct domain *d,
+const struct dt_device_node *gic,
+void *fdt);
+
 /*
  * Map a device on the host by allocating an ITT on the host (ITS).
  * "nr_event" specifies how many events (interrupts) this device will need.
@@ -234,6 +240,13 @@ static inline int vgic_v3_its_init_virtual(struct domain 
*d,
 {
 return 0;
 }
+static inline int gicv3_its_make_dt_nodes(struct list_head *its_list,
+   const struct domain *d,
+   const struct dt_device_node *gic,
+   void *fdt)
+{
+return 0;
+}
 
 #endif /* CONFIG_HAS_ITS */
 
-- 
2.9.0


___
Xen-devel mailing list

[Xen-devel] [PATCH v4 16/27] ARM: vITS: handle CLEAR command

2017-04-03 Thread Andre Przywara
This introduces the ITS command handler for the CLEAR command, which
clears the pending state of an LPI.
This removes a not-yet injected, but already queued IRQ from a VCPU.
As read_itte() is now eventually used, we add the static keyword.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index fcfea3b..cc1d7a0 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -186,8 +186,8 @@ static void put_itte(struct virt_its *its, struct vits_itte 
*itte)
  * This function takes care of the locking by taking the its_lock itself, so
  * a caller shall not hold this. Upon returning, the lock is dropped again.
  */
-bool read_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
-   struct vcpu **vcpu, uint32_t *vlpi)
+static bool read_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
+  struct vcpu **vcpu, uint32_t *vlpi)
 {
 struct vits_itte *itte;
 uint16_t collid;
@@ -273,6 +273,30 @@ static uint64_t its_cmd_mask_field(uint64_t *its_cmd, 
unsigned int word,
 #define its_cmd_get_target_addr(cmd)its_cmd_mask_field(cmd, 2, 16, 32)
 #define its_cmd_get_validbit(cmd)   its_cmd_mask_field(cmd, 2, 63,  1)
 
+static int its_handle_clear(struct virt_its *its, uint64_t *cmdptr)
+{
+uint32_t devid = its_cmd_get_deviceid(cmdptr);
+uint32_t eventid = its_cmd_get_id(cmdptr);
+struct pending_irq *p;
+struct vcpu *vcpu;
+uint32_t vlpi;
+
+if ( !read_itte(its, devid, eventid, , ) )
+return -1;
+
+p = lpi_to_pending(its->d, vlpi);
+if ( !p )
+return -1;
+
+clear_bit(GIC_IRQ_GUEST_LPI_PENDING, >status);
+
+/* Remove a pending, but not yet injected guest IRQ. */
+clear_bit(GIC_IRQ_GUEST_QUEUED, >status);
+gic_remove_from_queues(vcpu, vlpi);
+
+return 0;
+}
+
 #define ITS_CMD_BUFFER_SIZE(baser)  baser) & 0xff) + 1) << 12)
 
 static int vgic_its_handle_cmds(struct domain *d, struct virt_its *its,
@@ -310,6 +334,9 @@ static int vgic_its_handle_cmds(struct domain *d, struct 
virt_its *its,
 
 switch ( its_cmd_get_command(cmdptr) )
 {
+case GITS_CMD_CLEAR:
+ret = its_handle_clear(its, cmdptr);
+break;
 case GITS_CMD_SYNC:
 /* We handle ITS commands synchronously, so we ignore SYNC. */
break;
-- 
2.9.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v4 15/27] ARM: vITS: introduce translation table walks

2017-04-03 Thread Andre Przywara
The ITS stores the target (v)CPU and the (virtual) LPI number in tables.
Introduce functions to walk those tables and translate an device ID -
event ID pair into a pair of virtual LPI and vCPU.
We map those tables on demand - which is cheap on arm64. Also we take
care of the locking on the way, since we can't easily protect those ITTs
from being altered by the guest.

To allow compiling without warnings, we declare two functions as
non-static for the moment, which two later patches will fix.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3-its.c | 183 +
 1 file changed, 183 insertions(+)

diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
index fd3b9a1..fcfea3b 100644
--- a/xen/arch/arm/vgic-v3-its.c
+++ b/xen/arch/arm/vgic-v3-its.c
@@ -71,6 +71,189 @@ static bool its_is_enabled(struct virt_its *its)
 return test_bit(VIRT_ITS_ENABLED, >flags);
 }
 
+#define UNMAPPED_COLLECTION  ((uint16_t)~0)
+
+/*
+ * The physical address is encoded slightly differently depending on
+ * the used page size: the highest four bits are stored in the lowest
+ * four bits of the field for 64K pages.
+ */
+static paddr_t get_baser_phys_addr(uint64_t reg)
+{
+if ( reg & BIT(9) )
+return (reg & GENMASK_ULL(47, 16)) |
+((reg & GENMASK_ULL(15, 12)) << 36);
+else
+return reg & GENMASK_ULL(47, 12);
+}
+
+/* Must be called with the ITS lock held. */
+static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
+ uint16_t collid)
+{
+paddr_t addr = get_baser_phys_addr(its->baser_coll);
+uint16_t *coll_table;
+uint16_t vcpu_id;
+
+if ( collid >= its->max_collections )
+return NULL;
+
+coll_table = map_one_guest_page(its->d, addr + collid * sizeof(uint16_t));
+if ( !coll_table )
+return NULL;
+
+vcpu_id = *coll_table;
+
+unmap_one_guest_page(coll_table);
+
+if ( vcpu_id == UNMAPPED_COLLECTION || vcpu_id >= its->d->max_vcpus )
+return NULL;
+
+return its->d->vcpu[vcpu_id];
+}
+
+/*
+ * Our device table encodings:
+ * Contains the guest physical address of the Interrupt Translation Table in
+ * bits [51:8], and the size of it encoded in the lowest 8 bits.
+ */
+#define DEV_TABLE_ITT_ADDR(x) ((x) & GENMASK_ULL(51, 8))
+#define DEV_TABLE_ITT_SIZE(x) (BIT(((x) & GENMASK_ULL(7, 0)) + 1))
+#define DEV_TABLE_ENTRY(addr, bits) \
+(((addr) & GENMASK_ULL(51, 8)) | (((bits) - 1) & GENMASK_ULL(7, 0)))
+
+/*
+ * Lookup the address of the Interrupt Translation Table associated with
+ * a device ID and return the address of the ITTE belonging to the event ID
+ * (which is an index into that table).
+ */
+static paddr_t its_get_itte_address(struct virt_its *its,
+uint32_t devid, uint32_t evid)
+{
+paddr_t ret, addr = get_baser_phys_addr(its->baser_dev);
+uint64_t *itt_ptr;
+uint64_t itt;
+
+if ( devid >= its->max_devices )
+return INVALID_PADDR;
+
+itt_ptr = map_one_guest_page(its->d, addr + devid * sizeof(uint64_t));
+if ( !itt_ptr )
+return INVALID_PADDR;
+
+itt = read_u64_atomic(itt_ptr);
+
+if ( evid < DEV_TABLE_ITT_SIZE(itt) &&
+ DEV_TABLE_ITT_ADDR(itt) != INVALID_PADDR )
+ret = DEV_TABLE_ITT_ADDR(itt) + evid * sizeof(struct vits_itte);
+else
+ret = INVALID_PADDR;
+
+unmap_one_guest_page(itt_ptr);
+
+return ret;
+}
+
+/*
+ * Looks up a given deviceID/eventID pair on an ITS and returns a pointer to
+ * the corresponding ITTE. This maps the respective guest page into Xen.
+ * Once finished with handling the ITTE, call put_itte() to unmap
+ * the page again.
+ * Must be called with the ITS lock held.
+ */
+static struct vits_itte *get_itte(struct virt_its *its,
+  uint32_t devid, uint32_t evid)
+{
+paddr_t addr = its_get_itte_address(its, devid, evid);
+
+if ( addr == INVALID_PADDR )
+return NULL;
+
+return map_one_guest_page(its->d, addr);
+}
+
+/* Must be called with the ITS lock held. */
+static void put_itte(struct virt_its *its, struct vits_itte *itte)
+{
+unmap_one_guest_page(itte);
+}
+
+/*
+ * Queries the collection and device tables to get the vCPU and virtual
+ * LPI number for a given guest event. This takes care of mapping the
+ * respective tables and validating the values, since we can't efficiently
+ * protect the ITTs with their less-than-page-size granularity.
+ * This function takes care of the locking by taking the its_lock itself, so
+ * a caller shall not hold this. Upon returning, the lock is dropped again.
+ */
+bool read_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
+   struct vcpu **vcpu, uint32_t *vlpi)
+{
+struct vits_itte *itte;
+uint16_t collid;
+uint32_t _vlpi;
+struct vcpu *_vcpu;
+
+spin_lock(>its_lock);
+itte = 

[Xen-devel] [PATCH v4 12/27] ARM: vGICv3: handle virtual LPI pending and property tables

2017-04-03 Thread Andre Przywara
Allow a guest to provide the address and size for the memory regions
it has reserved for the GICv3 pending and property tables.
We sanitise the various fields of the respective redistributor
registers and map those pages into Xen's address space to have easy
access.

Signed-off-by: Andre Przywara 
---
 xen/arch/arm/vgic-v3.c   | 136 +--
 xen/common/memory.c  |  61 +++
 xen/include/asm-arm/domain.h |   6 +-
 xen/include/asm-arm/vgic.h   |   2 +
 xen/include/xen/mm.h |   8 +++
 5 files changed, 195 insertions(+), 18 deletions(-)

diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 797fd86..2c6b317 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -19,12 +19,14 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -228,12 +230,15 @@ static int __vgic_v3_rdistr_rd_mmio_read(struct vcpu *v, 
mmio_info_t *info,
 goto read_reserved;
 
 case VREG64(GICR_PROPBASER):
-/* LPI's not implemented */
-goto read_as_zero_64;
+if ( !vgic_reg64_check_access(dabt) ) goto bad_width;
+*r = vgic_reg64_extract(v->domain->arch.vgic.rdist_propbase, info);
+return 1;
 
 case VREG64(GICR_PENDBASER):
-/* LPI's not implemented */
-goto read_as_zero_64;
+if ( !vgic_reg64_check_access(dabt) ) goto bad_width;
+*r = vgic_reg64_extract(v->arch.vgic.rdist_pendbase, info);
+*r &= ~GICR_PENDBASER_PTZ;   /* WO, reads as 0 */
+return 1;
 
 case 0x0080:
 goto read_reserved;
@@ -301,11 +306,6 @@ bad_width:
 domain_crash_synchronous();
 return 0;
 
-read_as_zero_64:
-if ( !vgic_reg64_check_access(dabt) ) goto bad_width;
-*r = 0;
-return 1;
-
 read_as_zero_32:
 if ( dabt.size != DABT_WORD ) goto bad_width;
 *r = 0;
@@ -358,11 +358,95 @@ int vgic_lpi_get_priority(struct domain *d, uint32_t vlpi)
 return p->lpi_priority;
 }
 
+static uint64_t vgic_sanitise_field(uint64_t reg, uint64_t field_mask,
+int field_shift,
+uint64_t (*sanitise_fn)(uint64_t))
+{
+uint64_t field = (reg & field_mask) >> field_shift;
+
+field = sanitise_fn(field) << field_shift;
+
+return (reg & ~field_mask) | field;
+}
+
+/* We want to avoid outer shareable. */
+static uint64_t vgic_sanitise_shareability(uint64_t field)
+{
+switch ( field )
+{
+case GIC_BASER_OuterShareable:
+return GIC_BASER_InnerShareable;
+default:
+return field;
+}
+}
+
+/* Avoid any inner non-cacheable mapping. */
+static uint64_t vgic_sanitise_inner_cacheability(uint64_t field)
+{
+switch ( field )
+{
+case GIC_BASER_CACHE_nCnB:
+case GIC_BASER_CACHE_nC:
+return GIC_BASER_CACHE_RaWb;
+default:
+return field;
+}
+}
+
+/* Non-cacheable or same-as-inner are OK. */
+static uint64_t vgic_sanitise_outer_cacheability(uint64_t field)
+{
+switch ( field )
+{
+case GIC_BASER_CACHE_SameAsInner:
+case GIC_BASER_CACHE_nC:
+return field;
+default:
+return GIC_BASER_CACHE_nC;
+}
+}
+
+static uint64_t sanitize_propbaser(uint64_t reg)
+{
+reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK,
+  GICR_PROPBASER_SHAREABILITY_SHIFT,
+  vgic_sanitise_shareability);
+reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK,
+  GICR_PROPBASER_INNER_CACHEABILITY_SHIFT,
+  vgic_sanitise_inner_cacheability);
+reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK,
+  GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT,
+  vgic_sanitise_outer_cacheability);
+
+reg &= ~GICR_PROPBASER_RES0_MASK;
+
+return reg;
+}
+
+static uint64_t sanitize_pendbaser(uint64_t reg)
+{
+reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK,
+  GICR_PENDBASER_SHAREABILITY_SHIFT,
+  vgic_sanitise_shareability);
+reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK,
+  GICR_PENDBASER_INNER_CACHEABILITY_SHIFT,
+  vgic_sanitise_inner_cacheability);
+reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK,
+  GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT,
+  vgic_sanitise_outer_cacheability);
+
+reg &= ~GICR_PENDBASER_RES0_MASK;
+
+return reg;
+}
+
 static int __vgic_v3_rdistr_rd_mmio_write(struct vcpu *v, mmio_info_t *info,
   uint32_t gicr_reg,
   register_t r)
 {
 struct hsr_dabt 

Re: [Xen-devel] [PATCH v2 08/27] ARM: GICv3 ITS: introduce host LPI array

2017-04-03 Thread Julien Grall

On 04/03/2017 08:30 PM, Andre Przywara wrote:

Hi,


Hi Andre,


On 23/03/17 19:08, Julien Grall wrote:

 /*
+ * On the host ITS @its, map @nr_events consecutive LPIs.
+ * The mapping connects a device @devid and event @eventid pair to
LPI @lpi,
+ * increasing both @eventid and @lpi to cover the number of requested
LPIs.
+ */
+int gicv3_its_map_host_events(struct host_its *its,
+  uint32_t devid, uint32_t eventid,
uint32_t lpi,
+  uint32_t nr_events)
+{
+uint32_t i;
+int ret;
+
+for ( i = 0; i < nr_events; i++ )
+{
+/* For now we map every host LPI to host CPU 0 */
+ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0);
+if ( ret )
+return ret;
+ret = its_send_cmd_inv(its, devid, eventid + i);



So the spec allows up to 32KB event per device. As all the LPIs will be
routed to CPU0 (e.g collection 0), it would be more efficient to do an
INVALL. I would be happy to defer that to post Xen 4.9, but the rest
needs to be fixed.


I tried INVALL and it didn't work, at least on the model. I can't see
why, so I kept the individual INVs in.
I have the patch still lying around, so we can revisit this later.


Can you add a TODO comment please?



 hw_its = gicv3_its_find_by_doorbell(host_doorbell);
 if ( !hw_its )
@@ -574,6 +639,11 @@ int gicv3_its_map_guest_device(struct domain *d,
 if ( !dev )
 goto out_unlock;

+dev->host_lpis = xzalloc_array(uint32_t,
+   DIV_ROUND_UP(nr_events, LPI_BLOCK));


Rather than having DIV_ROUND_UP spread everywhere. Would not be easier
to round_up nr_events once for all?


I'd rather keep nr_events as the actual number around.
I think we might look into actually limiting the allocation later.


Why? This number will likely be a multiple of bit because of the ITS 
works. You would also have to keep around multiple different value that 
will make the code more complicate to read...




+/* Must be called with host_lpis_lock held. */


Again, this is a call for adding an ASSERT in the function.


This comment is more like lock documentation, to give code writers a
guidance how the locking should be handled here.
I am not convinced that having ASSERTS in *static* functions is really
useful.


Well, you seem to assume that you will be the only one to modify this 
code and it is very easy to skip reading a comment by mistake.


So the ASSERT will catch such error. Give me a reason that the ASSERT is 
a bad idea and I will re-think my position.


[...]


The algo does not seem to have changed since the previous version.
Looking at it again, my understanding is you will always try to allocate
forward. So if the current chunk is full, you will allocate the next one
rather than looking whether a previous chunk has space available. This
will result to allocate more memory than necessary.


In v4 I amended the code slightly to move next_lpi outside of the
function. When we now free an LPI block, we check if the previous
next_lpi was pointing after this block and adjust it in this case.


Similarly unused chunk could be freed to save memory.


No, we can't, because this breaks the lockless property. A user
(incoming LPI) would find the first level pointer and go into that
block. So we can't never replace this block pointer now. That smells
like a case for RCU, but I am not sure if Xen can properly handle this case.
But with the above optimization (adjusting next_lpi on freeing a block)
I am pretty sure this isn't a problem for now, especially for Dom0.


Then document it, because this is a call to forget to revisit that in 
the future.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 06/27] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Andre Przywara
Hi,

On 22/03/17 17:29, Julien Grall wrote:
> Hi Andre,
> 
> On 16/03/17 11:20, Andre Przywara wrote:
>> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
>> those IDs, which we directly pass on to the host.
>> For this we have to map each device that Dom0 may request to a host
>> ITS device with the same identifier.
>> Allocate the respective memory and enter each device into an rbtree to
>> later be able to iterate over it or to easily teardown guests.
>>
>> Signed-off-by: Andre Przywara 
>> ---
>>  xen/arch/arm/gic-v3-its.c| 207
>> +++
>>  xen/arch/arm/vgic-v3.c   |   3 +
>>  xen/include/asm-arm/domain.h |   3 +
>>  xen/include/asm-arm/gic_v3_its.h |  18 
>>  4 files changed, 231 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 5c11b0d..60b15b5 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -21,6 +21,8 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -32,6 +34,17 @@
>>
>>  LIST_HEAD(host_its_list);
>>
>> +struct its_devices {
>> +struct rb_node rbnode;
>> +struct host_its *hw_its;
>> +void *itt_addr;
>> +paddr_t guest_doorbell;
> 
> I think it would be worth to explain in the commit message why you need
> the guest_doorbell in the struct its_devices and how you plan to use it.

In v4 I now also elaborated on the reason in a comment (before the
struct), which I deem more useful than something in the commit message.

>> +uint32_t host_devid;
>> +uint32_t guest_devid;
>> +uint32_t eventids;
>> +uint32_t *host_lpis;
>> +};
>> +
>>  bool gicv3_its_host_has_its(void)
>>  {
>>  return !list_empty(_its_list);
>> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its
>> *its, uint32_t collection_id,
>>  return its_send_command(its, cmd);
>>  }
>>
>> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
>> + uint8_t size_bits, paddr_t itt_addr,
>> bool valid)
>> +{
>> +uint64_t cmd[4];
>> +
>> +if ( valid )
>> +{
>> +ASSERT(size_bits < 32);
> 
> It would be better if you do the check against the real number in
> hardware (i.e GITS_TYPER.ID_bits).

Added in v4.

> 
>> +ASSERT(!(itt_addr & ~GENMASK(51, 8)));
>> +}
>> +cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
>> +cmd[1] = valid ? size_bits : 0x00;
> 
> This is really confusing. The check was not on the previous version. So
> why do you need that?

Admittedly I was taken away be some intention to check this here
properly. But since itt_addr and size are only valid with V=1, I removed
this in v3.

> Also, it would have been better to hide the "size - 1" in the helper
> avoiding to really on the caller to do the right thing.

I tend to agree, but then we have the awkward case where an unmap passes
0 in size, which then gets decremented by one. But you are right that
it's still saner this way, so I pass 1 now in the unmap call and do the
"-1" encoding in here.

>> +cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
> 
> Ditto about "valid? ...".

Removed in v3.

> [...]
> 
>> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t
>> doorbell_address)
>> +{
>> +struct host_its *hw_its;
>> +
>> +list_for_each_entry(hw_its, _its_list, entry)
>> +{
>> +if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
> 
> Why not storing the ITS address rather than the doorbell to avoid this
> check?

Because the doorbell address is a nice architectural property of MSIs in
general. And we need this check anyway, it's just the addition of the
doorbell offset that is different.

> [...]
> 
>> +int gicv3_its_map_guest_device(struct domain *d,
>> +   paddr_t host_doorbell, uint32_t
>> host_devid,
>> +   paddr_t guest_doorbell, uint32_t
>> guest_devid,
>> +   uint32_t nr_events, bool valid)
>> +{
>> +void *itt_addr = NULL;
>> +struct host_its *hw_its;
>> +struct its_devices *dev = NULL, *temp;
>> +struct rb_node **new = >arch.vgic.its_devices.rb_node, *parent
>> = NULL;
>> +int ret = -ENOENT;
>> +
>> +hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>> +if ( !hw_its )
>> +return ret;
>> +
>> +/* check for already existing mappings */
>> +spin_lock(>arch.vgic.its_devices_lock);
>> +while ( *new )
>> +{
>> +temp = rb_entry(*new, struct its_devices, rbnode);
>> +
>> +parent = *new;
>> +if ( !compare_its_guest_devices(temp, guest_doorbell,
>> guest_devid) )
>> +{
>> +if ( !valid )
>> +rb_erase(>rbnode, >arch.vgic.its_devices);
>> +
>> +spin_unlock(>arch.vgic.its_devices_lock);
>> +
>> +if ( valid )
> 
> Again, a printk(XENLOG_GUEST...) 

Re: [Xen-devel] [GSoc] GSoc Introduction : Xen on ARM: create multiple guests from device tree

2017-04-03 Thread Methuku Karthik
Hi Stefano,

I have asked questions in inline. Clarification below questions would
really help me in contribution. Please look into the questions. I am
highlighting them in this mail.

 For example, Dom1 should be able to share a page with Dom2 and a
  different page with Dom3. It needs to be clear which page is shared with
which VM from the VM config files.


when we create vms using xl create , for example if i am planning create
three VMs,

Dom1, Dom2 and Dom3, because of the page sharing are we imposing any order
of
  creating VMs.

  I am asking this question to clarify this point, while creation of Dom1
if its
  sharing pages with Dom 2 and Dom 3 , should Xen already be aware of Dom2
and Dom3?

  I am referring to following links to understand about mem sharing.

  http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=tools/
tests/mem-sharing/memshrtoo
  l.c;h=8e5e22b9e95d91f1441d8eb226b64852eca075d5;hb=HEAD
  http://xenbits.xen.org/docs/unstable/misc/grant-tables.txt

  I also want to figure out how domains are created and how xl tool parses
the file
  and passes on the information to domain creation . Let me know if i am
thinking in
  right direction.

  suggest any resource or work which would help with designing config file
options.


 I will start with Xen-38 that would help me in exploring init code.
Correct me if i
  am wrong.

  I have a few questions and clarifications before proceeding further. I
have checked
  how config.gz file is generated in linux kernel source.
  In linux kernel sources, if CONFIG_IKCONFIG_PROC option is set, .conifg
file which
  is generated after choosing options with lets say from make menuconfig
is read into
  a variable, this way its part of build.

  during init time proc_create service is used to create this file
config.gz.
  http://lxr.free-electrons.com/source/kernel/configs.c


  I guess i have to do something similar.

  Questions :

  1. When Xen is build using the make command, we effectively set
XEN_COMPILE_ARCH,
  XEN_OS, XEN_TARGET which allow using corresponding .mk file from config
folder.
  These variable in turn decide what are the config options. I wasnt able
to find any
  .config. Please direct me to find the file or if i am missing something.

  2. Where and how this config file should be accessible to  User once in
Dom0. Is the
  xen folder created to keep the information about guest domains like proc
for process
  in linux kernel ? Will that be suitable location to have config file.

  3. if i assume that i will approach similarly, i have to add services to
be called
  during init stage. As am not acquainted with code base, i could just grep
with
  _start or _init or similar strings to find out initialization code. Any
  input(function name or filename) to look for will be of great help.

On Mon, Apr 3, 2017 at 3:35 PM, Stefano Stabellini 
wrote:

> Thank you! I am looking forward to your contribution on the list! If you
> encounter any issues, please let us know.
>
> The code contribution is more important, but if you find the time in the
> next few days, it would be nice to add more details to the
> implementation plan, such as where the memory gets allocated, whether it
> is taken from a VM, and if so, which one. Also what kind of "token"
> could be used in the config option and how the toolstack could keep
> track of the token - memory page references.
>
> Thanks,
>
> Stefano
>
> On Mon, 3 Apr 2017, Methuku Karthik wrote:
> > Hi Stefano,
> >
> > Thanks for Input. I was not able to spend enough time last couple of
> weeks due to
> > projects. I have received mail from Lars Kurt explaining submission of
> draft
> > proposal and possibility to work on micro tasks.
> >
> > I have created a draft proposal from with your inputs and what i learnt
> about
> > sharing pages and memory management in Xen, please access it from here
> >
> > https://docs.google.com/document/d/1xLmR7x4yfCbRgpuefZQNhZ4lAu-
> 6slW0oXPmjnxcnz0/edi
> > t#heading=h.1yvc35w6t3fu
> >
> > I haven't written anything about maintenance. I have included some links
> i thought
> > will be helpful under references and referenced wherever applicable.
> >
> > Please suggest comments and inputs.
> >
> > On Tue, Mar 28, 2017 at 8:12 PM, Stefano Stabellini <
> sstabell...@kernel.org> wrote:
> > > CC'ing a couple of maintainers that might have more insights on this
> > > project.
> > >
> > > On Tue, 28 Mar 2017, Stefano Stabellini wrote:
> > >> On Tue, 28 Mar 2017, Methuku Karthik wrote:
> > >> > Hi Stefano,
> > >> >
> > >> > Kindly suggest me reading material that could help me understand and
> > >> > come up with the proposal.
> > >>
> > >> Sure! For the "Xen on ARM: create multiple guests from device tree"
> > >> project, the idea is that on many embedded systems the user knows how
> > >> many guests to create beforehand, and usually it is a small number,
> like
> > >> 2 or 3. Often these guests don't even have any PV frontends, but just
> a
> > >> set of devices 

[Xen-devel] [xen-4.6-testing test] 107151: tolerable FAIL - PUSHED

2017-04-03 Thread osstest service owner
flight 107151 xen-4.6-testing real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107151/

Failures :-/ but no regressions.

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt-xsm 13 saverestore-support-checkfail  like 106819
 test-armhf-armhf-libvirt 13 saverestore-support-checkfail  like 106819
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 106819
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop fail like 106819
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stopfail like 106819
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail like 106819
 test-armhf-armhf-libvirt-raw 12 saverestore-support-checkfail  like 106819

Tests which did not succeed, but are not blocking:
 test-xtf-amd64-amd64-3   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-xtf-amd64-amd64-5   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-xtf-amd64-amd64-2   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-xtf-amd64-amd64-1   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-xtf-amd64-amd64-4   64 xtf/test-pv32pae-xsa-194 fail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass

version targeted for testing:
 xen  f96efeb0c6b4f499194571ef6d767534ba851c6a
baseline version:
 xen  ac4c5d4ddf89051365da2acba5c6c306a10e0bbe

Last test of basis   106819  2017-03-21 19:13:35 Z   13 days
Failing since107020  2017-03-31 07:11:02 Z3 days7 attempts
Testing same since   107151  2017-04-03 09:51:21 Z0 days1 attempts


People who touched revisions under test:
  Daniel De Graaf 
  Dario Faggioli 
  Jan Beulich 
  Roger Pau Monné 

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-prev pass
 

Re: [Xen-devel] [PATCH v2] setup vwfi correctly on cpu0

2017-04-03 Thread Julien Grall

Hi Stefano,

On 03/31/2017 11:37 PM, Stefano Stabellini wrote:

parse_vwfi runs after init_traps on cpu0, potentially resulting in the
wrong HCR_EL2 for it. Secondary cpus boot after parse_vwfi, so in their
case init_traps will write the correct set of flags to HCR_EL2.

For cpu0, fix the issue by changing HCR_EL2 setting from a new
presmp_initcall.

Signed-off-by: Stefano Stabellini 

---
This patch should be apply to 4.8, 4.7, 4.6, not to unstable (it will be
fixed differently there).
---

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 614501f..65b5397 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -115,6 +115,22 @@ static void __init parse_vwfi(const char *s)
 }
 custom_param("vwfi", parse_vwfi);

+static int __init vwfi_init(void)
+{
+/*
+ * HCR_EL2 has already been set on cpu0, change the setting here, if
+ * needed. Other cpus haven't booted yet, init_traps will setup
+ * HCR_EL2 correctly.
+ */
+if ( vwfi == NATIVE ) {


Coding style:

if ( ... )
{


+register_t hcr;


NIT: newline here please.



+hcr = READ_SYSREG(HCR_EL2);
+WRITE_SYSREG(hcr & ~(HCR_TWI|HCR_TWE), HCR_EL2);
+}


Ditto.


+return 0;
+}
+presmp_initcall(vwfi_init);
+
 void init_traps(void)
 {
 /* Setup Hyp vector base */



With that:

Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 06/27] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Andre Przywara
Hi,

On 22/03/17 22:45, Stefano Stabellini wrote:
> On Thu, 16 Mar 2017, Andre Przywara wrote:
>> The ITS uses device IDs to map LPIs to a device. Dom0 will later use
>> those IDs, which we directly pass on to the host.
>> For this we have to map each device that Dom0 may request to a host
>> ITS device with the same identifier.
>> Allocate the respective memory and enter each device into an rbtree to
>> later be able to iterate over it or to easily teardown guests.
>>
>> Signed-off-by: Andre Przywara 
>> ---
>>  xen/arch/arm/gic-v3-its.c| 207 
>> +++
>>  xen/arch/arm/vgic-v3.c   |   3 +
>>  xen/include/asm-arm/domain.h |   3 +
>>  xen/include/asm-arm/gic_v3_its.h |  18 
>>  4 files changed, 231 insertions(+)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 5c11b0d..60b15b5 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -21,6 +21,8 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -32,6 +34,17 @@
>>  
>>  LIST_HEAD(host_its_list);
>>  
>> +struct its_devices {
>> +struct rb_node rbnode;
>> +struct host_its *hw_its;
>> +void *itt_addr;
>> +paddr_t guest_doorbell;
>> +uint32_t host_devid;
>> +uint32_t guest_devid;
>> +uint32_t eventids;
>> +uint32_t *host_lpis;
>> +};
>> +
>>  bool gicv3_its_host_has_its(void)
>>  {
>>  return !list_empty(_its_list);
>> @@ -149,6 +162,24 @@ static int its_send_cmd_mapc(struct host_its *its, 
>> uint32_t collection_id,
>>  return its_send_command(its, cmd);
>>  }
>>  
>> +static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
>> + uint8_t size_bits, paddr_t itt_addr, bool 
>> valid)
>> +{
>> +uint64_t cmd[4];
>> +
>> +if ( valid )
>> +{
>> +ASSERT(size_bits < 32);
>> +ASSERT(!(itt_addr & ~GENMASK(51, 8)));
>> +}
>> +cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
>> +cmd[1] = valid ? size_bits : 0x00;
>> +cmd[2] = valid ? (itt_addr | GITS_VALID_BIT) : 0x00;
>> +cmd[3] = 0x00;
>> +
>> +return its_send_command(its, cmd);
>> +}
>> +
>>  /* Set up the (1:1) collection mapping for the given host CPU. */
>>  int gicv3_its_setup_collection(unsigned int cpu)
>>  {
>> @@ -379,6 +410,7 @@ static int gicv3_its_init_single_its(struct host_its 
>> *hw_its)
>>  devid_bits = min(devid_bits, max_its_device_bits);
>>  if ( reg & GITS_TYPER_PTA )
>>  hw_its->flags |= HOST_ITS_USES_PTA;
>> +hw_its->itte_size = GITS_TYPER_ITT_SIZE(reg);
>>  
>>  for ( i = 0; i < GITS_BASER_NR_REGS; i++ )
>>  {
>> @@ -428,6 +460,180 @@ int gicv3_its_init(void)
>>  return 0;
>>  }
>>  
>> +static int remove_mapped_guest_device(struct its_devices *dev)
>> +{
>> +int ret;
>> +
>> +if ( dev->hw_its )
>> +{
>> +int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, 
>> false);
>> +if ( ret )
>> +return ret;
>> +}
>> +
>> +ret = gicv3_its_wait_commands(dev->hw_its);
>> +if ( ret )
>> +return ret;
>> +
>> +xfree(dev->itt_addr);
>> +xfree(dev);
>> +
>> +return 0;
>> +}
>> +
>> +static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
>> +{
>> +struct host_its *hw_its;
>> +
>> +list_for_each_entry(hw_its, _its_list, entry)
> 
> Does this need to take a spinlock to protect host_its_list? I guess not
> because the list is not modified after boot?

Exactly, I added a comment in v4 explaining this.

>> +{
>> +if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
>> +return hw_its;
>> +}
>> +
>> +return NULL;
>> +}
>> +
>> +static int compare_its_guest_devices(struct its_devices *dev,
>> + paddr_t doorbell, uint32_t devid)
>> +{
>> +if ( dev->guest_doorbell < doorbell )
>> +return -1;
>> +
>> +if ( dev->guest_doorbell > doorbell )
>> +return 1;
>> +
>> +if ( dev->guest_devid < devid )
>> +return -1;
>> +
>> +if ( dev->guest_devid > devid )
>> +return 1;
>> +
>> +return 0;
>> +}
>> +
>> +/*
>> + * Map a hardware device, identified by a certain host ITS and its device ID
>> + * to domain d, a guest ITS (identified by its doorbell address) and device 
>> ID.
>> + * Also provide the number of events (MSIs) needed for that device.
>> + * This does not check if this particular hardware device is already mapped
>> + * at another domain, it is expected that this would be done by the caller.
>> + */
>> +int gicv3_its_map_guest_device(struct domain *d,
>> +   paddr_t host_doorbell, uint32_t host_devid,
>> +   paddr_t guest_doorbell, uint32_t guest_devid,
>> +   uint32_t nr_events, bool valid)
>> +{
>> +void *itt_addr = NULL;
>> 

Re: [Xen-devel] [PATCH] setup vwfi correctly on cpu0

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Julien Grall wrote:
> Hi Stefano,
> 
> On 31/03/17 23:33, Stefano Stabellini wrote:
> > On Fri, 31 Mar 2017, Julien Grall wrote:
> > > Hi Stefano,
> > > 
> > > On 03/30/2017 11:35 PM, Stefano Stabellini wrote:
> > > > parse_vwfi runs after init_traps on cpu0, potentially resulting in the
> > > > wrong HCR_EL2 for it. Secondary cpus boot after parse_vwfi, so in their
> > > > case init_traps will write the correct set of flags to HCR_EL2.
> > > > 
> > > > For cpu0, fix the issue by changing HCR_EL2 setting directly in
> > > > parse_vwfi.
> > > > 
> > > > Signed-off-by: Stefano Stabellini 
> > > > 
> > > > ---
> > > > This patch should be apply to 4.8, 4.7, 4.6, not to unstable (it will be
> > > > fixed differently there).
> > > > ---
> > > > diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> > > > index 614501f..94d2e8a 100644
> > > > --- a/xen/arch/arm/traps.c
> > > > +++ b/xen/arch/arm/traps.c
> > > > @@ -112,6 +112,16 @@ static void __init parse_vwfi(const char *s)
> > > > vwfi = NATIVE;
> > > > else
> > > > vwfi = TRAP;
> > > > +/*
> > > > + * HCR_EL2 has already been set on cpu0, change the setting here,
> > > > if
> > > > + * needed. Other cpus haven't booted yet, init_traps will setup
> > > > + * HCR_EL2 correctly.
> > > > + */
> > > > +if (vwfi == NATIVE) {
> > > 
> > > Coding style:
> > > 
> > > if ( ... )
> > > {
> > 
> > OK
> > 
> > 
> > > > +register_t hcr;
> > > > +hcr = READ_SYSREG(HCR_EL2);
> > > > +WRITE_SYSREG(hcr & ~(HCR_TWI|HCR_TWE), HCR_EL2);
> > > 
> > > You are assuming the default value of vwfi and it makes very complicate
> > > for
> > > someone to follow the code and modify it.
> > 
> > Do you mean the default vwfi setting? If so, no, I am not: hcr &
> > ~(HCR_TWI|HCR_TWE) works regardless of the default.
> 
> Sorry, I was not clear. You assume that vwfi == TRAP by default, although I
> guess it is fine because it is a backport.

Yes, that was my thinking. See new patch:
alpine.DEB.2.10.1703311535180.3287@sstabellini-ThinkPad-X260

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 03/26] ARM: GICv3 ITS: allocate device and collection table

2017-04-03 Thread Andre Przywara
Hi,

On 03/04/17 18:22, Julien Grall wrote:
> Hi Andre,
> 
> On 03/04/17 16:38, Julien Grall wrote:
>> On 31/03/17 19:05, Andre Przywara wrote:
>>> Each ITS maps a pair of a DeviceID (for instance derived from a PCI
>>> b/d/f triplet) and an EventID (the MSI payload or interrupt ID) to a
>>> pair of LPI number and collection ID, which points to the target CPU.
>>> This mapping is stored in the device and collection tables, which
>>> software
>>> has to provide for the ITS to use.
>>> Allocate the required memory and hand it to the ITS.
>>> The maximum number of devices is limited to a compile-time constant
>>> exposed in Kconfig.
>>>
>>> Signed-off-by: Andre Przywara 
>>
>> Reviewed-by: Julien Grall 
> 
> Actually I will withdraw my reviewed-by. I didn't spot you keep the
> command line around which I clearly say no and gave some reasons why.
> Sorry for the mess.

I thought we were talking about the Kconfig option to drop here (which
the commit msg wrongly states as still being around)?

For implementations that don't support indirect tables, but still
advertise high numbers, I'd find it useful to have the possibility to
limit this to avoid memory waste.

> To explain it again, no-one can possible know how the DeviceID will be
> spread on the platform without having the platform data sheet in hand.
> If the platform provide more DeviceID and is not able to cope with that.
> Then it is a platform specific quirk.
> When we spoke f2f you agree on this. So please drop this command line.

Sigh ...

Cheers,
Andre.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [GSoc] GSoc Introduction : Xen on ARM: create multiple guests from device tree

2017-04-03 Thread Stefano Stabellini
Thank you! I am looking forward to your contribution on the list! If you
encounter any issues, please let us know.

The code contribution is more important, but if you find the time in the
next few days, it would be nice to add more details to the
implementation plan, such as where the memory gets allocated, whether it
is taken from a VM, and if so, which one. Also what kind of "token"
could be used in the config option and how the toolstack could keep
track of the token - memory page references.

Thanks,

Stefano

On Mon, 3 Apr 2017, Methuku Karthik wrote:
> Hi Stefano,
> 
> Thanks for Input. I was not able to spend enough time last couple of weeks 
> due to
> projects. I have received mail from Lars Kurt explaining submission of draft
> proposal and possibility to work on micro tasks.
> 
> I have created a draft proposal from with your inputs and what i learnt about
> sharing pages and memory management in Xen, please access it from here
> 
> https://docs.google.com/document/d/1xLmR7x4yfCbRgpuefZQNhZ4lAu-6slW0oXPmjnxcnz0/edi
> t#heading=h.1yvc35w6t3fu
> 
> I haven't written anything about maintenance. I have included some links i 
> thought
> will be helpful under references and referenced wherever applicable.
> 
> Please suggest comments and inputs.
> 
> On Tue, Mar 28, 2017 at 8:12 PM, Stefano Stabellini  
> wrote:
> > CC'ing a couple of maintainers that might have more insights on this
> > project.
> >
> > On Tue, 28 Mar 2017, Stefano Stabellini wrote:
> >> On Tue, 28 Mar 2017, Methuku Karthik wrote:
> >> > Hi Stefano,
> >> >
> >> > Kindly suggest me reading material that could help me understand and
> >> > come up with the proposal.
> >>
> >> Sure! For the "Xen on ARM: create multiple guests from device tree"
> >> project, the idea is that on many embedded systems the user knows how
> >> many guests to create beforehand, and usually it is a small number, like
> >> 2 or 3. Often these guests don't even have any PV frontends, but just a
> >> set of devices assigned to them. An example could be an extremely simple
> >> guest that only accesses one physical device (which is assigned to it at
> >> boot) and prints messages using the debug hypercalls (see
> >> xen/arch/arm/traps.c:do_debug_trap).
> >>
> >> In this scenario, there is no need to wait for Dom0 to boot to create
> >> this second guest (I'll call it Dom1, to distinguish it from Dom0). Xen
> >> could actually create it directly by itself, the same way it starts Dom0
> >> (see xen/arch/arm/domain_build.c:construct_dom0 and
> >> docs/misc/arm/device-tree/booting.txt).
> >>
> >> The project is about extending the existing device interface to pass an
> >> additional kernel, initrd, command line arguments for the second virtual
> >> machine. It would also need to include which devices should be assigned
> >> to it. In response, Xen should build the second VM the same way it would
> >> do normally when done via the toolstack (xl/libxl), but it would happen
> >> at boot time, before Dom0 is fully up and running.
> >>
> >>
> >>
> >> For the "Share a page in memory from the VM config file" project,
> >
> > Ops, I forgot to add this description :-)
> >
> > I was saying, for the "Share a page in memory from the VM config file"
> > project, the idea is that we want to share a page in memory between two
> > VMs just by adding one line to their VM config files. They should be
> > able to communicate with each others straight away by writing at the
> > right address in memory. That way, even small embedded systems with no
> > xenstore support can still setup a communication channel with each
> > others.
> >
> > Fundamentally, it just requires the xl/libxl toolstack (see tools/xl and
> > tools/libxl) to parse a new VM config file option, and in response share
> > a page at the specified address, or map a page at a specified address.
> > If the memory address is wrong, the toolstack needs to be able to handle
> > the failure. Sharing pages and mapping pages in xl/libxl is easy; the
> > difficulty of the project is coming up with the right parameter in the
> > VM config file so that multiple VMs can share different pages with each
> > others. It probably requires the introduction of a "token" to identify
> > the page you want to share across multiple VM config files.
> >
> > For example, Dom1 should be able to share a page with Dom2 and a
> > different page with Dom3. It needs to be clear which page is shared with
> > which VM from the VM config files.
> >
> when we create vms using xl create , for example if i am planning create 
> three VMs,
> 
> Dom1, Dom2 and Dom3, because of the page sharing are we imposing any order of
> creating VMs.
> 
> I am asking this question to clarify this point, while creation of Dom1 if its
> sharing pages with Dom 2 and Dom 3 , should Xen already be aware of Dom2 and 
> Dom3?
> 
> I am referring to following links to understand about mem sharing.
> 
> 

Re: [Xen-devel] [PATCH v2 08/27] ARM: GICv3 ITS: introduce host LPI array

2017-04-03 Thread Andre Przywara
Hi,

On 23/03/17 19:08, Julien Grall wrote:
> Hi Andre,
> 
> On 16/03/17 11:20, Andre Przywara wrote:
>> The number of LPIs on a host can be potentially huge (millions),
>> although in practise will be mostly reasonable. So prematurely allocating
>> an array of struct irq_desc's for each LPI is not an option.
>> However Xen itself does not care about LPIs, as every LPI will be
>> injected
>> into a guest (Dom0 for now).
>> Create a dense data structure (8 Bytes) for each LPI which holds just
>> enough information to determine the virtual IRQ number and the VCPU into
>> which the LPI needs to be injected.
>> Also to not artificially limit the number of LPIs, we create a 2-level
>> table for holding those structures.
>> This patch introduces functions to initialize these tables and to
>> create, lookup and destroy entries for a given LPI.
>> By using the naturally atomic access guarantee the native uint64_t data
>> type gives us, we allocate and access LPI information in a way that does
>> not require a lock.
>>
>> Signed-off-by: Andre Przywara 
>> ---
>>  xen/arch/arm/gic-v3-its.c|  90 ++-
>>  xen/arch/arm/gic-v3-lpi.c| 188
>> +++
>>  xen/include/asm-arm/gic.h|   5 ++
>>  xen/include/asm-arm/gic_v3_its.h |  11 +++
>>  4 files changed, 292 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 60b15b5..ed14d95 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -148,6 +148,20 @@ static int its_send_cmd_sync(struct host_its
>> *its, unsigned int cpu)
>>  return its_send_command(its, cmd);
>>  }
>>
>> +static int its_send_cmd_mapti(struct host_its *its,
>> +  uint32_t deviceid, uint32_t eventid,
>> +  uint32_t pintid, uint16_t icid)
>> +{
>> +uint64_t cmd[4];
>> +
>> +cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32);
>> +cmd[1] = eventid | ((uint64_t)pintid << 32);
>> +cmd[2] = icid;
>> +cmd[3] = 0x00;
>> +
>> +return its_send_command(its, cmd);
>> +}
>> +
>>  static int its_send_cmd_mapc(struct host_its *its, uint32_t
>> collection_id,
>>   unsigned int cpu)
>>  {
>> @@ -180,6 +194,19 @@ static int its_send_cmd_mapd(struct host_its
>> *its, uint32_t deviceid,
>>  return its_send_command(its, cmd);
>>  }
>>
>> +static int its_send_cmd_inv(struct host_its *its,
>> +uint32_t deviceid, uint32_t eventid)
>> +{
>> +uint64_t cmd[4];
>> +
>> +cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32);
>> +cmd[1] = eventid;
>> +cmd[2] = 0x00;
>> +cmd[3] = 0x00;
>> +
>> +return its_send_command(its, cmd);
>> +}
>> +
>>  /* Set up the (1:1) collection mapping for the given host CPU. */
>>  int gicv3_its_setup_collection(unsigned int cpu)
>>  {
>> @@ -462,7 +489,7 @@ int gicv3_its_init(void)
>>
>>  static int remove_mapped_guest_device(struct its_devices *dev)
>>  {
>> -int ret;
>> +int ret, i;
>>
>>  if ( dev->hw_its )
>>  {
>> @@ -471,11 +498,19 @@ static int remove_mapped_guest_device(struct
>> its_devices *dev)
>>  return ret;
>>  }
>>
>> +/*
>> + * The only error the function below would return is -ENOENT, in
>> which
>> + * case there is nothing to free here. So we just ignore it.
>> + */
> 
> The function gicv3_free_host_lpi_block will only be used here. And to be
> fair, if you try to free something that does not exist then it is not
> really an error...
> 
> So I would prefer to see the function to be void.

Fixed in v3.

>> +for ( i = 0; i < DIV_ROUND_UP(dev->eventids, LPI_BLOCK); i++ )
>> +gicv3_free_host_lpi_block(dev->hw_its, dev->host_lpis[i]);
> 
> Again, without looking at the implementation of
> gicv3_free_host_lpi_block, I think the usage of the function is very
> confusing. When I read host_lpis, I expect to see one LPI per event. But
> instead it be the first LPI of a block. The lack of documentation of the
> field in its_devices does not help to understand what's going on.
> 
> So please add some documentation and probably renaming some fields.

Fixed in v3.

> 
> Lastly, you have not answered to my question: should not we discard the
> LPIs before removing the device? Or does MAPD take care for you?"

Yes: "MAPD removes the mapping of the specified DeviceID. and interrupt
requests from that device are discarded."

So we don't need to issue individual DISCARDs if the device in unmapped.

>> +
>>  ret = gicv3_its_wait_commands(dev->hw_its);
>>  if ( ret )
>>  return ret;
> 
> I know I asked to wait the command, thank you for addressing it. But
> now, if the function fail you will end-up to leak memory. This is not
> better than failing to wait commands.

Fixed in v3.

>>
>>  xfree(dev->itt_addr);
>> +xfree(dev->host_lpis);
>>  xfree(dev);
>>
>>  return 0;
>> @@ 

Re: [Xen-devel] [PATCH] x86/vpmu_intel: Handle SMT consistently for programmable and fixed counters

2017-04-03 Thread Mohit Gambhir



On 04/03/2017 06:36 AM, Jan Beulich wrote:

On 31.03.17 at 16:46,  wrote:

This patch masks .AnyThread bits in IA32_FIXED_CTR_CTRL MSR for all
versions of Intel Arhcitectural Performance Monitoring. Note that
.AnyThread bit (21) is already masked in IA32_PERFEVTSELx MSRs since
hyperthreading is not exposed to guests and Intel SDM discourages the use of
.AnyThread bit in virtualized environments (per section 18.2.3.1
AnyThread Counting and Software Evolution)

All nice and presumably correct, but the main thing is missing: The
bits aren't defined prior to version 3 afaics, so ...


--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -979,8 +979,7 @@ int __init core2_vpmu_init(void)
  full_width_write = (caps >> 13) & 1;
  
  fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);

-if ( version == 2 )
-fixed_ctrl_mask |= 0x444;
+fixed_ctrl_mask |= 0x444;

... the main thing to explain is why removing the conditional is
(a) correct and (b) necessary (going through the uses of the
variable I can see (a) to be true, but not (b)). And of course it
would be quite helpful if the literal number changed to a
manifest constant at once, or a comment was attached to
clarify what the number represents.


I do agree that replacing the hard coded constant with a macro would be 
nice and I will update the patch with that.


The answer to why this change is (b) necessary is two folds -

1. We need to be consistent in the implementation. As said in the commit 
log - we disable .Anythread bit in
programmable counters (regardless of the version) by masking bit 21 in 
IA32_PERFEVTSELx.  (See code snippet

below from vpmu_intel.c)

 /* Masks used for testing whether and MSR is valid */
 #define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21))

But we leave it enabled in fixed function counters for version 3. 
Removing the condition disables the bit in fixed function
counters regardless of the version,  which is consistent with what is 
done for programmable counters.


2. We don't want to expose event counts from another guest (or 
hypervisor) which can happen if .AnyThread bit is not masked and
a VCPU is only scheduled to run on one of the hardware threads in a 
hyper-threaded CPU.



Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [linux-next test] 107149: regressions - FAIL

2017-04-03 Thread osstest service owner
flight 107149 linux-next real [real]
http://logs.test-lab.xenproject.org/osstest/logs/107149/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-qemuu-winxpsp3  6 xen-boot   fail REGR. vs. 107123
 test-amd64-amd64-rumprun-amd64  6 xen-boot   fail REGR. vs. 107123
 test-amd64-amd64-pair 9 xen-boot/src_hostfail REGR. vs. 107123
 test-amd64-amd64-pair10 xen-boot/dst_hostfail REGR. vs. 107123
 test-amd64-amd64-qemuu-nested-intel  6 xen-boot  fail REGR. vs. 107123
 test-amd64-amd64-libvirt-pair  9 xen-boot/src_host   fail REGR. vs. 107123
 test-amd64-amd64-libvirt-pair 10 xen-boot/dst_host   fail REGR. vs. 107123
 test-amd64-amd64-xl-multivcpu  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-xl-qemut-winxpsp3  6 xen-boot   fail REGR. vs. 107123
 test-amd64-amd64-xl   6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-pvh-intel  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-libvirt  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qcow2 6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 6 xen-boot fail REGR. vs. 107123
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-i386-pvgrub  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-libvirt-xsm  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-amd64-pvgrub  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 6 xen-boot fail REGR. 
vs. 107123
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 6 xen-boot fail REGR. vs. 
107123
 test-amd64-i386-xl-xsm6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-pygrub   6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qemuu-ovmf-amd64  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-libvirt-vhd  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-xsm   6 xen-boot fail REGR. vs. 107123
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-credit2   6 xen-boot fail REGR. vs. 107123
 build-i3865 xen-buildfail REGR. vs. 107123
 test-amd64-amd64-xl-qemuu-debianhvm-amd64  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsm 6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qemut-win7-amd64  6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qemut-debianhvm-amd64  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-qemuu-nested-amd  6 xen-bootfail REGR. vs. 107123
 test-amd64-amd64-xl-pvh-amd   6 xen-boot fail REGR. vs. 107123
 test-amd64-amd64-xl-qemuu-win7-amd64  6 xen-boot fail REGR. vs. 107123
 test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm 6 xen-boot fail REGR. vs. 
107123

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-rtds  6 xen-boot fail REGR. vs. 107123
 test-armhf-armhf-xl-arndale  11 guest-start  fail  like 107123
 test-armhf-armhf-xl-rtds 11 guest-start  fail  like 107123
 test-armhf-armhf-xl-xsm  11 guest-start  fail  like 107123
 test-armhf-armhf-libvirt 11 guest-start  fail  like 107123
 test-armhf-armhf-xl  11 guest-start  fail  like 107123
 test-armhf-armhf-xl-credit2  11 guest-start  fail  like 107123
 test-armhf-armhf-xl-cubietruck 11 guest-start fail like 107123
 test-armhf-armhf-libvirt-xsm 11 guest-start  fail  like 107123
 test-armhf-armhf-xl-multivcpu 11 guest-start  fail like 107123
 test-armhf-armhf-xl-vhd   9 debian-di-installfail  like 107123
 test-armhf-armhf-libvirt-raw  9 debian-di-installfail  like 107123

Tests which did not succeed, but are not blocking:
 test-amd64-i386-freebsd10-i386  1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 1 build-check(1) blocked n/a
 build-arm64-libvirt   1 build-check(1)   blocked  n/a
 test-arm64-arm64-libvirt-qcow2  1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1  1 build-check(1) blocked n/a
 test-arm64-arm64-libvirt  1 build-check(1)   blocked  n/a
 test-amd64-i386-libvirt-xsm   1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemuu-winxpsp3  1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemut-winxpsp3  1 build-check(1)   blocked  n/a
 test-amd64-i386-xl-qemut-debianhvm-amd64  1 build-check(1) blocked n/a
 test-amd64-i386-qemut-rhel6hvm-intel  1 

Re: [Xen-devel] [PATCH v3 06/26] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Julien Grall

Hi Andre,

Mostly repeating my comments from the previous version.

On 31/03/17 19:05, Andre Przywara wrote:

[...]


+static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
+ uint8_t size_bits, paddr_t itt_addr, bool valid)
+{
+uint64_t cmd[4];
+
+if ( valid )
+{
+ASSERT(size_bits < 32);


Again, it would be better if you do the check against the real number in 
hardware (i.e GITS_TYPER.ID_bits).



+ASSERT(!(itt_addr & ~GENMASK_ULL(51, 8)));
+}
+cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
+cmd[1] = size_bits;


I would have expected to see size_bits - 1 to accommodate all the 
helpers rather than relying on them.


[...]


+static int remove_mapped_guest_device(struct its_devices *dev)
+{
+int ret;
+
+if ( dev->hw_its )
+{
+/* MAPD also discards all events with this device ID. */
+int ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);


You are re-defining ret. Why?

[...]


+static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
+{
+struct host_its *hw_its;
+
+list_for_each_entry(hw_its, _its_list, entry)
+{
+if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )


Again, why not storing the ITS address rather than the doorbell to avoid 
"+ ITS_DOORBELL_OFFSET" ?


[...]


+/*
+ * Map a hardware device, identified by a certain host ITS and its device ID
+ * to domain d, a guest ITS (identified by its doorbell address) and device ID.
+ * Also provide the number of events (MSIs) needed for that device.
+ * This does not check if this particular hardware device is already mapped
+ * at another domain, it is expected that this would be done by the caller.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+   paddr_t host_doorbell, uint32_t host_devid,
+   paddr_t guest_doorbell, uint32_t guest_devid,
+   uint32_t nr_events, bool valid)


I am sure I said it somewhere in this series, nr_events likely needs to 
be sanitized against the hardware value. Same for host_devid.


[...]


+parent = *new;
+cmp = compare_its_guest_devices(temp, guest_doorbell, guest_devid);
+if ( !cmp )
+{
+if ( !valid )
+rb_erase(>rbnode, >arch.vgic.its_devices);
+
+spin_unlock(>arch.vgic.its_devices_lock);
+
+if ( valid )


Again, a printk(XENLOG_GUEST...) here would be useful to know which host 
DeviceID was associated to the guest DeviceID.



+return -EBUSY;
+
+return remove_mapped_guest_device(temp);


Again, just above you removed the device from the RB-tree but this 
function may fail and never free the memory. This means that memory will 
be leaked leading to a potential denial of service.



+}
+
+if ( cmp > 0 )
+new = &((*new)->rb_left);
+else
+new = &((*new)->rb_right);
+}
+
+if ( !valid )
+goto out_unlock;
+
+ret = -ENOMEM;
+
+/* An Interrupt Translation Table needs to be 256-byte aligned. */
+itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);


See Vijay's comment. But why don't you round up nr_events at the 
beginning once for all rather than doing it in the middle?


[...]


+out_unlock:
+spin_unlock(>arch.vgic.its_devices_lock);
+if ( dev )
+{
+xfree(dev->pend_irqs);
+xfree(dev->host_lpi_blocks);


Where is host_lpi_blocks allocated? Why is it freed here?


+}
+xfree(itt_addr);
+xfree(dev);
+return ret;
+}
+
+/* Removing any connections a domain had to any ITS in the system. */
+void gicv3_its_unmap_all_devices(struct domain *d)
+{
+struct rb_node *victim;
+struct its_devices *dev;
+
+/*
+ * This is an easily readable, but suboptimal implementation.
+ * It uses the provided iteration wrapper and erases each node, which
+ * possibly triggers rebalancing.
+ * This seems overkill since we are going to abolish the whole tree, but
+ * avoids an open-coded re-implementation of the traversal functions with
+ * some recursive function calls.
+ */


Well, you updated the comment but it does not make the performance 
problem going away... Xen cannot be preempted, so if it takes too long, 
you will have an impact on the overall system.


As said previously, I think it would be fair to assume that all devices 
will be deassigned before the ITS is destroyed. So I would just drop 
this function. Not that we have the same assumption in the SMMU driver.


If you disagree please say why. But ignoring comments will not help here.


+restart:
+spin_lock(>arch.vgic.its_devices_lock);
+if ( (victim = rb_first(>arch.vgic.its_devices)) )
+{
+dev = rb_entry(victim, struct its_devices, rbnode);
+rb_erase(victim, >arch.vgic.its_devices);
+
+

Re: [Xen-devel] [PATCH v2 20/27] ARM: vITS: handle MAPTI command

2017-04-03 Thread Andre Przywara
Hi,

On 24/03/17 14:54, Julien Grall wrote:
> Hi Andre,
> 
> On 03/16/2017 11:20 AM, Andre Przywara wrote:
>> The MAPTI commands associates a DeviceID/EventID pair with a LPI/CPU
>> pair and actually instantiates LPI interrupts.
>> We connect the already allocated host LPI to this virtual LPI, so that
>> any triggering IRQ on the host can be quickly forwarded to a guest.
>>
>> Signed-off-by: Andre Przywara 
>> ---
>>  xen/arch/arm/gic-v3-its.c| 63
>> 
>>  xen/arch/arm/gic-v3-lpi.c| 18 
>>  xen/arch/arm/vgic-v3-its.c   | 27 +++--
>>  xen/include/asm-arm/gic_v3_its.h |  6 
>>  4 files changed, 112 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
>> index 5a2dbec..e2fcf50 100644
>> --- a/xen/arch/arm/gic-v3-its.c
>> +++ b/xen/arch/arm/gic-v3-its.c
>> @@ -724,6 +724,69 @@ restart:
>>  spin_unlock(>arch.vgic.its_devices_lock);
>>  }
>>
>> +/*
>> + * Translates an event for a given guest device ID into the
>> associated host
>> + * LPI number. This can be used to look up the mapped guest LPI.
>> + */
>> +static uint32_t translate_event(struct domain *d, paddr_t doorbell,
>> +uint32_t devid, uint32_t eventid)
>> +{
>> +struct rb_node *node;
>> +struct its_devices *dev;
>> +uint32_t host_lpi = 0;
>> +int cmp;
>> +
>> +spin_lock(>arch.vgic.its_devices_lock);
>> +node = d->arch.vgic.its_devices.rb_node;
>> +while (node)
>> +{
>> +dev = rb_entry(node, struct its_devices, rbnode);
>> +cmp = compare_its_guest_devices(dev, doorbell, devid);
>> +
>> +if ( !cmp )
>> +{
>> +if ( eventid >= dev->eventids )
>> +goto out;
>> +
>> +host_lpi = dev->host_lpis[eventid / LPI_BLOCK] +
>> +(eventid % LPI_BLOCK);
>> +if ( !is_lpi(host_lpi) )
> 
> Hmmm, I don't understand this check. host_lpi should always be an LPI. No?

Looks like. Dropped in v4.

>> +host_lpi = 0;
>> +goto out;
>> +}
>> +
>> +if ( cmp > 0 )
>> +node = node->rb_left;
>> +else
>> +node = node->rb_right;
>> +}
>> +
>> +out:
>> +spin_unlock(>arch.vgic.its_devices_lock);
>> +
>> +return host_lpi;
>> +}
>> +
>> +/*
>> + * Connects the event ID for an already assigned device to the given
>> VCPU/vLPI
>> + * pair. The corresponding physical LPI is already mapped on the host
>> side
>> + * (when assigning the physical device to the guest), so we just
>> connect the
>> + * target VCPU/vLPI pair to that interrupt to inject it properly if
>> it fires.
>> + */
>> +int gicv3_assign_guest_event(struct domain *d, paddr_t doorbell_address,
>> + uint32_t devid, uint32_t eventid,
> 
> It looks like to me that devid is the virtual deviceID. If so, please
> prefix with 'v', otherwise 'p'.

Fixed in v4.

>> + struct vcpu *v, uint32_t virt_lpi)
>> +{
>> +uint32_t host_lpi = translate_event(d, doorbell_address, devid,
>> eventid);
>> +
>> +if ( !host_lpi )
>> +return -ENOENT;
>> +
>> +gicv3_lpi_update_host_entry(host_lpi, d->domain_id, v->vcpu_id,
>> virt_lpi);
>> +
>> +return 0;
>> +}
>> +
>>  /* Scan the DT for any ITS nodes and create a list of host ITSes out
>> of it. */
>>  void gicv3_its_dt_init(const struct dt_device_node *node)
>>  {
>> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
>> index 994698e..c110ec9 100644
>> --- a/xen/arch/arm/gic-v3-lpi.c
>> +++ b/xen/arch/arm/gic-v3-lpi.c
>> @@ -153,6 +153,24 @@ void do_LPI(unsigned int lpi)
>>  vgic_vcpu_inject_irq(vcpu, hlpi.virt_lpi);
>>  }
>>
>> +int gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
>> +unsigned int vcpu_id, uint32_t virt_lpi)
>> +{
>> +union host_lpi *hlpip, hlpi;
>> +
>> +host_lpi -= LPI_OFFSET;
> 
> I would add an ASSERT(host_lpi > LPI_OFFSET);

Fixed in v4.

>> +
>> +hlpip = _data.host_lpis[host_lpi /
>> HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
>> +
>> +hlpi.virt_lpi = virt_lpi;
>> +hlpi.dom_id = domain_id;
>> +hlpi.vcpu_id = vcpu_id;
>> +
>> +write_u64_atomic(>data, hlpi.data);
>> +
>> +return 0;
>> +}
>> +
>>  static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
>>  {
>>  uint64_t val;
>> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
>> index c26d5d4..600ff69 100644
>> --- a/xen/arch/arm/vgic-v3-its.c
>> +++ b/xen/arch/arm/vgic-v3-its.c
>> @@ -167,8 +167,8 @@ static bool read_itte(struct virt_its *its,
>> uint32_t devid, uint32_t evid,
>>  }
>>
>>  #define SKIP_LPI_UPDATE 1
>> -bool write_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
>> -uint32_t collid, uint32_t vlpi, struct vcpu **vcpu)
>> +static bool write_itte(struct virt_its 

Re: [Xen-devel] [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG

2017-04-03 Thread Razvan Cojocaru
On 04/03/2017 09:20 PM, Razvan Cojocaru wrote:
> On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
>> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>> On 31.03.17 at 17:01,  wrote:
 On 03/31/2017 05:46 PM, Jan Beulich wrote:
 On 31.03.17 at 11:56,  wrote:
>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>> On 31.03.17 at 08:17,  wrote:
 On 03/30/2017 06:47 PM, Jan Beulich wrote:
>> Speaking of emulated MMIO, I've got this when the guest was crashing
>> immediately (pre RETRY loop):
>>
>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 
>> 72
>> 07 8b cb e8 da 4b ff ff 8b 45
>
> That's a BTR, which we should be emulating fine. More information
> would need to be collected to have a chance to understand what
> might be going one (first of all the virtual and physical memory
> address this was trying to act on).

 Right, the BTR part should be fine, but I think the LOCK part is what's
 causing the issue. I've done a few more test runs to see what return
 RETRY (dumping the instruction with an "(r)" prefix to distinguish from
 the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
 and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
 handler, which presumably now fails - possibly simply because it's
 always LOCKed in my patch):
>>>
>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>> how this information helps understanding the MMIO emulation failure
>>> above.
>>
>> I've managed to obtain this log of emulation errors:
>> https://pastebin.com/Esy1SkHx 
>>
>> The "virtual address" lines that are not followed by any "Mem event"
>> line correspond to CMXCHG_FAILED return codes.
>>
>> The very last line is a MMIO emulation failed.
>>
>> It's probably important that this happens with the model where
>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>> succeeds. The other model allows me to go further with the guest, but
>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>
> Interesting. You didn't clarify what the printed "offset" values are,
> and it doesn't look like these have any correlation with the underlying
> (guest) physical address, which we would also want to see. And then
> it strikes me as odd that in these last lines
>
> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> 
> f0 0f 
 ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
> (XEN) virtual address: 0xffd080f0, offset: 4291854576
> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 
> 72 
 07 8b cb e8 da 4b ff ff 8b 45
>
> the instruction pointers and virtual addresses are different, but the
> code bytes are exactly the same. This doesn't seem very likely, so I
> wonder whether there's an issue with us wrongly re-using previously
> fetched insn bytes. (Of course I'd be happy to be proven wrong with
> this guessing, by you checking the involved binary/ies.)

 Offset is the actual value of the "offset" parameter of
 hvmemul_cmpxchg().
>>>
>>> That's not very useful then, as for flat segments "offset" ==
>>> "virtual address" (i.e. you merely re-print in decimal what you've
>>> already printed in hex).
>>
>> The attached patch (a combination of your patch and mine) produces the
>> following output when booting a Windows 7 32-bit guest with monitoring:
>> https://pastebin.com/ayiFmj1N
>>
>> The failed MMIO emulation is caused by a mapping failure due to the
>> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
>> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
>> __hvm_copy() but it looks like that might not be the right way to use it.
> 
> Sorry to reply to this email instead of your original reply but I've
> "left it" in my computer at work. Here's the last part of the log, with
> the VCPU number logged for the GFN as well:
> 
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810a8, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810a8, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810cc, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x8278109c, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810d0, rc: 0
> (XEN) [11] gfn: 0x2781
> (XEN) [8] gfn: 0x2781
> (XEN) [11] virtual address: 0x8278109c, rc: 0
> (XEN) [8] virtual address: 0x8278109c, rc: 4
> (XEN) Dump follows for VCPU 8
> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
> (XEN) [11] gfn: 0x2781
> (XEN) [11] virtual address: 0x827810a8, rc: 0
> 

Re: [Xen-devel] [PATCH v3 06/26] ARM: GICv3 ITS: introduce device mapping

2017-04-03 Thread Julien Grall

On 01/04/17 09:01, Vijay Kilari wrote:

Hi Andre,


Hi Vijay,


On Fri, Mar 31, 2017 at 11:35 PM, Andre Przywara  wrote:

+/* An Interrupt Translation Table needs to be 256-byte aligned. */
+itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);


  As I mentioned, in previous version, if itt_addr is not enough size,
ITS would overwrite and corrupt memory.
Similar to size passed in MAPD cmd, itt_addr should also be allocated of size
ROUNDUP(nr_events, LPI_BLOCK).


ROUNDUP(nr_events, LPI_BLOCK) would still be wrong as the MAPD command 
works in term of bits. You have to round up to the next bit.


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Oleksandr Tyshchenko
Hi, Andrew

On Mon, Apr 3, 2017 at 7:42 PM, Andrew Cooper  wrote:
> On 03/04/17 17:24, Oleksandr Tyshchenko wrote:
>> Hi, all.
>>
>> Playing with non-shared IOMMU in Xen on ARM I faced one interesting
>> thing. I found out that the superpages were shattered during domain
>> life cycle.
>> This is the result of mapping of foreign pages, ballooning memory,
>> even if domain maps Xen shared pages, etc.
>> I don't bother with the memory fragmentation at the moment. But,
>> shattering bothers me from the IOMMU point of view.
>> As the Xen owns IOMMU it might manipulate IOMMU page tables when
>> passthoughed/protected device doing DMA in Linux. It is hard to detect
>> when the DMA transaction isn't in progress
>> in order to prevent this race. So, if we have inflight transaction
>> from a device when changing IOMMU mapping we might get into trouble.
>> Unfortunately, not in all the cases the
>> faulting transaction can be restarted. The chance to hit the problem
>> increases during shattering.
>>
>> I did next test:
>> The dom0 on my setup contains ethernet IP that are protected by IOMMU.
>> What is more, as the IOMMU I am playing with supports superpages (2M,
>> 1G) the IOMMU driver
>> takes into account these capabilities when building page tables. As I
>> gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
>> only. As I am using NFS for both dom0 and domU the ethernet IP
>> performs DMA transactions almost all the time.
>> Sometimes, I see the IOMMU page faults during creating guest domain. I
>> think, it happens during Xen is shattering 2M mappings 4K mappings (it
>> unmaps dom0 pages by one 4K page at a time, then maps domU pages there
>> for copying domU images).
>> But, I don't see any page faults when the IOMMU page table was built
>> by 4K pages only.
>>
>> I had a talk with Julien on IIRC and we came to conclusion that the
>> safest way would be to use 4K pages to prevent shattering, so the
>> IOMMU shouldn't report superpage capability.
>> On the other hand, if we build IOMMU from 4K pages we will have
>> performance drop (during building, walking page tables), TLB pressure,
>> etc.
>> Another possible solution Julien was suggesting is to always
>> ballooning with 2M, 1G, and not using 4K. That would help us to
>> prevent shattering effect.
>> The discussion was moved to the ML since it seems to be a generic
>> issue and the right solution should be think of.
>>
>> What do you think is the right way to follow? Use 4K pages and don't
>> bother with shattering or try to optimize? And if the idea to make
>> balloon mechanism smarter makes sense how to teach balloon to do so?
>> Thank you.
>
> Ballooning and foreign mappings are terrible for trying to retain
> superpage mappings.  No OS, not even Linux, can sensibly provide victim
> pages in a useful way to avoid shattering.
>
> If you care about performance, don't ever balloon.  Foreign mappings in
> translated guests should start from the top of RAM, and work upwards.

I understand about disabling ballooning mechanism. I will keep it in mind.

>
>
> As for the IOMMU specifically, things are rather easier.  It is the
> guests responsibility to ensure that frames offered up for ballooning or
> foreign mappings are unused.  Therefore, if anything cares about the
> specific 4K region becoming non-present in the IOMMU mappings, it is the
> guest kernels fault for offering up a frame already in use.
>
> For the shattering however, It is Xen's responsibility to ensure that
> all other mappings stay valid at all points.  The correct way to do this
> is to construct a new L1 table, mirroring the L2 superpage but lacking
> the specific 4K mapping in question, then atomically replace the L2
> superpage entry with the new L1 table, then issue an IOMMU TLB
> invalidation to remove any cached mappings.

I think I do almost the same.

>
> By following that procedure, all DMA within the 2M region, but not
> hitting the 4K frame, won't observe any interim lack of mappings.  It
> appears from your description that Xen isn't following the procedure.
>
> ~Andrew

Thank you.

-- 
Regards,

Oleksandr Tyshchenko

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v8 2/4] xen: introduce a C99 headers check

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Jan Beulich wrote:
> >>> On 31.03.17 at 21:15,  wrote:
> > Introduce a C99 headers check, for non-ANSI compliant headers: 9pfs.h
> > and pvcalls.h.
> > 
> > In addition to the usual -include stdint.h, also add -include string.h
> > to the C99 check to get the declaration of memcpy and size_t.
> > 
> > For the same reason, also add -include cstring to the C++ check when
> > necessary.
> > 
> > Signed-off-by: Stefano Stabellini 
> > CC: jbeul...@suse.com 
> > CC: konrad.w...@oracle.com 
> > ---
> >  .gitignore   |  3 +--
> >  xen/include/Makefile | 30 ++
> >  2 files changed, 19 insertions(+), 14 deletions(-)
> > 
> > diff --git a/.gitignore b/.gitignore
> > index 443b12a..0265c1e 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -273,8 +273,7 @@ xen/arch/*/efi/boot.c
> >  xen/arch/*/efi/compat.c
> >  xen/arch/*/efi/efi.h
> >  xen/arch/*/efi/runtime.c
> > -xen/include/headers.chk
> > -xen/include/headers++.chk
> > +xen/include/headers*.chk
> >  xen/include/asm
> >  xen/include/asm-*/asm-offsets.h
> >  xen/include/asm-x86/cpuid-autogen.h
> > diff --git a/xen/include/Makefile b/xen/include/Makefile
> > index aca7f20..fd57ce4 100644
> > --- a/xen/include/Makefile
> > +++ b/xen/include/Makefile
> > @@ -90,11 +90,12 @@ compat/xlat.h: $(addprefix compat/.xlat/,$(xlat-y)) 
> > Makefile
> >  
> >  ifeq ($(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH))
> >  
> > -all: headers.chk headers++.chk
> > +all: headers.chk headers99.chk headers++.chk
> >  
> >  PUBLIC_HEADERS := $(filter-out public/arch-% public/dom0_ops.h, $(wildcard 
> > public/*.h public/*/*.h) $(public-y))
> >  
> > -PUBLIC_ANSI_HEADERS := $(filter-out public/%ctl.h public/xsm/% 
> > public/%hvm/save.h, $(PUBLIC_HEADERS))
> > +PUBLIC_C99_HEADERS :=
> > +PUBLIC_ANSI_HEADERS := $(filter-out public/%ctl.h public/xsm/% 
> > public/%hvm/save.h $(PUBLIC_C99_HEADERS), $(PUBLIC_HEADERS))
> >  
> >  headers.chk: $(PUBLIC_ANSI_HEADERS) Makefile
> > for i in $(filter %.h,$^); do \
> > @@ -104,16 +105,21 @@ headers.chk: $(PUBLIC_ANSI_HEADERS) Makefile
> > done >$@.new
> > mv $@.new $@
> >  
> > +headers99.chk: $(PUBLIC_C99_HEADERS) Makefile
> > +   rm -f $@.new
> > +   $(foreach i, $(filter %.h,$^), $(CC) -x c -std=c99 -Wall -Werror\
> > +   -include stdint.h $(foreach j, $($(i)-prereq), -include $(j).h) \
> > +   -S -o /dev/null $(i) || exit $$?; echo $(i) >> $@.new;)
> 
> I would have wished that you formatted this along the lines of
> the C++ rule below (|| first on its line, aligned with the beginning
> of the command). But anyway - I can live with it here, but ...
> 
> > +   mv $@.new $@
> > +
> >  headers++.chk: $(PUBLIC_HEADERS) Makefile
> > -   if $(CXX) -v >/dev/null 2>&1; then \
> > -   for i in $(filter %.h,$^); do \
> > -   echo '#include "'$$i'"' \
> > -   | $(CXX) -x c++ -std=gnu++98 -Wall -Werror -D__XEN_TOOLS__ \
> > - -include stdint.h -include public/xen.h -S -o /dev/null - \
> > -   || exit 1; \
> > -   echo $$i; \
> > -   done ; \
> > -   fi >$@.new
> > +   rm -f $@.new
> > +   $(CXX) -v >/dev/null 2>&1 || exit 0;   \
> > +   $(foreach i, $(filter %.h,$^), echo "#include "\"$(i)\"\
> > +   |$(CXX) -x c++ -std=gnu++98 -Wall -Werror -D__XEN_TOOLS__  \
> > +   -include stdint.h -include public/xen.h\
> > +   $(foreach j, $($(i)-prereq), -include c$(j)) -S -o /dev/null - \
> > +   || exit $$?; echo $(i) >> $@.new;)
> 
> ... indentation still doesn't match how it was originally (including,
> as mentioned above as well, aligning the start of the command
> with | and || ) and there's a blank missing after | . Of course I'm
> fine with you fixing this upon commit, if no other need arises for
> a v9, so on that basis with those adjustments
> Reviewed-by: Jan Beulich 

Thank you, I can do that! Are you OK also with the other patches, patch
#1 in particular?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 15/27] ARM: vITS: introduce translation table walks

2017-04-03 Thread Andre Przywara
Hi,

On 24/03/17 13:00, Julien Grall wrote:
> Hi Andre,
> 
> On 03/16/2017 11:20 AM, Andre Przywara wrote:
>> The ITS stores the target (v)CPU and the (virtual) LPI number in tables.
>> Introduce functions to walk those tables and translate an device ID -
>> event ID pair into a pair of virtual LPI and vCPU.
>> Since the final interrupt translation tables can be smaller than a page,
>> we map them on demand (which is cheap on arm64). Also we take care of
>> the locking on the way, since we can't easily protect those ITTs from
>> being altered by the guest.
>>
>> To allow compiling without warnings, we declare two functions as
>> non-static for the moment.
>>
>> Signed-off-by: Andre Przywara 
>> ---
>>  xen/arch/arm/vgic-v3-its.c | 135
>> +
>>  1 file changed, 135 insertions(+)
>>
>> diff --git a/xen/arch/arm/vgic-v3-its.c b/xen/arch/arm/vgic-v3-its.c
>> index 5337638..267a573 100644
>> --- a/xen/arch/arm/vgic-v3-its.c
>> +++ b/xen/arch/arm/vgic-v3-its.c
>> @@ -62,6 +62,141 @@ struct vits_itte
>>  uint16_t collection;
>>  };
>>
>> +#define UNMAPPED_COLLECTION  ((uint16_t)~0)
>> +
>> +/* Must be called with the ITS lock held. */
> 
> This is a call for an ASSERT in the function.
> 
>> +static struct vcpu *get_vcpu_from_collection(struct virt_its *its,
>> int collid)
> 
> s/int/unsigned int/

Fixed in v4.

>> +{
>> +uint16_t vcpu_id;
>> +
>> +if ( collid >= its->max_collections )
>> +return NULL;
>> +
>> +vcpu_id = its->coll_table[collid];
>> +if ( vcpu_id == UNMAPPED_COLLECTION || vcpu_id >=
>> its->d->max_vcpus )
>> +return NULL;
>> +
>> +return its->d->vcpu[vcpu_id];
>> +}
>> +
>> +#define DEV_TABLE_ITT_ADDR(x) ((x) & GENMASK(51, 8))
>> +#define DEV_TABLE_ITT_SIZE(x) (BIT(((x) & GENMASK(7, 0)) + 1))
>> +#define DEV_TABLE_ENTRY(addr, bits) \
>> +(((addr) & GENMASK(51, 8)) | (((bits) - 1) & GENMASK(7, 0)))
> 
> The layout of dev_table[...] really needs to be explained. It took me
> quite a while to understand how it works. For instance why you skip the
> first 8 bits for the address...

Fixed in v3.

>> +
>> +static paddr_t get_itte_address(struct virt_its *its,
>> +uint32_t devid, uint32_t evid)
>> +{
> 
> I was expected to see the support of two-level page table for the vITS.
> Any plan for that?
> 
>> +paddr_t addr;
>> +
>> +if ( devid >= its->max_devices )
>> +return ~0;
> 
> Please don't hardcode invalid address and use INVALID_PADDR.
> 
>> +
>> +if ( evid >= DEV_TABLE_ITT_SIZE(its->dev_table[devid]) )
>> +return ~0;
> 
> Ditto.

Fixed in v3.

>> +
>> +addr = DEV_TABLE_ITT_ADDR(its->dev_table[devid]);
> 
> You read dev_table[...] multiple time. What prevents someone to modify
> dev_table after you did someone sanity check?
> 
> It would be safer to do a read_atomic(..) at the beginning and use a
> temporary variable.

Fixed in v4.

> 
>> +
>> +return addr + evid * sizeof(struct vits_itte);
>> +}
>> +
>> +/*
>> + * Looks up a given deviceID/eventID pair on an ITS and returns a
>> pointer to
>> + * the corresponding ITTE. This maps the respective guest page into Xen.
>> + * Once finished with handling the ITTE, call put_devid_evid() to unmap
>> + * the page again.
>> + * Must be called with the ITS lock held.
> 
> This is a call for an ASSERT in the code.
> 
>> + */
>> +static struct vits_itte *get_devid_evid(struct virt_its *its,
>> +uint32_t devid, uint32_t evid)
> 
> The naming of the function is confusing. It doesn't look up a device
> ID/event ID but an IIT. So I would rename it to find_itte.

Fixed in v3.

>> +{
>> +paddr_t addr = get_itte_address(its, devid, evid);
>> +
>> +if ( addr == ~0 )
> 
> 
> Please use INVALID_PADDR.

Fixed in v3.

>> +return NULL;
>> +
>> +return map_guest_pages(its->d, addr, 1);
>> +}
>> +
>> +/* Must be called with the ITS lock held. */
>> +static void put_devid_evid(struct virt_its *its, struct vits_itte *itte)
>> +{
>> +unmap_guest_pages(itte, 1);
>> +}
>> +
>> +/*
>> + * Queries the collection and device tables to get the vCPU and virtual
>> + * LPI number for a given guest event. This takes care of mapping the
>> + * respective tables and validating the values, since we can't
>> efficiently
>> + * protect the ITTs with their less-than-page-size granularity.
>> + * Takes and drops the its_lock.
> 
> I am not sure to understand the usefulness of "takes and drops the
> its_lock".

It tells people that they should not hold the its_lock when calling this
function, as this might deadlock. Also it means that this function takes
care about locking itself.
Improved the wording in v4.

>> + */
>> +bool read_itte(struct virt_its *its, uint32_t devid, uint32_t evid,
>> +   struct vcpu **vcpu, uint32_t *vlpi)
>> +{
>> +struct vits_itte *itte;
>> +int collid;
>> +uint32_t _vlpi;
>> +

Re: [Xen-devel] [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG

2017-04-03 Thread Razvan Cojocaru
On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
> On 03/31/2017 06:04 PM, Jan Beulich wrote:
> On 31.03.17 at 17:01,  wrote:
>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>> On 31.03.17 at 11:56,  wrote:
> On 03/31/2017 10:34 AM, Jan Beulich wrote:
> On 31.03.17 at 08:17,  wrote:
>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
> Speaking of emulated MMIO, I've got this when the guest was crashing
> immediately (pre RETRY loop):
>
>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 
> 72
> 07 8b cb e8 da 4b ff ff 8b 45

 That's a BTR, which we should be emulating fine. More information
 would need to be collected to have a chance to understand what
 might be going one (first of all the virtual and physical memory
 address this was trying to act on).
>>>
>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>> causing the issue. I've done a few more test runs to see what return
>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>> handler, which presumably now fails - possibly simply because it's
>>> always LOCKed in my patch):
>>
>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>> how this information helps understanding the MMIO emulation failure
>> above.
>
> I've managed to obtain this log of emulation errors:
> https://pastebin.com/Esy1SkHx 
>
> The "virtual address" lines that are not followed by any "Mem event"
> line correspond to CMXCHG_FAILED return codes.
>
> The very last line is a MMIO emulation failed.
>
> It's probably important that this happens with the model where
> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
> succeeds. The other model allows me to go further with the guest, but
> eventually I get timeout-related BSODs or the guest becomes unresponsive.

 Interesting. You didn't clarify what the printed "offset" values are,
 and it doesn't look like these have any correlation with the underlying
 (guest) physical address, which we would also want to see. And then
 it strikes me as odd that in these last lines

 (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 
 0f 
>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
 (XEN) virtual address: 0xffd080f0, offset: 4291854576
 (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 
 72 
>>> 07 8b cb e8 da 4b ff ff 8b 45

 the instruction pointers and virtual addresses are different, but the
 code bytes are exactly the same. This doesn't seem very likely, so I
 wonder whether there's an issue with us wrongly re-using previously
 fetched insn bytes. (Of course I'd be happy to be proven wrong with
 this guessing, by you checking the involved binary/ies.)
>>>
>>> Offset is the actual value of the "offset" parameter of
>>> hvmemul_cmpxchg().
>>
>> That's not very useful then, as for flat segments "offset" ==
>> "virtual address" (i.e. you merely re-print in decimal what you've
>> already printed in hex).
> 
> The attached patch (a combination of your patch and mine) produces the
> following output when booting a Windows 7 32-bit guest with monitoring:
> https://pastebin.com/ayiFmj1N
> 
> The failed MMIO emulation is caused by a mapping failure due to the
> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
> __hvm_copy() but it looks like that might not be the right way to use it.

Sorry to reply to this email instead of your original reply but I've
"left it" in my computer at work. Here's the last part of the log, with
the VCPU number logged for the GFN as well:

(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810cc, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x8278109c, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810d0, rc: 0
(XEN) [11] gfn: 0x2781
(XEN) [8] gfn: 0x2781
(XEN) [11] virtual address: 0x8278109c, rc: 0
(XEN) [8] virtual address: 0x8278109c, rc: 4
(XEN) Dump follows for VCPU 8
(XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) [11] gfn: 0x2781
(XEN) [11] virtual address: 0x827810a8, rc: 0
(XEN) [11] gfn: 0x2781
(XEN) [11] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0xfed00
(XEN) !page
(XEN) hvmemul_vaddr_to_mfn() fail
(XEN) [8] virtual address: 

Re: [Xen-devel] [PATCH v3 02/26] ARM: GICv3: allocate LPI pending and property table

2017-04-03 Thread Stefano Stabellini
On Mon, 3 Apr 2017, Andre Przywara wrote:
> Hi,
> 
> On 31/03/17 23:59, Stefano Stabellini wrote:
> > On Fri, 31 Mar 2017, Andre Przywara wrote:
> >> The ARM GICv3 provides a new kind of interrupt called LPIs.
> >> The pending bits and the configuration data (priority, enable bits) for
> >> those LPIs are stored in tables in normal memory, which software has to
> >> provide to the hardware.
> >> Allocate the required memory, initialize it and hand it over to each
> >> redistributor. The maximum number of LPIs to be used can be adjusted with
> >> the command line option "max_lpi_bits", which defaults to 20 bits,
> >> covering about one million LPIs.
> >>
> >> Signed-off-by: Andre Przywara 
> >> ---
> 
> [...]
> 
> >> +static unsigned int max_lpi_bits = 20;
> >> +integer_param("max_lpi_bits", max_lpi_bits);
> > 
> > The only thing missing is checking that the user has passed max_lpi_bits
> > or warn if she has not (or if the memory usage is too high).
> 
> Right, I was missing that.
> So I went with the "if memory usage is too high" version here, since the
> default of 20 bits results in a 16KB first level table only. I would
> then start warning if the bits exceed 24 (which is 256KB).

Yes, but where is the warning? I cannot find it on this patch.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Julien Grall

Hi Andrew,

On 03/04/17 18:16, Andrew Cooper wrote:

On 03/04/17 18:02, Julien Grall wrote:

Hi Andrew,

On 03/04/17 17:42, Andrew Cooper wrote:

On 03/04/17 17:24, Oleksandr Tyshchenko wrote:

Hi, all.

Playing with non-shared IOMMU in Xen on ARM I faced one interesting
thing. I found out that the superpages were shattered during domain
life cycle.
This is the result of mapping of foreign pages, ballooning memory,
even if domain maps Xen shared pages, etc.
I don't bother with the memory fragmentation at the moment. But,
shattering bothers me from the IOMMU point of view.
As the Xen owns IOMMU it might manipulate IOMMU page tables when
passthoughed/protected device doing DMA in Linux. It is hard to detect
when the DMA transaction isn't in progress
in order to prevent this race. So, if we have inflight transaction
from a device when changing IOMMU mapping we might get into trouble.
Unfortunately, not in all the cases the
faulting transaction can be restarted. The chance to hit the problem
increases during shattering.

I did next test:
The dom0 on my setup contains ethernet IP that are protected by IOMMU.
What is more, as the IOMMU I am playing with supports superpages (2M,
1G) the IOMMU driver
takes into account these capabilities when building page tables. As I
gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
only. As I am using NFS for both dom0 and domU the ethernet IP
performs DMA transactions almost all the time.
Sometimes, I see the IOMMU page faults during creating guest domain. I
think, it happens during Xen is shattering 2M mappings 4K mappings (it
unmaps dom0 pages by one 4K page at a time, then maps domU pages there
for copying domU images).
But, I don't see any page faults when the IOMMU page table was built
by 4K pages only.

I had a talk with Julien on IIRC and we came to conclusion that the
safest way would be to use 4K pages to prevent shattering, so the
IOMMU shouldn't report superpage capability.
On the other hand, if we build IOMMU from 4K pages we will have
performance drop (during building, walking page tables), TLB pressure,
etc.
Another possible solution Julien was suggesting is to always
ballooning with 2M, 1G, and not using 4K. That would help us to
prevent shattering effect.
The discussion was moved to the ML since it seems to be a generic
issue and the right solution should be think of.

What do you think is the right way to follow? Use 4K pages and don't
bother with shattering or try to optimize? And if the idea to make
balloon mechanism smarter makes sense how to teach balloon to do so?
Thank you.


Ballooning and foreign mappings are terrible for trying to retain
superpage mappings.  No OS, not even Linux, can sensibly provide victim
pages in a useful way to avoid shattering.

If you care about performance, don't ever balloon.  Foreign mappings in
translated guests should start from the top of RAM, and work upwards.


I am not sure to understand this. Can you extend?


I am not sure what is unclear.  Handing random frames of RAM back to the
hypervisor is what exacerbates host superpage fragmentation, and all
balloon drivers currently do it.

If you want to avoid host superpage fragmentation, don't use a
scattergun approach of handing frames back to Xen.  However, because
even Linux doesn't provide enough hooks into the physical memory
management logic, the only solution is to not balloon at all, and to use
already-unoccupied frames for foreign mappings.


Do you have any pointer in the Linux code?








As for the IOMMU specifically, things are rather easier.  It is the
guests responsibility to ensure that frames offered up for ballooning or
foreign mappings are unused.  Therefore, if anything cares about the
specific 4K region becoming non-present in the IOMMU mappings, it is the
guest kernels fault for offering up a frame already in use.

For the shattering however, It is Xen's responsibility to ensure that
all other mappings stay valid at all points.  The correct way to do this
is to construct a new L1 table, mirroring the L2 superpage but lacking
the specific 4K mapping in question, then atomically replace the L2
superpage entry with the new L1 table, then issue an IOMMU TLB
invalidation to remove any cached mappings.

By following that procedure, all DMA within the 2M region, but not
hitting the 4K frame, won't observe any interim lack of mappings.  It
appears from your description that Xen isn't following the procedure.


Xen is following what's the ARM ARM is mandating. For shattering page
table, we have to follow the break-before-sequence i.e:
- Invalidate the L2 entry
- Flush the TLBs
- Add the new L1 table
See D4-1816 in ARM DDI 0487A.k_iss10775 for details. So we end up in a
small window where there are no valid mapping. It is easy to trap data
abort from processor and restarting it but not for device memory
transactions.

Xen by default is sharing stage-2 page tables with between the IOMMU
and the MMU. However, from the 

Re: [Xen-devel] linux-next: manual merge of the xen-tip tree with the tip tree

2017-04-03 Thread Juergen Gross
On 29/03/17 12:06, Vitaly Kuznetsov wrote:
> Juergen Gross  writes:
>> I'll create another branch for-linus-4.12 based on the tip tree next
>> week which will be subject to the pull request for Linus. As soon as
>> for-linus-4.12 is ready the for-linus-4.12-pre branch shouldn't be used
>> any longer.
> 
> Please let me know if/when I need to rebase my series. I'll rebase, test
> and re-send.

Vitaly, I've created the (new) for-linus-4.12 branch on xen/tip. It is
based on the x86-mm-for-xen branch of tip/tip containing the 5-level
page table patches. Please rebase your patches to this branch.


Juergen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] kexec: clear kexec_image slot when unloading kexec image

2017-04-03 Thread Daniel Kiper
On Mon, Apr 03, 2017 at 10:44:54AM -0700, Bhavesh Davda wrote:
> While theoretically this bug can be tickled simply by a sequence of 'kexec -p'
> to load a kexec crash image followed by two back-to-back 'kexec -p -u' to
> unload the kexec crash image, I found the following perl script to be useful 
> to
> reliably reproduce Xen panics as well as verify that the fix works. YMMV.
>
> -snip-
> #!/usr/bin/perl -w
>
> use strict;
> use warnings;
> use threads;
>
> sub threaded_task {
> threads->create(sub {
> my $thr_id = threads->self->tid;
> print "Starting load thread $thr_id\n";
> system("/sbin/kexec  -p --command-line=\"placeholder 
> root=/dev/mapper/root ro rhbg console=tty0 console=hvc0 earlyprintk=xen 
> nomodeset printk.time=1 irqpoll maxcpus=1 nr_cpus=1 reset_devices 
> cgroup_disable=memory mce=off selinux=0 console=ttyS1,115200n8\" 
> --initrd=/boot/initrd.x86_64kdump.img /boot/vmlinuz.x86_64");
> print "Ending load thread $thr_id\n";
> threads->detach(); #End thread.
> });
> threads->create(sub {
> my $thr_id = threads->self->tid;
> print "Starting unload thread $thr_id\n";
> system("/sbin/kexec  -p -u");
> print "Ending unload thread $thr_id\n";
> threads->detach(); #End thread.
> });
> }
>
> for my $i (0..99)
> {
> threaded_task();
> }
> -snip-
>
> ---
> When kexec_do_unload calls kexec_swap_images to get the old kexec_image to
> free, it passes NULL for the new kexec_image pointer. The new slot wasn't 
> being
> cleared in such a case, leading to a stale pointer being left behind in the
> kexec_image array and Xen panics in subsequent load/unload operations.
>
> Signed-off-by: Bhavesh Davda 
> Reviewed-by: Konrad Rzeszutek Wilk 
> Reviewed-by: Daniel Kiper 
> ---
>  xen/common/kexec.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/xen/common/kexec.c b/xen/common/kexec.c
> index 940fc7ec94..072cc8e0db 100644
> --- a/xen/common/kexec.c
> +++ b/xen/common/kexec.c
> @@ -837,11 +837,9 @@ static int kexec_swap_images(int type, struct 
> kexec_image *new,
>  old_slot = base + pos;
>  new_slot = base + !pos;
>
> +kexec_image[new_slot] = new;
>  if ( new )
> -{
> -kexec_image[new_slot] = new;
>  set_bit(new_slot, _flags);
> -}
>  change_bit(bit, _flags);
>
>  clear_bit(old_slot, _flags);

Bhavesh, thanks for posting this.

Jan, Andrew, IMO, this is Xen stable material too.

Daniel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Oleksandr Tyshchenko
On Mon, Apr 3, 2017 at 8:39 PM, Oleksandr Tyshchenko
 wrote:
> Hi, Julien.
>
> On Mon, Apr 3, 2017 at 8:02 PM, Julien Grall  wrote:
>> Hi Andrew,
>>
>>
>> On 03/04/17 17:42, Andrew Cooper wrote:
>>>
>>> On 03/04/17 17:24, Oleksandr Tyshchenko wrote:

 Hi, all.

 Playing with non-shared IOMMU in Xen on ARM I faced one interesting
 thing. I found out that the superpages were shattered during domain
 life cycle.
 This is the result of mapping of foreign pages, ballooning memory,
 even if domain maps Xen shared pages, etc.
 I don't bother with the memory fragmentation at the moment. But,
 shattering bothers me from the IOMMU point of view.
 As the Xen owns IOMMU it might manipulate IOMMU page tables when
 passthoughed/protected device doing DMA in Linux. It is hard to detect
 when the DMA transaction isn't in progress
 in order to prevent this race. So, if we have inflight transaction
 from a device when changing IOMMU mapping we might get into trouble.
 Unfortunately, not in all the cases the
 faulting transaction can be restarted. The chance to hit the problem
 increases during shattering.

 I did next test:
 The dom0 on my setup contains ethernet IP that are protected by IOMMU.
 What is more, as the IOMMU I am playing with supports superpages (2M,
 1G) the IOMMU driver
 takes into account these capabilities when building page tables. As I
 gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
 only. As I am using NFS for both dom0 and domU the ethernet IP
 performs DMA transactions almost all the time.
 Sometimes, I see the IOMMU page faults during creating guest domain. I
 think, it happens during Xen is shattering 2M mappings 4K mappings (it
 unmaps dom0 pages by one 4K page at a time, then maps domU pages there
 for copying domU images).
 But, I don't see any page faults when the IOMMU page table was built
 by 4K pages only.

 I had a talk with Julien on IIRC and we came to conclusion that the
 safest way would be to use 4K pages to prevent shattering, so the
 IOMMU shouldn't report superpage capability.
 On the other hand, if we build IOMMU from 4K pages we will have
 performance drop (during building, walking page tables), TLB pressure,
 etc.
 Another possible solution Julien was suggesting is to always
 ballooning with 2M, 1G, and not using 4K. That would help us to
 prevent shattering effect.
 The discussion was moved to the ML since it seems to be a generic
 issue and the right solution should be think of.

 What do you think is the right way to follow? Use 4K pages and don't
 bother with shattering or try to optimize? And if the idea to make
 balloon mechanism smarter makes sense how to teach balloon to do so?
 Thank you.
>>>
>>>
>>> Ballooning and foreign mappings are terrible for trying to retain
>>> superpage mappings.  No OS, not even Linux, can sensibly provide victim
>>> pages in a useful way to avoid shattering.
>>>
>>> If you care about performance, don't ever balloon.  Foreign mappings in
>>> translated guests should start from the top of RAM, and work upwards.
>>
>>
>> I am not sure to understand this. Can you extend?
>>
>>>
>>>
>>> As for the IOMMU specifically, things are rather easier.  It is the
>>> guests responsibility to ensure that frames offered up for ballooning or
>>> foreign mappings are unused.  Therefore, if anything cares about the
>>> specific 4K region becoming non-present in the IOMMU mappings, it is the
>>> guest kernels fault for offering up a frame already in use.
>>>
>>> For the shattering however, It is Xen's responsibility to ensure that
>>> all other mappings stay valid at all points.  The correct way to do this
>>> is to construct a new L1 table, mirroring the L2 superpage but lacking
>>> the specific 4K mapping in question, then atomically replace the L2
>>> superpage entry with the new L1 table, then issue an IOMMU TLB
>>> invalidation to remove any cached mappings.
>>>
>>> By following that procedure, all DMA within the 2M region, but not
>>> hitting the 4K frame, won't observe any interim lack of mappings.  It
>>> appears from your description that Xen isn't following the procedure.
>>
>>
>> Xen is following what's the ARM ARM is mandating. For shattering page table,
>> we have to follow the break-before-sequence i.e:
>> - Invalidate the L2 entry
>> - Flush the TLBs
>> - Add the new L1 table
>>
>> See D4-1816 in ARM DDI 0487A.k_iss10775 for details. So we end up in a small
>> window where there are no valid mapping. It is easy to trap data abort from
>> processor and restarting it but not for device memory transactions.
>>
>> Xen by default is sharing stage-2 page tables with between the IOMMU and the
>> MMU. However, from the discussion I had with Oleksandr, they are not sharing

[Xen-devel] [PATCH] kexec: clear kexec_image slot when unloading kexec image

2017-04-03 Thread Bhavesh Davda
While theoretically this bug can be tickled simply by a sequence of 'kexec -p'
to load a kexec crash image followed by two back-to-back 'kexec -p -u' to
unload the kexec crash image, I found the following perl script to be useful to
reliably reproduce Xen panics as well as verify that the fix works. YMMV.

-snip-
#!/usr/bin/perl -w

use strict;
use warnings;
use threads;

sub threaded_task {
threads->create(sub { 
my $thr_id = threads->self->tid;
print "Starting load thread $thr_id\n";
system("/sbin/kexec  -p --command-line=\"placeholder 
root=/dev/mapper/root ro rhbg console=tty0 console=hvc0 earlyprintk=xen 
nomodeset printk.time=1 irqpoll maxcpus=1 nr_cpus=1 reset_devices 
cgroup_disable=memory mce=off selinux=0 console=ttyS1,115200n8\" 
--initrd=/boot/initrd.x86_64kdump.img /boot/vmlinuz.x86_64");
print "Ending load thread $thr_id\n";
threads->detach(); #End thread.
});
threads->create(sub { 
my $thr_id = threads->self->tid;
print "Starting unload thread $thr_id\n";
system("/sbin/kexec  -p -u");
print "Ending unload thread $thr_id\n";
threads->detach(); #End thread.
});
}

for my $i (0..99) 
{
threaded_task();
}
-snip-

---
When kexec_do_unload calls kexec_swap_images to get the old kexec_image to
free, it passes NULL for the new kexec_image pointer. The new slot wasn't being
cleared in such a case, leading to a stale pointer being left behind in the
kexec_image array and Xen panics in subsequent load/unload operations.

Signed-off-by: Bhavesh Davda 
Reviewed-by: Konrad Rzeszutek Wilk 
Reviewed-by: Daniel Kiper 
---
 xen/common/kexec.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/xen/common/kexec.c b/xen/common/kexec.c
index 940fc7ec94..072cc8e0db 100644
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -837,11 +837,9 @@ static int kexec_swap_images(int type, struct kexec_image 
*new,
 old_slot = base + pos;
 new_slot = base + !pos;
 
+kexec_image[new_slot] = new;
 if ( new )
-{
-kexec_image[new_slot] = new;
 set_bit(new_slot, _flags);
-}
 change_bit(bit, _flags);
 
 clear_bit(old_slot, _flags);
-- 
2.12.2.575.gb14f27f.dirty


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Oleksandr Tyshchenko
Hi, Julien.

On Mon, Apr 3, 2017 at 8:02 PM, Julien Grall  wrote:
> Hi Andrew,
>
>
> On 03/04/17 17:42, Andrew Cooper wrote:
>>
>> On 03/04/17 17:24, Oleksandr Tyshchenko wrote:
>>>
>>> Hi, all.
>>>
>>> Playing with non-shared IOMMU in Xen on ARM I faced one interesting
>>> thing. I found out that the superpages were shattered during domain
>>> life cycle.
>>> This is the result of mapping of foreign pages, ballooning memory,
>>> even if domain maps Xen shared pages, etc.
>>> I don't bother with the memory fragmentation at the moment. But,
>>> shattering bothers me from the IOMMU point of view.
>>> As the Xen owns IOMMU it might manipulate IOMMU page tables when
>>> passthoughed/protected device doing DMA in Linux. It is hard to detect
>>> when the DMA transaction isn't in progress
>>> in order to prevent this race. So, if we have inflight transaction
>>> from a device when changing IOMMU mapping we might get into trouble.
>>> Unfortunately, not in all the cases the
>>> faulting transaction can be restarted. The chance to hit the problem
>>> increases during shattering.
>>>
>>> I did next test:
>>> The dom0 on my setup contains ethernet IP that are protected by IOMMU.
>>> What is more, as the IOMMU I am playing with supports superpages (2M,
>>> 1G) the IOMMU driver
>>> takes into account these capabilities when building page tables. As I
>>> gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
>>> only. As I am using NFS for both dom0 and domU the ethernet IP
>>> performs DMA transactions almost all the time.
>>> Sometimes, I see the IOMMU page faults during creating guest domain. I
>>> think, it happens during Xen is shattering 2M mappings 4K mappings (it
>>> unmaps dom0 pages by one 4K page at a time, then maps domU pages there
>>> for copying domU images).
>>> But, I don't see any page faults when the IOMMU page table was built
>>> by 4K pages only.
>>>
>>> I had a talk with Julien on IIRC and we came to conclusion that the
>>> safest way would be to use 4K pages to prevent shattering, so the
>>> IOMMU shouldn't report superpage capability.
>>> On the other hand, if we build IOMMU from 4K pages we will have
>>> performance drop (during building, walking page tables), TLB pressure,
>>> etc.
>>> Another possible solution Julien was suggesting is to always
>>> ballooning with 2M, 1G, and not using 4K. That would help us to
>>> prevent shattering effect.
>>> The discussion was moved to the ML since it seems to be a generic
>>> issue and the right solution should be think of.
>>>
>>> What do you think is the right way to follow? Use 4K pages and don't
>>> bother with shattering or try to optimize? And if the idea to make
>>> balloon mechanism smarter makes sense how to teach balloon to do so?
>>> Thank you.
>>
>>
>> Ballooning and foreign mappings are terrible for trying to retain
>> superpage mappings.  No OS, not even Linux, can sensibly provide victim
>> pages in a useful way to avoid shattering.
>>
>> If you care about performance, don't ever balloon.  Foreign mappings in
>> translated guests should start from the top of RAM, and work upwards.
>
>
> I am not sure to understand this. Can you extend?
>
>>
>>
>> As for the IOMMU specifically, things are rather easier.  It is the
>> guests responsibility to ensure that frames offered up for ballooning or
>> foreign mappings are unused.  Therefore, if anything cares about the
>> specific 4K region becoming non-present in the IOMMU mappings, it is the
>> guest kernels fault for offering up a frame already in use.
>>
>> For the shattering however, It is Xen's responsibility to ensure that
>> all other mappings stay valid at all points.  The correct way to do this
>> is to construct a new L1 table, mirroring the L2 superpage but lacking
>> the specific 4K mapping in question, then atomically replace the L2
>> superpage entry with the new L1 table, then issue an IOMMU TLB
>> invalidation to remove any cached mappings.
>>
>> By following that procedure, all DMA within the 2M region, but not
>> hitting the 4K frame, won't observe any interim lack of mappings.  It
>> appears from your description that Xen isn't following the procedure.
>
>
> Xen is following what's the ARM ARM is mandating. For shattering page table,
> we have to follow the break-before-sequence i.e:
> - Invalidate the L2 entry
> - Flush the TLBs
> - Add the new L1 table
>
> See D4-1816 in ARM DDI 0487A.k_iss10775 for details. So we end up in a small
> window where there are no valid mapping. It is easy to trap data abort from
> processor and restarting it but not for device memory transactions.
>
> Xen by default is sharing stage-2 page tables with between the IOMMU and the
> MMU. However, from the discussion I had with Oleksandr, they are not sharing
> page tables and still see the problem. I am not sure how they are updating
> the page table here. Oleksandr, can you provide more details?

Yes, the IOMMU is a IPMMU-VMSA that doesn't 

Re: [Xen-devel] [PATCH for 4.9 3/6] x86/hvm: Fix segmentation logic for system segments

2017-04-03 Thread Andrew Cooper
On 03/04/17 17:08, Jan Beulich wrote:
 On 03.04.17 at 17:42,  wrote:
>> On 03/04/17 16:07, Jan Beulich wrote:
>> On 03.04.17 at 16:27,  wrote:
 On 03/04/17 10:13, Jan Beulich wrote:
 On 31.03.17 at 21:50,  wrote:
>> --- a/xen/arch/x86/hvm/hvm.c
>> +++ b/xen/arch/x86/hvm/hvm.c
>> @@ -2374,13 +2374,27 @@ int hvm_set_cr4(unsigned long value, bool_t 
 may_defer)
>>  return X86EMUL_OKAY;
>>  }
>>  
>> +enum hvm_segmentation_mode hvm_seg_mode(
>> +const struct vcpu *v, enum x86_segment seg,
>> +const struct segment_register *cs)
> The inputs here are at least somewhat counterintuitive (for example,
> from an abstract pov it is unexpected that the result depends on seg
> and cs). At the very least I think the naming should make clear that
> cs is not just some code segment, but the CS v has currently in use
> (e.g. active_cs). Going further the question is whether having this
> helper is really useful (and not perhaps inviting mis-use), and hence
> whether the inputs passed here wouldn't better be passed directly
> to hvm_virtual_to_linear_addr(), the more that the "seg" argument
> is required to match up between the two calls.
 I purposefully wanted to avoid people opencoding the logic and getting
 it wrong (looks like even I got it wrong).

 I'm not convinced that passing the parameters individually is better.

>> +{
>> +if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
>> +return hvm_seg_mode_real;
> What about VM86 mode?
 Very good point.

>> +if ( hvm_long_mode_active(v) &&
>> + (is_x86_system_segment(seg) || cs->attr.fields.l) )
>> +return hvm_seg_mode_long;
>> +
>> +return hvm_seg_mode_prot;
> Did you verify this actually matching real hardware behavior? There
> being obvious anomalies when compat ring-0 code executes
> LGDT/LIDT/SGDT/SIDT (in long mode these ought to have 10-byte
> operands, yet 32-bit/compat code would expect 6-byte ones, so
> one of the two views is necessarily wrong, and whichever it is, it
> introduces an inconsistency), I wouldn't take it for given that _all_
> descriptor table accessing insns behave like they would from a
> 64-bit code segment (I nevertheless assume they do, but as I
> can't see it written down anywhere, we shouldn't assume so,
> considering how many other oddities there are in x86).
>
> This question is also being supported by the SDM using the same
> standard "Same exceptions as in protected mode" in the
> respective insns' "Compatibility Mode Exceptions" sections, yet
> the behavior above implies that #GP(0) might also result for
> compat mode descriptor table accesses if the descriptor address
> ends up being non-canonical. Interestingly enough the PM
> doesn't separate exception specifications for 32-bit protected,
> compat, and 64-bit modes.
 You are mistaken.

 {L,S}{I,G}DT are {read,write}_segment_register() operations, using a
 plain memory operand.

 When we come to the segmentation check, it will be by default
 %ds-relative, with size as calculated by op_bytes in the instruction
 emulator.
>>> I think I didn't make myself clear then: I'm not at all talking about how
>>> the memory access of these insns get carried out, I solely talk about
>>> the size of their operands:
>> I still fail to see what the size of the operands have to do with the
>> choice of segmentation mode.
>>
>>> In long mode to load IDTR or GDTR you'd expect a 64-bit base and a 16-bit 
>> limit.
>>
>> Why?  I'd expect nothing of the sort, because 32bit compat segments are
>> purposefully designed to be no functional difference from regular 32bit
>> protected mode segments.  That includes not changing the behaviour of
>> instructions like this.
> Well, one can of course take the position that ring-0 compat code
> is simply a useless thing.

Compatibility mode segments exist for the purpose of making user code
continue to work.  I don't find it surprising that compatbility
supervisor segments have some rough corners.

>
>>> Hence if _all_ system segment
>>> register related insns worked consistently in long mode, the four
>>> named insns would have to have 10-byte operands.
>> This isn't a valid expectation to draw.
>>
>>>  I'm pretty sure
>>> they don't though, so there is _one_ anomaly already.
>> Indeed they don't.  In a compatibility mode segment, they have take a
>> 6-byte operand, identically to 32bit mode.
>>
>>> With that I don't think we can rule out there being other anomalies, with 
>>> this
>>> not being talked about explicitly anywhere in the doc.
>> I don't think any of this is relevant to the correctness of this patch.
> I don't question the correctness; all I question is how far it 

Re: [Xen-devel] [PATCH v3 03/26] ARM: GICv3 ITS: allocate device and collection table

2017-04-03 Thread Julien Grall

Hi Andre,

On 03/04/17 16:38, Julien Grall wrote:

On 31/03/17 19:05, Andre Przywara wrote:

Each ITS maps a pair of a DeviceID (for instance derived from a PCI
b/d/f triplet) and an EventID (the MSI payload or interrupt ID) to a
pair of LPI number and collection ID, which points to the target CPU.
This mapping is stored in the device and collection tables, which
software
has to provide for the ITS to use.
Allocate the required memory and hand it to the ITS.
The maximum number of devices is limited to a compile-time constant
exposed in Kconfig.

Signed-off-by: Andre Przywara 


Reviewed-by: Julien Grall 


Actually I will withdraw my reviewed-by. I didn't spot you keep the 
command line around which I clearly say no and gave some reasons why. 
Sorry for the mess.


To explain it again, no-one can possible know how the DeviceID will be 
spread on the platform without having the platform data sheet in hand. 
If the platform provide more DeviceID and is not able to cope with that. 
Then it is a platform specific quirk.


When we spoke f2f you agree on this. So please drop this command line.

Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] How to migrate vCPUs based on Credit Scheduler

2017-04-03 Thread Lars Kurth
Adding George, Dario & Anshul
Lars

> On 3 Apr 2017, at 17:21, 甘清甜  wrote:
> 
> Hi,
> 
>  I'm now designing new vCPU scheduler in Xen, and trying to implement 
> the scheduler based on the Credit scheduler in Xen-4.5.1. But I encountered
>  come problems when debuging the code.
> 
> Most of the code modification is done in function csched_schedule() in 
> file: xen/common/csched_schedule.c . And the core code is as followed:
> 
> if( vcpu_runnable(current) )
> {
> if( match the migration contition )
> {
> cpu_affinity = pick_pcpu_runq();  // this function is defined by 
> myself
> 
> pcpulock = pcpu_schedule_lock_irqsave(cpu_affinity, 
>_flag);
> 
> TRACE_3D(TRC_CSCHED_STOLEN_VCPU, cpu_affinity  , domain_id,
>vcpu_id);
> SCHED_VCPU_STAT_CRANK(scurr, migrate_q);
> SCHED_STAT_CRANK(migrate_queued);
> WARN_ON(scurr->vcpu->is_urgent);
>   scurr->vcpu->processor = cpu_affinity;  
> 
>  __runq_insert(cpu_affinity, scurr);
> pcpu_schedule_unlock_irqrestore(pcpulock, pcpulock_flag, 
> cpu_affinity  );
> }
> else 
> __runq_insert(cpu, scurr);
> }
> else 
> BUG_ON( is_idle_vcpu(current) || list_empty(runq) );
> 
> 
> I try to run the modified Xen. But according to the log I found that, 
> although I insert the vCPU into the runqueue  of another pCPU, the 
> vCPU still appears at the old pCPU in the following scheduling period. 
> Now I have a few questions:
> 
> 1. Does the Xen scheduler framework support changing the pCPU of a 
> vCPU after using out the scheduling time slice, but not just to steal one 
> vCPU from runqueue of other pCPU in load_balance period?
> 
> 2. If yes, what status of the vCPU should be changed before inserting 
> the vCPU into the destination pCPU?
> 
> Thank you very much!
> ___
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Andrew Cooper
On 03/04/17 18:02, Julien Grall wrote:
> Hi Andrew,
>
> On 03/04/17 17:42, Andrew Cooper wrote:
>> On 03/04/17 17:24, Oleksandr Tyshchenko wrote:
>>> Hi, all.
>>>
>>> Playing with non-shared IOMMU in Xen on ARM I faced one interesting
>>> thing. I found out that the superpages were shattered during domain
>>> life cycle.
>>> This is the result of mapping of foreign pages, ballooning memory,
>>> even if domain maps Xen shared pages, etc.
>>> I don't bother with the memory fragmentation at the moment. But,
>>> shattering bothers me from the IOMMU point of view.
>>> As the Xen owns IOMMU it might manipulate IOMMU page tables when
>>> passthoughed/protected device doing DMA in Linux. It is hard to detect
>>> when the DMA transaction isn't in progress
>>> in order to prevent this race. So, if we have inflight transaction
>>> from a device when changing IOMMU mapping we might get into trouble.
>>> Unfortunately, not in all the cases the
>>> faulting transaction can be restarted. The chance to hit the problem
>>> increases during shattering.
>>>
>>> I did next test:
>>> The dom0 on my setup contains ethernet IP that are protected by IOMMU.
>>> What is more, as the IOMMU I am playing with supports superpages (2M,
>>> 1G) the IOMMU driver
>>> takes into account these capabilities when building page tables. As I
>>> gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
>>> only. As I am using NFS for both dom0 and domU the ethernet IP
>>> performs DMA transactions almost all the time.
>>> Sometimes, I see the IOMMU page faults during creating guest domain. I
>>> think, it happens during Xen is shattering 2M mappings 4K mappings (it
>>> unmaps dom0 pages by one 4K page at a time, then maps domU pages there
>>> for copying domU images).
>>> But, I don't see any page faults when the IOMMU page table was built
>>> by 4K pages only.
>>>
>>> I had a talk with Julien on IIRC and we came to conclusion that the
>>> safest way would be to use 4K pages to prevent shattering, so the
>>> IOMMU shouldn't report superpage capability.
>>> On the other hand, if we build IOMMU from 4K pages we will have
>>> performance drop (during building, walking page tables), TLB pressure,
>>> etc.
>>> Another possible solution Julien was suggesting is to always
>>> ballooning with 2M, 1G, and not using 4K. That would help us to
>>> prevent shattering effect.
>>> The discussion was moved to the ML since it seems to be a generic
>>> issue and the right solution should be think of.
>>>
>>> What do you think is the right way to follow? Use 4K pages and don't
>>> bother with shattering or try to optimize? And if the idea to make
>>> balloon mechanism smarter makes sense how to teach balloon to do so?
>>> Thank you.
>>
>> Ballooning and foreign mappings are terrible for trying to retain
>> superpage mappings.  No OS, not even Linux, can sensibly provide victim
>> pages in a useful way to avoid shattering.
>>
>> If you care about performance, don't ever balloon.  Foreign mappings in
>> translated guests should start from the top of RAM, and work upwards.
>
> I am not sure to understand this. Can you extend?

I am not sure what is unclear.  Handing random frames of RAM back to the
hypervisor is what exacerbates host superpage fragmentation, and all
balloon drivers currently do it.

If you want to avoid host superpage fragmentation, don't use a
scattergun approach of handing frames back to Xen.  However, because
even Linux doesn't provide enough hooks into the physical memory
management logic, the only solution is to not balloon at all, and to use
already-unoccupied frames for foreign mappings.

>
>>
>>
>> As for the IOMMU specifically, things are rather easier.  It is the
>> guests responsibility to ensure that frames offered up for ballooning or
>> foreign mappings are unused.  Therefore, if anything cares about the
>> specific 4K region becoming non-present in the IOMMU mappings, it is the
>> guest kernels fault for offering up a frame already in use.
>>
>> For the shattering however, It is Xen's responsibility to ensure that
>> all other mappings stay valid at all points.  The correct way to do this
>> is to construct a new L1 table, mirroring the L2 superpage but lacking
>> the specific 4K mapping in question, then atomically replace the L2
>> superpage entry with the new L1 table, then issue an IOMMU TLB
>> invalidation to remove any cached mappings.
>>
>> By following that procedure, all DMA within the 2M region, but not
>> hitting the 4K frame, won't observe any interim lack of mappings.  It
>> appears from your description that Xen isn't following the procedure.
>
> Xen is following what's the ARM ARM is mandating. For shattering page
> table, we have to follow the break-before-sequence i.e:
> - Invalidate the L2 entry
> - Flush the TLBs
> - Add the new L1 table
> See D4-1816 in ARM DDI 0487A.k_iss10775 for details. So we end up in a
> small window where there are no valid mapping. It is easy to trap data
> abort 

[Xen-devel] Random Dom0 Warning when starting PV guest

2017-04-03 Thread Hongming Xiao
Hello,

I'm getting some random kernel warnings on dom0 boot that I'm
concerned about.  Here is some information for my host and guest machines.
I got both xen and kernel from CentOS 6.8 xen4centos.

Dom0:

  - xen-4.6.1-11.el6.x86_64
  - kernel-3.18.34-20.el6.x86_64

PV Guest:

  - kernel-3.18.34-20.el6.x86_64

dmesg output:

[ cut here ]
WARNING: CPU: 9 PID: 7412 at arch/x86/xen/multicalls.c:129
xen_mc_flush+0x162/0x1c0()
Modules linked in: ipmi_watchdog dcdbas mpt3sas mpt2sas scsi_transport_sas
raid_class mptctl mptbase ipmi_si dell_rbu xen_pciback xen_gntalloc drbd
lru_cache libcrc32c ebt_ip ebtable_filter ebtables bridge stp llc
ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
nf_conntrack iptable_filter ip_tables xt_limit ip6t_REJECT nf_reject_ipv6
ip6table_filter ip6_tables ipv6 ebt_limit blktap xen_netback xen_blkback
xen_gntdev xen_evtchn xenfs xen_privcmd pcspkr sb_edac edac_core joydev
shpchp sg tg3 ptp pps_core 8250_fintek ipmi_devintf ipmi_msghandler lpc_ich
mei_me mei acpi_power_meter hwmon ext3 jbd mbcache sr_mod cdrom sd_mod ahci
libahci megaraid_sas wmi mgag200 ttm drm_kms_helper dm_mirror
dm_region_hash dm_log dm_mod [last unloaded: ipmi_si]
CPU: 9 PID: 7412 Comm: qldup_inv Not tainted 3.18.34-20.el6.x86_64 #1
Hardware name:/086D43, BIOS 1.5.4 10/002/2015
  880028b9ba98 81675c27 
 0081 880028b9bad8 81073c5c 
 88003372a100  0001 0001
Call Trace:
 [] dump_stack+0x64/0x85
 [] warn_slowpath_common+0x8c/0xc0
 [] warn_slowpath_null+0x1a/0x20
 [] xen_mc_flush+0x162/0x1c0
 [] __xen_mc_entry+0x120/0x190
 [] xen_extend_mmu_update+0x7a/0x110
 [] xen_set_pte_at+0x15f/0x2c0
 [] remap_pfn_range+0x36c/0x460
 [] ? cdev_put+0x30/0x30
 [] mmap_mem+0x103/0x110
 [] mmap_region+0x3f0/0x630
 [] do_mmap_pgoff+0x2cc/0x430
 [] vm_mmap_pgoff+0xad/0xe0
 [] ? kmem_cache_free+0xf0/0x280
 [] SyS_mmap_pgoff+0xee/0x200
 [] sysenter_dispatch+0x7/0x21
---[ end trace 66b9f7abd0aed691 ]---
[ cut here ]
WARNING: CPU: 9 PID: 7412 at arch/x86/xen/multicalls.c:129
xen_mc_flush+0x162/0x1c0()
Modules linked in: ipmi_watchdog dcdbas mpt3sas mpt2sas scsi_transport_sas
raid_class mptctl mptbase ipmi_si dell_rbu xen_pciback xen_gntalloc drbd
lru_cache libcrc32c ebt_ip ebtable_filter ebtables bridge stp llc
ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
nf_conntrack iptable_filter ip_tables xt_limit ip6t_REJECT nf_reject_ipv6
ip6table_filter ip6_tables ipv6 ebt_limit blktap xen_netback xen_blkback
xen_gntdev xen_evtchn xenfs xen_privcmd pcspkr sb_edac edac_core joydev
shpchp sg tg3 ptp pps_core 8250_fintek ipmi_devintf ipmi_msghandler lpc_ich
mei_me mei acpi_power_meter hwmon ext3 jbd mbcache sr_mod cdrom sd_mod ahci
libahci megaraid_sas wmi mgag200 ttm drm_kms_helper dm_mirror
dm_region_hash dm_log dm_mod [last unloaded: ipmi_si]
CPU: 9 PID: 7412 Comm: qldup_inv Tainted: GW
3.18.34-20.el6.x86_64 #1
Hardware name:/086D43, BIOS 1.5.4 10/002/2015
  880028b9ba98 81675c27 
 0081 880028b9bad8 81073c5c 
 88003372a100  0001 0001
Call Trace:
 [] dump_stack+0x64/0x85
 [] warn_slowpath_common+0x8c/0xc0
 [] warn_slowpath_null+0x1a/0x20
 [] xen_mc_flush+0x162/0x1c0
 [] __xen_mc_entry+0x120/0x190
 [] xen_extend_mmu_update+0x7a/0x110
 [] xen_set_pte_at+0x15f/0x2c0
 [] remap_pfn_range+0x36c/0x460
 [] ? cdev_put+0x30/0x30
 [] mmap_mem+0x103/0x110
 [] mmap_region+0x3f0/0x630
 [] do_mmap_pgoff+0x2cc/0x430
 [] vm_mmap_pgoff+0xad/0xe0
 [] ? kmem_cache_free+0xf0/0x280
 [] SyS_mmap_pgoff+0xee/0x200
 [] sysenter_dispatch+0x7/0x21
---[ end trace 66b9f7abd0aed692 ]---

Any idea on what is going on and does this warning has any indication on
any potential problem of the system?  I found a similar problem reported at
https://lists.gt.net/xen/users/215518 but in that case the problem is
reported in the guest instead of dom0. And, the solution mentioned there
doesn't make any sense to me.

Thanks in advance for looking into the problem.

Mike
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH] libxc: fix segfault on uninitialized xch->fmem

2017-04-03 Thread Seraphime Kirkovski
Currently in xc_interface_open, xch->fmem is not initialized
and in some rare case the code fails before ever assigning a value
to it.

I got this in master:

   $ sudo ./xl/xl run
   xencall: error: Could not obtain handle on privileged command interface: No 
such file or directory
   Segmentation fault

This initializes xch->fmem to NULL

Signed-off-by: Seraphime Kirkovski 
---
 tools/libxc/xc_private.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c
index 72e6242417..4ed46fde5f 100644
--- a/tools/libxc/xc_private.c
+++ b/tools/libxc/xc_private.c
@@ -32,6 +32,7 @@ struct xc_interface_core *xc_interface_open(xentoollog_logger 
*logger,
 {
 struct xc_interface_core xch_buf, *xch = _buf;
 
+xch->fmem = NULL;
 xch->flags = open_flags;
 xch->dombuild_logger_file = 0;
 xc_clear_last_error(xch);
-- 
2.11.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] How to migrate vCPUs based on Credit Scheduler

2017-04-03 Thread 甘清甜
Hi,

 I'm now designing new vCPU scheduler in Xen, and trying to implement
the scheduler based on the Credit scheduler in Xen-4.5.1. But I encountered
 come problems when debuging the code.

Most of the code modification is done in function csched_schedule() in
file: xen/common/csched_schedule.c . And the core code is as followed:

if( vcpu_runnable(current) )
{
if( match the migration contition )
{
cpu_affinity = pick_pcpu_runq();  // this function is defined
by myself

pcpulock = pcpu_schedule_lock_irqsave(cpu_affinity,
   _flag);

TRACE_3D(TRC_CSCHED_STOLEN_VCPU, cpu_affinity  , domain_id,
   vcpu_id);
SCHED_VCPU_STAT_CRANK(scurr, migrate_q);
SCHED_STAT_CRANK(migrate_queued);
WARN_ON(scurr->vcpu->is_urgent);
scurr->vcpu->processor = cpu_affinity;

 __runq_insert(cpu_affinity, scurr);
pcpu_schedule_unlock_irqrestore(pcpulock, pcpulock_flag,
cpu_affinity  );
}
else
__runq_insert(cpu, scurr);
}
else
BUG_ON( is_idle_vcpu(current) || list_empty(runq) );


I try to run the modified Xen. But according to the log I found that,
although I insert the vCPU into the runqueue  of another pCPU, the
vCPU still appears at the old pCPU in the following scheduling period.
Now I have a few questions:

1. Does the Xen scheduler framework support changing the pCPU of a
vCPU after using out the scheduling time slice, but not just to steal one
vCPU from runqueue of other pCPU in load_balance period?

2. If yes, what status of the vCPU should be changed before inserting
the vCPU into the destination pCPU?

Thank you very much!
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Shattering superpages impact on IOMMU in Xen

2017-04-03 Thread Julien Grall

Hi Andrew,

On 03/04/17 17:42, Andrew Cooper wrote:

On 03/04/17 17:24, Oleksandr Tyshchenko wrote:

Hi, all.

Playing with non-shared IOMMU in Xen on ARM I faced one interesting
thing. I found out that the superpages were shattered during domain
life cycle.
This is the result of mapping of foreign pages, ballooning memory,
even if domain maps Xen shared pages, etc.
I don't bother with the memory fragmentation at the moment. But,
shattering bothers me from the IOMMU point of view.
As the Xen owns IOMMU it might manipulate IOMMU page tables when
passthoughed/protected device doing DMA in Linux. It is hard to detect
when the DMA transaction isn't in progress
in order to prevent this race. So, if we have inflight transaction
from a device when changing IOMMU mapping we might get into trouble.
Unfortunately, not in all the cases the
faulting transaction can be restarted. The chance to hit the problem
increases during shattering.

I did next test:
The dom0 on my setup contains ethernet IP that are protected by IOMMU.
What is more, as the IOMMU I am playing with supports superpages (2M,
1G) the IOMMU driver
takes into account these capabilities when building page tables. As I
gave 256 MB for dom0, the IOMMU mapping was built by 2M memory blocks
only. As I am using NFS for both dom0 and domU the ethernet IP
performs DMA transactions almost all the time.
Sometimes, I see the IOMMU page faults during creating guest domain. I
think, it happens during Xen is shattering 2M mappings 4K mappings (it
unmaps dom0 pages by one 4K page at a time, then maps domU pages there
for copying domU images).
But, I don't see any page faults when the IOMMU page table was built
by 4K pages only.

I had a talk with Julien on IIRC and we came to conclusion that the
safest way would be to use 4K pages to prevent shattering, so the
IOMMU shouldn't report superpage capability.
On the other hand, if we build IOMMU from 4K pages we will have
performance drop (during building, walking page tables), TLB pressure,
etc.
Another possible solution Julien was suggesting is to always
ballooning with 2M, 1G, and not using 4K. That would help us to
prevent shattering effect.
The discussion was moved to the ML since it seems to be a generic
issue and the right solution should be think of.

What do you think is the right way to follow? Use 4K pages and don't
bother with shattering or try to optimize? And if the idea to make
balloon mechanism smarter makes sense how to teach balloon to do so?
Thank you.


Ballooning and foreign mappings are terrible for trying to retain
superpage mappings.  No OS, not even Linux, can sensibly provide victim
pages in a useful way to avoid shattering.

If you care about performance, don't ever balloon.  Foreign mappings in
translated guests should start from the top of RAM, and work upwards.


I am not sure to understand this. Can you extend?




As for the IOMMU specifically, things are rather easier.  It is the
guests responsibility to ensure that frames offered up for ballooning or
foreign mappings are unused.  Therefore, if anything cares about the
specific 4K region becoming non-present in the IOMMU mappings, it is the
guest kernels fault for offering up a frame already in use.

For the shattering however, It is Xen's responsibility to ensure that
all other mappings stay valid at all points.  The correct way to do this
is to construct a new L1 table, mirroring the L2 superpage but lacking
the specific 4K mapping in question, then atomically replace the L2
superpage entry with the new L1 table, then issue an IOMMU TLB
invalidation to remove any cached mappings.

By following that procedure, all DMA within the 2M region, but not
hitting the 4K frame, won't observe any interim lack of mappings.  It
appears from your description that Xen isn't following the procedure.


Xen is following what's the ARM ARM is mandating. For shattering page 
table, we have to follow the break-before-sequence i.e:

- Invalidate the L2 entry
- Flush the TLBs
- Add the new L1 table

See D4-1816 in ARM DDI 0487A.k_iss10775 for details. So we end up in a 
small window where there are no valid mapping. It is easy to trap data 
abort from processor and restarting it but not for device memory 
transactions.


Xen by default is sharing stage-2 page tables with between the IOMMU and 
the MMU. However, from the discussion I had with Oleksandr, they are not 
sharing page tables and still see the problem. I am not sure how they 
are updating the page table here. Oleksandr, can you provide more details?


Cheers,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 2/9] mm: Place unscrubbed pages at the end of pagelist

2017-04-03 Thread Boris Ostrovsky
. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).

Signed-off-by: Boris Ostrovsky 
---
Changes in v2:
* Added page_list_add_scrub()
* Mark pages as needing a scrub irrespective on tanted in free_heap_pages()

 xen/common/page_alloc.c  |  106 +-
 xen/include/asm-arm/mm.h |4 ++
 xen/include/asm-x86/mm.h |4 ++
 3 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 15fd7f4..56486a8 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -383,6 +383,8 @@ typedef struct page_list_head 
heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
 #define heap(node, zone, order) ((*_heap[node])[zone][order])
 
+static unsigned long node_need_scrub[MAX_NUMNODES];
+
 static unsigned long *avail[MAX_NUMNODES];
 static long total_avail_pages;
 
@@ -683,6 +685,20 @@ static void check_low_mem_virq(void)
 }
 }
 
+/* Pages that need scrub are added to tail, otherwise to head. */
+static void page_list_add_scrub(struct page_info *pg, unsigned int node,
+unsigned int zone, unsigned int order,
+bool_t need_scrub)
+{
+if ( need_scrub )
+{
+pg->count_info |= PGC_need_scrub;
+page_list_add_tail(pg, (node, zone, order));
+}
+else
+page_list_add(pg, (node, zone, order));
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
 unsigned int zone_lo, unsigned int zone_hi,
@@ -807,7 +823,7 @@ static struct page_info *alloc_heap_pages(
 while ( j != order )
 {
 PFN_ORDER(pg) = --j;
-page_list_add_tail(pg, (node, zone, j));
+page_list_add(pg, (node, zone, j));
 pg += 1 << j;
 }
 
@@ -827,6 +843,8 @@ static struct page_info *alloc_heap_pages(
 BUG_ON(pg[i].count_info != PGC_state_free);
 pg[i].count_info = PGC_state_inuse;
 
+BUG_ON(test_bit(_PGC_need_scrub, [i].count_info));
+
 if ( !(memflags & MEMF_no_tlbflush) )
 accumulate_tlbflush(_tlbflush, [i],
 _timestamp);
@@ -856,6 +874,7 @@ static int reserve_offlined_page(struct page_info *head)
 int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
 struct page_info *cur_head;
 int cur_order;
+bool_t need_scrub = !!test_bit(_PGC_need_scrub, >count_info);
 
 ASSERT(spin_is_locked(_lock));
 
@@ -897,8 +916,8 @@ static int reserve_offlined_page(struct page_info *head)
 {
 merge:
 /* We don't consider merging outside the head_order. */
-page_list_add_tail(cur_head, (node, zone, cur_order));
 PFN_ORDER(cur_head) = cur_order;
+page_list_add_scrub(cur_head, node, zone, cur_order, 
need_scrub);
 cur_head += (1 << cur_order);
 break;
 }
@@ -925,7 +944,7 @@ static int reserve_offlined_page(struct page_info *head)
 }
 
 static bool_t can_merge(struct page_info *buddy, unsigned int node,
-unsigned int order)
+unsigned int order, bool_t need_scrub)
 {
 if ( !mfn_valid(_mfn(page_to_mfn(buddy))) ||
  !page_state_is(buddy, free) ||
@@ -933,6 +952,10 @@ static bool_t can_merge(struct page_info *buddy, unsigned 
int node,
  (phys_to_nid(page_to_maddr(buddy)) != node) )
 return false;
 
+if ( need_scrub !=
+ !!test_bit(_PGC_need_scrub, >count_info) )
+return false;
+
 return true;
 }
 
@@ -940,6 +963,8 @@ static bool_t can_merge(struct page_info *buddy, unsigned 
int node,
 static struct page_info *merge_chunks(struct page_info *pg, unsigned int node,
   unsigned int zone, unsigned int order)
 {
+bool_t need_scrub = !!test_bit(_PGC_need_scrub, >count_info);
+
 ASSERT(spin_is_locked(_lock));
 
 /* Merge chunks as far as possible. */
@@ -952,9 +977,10 @@ static struct page_info *merge_chunks(struct page_info 
*pg, unsigned int node,
 {
 /* Merge with predecessor block? */
 buddy = pg - mask;
-if ( !can_merge(buddy, node, order) )
+if ( !can_merge(buddy, node, order, need_scrub) )
 break;
 
+pg->count_info &= ~PGC_need_scrub;
 pg = buddy;
 page_list_del(pg, (node, zone, order));
 }
@@ -962,9 +988,10 @@ static struct page_info *merge_chunks(struct page_info 
*pg, unsigned int node,
 {
 /* Merge with successor block? */
 buddy = pg + mask;
-if ( !can_merge(buddy, node, order) )
+if ( !can_merge(buddy, node, order, need_scrub) )
 break;
 
+buddy->count_info &= 

  1   2   3   >