[linux-linus test] 179499: regressions - trouble: fail/pass/starved

2023-03-07 Thread osstest service owner
flight 179499 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179499/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-freebsd12-amd64 13 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-shadow   14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvshim   14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-intel 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-xl-multivcpu 17 guest-saverestore   fail REGR. vs. 178042
 test-amd64-amd64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-amd 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-amd 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-freebsd11-amd64 13 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-libvirt 14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl-thunderx 17 guest-stop   fail REGR. vs. 178042
 test-arm64-arm64-xl-credit2  17 guest-stop   fail REGR. vs. 178042
 test-arm64-arm64-libvirt-xsm 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl 18 guest-start/debian.repeat fail REGR. vs. 178042
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 18 
guest-start/debianhvm.repeat fail REGR. vs. 178042
 test-arm64-arm64-xl-credit1 18 guest-start/debian.repeat fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 18 guest-localmigrate/x10 fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 12 debian-hvm-install fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 test-amd64-amd64-qemuu-nested-amd 12 debian-hvm-install  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 12 debian-hvm-install 
fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-intel 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-pygrub  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-qcow2 12 debian-di-install  fail REGR. vs. 178042
 test-arm64-arm64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-arm64-arm64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-xl-credit1 18 guest-localmigrate fail in 179473 REGR. vs. 
178042
 test-amd64-amd64-pair 27 guest-migrate/dst_host/src_host fail in 179473 REGR. 
vs. 178042
 test-amd64-amd64-xl   17 guest-saverestore fail in 179473 REGR. vs. 178042
 test-amd64-coresched-amd64-xl 20 guest-localmigrate/x10 fail in 179473 REGR. 
vs. 178042
 test-amd64-amd64-qemuu-nested-intel 13 nested-setup fail in 179473 REGR. vs. 
178042
 test-amd64-amd64-xl-credit2 20 guest-localmigrate/x10 fail in 179473 REGR. vs. 
178042
 test-amd64-amd64-libvirt-pair 28 guest-migrate/dst_host/src_host/debian.repeat 
fail in 179473 REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-ovmf-amd64 17 guest-saverestore.2 fail in 179473 
REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 13 guest-stop fail in 
179473 REGR. vs. 178042
 build-arm64-pvops 6 kernel-build   fail in 179473 REGR. vs. 178042

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-xl-multivcpu 14 guest-start fail in 179473 pass in 179499
 test-amd64-amd64-dom0pvh-xl-amd 13 debian-fixup  fail in 179473 pass in 179499
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 12 debian-hvm-install fail 
in 179473 pass in 179499
 test-amd64-amd64-xl-credit1  14 guest-startfail pass in 179473
 test-amd64-amd64-pair25 guest-start/debian fail pass in 179473
 test-amd64-amd64-xl  14 guest-startfail pass in 179473
 test-amd64-amd64-xl-credit2  18 guest-localmigrate fail pass in 179473
 test-amd64-amd64-libvirt-pair 25 guest-start/debianfail pass in 179473
 test-amd64-amd64-xl-rtds 17 guest-saverestore  fail pass in 179473
 test-amd64-coresched-amd64-xl 14 guest-start   fail pass in 179473
 test-amd64-amd64-qemuu-nested-intel 12 debian-hvm-install  fail pass in 179473
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail pass in 179473
 test-amd64-amd64-xl-qemuu-ovmf-amd64 12 debian-hvm-install fail pass in 179473

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 16 guest-localmigrate fail REGR. 
vs. 178042
 test-amd64-amd64-xl-rtds 

Re: [PATCH 5/7] sgi-xp: simplify sysctl registration

2023-03-07 Thread Steve Wahl
On Thu, Mar 02, 2023 at 12:46:10PM -0800, Luis Chamberlain wrote:
> Although this driver is a good use case for having a directory
> that is not other directories and then subdirectories with more
> entries, the usage of register_sysctl_table() can recurse and
> increases complexity so to avoid that just split out the
> registration to each directory with its own entries.
> 
> register_sysctl_table() is a deprecated compatibility wrapper.
> register_sysctl() can do the directory creation for you so just use
> that.
> 
> Signed-off-by: Luis Chamberlain 

Reviewed-by: Steve Wahl 

> ---
>  drivers/misc/sgi-xp/xpc_main.c | 24 ++--
>  1 file changed, 10 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
> index b2c3c22fc13c..6da509d692bb 100644
> --- a/drivers/misc/sgi-xp/xpc_main.c
> +++ b/drivers/misc/sgi-xp/xpc_main.c
> @@ -93,7 +93,7 @@ int xpc_disengage_timelimit = 
> XPC_DISENGAGE_DEFAULT_TIMELIMIT;
>  static int xpc_disengage_min_timelimit;  /* = 0 */
>  static int xpc_disengage_max_timelimit = 120;
>  
> -static struct ctl_table xpc_sys_xpc_hb_dir[] = {
> +static struct ctl_table xpc_sys_xpc_hb[] = {
>   {
>.procname = "hb_interval",
>.data = _hb_interval,
> @@ -112,11 +112,7 @@ static struct ctl_table xpc_sys_xpc_hb_dir[] = {
>.extra2 = _hb_check_max_interval},
>   {}
>  };
> -static struct ctl_table xpc_sys_xpc_dir[] = {
> - {
> -  .procname = "hb",
> -  .mode = 0555,
> -  .child = xpc_sys_xpc_hb_dir},
> +static struct ctl_table xpc_sys_xpc[] = {
>   {
>.procname = "disengage_timelimit",
>.data = _disengage_timelimit,
> @@ -127,14 +123,9 @@ static struct ctl_table xpc_sys_xpc_dir[] = {
>.extra2 = _disengage_max_timelimit},
>   {}
>  };
> -static struct ctl_table xpc_sys_dir[] = {
> - {
> -  .procname = "xpc",
> -  .mode = 0555,
> -  .child = xpc_sys_xpc_dir},
> - {}
> -};
> +
>  static struct ctl_table_header *xpc_sysctl;
> +static struct ctl_table_header *xpc_sysctl_hb;
>  
>  /* non-zero if any remote partition disengage was timed out */
>  int xpc_disengage_timedout;
> @@ -1041,6 +1032,8 @@ xpc_do_exit(enum xp_retval reason)
>  
>   if (xpc_sysctl)
>   unregister_sysctl_table(xpc_sysctl);
> + if (xpc_sysctl_hb)
> + unregister_sysctl_table(xpc_sysctl_hb);
>  
>   xpc_teardown_partitions();
>  
> @@ -1243,7 +1236,8 @@ xpc_init(void)
>   goto out_1;
>   }
>  
> - xpc_sysctl = register_sysctl_table(xpc_sys_dir);
> + xpc_sysctl = register_sysctl("xpc", xpc_sys_xpc);
> + xpc_sysctl_hb = register_sysctl("xpc/hb", xpc_sys_xpc_hb);
>  
>   /*
>* Fill the partition reserved page with the information needed by
> @@ -1308,6 +1302,8 @@ xpc_init(void)
>   (void)unregister_die_notifier(_die_notifier);
>   (void)unregister_reboot_notifier(_reboot_notifier);
>  out_2:
> + if (xpc_sysctl_hb)
> + unregister_sysctl_table(xpc_sysctl_hb);
>   if (xpc_sysctl)
>   unregister_sysctl_table(xpc_sysctl);
>  
> -- 
> 2.39.1
> 

-- 
Steve Wahl, Hewlett Packard Enterprise



[xen-unstable-smoke test] 179504: tolerable trouble: pass/starved - PUSHED

2023-03-07 Thread osstest service owner
flight 179504 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179504/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 xen  746774cd1786b13dc67020efb6496477535dcb26
baseline version:
 xen  dd0f000aafd91ff674a11d6d5623d345fa8f91a6

Last test of basis   179502  2023-03-08 00:00:26 Z0 days
Testing same since   179504  2023-03-08 03:00:27 Z0 days1 attempts


People who touched revisions under test:
  Stefano Stabellini 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  starved 
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  starved 
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   dd0f000aaf..746774cd17  746774cd1786b13dc67020efb6496477535dcb26 -> smoke



Re: [PATCH v3 2/2] automation: introduce a dom0less test run on Xilinx hardware

2023-03-07 Thread Stefano Stabellini
On Tue, 7 Mar 2023, Michal Orzel wrote:
> Hi Stefano,
> 
> On 07/03/2023 00:33, Stefano Stabellini wrote:
> > 
> > 
> > From: Stefano Stabellini 
> > 
> > The test prepares dom0 and domU binaries and boot artifacts, similarly
> > to the existing QEMU test. (TBD: share preparation steps with the
> > regular QEMU tests.)
> > 
> > However, instead of running the test inside QEMU as usual, it copies
> > the binaries to the tftp server root, triggers a Xilinx ZCU102 board
> > reboot, and connects to the real serial of the board.
> > 
> > For now and due to its novelty, allow_failure on the Xilinx hardware
> > test, and only run the job on protected branches with XILINX_JOBS set to
> > true (the "master" and "staging" on gitlab.com/xen-project/xen qualify).
> > 
> > Signed-off-by: Stefano Stabellini 
> Reviewed-by: Michal Orzel 

Thank you!

I spoke with Andrew and he also agreed on the series so I am going to
commit it with one minor change as requested by Andrew: the xilinx job
will not be optional but required. So I removed allowed_failure.

Cheers,

Stefano



[xen-unstable-smoke test] 179502: tolerable trouble: pass/starved - PUSHED

2023-03-07 Thread osstest service owner
flight 179502 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179502/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 xen  dd0f000aafd91ff674a11d6d5623d345fa8f91a6
baseline version:
 xen  31270f11a96ebb875cd70661e2df9e5c6edd7564

Last test of basis   179381  2023-03-06 10:01:56 Z1 days
Testing same since   179502  2023-03-08 00:00:26 Z0 days1 attempts


People who touched revisions under test:
  Bertrand Marquis 
  Michal Orzel 
  Stefano Stabellini 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  starved 
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  starved 
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   31270f11a9..dd0f000aaf  dd0f000aafd91ff674a11d6d5623d345fa8f91a6 -> smoke



Re: [PULL 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread Philippe Mathieu-Daudé

On 7/3/23 23:34, David Woodhouse wrote:

On Tue, 2023-03-07 at 21:20 +0100, Philippe Mathieu-Daudé wrote:


This tag only appears in the cover letter, and is missing in each patch.
It would have been acceptable if it were in the PR tag, but
the tag (which for some reason isn't displayed in your cover letter)
is simply "PV back end support for emulated Xen".

You can fetch a series with tag applied with b4:
https://github.com/mricon/b4

You can post signed PR with easily with git-publish:
https://github.com/stefanha/git-publish


Indeed, while I knew that the testing had been done, I didn't actually
have that Tested-by: tag to cut and paste until after I'd pushed the
tree to gitlab for CI, and signed the tag.

But I realise that I can just update the tag, since it's signed and it
was only referenced by name. And nobody but you has looked yet so
nobody will notice... this one look better?

https://git.infradead.org/users/dwmw2/qemu.git/tag/refs/tags/xenfv-2


LGTM, thanks!



[qemu-mainline test] 179497: tolerable trouble: fail/pass/starved - PUSHED

2023-03-07 Thread osstest service owner
flight 179497 qemu-mainline real [real]
flight 179500 qemu-mainline real-retest [real]
http://logs.test-lab.xenproject.org/osstest/logs/179497/
http://logs.test-lab.xenproject.org/osstest/logs/179500/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-qemuu-freebsd11-amd64 18 guest-saverestore.2 fail pass in 
179500-retest

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 179449
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 179449
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 179449
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 179449
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 179449
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-qcow2  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-raw  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit2   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-cubietruck  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-multivcpu  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-rtds  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-vhd   1 build-check(1)   starved  n/a
 build-armhf-libvirt   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit1   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 qemuuc29a2f40cd5d1fdad4632b48343cd968db041a44
baseline version:
 qemuu817fd33836e73812df2f1907612b57750fcb9491

Last test of basis   179449  2023-03-07 01:40:20 Z0 days
Testing same since   179497  2023-03-07 13:09:12 Z0 days1 attempts


People who touched revisions under test:
  Anthony PERARD 
  Ard Biesheuvel 
  Chuck Zmudzinski 
  David Reiss 
  Gollu Appalanaidu 
  Jesper Devantier 
  Joel Granados 
  Klaus Jensen 
  Marek Marczykowski-Górecki 
  Niklas Cassel 
  Peter Maydell 
  qianfan Zhao 
  Richard Henderson 
  Strahinja Jankovic 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  starved 
 build-i386   pass
 build-amd64-libvirt  

Re: [PATCH] build: add crypto/ to SUBDIRS

2023-03-07 Thread Stefano Stabellini
On Tue, 28 Feb 2023, Jan Beulich wrote:
> On 28.02.2023 09:14, Michal Orzel wrote:
> > On 27/02/2023 16:57, Jan Beulich wrote:
> >> On 27.02.2023 16:46, Michal Orzel wrote:
> >>> On 27/02/2023 16:00, Jan Beulich wrote:
>  On 27.02.2023 15:46, Michal Orzel wrote:
> > On 27/02/2023 14:54, Jan Beulich wrote:
> >> On 27.02.2023 14:41, Michal Orzel wrote:
> >>> On 27/02/2023 11:10, Jan Beulich wrote:
>  On 27.02.2023 10:53, Michal Orzel wrote:
> > --- a/xen/Makefile
> > +++ b/xen/Makefile
> > @@ -589,7 +589,7 @@ $(TARGET): outputmakefile FORCE
> >   $(Q)$(MAKE) $(build)=. 
> > arch/$(TARGET_ARCH)/include/asm/asm-offsets.h
> >   $(Q)$(MAKE) $(build)=. MKRELOC=$(MKRELOC) 
> > 'ALL_OBJS=$(ALL_OBJS-y)' 'ALL_LIBS=$(ALL_LIBS-y)' $@
> >
> > -SUBDIRS = xsm arch/$(TARGET_ARCH) common drivers lib test
> > +SUBDIRS = xsm arch/$(TARGET_ARCH) common crypto drivers lib test
> >  define all_sources
> >  ( find include -type f -name '*.h' -print; \
> >find $(SUBDIRS) -type f -name '*.[chS]' -print )
> 
>  As long as it's arch/$(TARGET_ARCH) that's used here, crypto should 
>  imo
>  also only be included when selected (or at the very least only when 
>  an
>  arch might select it, which afaics is possible on x86 only right 
>  now).
> 
>  It would also help if in the description you made explicit that 
>  SUBDIRS
>  isn't used for anything else (the name, after all, is pretty 
>  generic).
>  Which actually points at an issue: I suppose the variable would 
>  actually
>  better be used elsewhere as well, e.g. in the _clean: rule and 
>  perhaps
>  also in the setting of ALL_OBJS-y. (That'll require splitting the
>  variable, to that e.g. _clean would use $(SUBDIRS), $(SUBDIRS-y), and
>  $(SUBDIRS-) collectively.) It is, imo, that lack of consolidation 
>  which
>  has caused crypto to be missing from SUBDIRS.
> 
> >>> I think what you wrote can be split into 2 parts: the part being a 
> >>> goal of this patch
> >>> and the cleanup/improvements that would be beneficial but not related 
> >>> to this patch.
> >>> The second part involves more code and there are parts to be 
> >>> discussed:
> >>>
> >>> 1) If we decide to create ALL_OBJS-y from SUBDIRS, then we would need 
> >>> to carve out test/ dir
> >>> that is not part of ALL_OBJS-y and add it to SUBDIRS later on. Also, 
> >>> the order of ALL_OBJS-y matters
> >>> for linking, so we would need to transfer the responsibility to 
> >>> SUBDIRS. The natural placement of
> >>> SUBDIRS (including SUBDIRS-y, etc.) would be right above ALL_OBJS-y. 
> >>> However, when doing clean (next point),
> >>> need-config is set to n and SUBDIRS would be empty. This means it 
> >>> would need to be defined somewhere at the
> >>> top of the Makefile (thus harder to make sure the linking order is 
> >>> correct).
> >>>
> >>> 2) If we deicide to use SUBDIRS for _clean rule, then we would need 
> >>> some foreach loop, right?
> >>> Apart from that, there are other directories that are not part of 
> >>> SUBDIRS i.e. include/, tools/.
> >>> Together with SUBDIRS variable, it would create confusion (these dirs 
> >>> are also sub-directories, so why
> >>> not having them listed in this variable?). Also, I can see that we do 
> >>> clean not only for $(TARGET_ARCH)
> >>> but for all the existing architectures.
> >>
> >> I understand that the changes would be more involved, but I disagree 
> >> with
> >> your "two parts" statement: If what I've outlined was already the case,
> >> your patch would not even exist (because crypto/ would already be taken
> >> care of wherever needed).
> >>
> >>> I would prefer to stick for now to the goal of this patch which is to 
> >>> add crypto/ so that it is taken
> >>> into account for the tags/csope generation. Would the following 
> >>> change be ok for that purpose?
> >>>
> >>> diff --git a/xen/Makefile b/xen/Makefile
> >>> index 2d55bb9401f4..05bf301bd7ab 100644
> >>> --- a/xen/Makefile
> >>> +++ b/xen/Makefile
> >>> @@ -589,7 +589,9 @@ $(TARGET): outputmakefile FORCE
> >>>   $(Q)$(MAKE) $(build)=. 
> >>> arch/$(TARGET_ARCH)/include/asm/asm-offsets.h
> >>>   $(Q)$(MAKE) $(build)=. MKRELOC=$(MKRELOC) 
> >>> 'ALL_OBJS=$(ALL_OBJS-y)' 'ALL_LIBS=$(ALL_LIBS-y)' $@
> >>>
> >>> -SUBDIRS = xsm arch/$(TARGET_ARCH) common drivers lib test
> >>> +SUBDIRS-$(CONFIG_CRYPTO) += crypto
> >>> +SUBDIRS = xsm arch/$(TARGET_ARCH) common drivers lib test 
> >>> $(SUBDIRS-y)
> >>> +
> >>>  define all_sources
> >>>  ( find 

Re: [PULL 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread David Woodhouse
On Tue, 2023-03-07 at 21:20 +0100, Philippe Mathieu-Daudé wrote:
> 
> This tag only appears in the cover letter, and is missing in each patch.
> It would have been acceptable if it were in the PR tag, but
> the tag (which for some reason isn't displayed in your cover letter)
> is simply "PV back end support for emulated Xen".
> 
> You can fetch a series with tag applied with b4:
> https://github.com/mricon/b4
> 
> You can post signed PR with easily with git-publish:
> https://github.com/stefanha/git-publish

Indeed, while I knew that the testing had been done, I didn't actually
have that Tested-by: tag to cut and paste until after I'd pushed the
tree to gitlab for CI, and signed the tag.

But I realise that I can just update the tag, since it's signed and it
was only referenced by name. And nobody but you has looked yet so
nobody will notice... this one look better?

https://git.infradead.org/users/dwmw2/qemu.git/tag/refs/tags/xenfv-2


smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH v4 05/12] x86/xen: set MTRR state when running as Xen PV initial domain

2023-03-07 Thread Boris Ostrovsky




On 3/6/23 11:34 AM, Juergen Gross wrote:

When running as Xen PV initial domain (aka dom0), MTRRs are disabled
by the hypervisor, but the system should nevertheless use correct
cache memory types. This has always kind of worked, as disabled MTRRs
resulted in disabled PAT, too, so that the kernel avoided code paths
resulting in inconsistencies. This bypassed all of the sanity checks
the kernel is doing with enabled MTRRs in order to avoid memory
mappings with conflicting memory types.

This has been changed recently, leading to PAT being accepted to be
enabled, while MTRRs stayed disabled. The result is that
mtrr_type_lookup() no longer is accepting all memory type requests,
but started to return WB even if UC- was requested. This led to
driver failures during initialization of some devices.

In reality MTRRs are still in effect, but they are under complete
control of the Xen hypervisor. It is possible, however, to retrieve
the MTRR settings from the hypervisor.

In order to fix those problems, overwrite the MTRR state via
mtrr_overwrite_state() with the MTRR data from the hypervisor, if the
system is running as a Xen dom0.

Fixes: 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when 
running on Xen")
Signed-off-by: Juergen Gross 



Reviewed-by: Boris Ostrovsky 




RE: [PATCH v4 00/12] x86/mtrr: fix handling with PAT but without MTRR

2023-03-07 Thread Michael Kelley (LINUX)
From: Juergen Gross  Sent: Monday, March 6, 2023 8:34 AM
> 
> This series tries to fix the rather special case of PAT being available
> without having MTRRs (either due to CONFIG_MTRR being not set, or
> because the feature has been disabled e.g. by a hypervisor).
> 
> The main use cases are Xen PV guests and SEV-SNP guests running under
> Hyper-V.
> 
> Instead of trying to work around all the issues by adding if statements
> here and there, just try to use the complete available infrastructure
> by setting up a read-only MTRR state when needed.
> 
> In the Xen PV case the current MTRR MSR values can be read from the
> hypervisor, while for the SEV-SNP case all needed is to set the
> default caching mode to "WB".
> 
> I have added more cleanup which has been discussed when looking into
> the most recent failures.
> 
> Note that I couldn't test the Hyper-V related change (patch 3).
> 
> Running on bare metal and with Xen didn't show any problems with the
> series applied.
> 
> It should be noted that patches 9+10 are replacing today's way to
> lookup the MTRR cache type for a memory region from looking at the
> MTRR register values to building a memory map with the cache types.
> This should make the lookup much faster and much easier to understand.
> 
> Changes in V2:
> - replaced former patches 1+2 with new patches 1-4, avoiding especially
>   the rather hacky approach of V1, while making all the MTRR type
>   conflict tests available for the Xen PV case
> - updated patch 6 (was patch 4 in V1)
> 
> Changes in V3:
> - dropped patch 5 of V2, as already applied
> - split patch 1 of V2 into 2 patches
> - new patches 6-10
> - addressed comments
> 
> Changes in V4:
> - addressed comments
> 
> Juergen Gross (12):
>   x86/mtrr: split off physical address size calculation
>   x86/mtrr: optimize mtrr_calc_physbits()
>   x86/mtrr: support setting MTRR state for software defined MTRRs
>   x86/hyperv: set MTRR state when running as SEV-SNP Hyper-V guest
>   x86/xen: set MTRR state when running as Xen PV initial domain
>   x86/mtrr: replace vendor tests in MTRR code
>   x86/mtrr: allocate mtrr_value array dynamically
>   x86/mtrr: add get_effective_type() service function
>   x86/mtrr: construct a memory map with cache modes
>   x86/mtrr: use new cache_map in mtrr_type_lookup()
>   x86/mtrr: don't let mtrr_type_lookup() return MTRR_TYPE_INVALID
>   x86/mm: only check uniform after calling mtrr_type_lookup()
> 
>  arch/x86/include/asm/mtrr.h|  15 +-
>  arch/x86/include/uapi/asm/mtrr.h   |   6 +-
>  arch/x86/kernel/cpu/mshyperv.c |   4 +
>  arch/x86/kernel/cpu/mtrr/amd.c |   2 +-
>  arch/x86/kernel/cpu/mtrr/centaur.c |   2 +-
>  arch/x86/kernel/cpu/mtrr/cleanup.c |   4 +-
>  arch/x86/kernel/cpu/mtrr/cyrix.c   |   2 +-
>  arch/x86/kernel/cpu/mtrr/generic.c | 492 ++---
>  arch/x86/kernel/cpu/mtrr/mtrr.c|  94 +++---
>  arch/x86/kernel/cpu/mtrr/mtrr.h|   7 +-
>  arch/x86/kernel/setup.c|   2 +
>  arch/x86/mm/pgtable.c  |  24 +-
>  arch/x86/xen/enlighten_pv.c|  52 +++
>  13 files changed, 454 insertions(+), 252 deletions(-)
> 
> --
> 2.35.3

I've tested a Linux 6.2 kernel plus this series in a normal Hyper-V
guest and in a Hyper-V guest using SEV-SNP with vTOM.  MMIO
memory is correctly mapped as WB or UC- depending on the
request, which fixes the original problem introduced for Hyper-V
by the Xen-specific change.

Tested-by: Michael Kelley 



Re: [PATCH v1 2/2] arch/arm: time: Add support for parsing interrupts by names

2023-03-07 Thread Stefano Stabellini
On Tue, 7 Mar 2023, Bertrand Marquis wrote:
> > On 7 Mar 2023, at 11:09, Andrei Cherechesu (OSS) 
> >  wrote:
> > 
> > From: Andrei Cherechesu 
> > 
> > Added support for parsing the ARM generic timer interrupts DT
> > node by the "interrupt-names" property, if it is available.
> > 
> > If not available, the usual parsing based on the expected
> > IRQ order is performed.
> > 
> > Also added the "hyp-virt" PPI to the timer PPI list, even
> > though it's currently not in use. If the "hyp-virt" PPI is
> > not found, the hypervisor won't panic.
> > 
> > Signed-off-by: Andrei Cherechesu 
> > ---
> > xen/arch/arm/include/asm/time.h |  3 ++-
> > xen/arch/arm/time.c | 26 ++
> > 2 files changed, 24 insertions(+), 5 deletions(-)
> > 
> > diff --git a/xen/arch/arm/include/asm/time.h 
> > b/xen/arch/arm/include/asm/time.h
> > index 4b401c1110..49ad8c1a6d 100644
> > --- a/xen/arch/arm/include/asm/time.h
> > +++ b/xen/arch/arm/include/asm/time.h
> > @@ -82,7 +82,8 @@ enum timer_ppi
> > TIMER_PHYS_NONSECURE_PPI = 1,
> > TIMER_VIRT_PPI = 2,
> > TIMER_HYP_PPI = 3,
> > -MAX_TIMER_PPI = 4,
> > +TIMER_HYP_VIRT_PPI = 4,
> > +MAX_TIMER_PPI = 5,
> > };
> > 
> > /*
> > diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> > index 433d7be909..794da646d6 100644
> > --- a/xen/arch/arm/time.c
> > +++ b/xen/arch/arm/time.c
> > @@ -38,6 +38,14 @@ uint32_t __read_mostly timer_dt_clock_frequency;
> > 
> > static unsigned int timer_irq[MAX_TIMER_PPI];
> > 
> > +static const char *timer_irq_names[MAX_TIMER_PPI] = {
> > +[TIMER_PHYS_SECURE_PPI] = "sec-phys",
> > +[TIMER_PHYS_NONSECURE_PPI] = "phys",
> > +[TIMER_VIRT_PPI] = "virt",
> > +[TIMER_HYP_PPI] = "hyp-phys",
> > +[TIMER_HYP_VIRT_PPI] = "hyp-virt",
> > +};
> > +
> 
> I would need some reference or a pointer to some doc to check those.
> 
> > unsigned int timer_get_irq(enum timer_ppi ppi)
> > {
> > ASSERT(ppi >= TIMER_PHYS_SECURE_PPI && ppi < MAX_TIMER_PPI);
> > @@ -149,15 +157,25 @@ static void __init init_dt_xen_time(void)
> > {
> > int res;
> > unsigned int i;
> > +bool has_names;
> > +
> > +has_names = dt_property_read_bool(timer, "interrupt-names");
> > 
> > /* Retrieve all IRQs for the timer */
> > for ( i = TIMER_PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++ )
> > {
> > -res = platform_get_irq(timer, i);
> > -
> > -if ( res < 0 )
> > +if ( has_names )
> > +res = platform_get_irq_byname(timer, timer_irq_names[i]);
> > +else
> > +res = platform_get_irq(timer, i);
> > +
> > +if ( res > 0 )
> 
> The behaviour of the code is changed here compared to the current
> version as res = 0 will now generate a panic.
> 
> Some device tree might not specify an interrupt number and just put
> 0 and Xen will now panic on those systems.
> As I have no idea if such systems exists and the behaviour is modified
> you should justify this and mention it in the commit message or keep
> the old behaviour and let 0 go through without a panic.
> 
> @stefano, julien any idea here ? should just keep the old behaviour ?

platform_get_irq returns 0 if the irq is 0. The irq cannot be 0 because
0 is reserved for SGIs, not PPIs. So I think it is OK to consider 0 an
error.



Re: [PATCH v2 2/2] automation: introduce a dom0less test run on Xilinx hardware

2023-03-07 Thread Stefano Stabellini
On Tue, 7 Mar 2023, Marek Marczykowski-Górecki wrote:
> On Mon, Mar 06, 2023 at 03:02:51PM -0800, Stefano Stabellini wrote:
> > On Mon, 6 Mar 2023, Andrew Cooper wrote:
> > > On 03/03/2023 11:57 pm, Stefano Stabellini wrote:
> > > > +  only:
> > > > +variables:
> > > > +  - $XILINX_JOBS == "true" && $CI_COMMIT_REF_PROTECTED == "true"
> > > 
> > > We don't want to protect every branch of a tree that only a select
> > > number of people can push to,
> > 
> > Actually this is useful, more on this below
> > 
> > 
> > > nor (for this, or others configured with
> > > the runner), want to impose branching conventions on them.
> > > 
> > > In all anticipated cases, those able to push would also be able to
> > > reconfigure the protected-ness of branches, so this doesn't gain us any
> > > security I don't think, but it certainly puts more hoops in the way to
> > > be jumped through.
> > 
> > It is true that it adds a small inconvenience to the user, but I think
> > the benefits outweigh the inconvenience at the moment (that could change
> > though.)
> > 
> > With this, I can register the gitlab runner with a specific gitlab
> > project (for instance
> > https://gitlab.com/xen-project/people/sstabellini/xen) then I can mark
> > all branches as "protected" and select very specific access permissions,
> > e.g. I can give individual access to Julien, Bertrand, Michal, anyone,
> > to specific branches, which is great to allow them to run individual
> > pre-commit tests permanently or temporarily.
> > 
> > I couldn't find another way to do it at the moment, as non-protected
> > branches don't come with detailed access permissions. But it is possible
> > that as we setup a new sub-group under https://gitlab.com/xen-project
> > for people with access to the runner, then we might be able to remove
> > this restriction because it becomes unnecessary. We can remove the
> > protected check at that point.
> 
> You can configure runner to run only jobs from protected branches. This
> way it actually prevent running jobs from non-protected branches. Just a
> condition in .gitlab-ci.yml can be simply removed by anybody who wants
> to abuse your runner (and have push access to non-protected branch -
> which may or may not include all of patchew).

Yes, I did configure the runner only to execute protected jobs. The
$CI_COMMIT_REF_PROTECTED check in automation/gitlab-ci/test.yaml is
needed so that the xilinx job won't be created on pipelines for
non-protected branches where the runner won't run, hence the job has no
chance of completing successfully.

If I don't add the $CI_COMMIT_REF_PROTECTED check, the job will be
created in the pipeline but it will remain stuck as "paused" waiting for
the runner to become available (but the runner never will.)

Re: [PULL 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread Philippe Mathieu-Daudé

Hi David,

On 7/3/23 19:26, David Woodhouse wrote:

The following changes since commit 9832009d9dd2386664c15cc70f6e6bfe062be8bd:

   Merge tag 'pull-riscv-to-apply-20230306' of 
https://gitlab.com/palmer-dabbelt/qemu into staging (2023-03-07 12:53:00 +)

are available in the Git repository at:

   git://git.infradead.org/users/dwmw2/qemu.git refs/tags/xenfv-2

for you to fetch changes up to 154eac37190c4d80d29b09c226abd899e397530f:

   docs: Update Xen-on-KVM documentation for PV disk support (2023-03-07 
17:04:30 +)


Tested-by: Paul Durrant 
... on real Xen (master branch, 4.18) with a Debian guest.


This tag only appears in the cover letter, and is missing in each patch.
It would have been acceptable if it were in the PR tag, but
the tag (which for some reason isn't displayed in your cover letter)
is simply "PV back end support for emulated Xen".

You can fetch a series with tag applied with b4:
https://github.com/mricon/b4

You can post signed PR with easily with git-publish:
https://github.com/stefanha/git-publish



David Woodhouse (23):
   hw/xen: Add xenstore wire implementation and implementation stubs
   hw/xen: Add basic XenStore tree walk and write/read/directory support
   hw/xen: Implement XenStore watches
   hw/xen: Implement XenStore transactions
   hw/xen: Watches on XenStore transactions
   hw/xen: Implement core serialize/deserialize methods for xenstore_impl
   hw/xen: Add evtchn operations to allow redirection to internal emulation
   hw/xen: Add gnttab operations to allow redirection to internal emulation
   hw/xen: Pass grant ref to gnttab unmap operation
   hw/xen: Add foreignmem operations to allow redirection to internal 
emulation
   hw/xen: Move xenstore_store_pv_console_info to xen_console.c
   hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
   hw/xen: Rename xen_common.h to xen_native.h
   hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
   hw/xen: Only advertise ring-page-order for xen-block if gnttab supports 
it
   hw/xen: Hook up emulated implementation for event channel operations
   hw/xen: Add emulated implementation of grant table operations
   hw/xen: Add emulated implementation of XenStore operations
   hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
   hw/xen: Implement soft reset for emulated gnttab
   i386/xen: Initialize Xen backends from pc_basic_device_init() for 
emulation
   MAINTAINERS: Add entry for Xen on KVM emulation
   docs: Update Xen-on-KVM documentation for PV disk support

Paul Durrant (4):
   hw/xen: Implement XenStore permissions
   hw/xen: Create initial XenStore nodes
   hw/xen: Add xenstore operations to allow redirection to internal 
emulation
   hw/xen: Avoid crash when backend watch fires too early





Re: [PATCH v1 2/2] arch/arm: time: Add support for parsing interrupts by names

2023-03-07 Thread Andrei Cherechesu
On 07/03/2023 17:38, Bertrand Marquis wrote:
> Hi Andrei,
> 
>> On 7 Mar 2023, at 11:09, Andrei Cherechesu (OSS) 
>>  wrote:
>>
>> From: Andrei Cherechesu 
>>
>> Added support for parsing the ARM generic timer interrupts DT
>> node by the "interrupt-names" property, if it is available.
>>
>> If not available, the usual parsing based on the expected
>> IRQ order is performed.
>>
>> Also added the "hyp-virt" PPI to the timer PPI list, even
>> though it's currently not in use. If the "hyp-virt" PPI is
>> not found, the hypervisor won't panic.
>>
>> Signed-off-by: Andrei Cherechesu 
>> ---
>> xen/arch/arm/include/asm/time.h |  3 ++-
>> xen/arch/arm/time.c | 26 ++
>> 2 files changed, 24 insertions(+), 5 deletions(-)
>>
>> diff --git a/xen/arch/arm/include/asm/time.h 
>> b/xen/arch/arm/include/asm/time.h
>> index 4b401c1110..49ad8c1a6d 100644
>> --- a/xen/arch/arm/include/asm/time.h
>> +++ b/xen/arch/arm/include/asm/time.h
>> @@ -82,7 +82,8 @@ enum timer_ppi
>> TIMER_PHYS_NONSECURE_PPI = 1,
>> TIMER_VIRT_PPI = 2,
>> TIMER_HYP_PPI = 3,
>> -MAX_TIMER_PPI = 4,
>> +TIMER_HYP_VIRT_PPI = 4,
>> +MAX_TIMER_PPI = 5,
>> };
>>
>> /*
>> diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
>> index 433d7be909..794da646d6 100644
>> --- a/xen/arch/arm/time.c
>> +++ b/xen/arch/arm/time.c
>> @@ -38,6 +38,14 @@ uint32_t __read_mostly timer_dt_clock_frequency;
>>
>> static unsigned int timer_irq[MAX_TIMER_PPI];
>>
>> +static const char *timer_irq_names[MAX_TIMER_PPI] = {
>> +[TIMER_PHYS_SECURE_PPI] = "sec-phys",
>> +[TIMER_PHYS_NONSECURE_PPI] = "phys",
>> +[TIMER_VIRT_PPI] = "virt",
>> +[TIMER_HYP_PPI] = "hyp-phys",
>> +[TIMER_HYP_VIRT_PPI] = "hyp-virt",
>> +};
>> +
> 
> I would need some reference or a pointer to some doc to check those.

Hi Bertrand,

This implementation follows the one in Linux [0]. The parsing order for
the IRQs remains the same whether or not the "interrupt-names" property
is available, since the driver in both Linux and Xen expects them in a
specific order (defined by enum arch_timer_ppi_nr in Linux, for example)
which, most of the time, does not correspond to how they are mapped onto
the SoC. But now it can discover them correctly regardless of their
order in the "interrupts" property in the DT node.

Only the "hyp-virt" IRQ is not required to be present, which is also the
last one parsed.

[0]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/clocksource/arm_arch_timer.c?id=86332e9e3477af8f31c9d5f3e81e57e0fd2118e7

> 
>> unsigned int timer_get_irq(enum timer_ppi ppi)
>> {
>> ASSERT(ppi >= TIMER_PHYS_SECURE_PPI && ppi < MAX_TIMER_PPI);
>> @@ -149,15 +157,25 @@ static void __init init_dt_xen_time(void)
>> {
>> int res;
>> unsigned int i;
>> +bool has_names;
>> +
>> +has_names = dt_property_read_bool(timer, "interrupt-names");
>>
>> /* Retrieve all IRQs for the timer */
>> for ( i = TIMER_PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++ )
>> {
>> -res = platform_get_irq(timer, i);
>> -
>> -if ( res < 0 )
>> +if ( has_names )
>> +res = platform_get_irq_byname(timer, timer_irq_names[i]);
>> +else
>> +res = platform_get_irq(timer, i);
>> +
>> +if ( res > 0 )
> 
> The behaviour of the code is changed here compared to the current
> version as res = 0 will now generate a panic.
> 
> Some device tree might not specify an interrupt number and just put
> 0 and Xen will now panic on those systems.
> As I have no idea if such systems exists and the behaviour is modified
> you should justify this and mention it in the commit message or keep
> the old behaviour and let 0 go through without a panic.
> 
> @stefano, julien any idea here ? should just keep the old behaviour ?
> 

You're right, I didn't take the dummy fake interrupts case into
consideration. I also think we should keep the old behaviour then, and
let 0 go through too, as you mentioned.


>> +timer_irq[i] = res;
>> +/* Do not panic if "hyp-virt" PPI is not found, since it's not
>> + * currently used.
>> + */
> 
> Please respect the standard for comments and keep the first line empty:
> /*
>  * comment
>  */
> 

Will update in v2.

>> +else if ( i != TIMER_HYP_VIRT_PPI )
>> panic("Timer: Unable to retrieve IRQ %u from the device tree\n", 
>> i);
>> -timer_irq[i] = res;
>> }
>> }
> 
> Cheers
> Bertrand
> 

Thanks for the review.

Regards,
Andrei

>>
>> -- 
>> 2.35.1
>>
>>
> 



[PULL 09/27] hw/xen: Add evtchn operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

The existing implementation calling into the real libxenevtchn moves to
a new file hw/xen/xen-operations.c, and is called via a function table
which in a subsequent commit will also be able to invoke the emulated
event channel support.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/xen-9p-backend.c|  24 +++---
 hw/i386/xen/xen-hvm.c   |  27 ---
 hw/xen/meson.build  |   1 +
 hw/xen/xen-bus.c|  22 +++---
 hw/xen/xen-legacy-backend.c |   8 +-
 hw/xen/xen-operations.c |  71 +
 hw/xen/xen_pvdev.c  |  12 +--
 include/hw/xen/xen-bus.h|   1 +
 include/hw/xen/xen-legacy-backend.h |   1 +
 include/hw/xen/xen_backend_ops.h| 118 
 include/hw/xen/xen_common.h |  12 ---
 include/hw/xen/xen_pvdev.h  |   1 +
 softmmu/globals.c   |   1 +
 13 files changed, 242 insertions(+), 57 deletions(-)
 create mode 100644 hw/xen/xen-operations.c
 create mode 100644 include/hw/xen/xen_backend_ops.h

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979c3c..864bdaf952 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -241,7 +241,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
 xen_wmb();
 
 ring->inprogress = false;
-xenevtchn_notify(ring->evtchndev, ring->local_port);
+qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
 
 qemu_bh_schedule(ring->bh);
 }
@@ -324,8 +324,8 @@ static void xen_9pfs_evtchn_event(void *opaque)
 Xen9pfsRing *ring = opaque;
 evtchn_port_t port;
 
-port = xenevtchn_pending(ring->evtchndev);
-xenevtchn_unmask(ring->evtchndev, port);
+port = qemu_xen_evtchn_pending(ring->evtchndev);
+qemu_xen_evtchn_unmask(ring->evtchndev, port);
 
 qemu_bh_schedule(ring->bh);
 }
@@ -337,10 +337,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice 
*xendev)
 
 for (i = 0; i < xen_9pdev->num_rings; i++) {
 if (xen_9pdev->rings[i].evtchndev != NULL) {
-qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-NULL, NULL, NULL);
-xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
- xen_9pdev->rings[i].local_port);
+
qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+NULL, NULL, NULL);
+qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
+   xen_9pdev->rings[i].local_port);
 xen_9pdev->rings[i].evtchndev = NULL;
 }
 }
@@ -447,12 +447,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice 
*xendev)
 xen_9pdev->rings[i].inprogress = false;
 
 
-xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
 if (xen_9pdev->rings[i].evtchndev == NULL) {
 goto out;
 }
-qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
-xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
+xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
 (xen_9pdev->rings[i].evtchndev,
  xendev->dom,
  xen_9pdev->rings[i].evtchn);
@@ -463,8 +463,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
 goto out;
 }
 xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
-qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-xen_9pfs_evtchn_event, NULL, _9pdev->rings[i]);
+qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+xen_9pfs_evtchn_event, NULL, _9pdev->rings[i]);
 }
 
 xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e5a1dd19f4..cb1d24f592 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -761,7 +761,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
 int i;
 evtchn_port_t port;
 
-port = xenevtchn_pending(state->xce_handle);
+port = qemu_xen_evtchn_pending(state->xce_handle);
 if (port == state->bufioreq_local_port) {
 timer_mod(state->buffered_io_timer,
 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
@@ -780,7 +780,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
 }
 
 /* unmask the wanted port again */
-xenevtchn_unmask(state->xce_handle, port);
+qemu_xen_evtchn_unmask(state->xce_handle, port);
 
 /* get the io packet from shared memory */
 

Re: [PATCH v2 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread David Woodhouse
On Tue, 2023-03-07 at 17:57 +, Paul Durrant wrote:
> I think the series is good to go now so time to send a PR.

Done, thank you!


smime.p7s
Description: S/MIME cryptographic signature


[PULL 01/27] hw/xen: Add xenstore wire implementation and implementation stubs

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This implements the basic wire protocol for the XenStore commands, punting
all the actual implementation to xs_impl_* functions which all just return
errors for now.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/meson.build |   1 +
 hw/i386/kvm/trace-events|  15 +
 hw/i386/kvm/xen_xenstore.c  | 871 +++-
 hw/i386/kvm/xenstore_impl.c | 117 +
 hw/i386/kvm/xenstore_impl.h |  58 +++
 5 files changed, 1054 insertions(+), 8 deletions(-)
 create mode 100644 hw/i386/kvm/xenstore_impl.c
 create mode 100644 hw/i386/kvm/xenstore_impl.h

diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
index 82dd6ae7c6..6621ba5cd7 100644
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
   'xen_evtchn.c',
   'xen_gnttab.c',
   'xen_xenstore.c',
+  'xenstore_impl.c',
   ))
 
 i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index b83c3eb965..e4c82de6f3 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
 kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
 kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
 kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
+xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u 
tx %u err %s"
+xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int 
offset) "tx %u path %s offset %u"
+xenstore_transaction_start(unsigned int new_tx) "new_tx %u"
+xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d"
+xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_watch(const char *path, const char *token) "path %s token %s"
+xenstore_unwatch(const char *path, const char *token) "path %s token %s"
+xenstore_reset_watches(void) ""
+xenstore_watch_event(const char *path, const char *token) "path %s token %s"
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 14193ef3f9..64d8f1a38f 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -28,6 +28,10 @@
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_xen.h"
 
+#include "trace.h"
+
+#include "xenstore_impl.h"
+
 #include "hw/xen/interface/io/xs_wire.h"
 #include "hw/xen/interface/event_channel.h"
 
@@ -47,6 +51,9 @@ struct XenXenstoreState {
 SysBusDevice busdev;
 /*< public >*/
 
+XenstoreImplState *impl;
+GList *watch_events;
+
 MemoryRegion xenstore_page;
 struct xenstore_domain_interface *xs;
 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
@@ -64,6 +71,7 @@ struct XenXenstoreState {
 struct XenXenstoreState *xen_xenstore_singleton;
 
 static void xen_xenstore_event(void *opaque);
+static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
 {
@@ -89,6 +97,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 }
 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
+
+s->impl = xs_impl_create();
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
@@ -213,20 +223,761 @@ static void reset_rsp(XenXenstoreState *s)
 s->rsp_offset = 0;
 }
 
+static void xs_error(XenXenstoreState *s, unsigned int id,
+ xs_transaction_t tx_id, int errnum)
+{
+struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+const char *errstr = NULL;
+
+for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
+struct xsd_errors *xsd_error = _errors[i];
+
+if (xsd_error->errnum == errnum) {
+errstr = xsd_error->errstring;
+break;
+}
+}
+assert(errstr);
+
+trace_xenstore_error(id, tx_id, errstr);
+
+rsp->type = XS_ERROR;
+rsp->req_id = id;
+rsp->tx_id = tx_id;
+rsp->len = (uint32_t)strlen(errstr) + 1;
+
+memcpy([1], errstr, rsp->len);
+}
+
+static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
+  xs_transaction_t tx_id)
+{
+struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+const char *okstr = "OK";
+
+rsp->type = type;
+rsp->req_id = req_id;
+rsp->tx_id = tx_id;
+rsp->len = (uint32_t)strlen(okstr) + 1;
+
+

[PULL 08/27] hw/xen: Create initial XenStore nodes

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 70 ++
 1 file changed, 70 insertions(+)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 520422b147..fb3648a058 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -76,9 +76,39 @@ struct XenXenstoreState *xen_xenstore_singleton;
 static void xen_xenstore_event(void *opaque);
 static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
+static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
+GList *perms,
+const char *relpath,
+const char *fmt, ...)
+{
+gchar *abspath;
+gchar *value;
+va_list args;
+GByteArray *data;
+int err;
+
+abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
+va_start(args, fmt);
+value = g_strdup_vprintf(fmt, args);
+va_end(args);
+
+data = g_byte_array_new_take((void *)value, strlen(value));
+
+err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
+assert(!err);
+
+g_byte_array_unref(data);
+
+err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
+assert(!err);
+
+g_free(abspath);
+}
+
 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
 {
 XenXenstoreState *s = XEN_XENSTORE(dev);
+GList *perms;
 
 if (xen_mode != XEN_EMULATE) {
 error_setg(errp, "Xen xenstore support is for Xen emulation");
@@ -102,6 +132,46 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
xen_xenstore_event, NULL, NULL, NULL, s);
 
 s->impl = xs_impl_create(xen_domid);
+
+/* Populate the default nodes */
+
+/* Nodes owned by 'dom0' but readable by the guest */
+perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
+perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
+
+relpath_printf(s, perms, "", "%s", "");
+
+relpath_printf(s, perms, "domid", "%u", xen_domid);
+
+relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", 
"%u", 1);
+relpath_printf(s, perms, 
"control/platform-feature-multiprocessor-suspend", "%u", 1);
+
+relpath_printf(s, perms, "platform/acpi", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
+
+g_list_free_full(perms, g_free);
+
+/* Nodes owned by the guest */
+perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
+
+relpath_printf(s, perms, "attr", "%s", "");
+
+relpath_printf(s, perms, "control/shutdown", "%s", "");
+relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
+relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
+relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
+relpath_printf(s, perms, "control/feature-s3", "%u", 1);
+relpath_printf(s, perms, "control/feature-s4", "%u", 1);
+
+relpath_printf(s, perms, "data", "%s", "");
+relpath_printf(s, perms, "device", "%s", "");
+relpath_printf(s, perms, "drivers", "%s", "");
+relpath_printf(s, perms, "error", "%s", "");
+relpath_printf(s, perms, "feature", "%s", "");
+
+g_list_free_full(perms, g_free);
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
-- 
2.39.0




[PULL 12/27] hw/xen: Add foreignmem operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/char/xen_console.c|  8 +++---
 hw/display/xenfb.c   | 20 +++---
 hw/xen/xen-operations.c  | 45 
 include/hw/xen/xen_backend_ops.h | 26 ++
 include/hw/xen/xen_common.h  | 13 -
 softmmu/globals.c|  1 +
 tests/unit/test-xs-node.c|  1 +
 7 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 19ad6c946a..e9cef3e1ef 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -237,9 +237,9 @@ static int con_initialise(struct XenLegacyDevice *xendev)
 
 if (!xendev->dev) {
 xen_pfn_t mfn = con->ring_ref;
-con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
-  PROT_READ | PROT_WRITE,
-  1, , NULL);
+con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE,
+ 1, , NULL);
 } else {
 con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
   PROT_READ | PROT_WRITE);
@@ -269,7 +269,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
 
 if (con->sring) {
 if (!xendev->dev) {
-xenforeignmemory_unmap(xen_fmem, con->sring, 1);
+qemu_xen_foreignmem_unmap(con->sring, 1);
 } else {
 xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
 }
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 260eb38a76..2c4016fcbd 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -98,8 +98,9 @@ static int common_bind(struct common *c)
 if (xenstore_read_fe_int(>xendev, "event-channel", 
>xendev.remote_port) == -1)
 return -1;
 
-c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
-   PROT_READ | PROT_WRITE, 1, , NULL);
+c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL,
+  PROT_READ | PROT_WRITE, 1, ,
+  NULL);
 if (c->page == NULL)
 return -1;
 
@@ -115,7 +116,7 @@ static void common_unbind(struct common *c)
 {
 xen_pv_unbind_evtchn(>xendev);
 if (c->page) {
-xenforeignmemory_unmap(xen_fmem, c->page, 1);
+qemu_xen_foreignmem_unmap(c->page, 1);
 c->page = NULL;
 }
 }
@@ -500,15 +501,16 @@ static int xenfb_map_fb(struct XenFB *xenfb)
 fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
 
 xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
-map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-   PROT_READ, n_fbdirs, pgmfns, NULL);
+map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ,
+  n_fbdirs, pgmfns, NULL);
 if (map == NULL)
 goto out;
 xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
-xenforeignmemory_unmap(xen_fmem, map, n_fbdirs);
+qemu_xen_foreignmem_unmap(map, n_fbdirs);
 
-xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-PROT_READ, xenfb->fbpages, fbmfns, NULL);
+xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL,
+PROT_READ, xenfb->fbpages,
+fbmfns, NULL);
 if (xenfb->pixels == NULL)
 goto out;
 
@@ -927,7 +929,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
  *   Replacing the framebuffer with anonymous shared memory
  *   instead.  This releases the guest pages and keeps qemu happy.
  */
-xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages);
+qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
 fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
   -1, 0);
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index c5956d28c6..440e566bb1 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -22,6 +22,7 @@
  */
 #undef XC_WANT_COMPAT_EVTCHN_API
 #undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
 
 #include 
 
@@ -56,10 +57,13 @@ typedef xc_gnttab xengnttab_handle;
 #define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
 xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
 
+typedef xc_interface xenforeignmemory_handle;
+
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
 #include 
 #include 
+#include 
 
 #endif
 
@@ -218,6 +222,46 @@ static struct gnttab_backend_ops libxengnttab_backend_ops 
= {
 .unmap = libxengnttab_backend_unmap,
 };
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+static void *libxenforeignmem_backend_map(uint32_t dom, 

[PULL 02/27] hw/xen: Add basic XenStore tree walk and write/read/directory support

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is a fairly simple implementation of a copy-on-write tree.

The node walk function starts off at the root, with 'inplace == true'.
If it ever encounters a node with a refcount greater than one (including
the root node), then that node is shared with other trees, and cannot
be modified in place, so the inplace flag is cleared and we copy on
write from there on down.

Xenstore write has 'mkdir -p' semantics and will create the intermediate
nodes if they don't already exist, so in that case we flip the inplace
flag back to true as we populate the newly-created nodes.

We put a copy of the absolute path into the buffer in the struct walk_op,
with *two* NUL terminators at the end. As xs_node_walk() goes down the
tree, it replaces the next '/' separator with a NUL so that it can use
the 'child name' in place. The next recursion down then puts the '/'
back and repeats the exercise for the next path element... if it doesn't
hit that *second* NUL termination which indicates the true end of the
path.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 527 +++-
 tests/unit/meson.build  |   1 +
 tests/unit/test-xs-node.c   | 197 ++
 3 files changed, 718 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/test-xs-node.c

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 31dbc98fe0..9e10a31bea 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -10,13 +10,470 @@
  */
 
 #include "qemu/osdep.h"
+#include "qom/object.h"
 
 #include "xen_xenstore.h"
 #include "xenstore_impl.h"
 
+#include "hw/xen/interface/io/xs_wire.h"
+
+#define XS_MAX_WATCHES  128
+#define XS_MAX_DOMAIN_NODES 1000
+#define XS_MAX_NODE_SIZE2048
+#define XS_MAX_TRANSACTIONS 10
+#define XS_MAX_PERMS_PER_NODE   5
+
+#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \
+   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+   "0123456789-/_"
+
+typedef struct XsNode {
+uint32_t ref;
+GByteArray *content;
+GHashTable *children;
+uint64_t gencnt;
+#ifdef XS_NODE_UNIT_TEST
+gchar *name; /* debug only */
+#endif
+} XsNode;
+
 struct XenstoreImplState {
+XsNode *root;
+unsigned int nr_nodes;
 };
 
+static inline XsNode *xs_node_new(void)
+{
+XsNode *n = g_new0(XsNode, 1);
+n->ref = 1;
+
+#ifdef XS_NODE_UNIT_TEST
+nr_xs_nodes++;
+xs_node_list = g_list_prepend(xs_node_list, n);
+#endif
+return n;
+}
+
+static inline XsNode *xs_node_ref(XsNode *n)
+{
+/* With just 10 transactions, it can never get anywhere near this. */
+g_assert(n->ref < INT_MAX);
+
+g_assert(n->ref);
+n->ref++;
+return n;
+}
+
+static inline void xs_node_unref(XsNode *n)
+{
+if (!n) {
+return;
+}
+g_assert(n->ref);
+if (--n->ref) {
+return;
+}
+
+if (n->content) {
+g_byte_array_unref(n->content);
+}
+if (n->children) {
+g_hash_table_unref(n->children);
+}
+#ifdef XS_NODE_UNIT_TEST
+g_free(n->name);
+nr_xs_nodes--;
+xs_node_list = g_list_remove(xs_node_list, n);
+#endif
+g_free(n);
+}
+
+/* For copying from one hash table to another using g_hash_table_foreach() */
+static void do_insert(gpointer key, gpointer value, gpointer user_data)
+{
+g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
+}
+
+static XsNode *xs_node_copy(XsNode *old)
+{
+XsNode *n = xs_node_new();
+
+n->gencnt = old->gencnt;
+if (old->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+g_hash_table_foreach(old->children, do_insert, n->children);
+}
+if (old && old->content) {
+n->content = g_byte_array_ref(old->content);
+}
+return n;
+}
+
+/* Returns true if it made a change to the hash table */
+static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child)
+{
+assert(!strchr(path_elem, '/'));
+
+if (!child) {
+assert(n->children);
+return g_hash_table_remove(n->children, path_elem);
+}
+
+#ifdef XS_NODE_UNIT_TEST
+g_free(child->name);
+child->name = g_strdup(path_elem);
+#endif
+if (!n->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+}
+
+/*
+ * The documentation for g_hash_table_insert() says that it "returns a
+ * boolean value to indicate whether the newly added value was already
+ * in the hash table or not."
+ *
+ * It could perhaps be clearer that returning TRUE means it wasn't,
+ */
+return g_hash_table_insert(n->children, g_strdup(path_elem), child);
+}
+
+struct walk_op {
+struct XenstoreImplState *s;
+char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL 

[PULL 13/27] hw/xen: Add xenstore operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c |  11 +-
 hw/char/xen_console.c   |   2 +-
 hw/i386/kvm/xen_xenstore.c  |   3 -
 hw/i386/kvm/xenstore_impl.h |   8 +-
 hw/xen/xen-bus-helper.c |  62 +++
 hw/xen/xen-bus.c| 261 
 hw/xen/xen-legacy-backend.c | 119 +++--
 hw/xen/xen-operations.c | 198 +
 hw/xen/xen_devconfig.c  |   4 +-
 hw/xen/xen_pt_graphics.c|   1 -
 hw/xen/xen_pvdev.c  |  49 +-
 include/hw/xen/xen-bus-helper.h |  26 +--
 include/hw/xen/xen-bus.h|  17 +-
 include/hw/xen/xen-legacy-backend.h |   6 +-
 include/hw/xen/xen_backend_ops.h| 163 +
 include/hw/xen/xen_common.h |   1 -
 include/hw/xen/xen_pvdev.h  |   2 +-
 softmmu/globals.c   |   1 +
 18 files changed, 525 insertions(+), 409 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index e85e4aeba5..425216230f 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -90,12 +90,15 @@ void xenstore_store_pv_console_info(int i, Chardev *chr)
 }
 
 
-static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
+static void xenstore_record_dm_state(const char *state)
 {
+struct xs_handle *xs;
 char path[50];
 
+/* We now have everything we need to set the xenstore entry. */
+xs = xs_open(0);
 if (xs == NULL) {
-error_report("xenstore connection not initialized");
+fprintf(stderr, "Could not contact XenStore\n");
 exit(1);
 }
 
@@ -109,6 +112,8 @@ static void xenstore_record_dm_state(struct xs_handle *xs, 
const char *state)
 error_report("error recording dm state");
 exit(1);
 }
+
+xs_close(xs);
 }
 
 
@@ -117,7 +122,7 @@ static void xen_change_state_handler(void *opaque, bool 
running,
 {
 if (running) {
 /* record state running */
-xenstore_record_dm_state(xenstore, "running");
+xenstore_record_dm_state("running");
 }
 }
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index e9cef3e1ef..ad8638a86d 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -181,7 +181,7 @@ static int con_init(struct XenLegacyDevice *xendev)
 const char *output;
 
 /* setup */
-dom = xs_get_domain_path(xenstore, con->xendev.dom);
+dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
 if (!xendev->dev) {
 snprintf(con->console, sizeof(con->console), "%s/console", dom);
 } else {
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index fb3648a058..35898e9b37 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -38,9 +38,6 @@
 #define TYPE_XEN_XENSTORE "xen-xenstore"
 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
 
diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h
index bbe2391e2e..0df2a91aae 100644
--- a/hw/i386/kvm/xenstore_impl.h
+++ b/hw/i386/kvm/xenstore_impl.h
@@ -12,13 +12,7 @@
 #ifndef QEMU_XENSTORE_IMPL_H
 #define QEMU_XENSTORE_IMPL_H
 
-typedef uint32_t xs_transaction_t;
-
-#define XBT_NULL 0
-
-#define XS_PERM_NONE  0x00
-#define XS_PERM_READ  0x01
-#define XS_PERM_WRITE 0x02
+#include "hw/xen/xen_backend_ops.h"
 
 typedef struct XenstoreImplState XenstoreImplState;
 
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index 5a1e12b374..b2b2cc9c5d 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -10,6 +10,7 @@
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-bus-helper.h"
 #include "qapi/error.h"
+#include "trace.h"
 
 #include 
 
@@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state)
 return "INVALID";
 }
 
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
-const char *node, struct xs_permissions perms[],
-unsigned int nr_perms, Error **errp)
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+const char *node, unsigned int owner, unsigned int domid,
+unsigned int perms, Error **errp)
 {
 trace_xs_node_create(node);
 
-if (!xs_write(xsh, tid, node, "", 0)) {
+if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) {
 error_setg_errno(errp, errno, "failed to create node '%s'", node);
-return;
-}
-
-if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) {
-error_setg_errno(errp, errno, "failed to set node '%s' permissions",
- node);
 }
 }
 
-void xs_node_destroy(struct 

[PULL 18/27] hw/xen: Avoid crash when backend watch fires too early

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

The xen-block code ends up calling aio_poll() through blkconf_geometry(),
which means we see watch events during the indirect call to
xendev_class->realize() in xen_device_realize(). Unfortunately this call
is made before populating the initial frontend and backend device nodes
in xenstore and hence xen_block_frontend_changed() (which is called from
a watch event) fails to read the frontend's 'state' node, and hence
believes the device is being torn down. This in-turn sets the backend
state to XenbusStateClosed and causes the device to be deleted before it
is fully set up, leading to the crash.
By simply moving the call to xendev_class->realize() after the initial
xenstore nodes are populated, this sorry state of affairs is avoided.

Reported-by: David Woodhouse 
Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/xen/xen-bus.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index 9fe54967d4..c59850b1de 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -1034,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error 
**errp)
 goto unrealize;
 }
 
-if (xendev_class->realize) {
-xendev_class->realize(xendev, errp);
-if (*errp) {
-goto unrealize;
-}
-}
-
 xen_device_backend_printf(xendev, "frontend", "%s",
   xendev->frontend_path);
 xen_device_backend_printf(xendev, "frontend-id", "%u",
@@ -1059,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error 
**errp)
 xen_device_frontend_set_state(xendev, XenbusStateInitialising, true);
 }
 
+if (xendev_class->realize) {
+xendev_class->realize(xendev, errp);
+if (*errp) {
+goto unrealize;
+}
+}
+
 xendev->exit.notify = xen_device_exit;
 qemu_add_exit_notifier(>exit);
 return;
-- 
2.39.0




[PULL 06/27] hw/xen: Implement XenStore permissions

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Store perms as a GList of strings, check permissions.

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c  |   2 +-
 hw/i386/kvm/xenstore_impl.c | 259 +---
 hw/i386/kvm/xenstore_impl.h |   8 +-
 tests/unit/test-xs-node.c   |  27 +++-
 4 files changed, 275 insertions(+), 21 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 64d8f1a38f..3b409e3817 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -98,7 +98,7 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
 
-s->impl = xs_impl_create();
+s->impl = xs_impl_create(xen_domid);
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 60f42f61d6..8a2053e243 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -12,6 +12,8 @@
 #include "qemu/osdep.h"
 #include "qom/object.h"
 
+#include "hw/xen/xen.h"
+
 #include "xen_xenstore.h"
 #include "xenstore_impl.h"
 
@@ -30,6 +32,7 @@
 typedef struct XsNode {
 uint32_t ref;
 GByteArray *content;
+GList *perms;
 GHashTable *children;
 uint64_t gencnt;
 bool deleted_in_tx;
@@ -133,6 +136,9 @@ static inline void xs_node_unref(XsNode *n)
 if (n->content) {
 g_byte_array_unref(n->content);
 }
+if (n->perms) {
+g_list_free_full(n->perms, g_free);
+}
 if (n->children) {
 g_hash_table_unref(n->children);
 }
@@ -144,8 +150,51 @@ static inline void xs_node_unref(XsNode *n)
 g_free(n);
 }
 
+char *xs_perm_as_string(unsigned int perm, unsigned int domid)
+{
+char letter;
+
+switch (perm) {
+case XS_PERM_READ | XS_PERM_WRITE:
+letter = 'b';
+break;
+case XS_PERM_READ:
+letter = 'r';
+break;
+case XS_PERM_WRITE:
+letter = 'w';
+break;
+case XS_PERM_NONE:
+default:
+letter = 'n';
+break;
+}
+
+return g_strdup_printf("%c%u", letter, domid);
+}
+
+static gpointer do_perm_copy(gconstpointer src, gpointer user_data)
+{
+return g_strdup(src);
+}
+
+static XsNode *xs_node_create(const char *name, GList *perms)
+{
+XsNode *n = xs_node_new();
+
+#ifdef XS_NODE_UNIT_TEST
+if (name) {
+n->name = g_strdup(name);
+}
+#endif
+
+n->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+
+return n;
+}
+
 /* For copying from one hash table to another using g_hash_table_foreach() */
-static void do_insert(gpointer key, gpointer value, gpointer user_data)
+static void do_child_insert(gpointer key, gpointer value, gpointer user_data)
 {
 g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
 }
@@ -162,12 +211,16 @@ static XsNode *xs_node_copy(XsNode *old)
 }
 #endif
 
+assert(old);
 if (old->children) {
 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
 (GDestroyNotify)xs_node_unref);
-g_hash_table_foreach(old->children, do_insert, n->children);
+g_hash_table_foreach(old->children, do_child_insert, n->children);
 }
-if (old && old->content) {
+if (old->perms) {
+n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+}
+if (old->content) {
 n->content = g_byte_array_ref(old->content);
 }
 return n;
@@ -383,6 +436,9 @@ static XsNode *xs_node_copy_deleted(XsNode *old, struct 
walk_op *op)
 op->op_opaque2 = n->children;
 g_hash_table_foreach(old->children, copy_deleted_recurse, op);
 }
+if (old->perms) {
+n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+}
 n->deleted_in_tx = true;
 /* If it gets resurrected we only fire a watch if it lost its content */
 if (old->content) {
@@ -417,6 +473,104 @@ static int xs_node_rm(XsNode **n, struct walk_op *op)
 return 0;
 }
 
+static int xs_node_get_perms(XsNode **n, struct walk_op *op)
+{
+GList **perms = op->op_opaque;
+
+assert(op->inplace);
+assert(*n);
+
+*perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL);
+return 0;
+}
+
+static void parse_perm(const char *perm, char *letter, unsigned int *dom_id)
+{
+unsigned int n = sscanf(perm, "%c%u", letter, dom_id);
+
+assert(n == 2);
+}
+
+static bool can_access(unsigned int dom_id, GList *perms, const char *letters)
+{
+unsigned int i, n;
+char perm_letter;
+unsigned int perm_dom_id;
+bool access;
+
+if (dom_id == 0) {
+return true;
+}
+
+n = g_list_length(perms);
+assert(n >= 1);
+
+/*
+ * The dom_id of the first perm is the owner, and the owner always has
+ * read-write access.
+ */
+

[PULL 15/27] hw/xen: Use XEN_PAGE_SIZE in PV backend drivers

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

XC_PAGE_SIZE comes from the actual Xen libraries, while XEN_PAGE_SIZE is
provided by QEMU itself in xen_backend_ops.h. For backends which may be
built for emulation mode, use the latter.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/block/dataplane/xen-block.c |  8 
 hw/display/xenfb.c | 12 ++--
 hw/net/xen_nic.c   | 12 ++--
 hw/usb/xen-usb.c   |  8 
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index e55b713002..8322a1de82 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -101,9 +101,9 @@ static XenBlockRequest 
*xen_block_start_request(XenBlockDataPlane *dataplane)
  * re-use requests, allocate the memory once here. It will be freed
  * xen_block_dataplane_destroy() when the request list is freed.
  */
-request->buf = qemu_memalign(XC_PAGE_SIZE,
+request->buf = qemu_memalign(XEN_PAGE_SIZE,
  BLKIF_MAX_SEGMENTS_PER_REQUEST *
- XC_PAGE_SIZE);
+ XEN_PAGE_SIZE);
 dataplane->requests_total++;
 qemu_iovec_init(>v, 1);
 } else {
@@ -185,7 +185,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
 goto err;
 }
 if (request->req.seg[i].last_sect * dataplane->sector_size >=
-XC_PAGE_SIZE) {
+XEN_PAGE_SIZE) {
 error_report("error: page crossing");
 goto err;
 }
@@ -740,7 +740,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
 
 dataplane->protocol = protocol;
 
-ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref;
+ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
 switch (dataplane->protocol) {
 case BLKIF_PROTOCOL_NATIVE:
 {
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 2c4016fcbd..0074a9b6f8 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -489,13 +489,13 @@ static int xenfb_map_fb(struct XenFB *xenfb)
 }
 
 if (xenfb->pixels) {
-munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE);
 xenfb->pixels = NULL;
 }
 
-xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE);
+xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE);
 n_fbdirs = xenfb->fbpages * mode / 8;
-n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE);
+n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE);
 
 pgmfns = g_new0(xen_pfn_t, n_fbdirs);
 fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
@@ -528,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t 
fb_len_lim,
 {
 size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]);
 size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz;
-size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
-size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz;
+size_t fb_len_max = fb_pages * XEN_PAGE_SIZE;
 int max_width, max_height;
 
 if (fb_len_lim > fb_len_max) {
@@ -930,7 +930,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
  *   instead.  This releases the guest pages and keeps qemu happy.
  */
 qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
-fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE,
   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
   -1, 0);
 if (fb->pixels == MAP_FAILED) {
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 166d03787d..9bbf6599fc 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 continue;
 }
 
-if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
 xen_pv_printf(>xendev, 0, "error: page crossing\n");
 net_tx_error(netdev, , rc);
 continue;
@@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 if (txreq.flags & NETTXF_csum_blank) {
 /* have read-only mapping -> can't fill checksum in-place */
 if (!tmpbuf) {
-tmpbuf = g_malloc(XC_PAGE_SIZE);
+tmpbuf = g_malloc(XEN_PAGE_SIZE);
 }
 memcpy(tmpbuf, page + txreq.offset, txreq.size);
 net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
@@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const 
uint8_t *buf, size_t size
 if (rc == rp || RING_REQUEST_CONS_OVERFLOW(>rx_ring, rc)) {
 return 0;
 }
-if (size > XC_PAGE_SIZE - 

[PULL 27/27] docs: Update Xen-on-KVM documentation for PV disk support

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 docs/system/i386/xen.rst | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index a00523b492..f06765e88c 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -9,6 +9,8 @@ KVM has support for hosting Xen guests, intercepting Xen 
hypercalls and event
 channel (Xen PV interrupt) delivery. This allows guests which expect to be
 run under Xen to be hosted in QEMU under Linux/KVM instead.
 
+Using the split irqchip is mandatory for Xen support.
+
 Setup
 -
 
@@ -17,14 +19,14 @@ accelerator, for example for Xen 4.10:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split
 
 Additionally, virtual APIC support can be advertised to the guest through the
 ``xen-vapic`` CPU flag:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu 
host,+xen_vapic
 
 When Xen support is enabled, QEMU changes hypervisor identification (CPUID
 0x4000..0x400A) to Xen. The KVM identification and features are not
@@ -33,11 +35,25 @@ moves to leaves 0x4100..0x410A.
 
 The Xen platform device is enabled automatically for a Xen guest. This allows
 a guest to unplug all emulated devices, in order to use Xen PV block and 
network
-drivers instead. Note that until the Xen PV device back ends are enabled to 
work
-with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests
-can be dissuaded from this by adding 'xen_emul_unplug=never' on their command
-line, and it can also be noted that AHCI disk controllers are exempt from being
-unplugged, as are passthrough VFIO PCI devices.
+drivers instead. Under Xen, the boot disk is typically available both via IDE
+emulation, and as a PV block device. Guest bootloaders typically use IDE to 
load
+the guest kernel, which then unplugs the IDE and continues with the Xen PV 
block
+device.
+
+This configuration can be achieved as follows
+
+.. parsed-literal::
+
+  |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\
+   -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device 
xen-disk,drive=disk,vdev=xvda \\
+   -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide
+
+It is necessary to use the pc machine type, as the q35 machine uses AHCI 
instead
+of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug
+mechanism.
+
+VirtIO devices can also be used; Linux guests may need to be dissuaded from
+umplugging them by adding 'xen_emul_unplug=never' on their command line.
 
 Properties
 --
-- 
2.39.0




[PULL 20/27] hw/xen: Hook up emulated implementation for event channel operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

We provided the backend-facing evtchn functions very early on as part of
the core Xen platform support, since things like timers and xenstore need
to use them.

By what may or may not be an astonishing coincidence, those functions
just *happen* all to have exactly the right function prototypes to slot
into the evtchn_backend_ops table and be called by the PV backends.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_evtchn.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 886fbf6b3b..98a7b85047 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -34,6 +34,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/irq.h"
+#include "hw/xen/xen_backend_ops.h"
 
 #include "xen_evtchn.h"
 #include "xen_overlay.h"
@@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = {
 .class_init= xen_evtchn_class_init,
 };
 
+static struct evtchn_backend_ops emu_evtchn_backend_ops = {
+.open = xen_be_evtchn_open,
+.bind_interdomain = xen_be_evtchn_bind_interdomain,
+.unbind = xen_be_evtchn_unbind,
+.close = xen_be_evtchn_close,
+.get_fd = xen_be_evtchn_fd,
+.notify = xen_be_evtchn_notify,
+.unmask = xen_be_evtchn_unmask,
+.pending = xen_be_evtchn_pending,
+};
+
 static void gsi_assert_bh(void *opaque)
 {
 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
@@ -318,6 +330,9 @@ void xen_evtchn_create(void)
 s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
 s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
 s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
+
+/* Set event channel functions for backend drivers to use */
+xen_evtchn_ops = _evtchn_backend_ops;
 }
 
 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
-- 
2.39.0




[PULL 26/27] MAINTAINERS: Add entry for Xen on KVM emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5340de0515..640deb2895 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,15 @@ F: target/i386/kvm/
 F: target/i386/sev*
 F: scripts/kvm/vmxcap
 
+Xen emulation on X86 KVM CPUs
+M: David Woodhouse 
+M: Paul Durrant 
+S: Supported
+F: include/sysemu/kvm_xen.h
+F: target/i386/kvm/xen*
+F: hw/i386/kvm/xen*
+F: tests/avocado/xen_guest.py
+
 Guest CPU Cores (other accelerators)
 
 Overall
-- 
2.39.0




[PULL 10/27] hw/xen: Add gnttab operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Move the existing code using libxengnttab to xen-operations.c and allow
the operations to be redirected so that we can add emulation of grant
table mapping for backend drivers.

In emulation, mapping more than one grant ref to be virtually contiguous
would be fairly difficult. The best way to do it might be to make the
ram_block mappings actually backed by a file (shmem or a deleted file,
perhaps) so that we can have multiple *shared* mappings of it. But that
would be fairly intrusive.

Making the backend drivers cope with page *lists* instead of expecting
the mapping to be contiguous is also non-trivial, since some structures
would actually *cross* page boundaries (e.g. the 32-bit blkif responses
which are 12 bytes).

So for now, we'll support only single-page mappings in emulation. Add a
XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE flag to indicate that the native Xen
implementation *does* support multi-page maps, and a helper function to
query it.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/xen/xen-bus.c| 112 ++--
 hw/xen/xen-legacy-backend.c | 125 ++
 hw/xen/xen-operations.c | 157 
 hw/xen/xen_pvdev.c  |   2 +-
 include/hw/xen/xen-bus.h|   3 +-
 include/hw/xen/xen-legacy-backend.h |  13 +--
 include/hw/xen/xen_backend_ops.h| 100 ++
 include/hw/xen/xen_common.h |  39 ---
 softmmu/globals.c   |   1 +
 9 files changed, 280 insertions(+), 272 deletions(-)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index d0b1ae93da..b247e86f28 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -947,7 +947,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
 void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
Error **errp)
 {
-if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) {
 error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
 }
 }
@@ -956,9 +956,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t 
*refs,
 unsigned int nr_refs, int prot,
 Error **errp)
 {
-void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
-xendev->frontend_id, refs,
-prot);
+void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs,
+ xendev->frontend_id, refs, prot);
 
 if (!map) {
 error_setg_errno(errp, errno,
@@ -971,109 +970,17 @@ void *xen_device_map_grant_refs(XenDevice *xendev, 
uint32_t *refs,
 void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
  unsigned int nr_refs, Error **errp)
 {
-if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) {
 error_setg_errno(errp, errno, "xengnttab_unmap failed");
 }
 }
 
-static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
-   XenDeviceGrantCopySegment segs[],
-   unsigned int nr_segs, Error **errp)
-{
-uint32_t *refs = g_new(uint32_t, nr_segs);
-int prot = to_domain ? PROT_WRITE : PROT_READ;
-void *map;
-unsigned int i;
-
-for (i = 0; i < nr_segs; i++) {
-XenDeviceGrantCopySegment *seg = [i];
-
-refs[i] = to_domain ? seg->dest.foreign.ref :
-seg->source.foreign.ref;
-}
-
-map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
-  xendev->frontend_id, refs,
-  prot);
-if (!map) {
-error_setg_errno(errp, errno,
- "xengnttab_map_domain_grant_refs failed");
-goto done;
-}
-
-for (i = 0; i < nr_segs; i++) {
-XenDeviceGrantCopySegment *seg = [i];
-void *page = map + (i * XC_PAGE_SIZE);
-
-if (to_domain) {
-memcpy(page + seg->dest.foreign.offset, seg->source.virt,
-   seg->len);
-} else {
-memcpy(seg->dest.virt, page + seg->source.foreign.offset,
-   seg->len);
-}
-}
-
-if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
-error_setg_errno(errp, errno, "xengnttab_unmap failed");
-}
-
-done:
-g_free(refs);
-}
-
 void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
 XenDeviceGrantCopySegment segs[],
 unsigned int nr_segs, Error **errp)
 {
-xengnttab_grant_copy_segment_t *xengnttab_segs;
-unsigned int i;
-
-if (!xendev->feature_grant_copy) {
-compat_copy_grant_refs(xendev, to_domain, 

[PULL 25/27] i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that all the work is done to enable the PV backends to work without
actual Xen, instantiate the bus from pc_basic_device_init() for emulated
mode.

This allows us finally to launch an emulated Xen guest with PV disk.

   qemu-system-x86_64 -serial mon:stdio -M q35 -cpu host -display none \
 -m 1G -smp 2 -accel kvm,xen-version=0x4000a,kernel-irqchip=split \
 -kernel bzImage -append "console=ttyS0 root=/dev/xvda1" \
 -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
 -device xen-disk,drive=disk,vdev=xvda

If we use -M pc instead of q35, we can even add an IDE disk and boot a
guest image normally through grub. But q35 gives us AHCI and that isn't
unplugged by the Xen magic, so the guests ends up seeing "both" disks.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/pc.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7bebea57e3..1489abf010 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -102,6 +102,11 @@
 #include "trace.h"
 #include CONFIG_DEVICES
 
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
+#endif
+
 /*
  * Helper for setting model-id for CPU models that changed model-id
  * depending on QEMU versions up to QEMU 2.4.
@@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 if (pcms->bus) {
 pci_create_simple(pcms->bus, -1, "xen-platform");
 }
+xen_bus_init();
+xen_be_init();
 }
 #endif
 
-- 
2.39.0




[PULL 04/27] hw/xen: Implement XenStore transactions

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Given that the whole thing supported copy on write from the beginning,
transactions end up being fairly simple. On starting a transaction, just
take a ref of the existing root; swap it back in on a successful commit.

The main tree has a transaction ID too, and we keep a record of the last
transaction ID given out. if the main tree is ever modified when it isn't
the latest, it gets a new transaction ID.

A commit can only succeed if the main tree hasn't moved on since it was
forked. Strictly speaking, the XenStore protocol allows a transaction to
succeed as long as nothing *it* read or wrote has changed in the interim,
but no implementations do that; *any* change is sufficient to abort a
transaction.

This does not yet fire watches on the changed nodes on a commit. That bit
is more fun and will come in a follow-on commit.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 150 ++--
 tests/unit/test-xs-node.c   | 118 
 2 files changed, 262 insertions(+), 6 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 9c2348835f..0812e367b0 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -46,13 +46,56 @@ typedef struct XsWatch {
 int rel_prefix;
 } XsWatch;
 
+typedef struct XsTransaction {
+XsNode *root;
+unsigned int nr_nodes;
+unsigned int base_tx;
+unsigned int tx_id;
+unsigned int dom_id;
+} XsTransaction;
+
 struct XenstoreImplState {
 XsNode *root;
 unsigned int nr_nodes;
 GHashTable *watches;
 unsigned int nr_domu_watches;
+GHashTable *transactions;
+unsigned int nr_domu_transactions;
+unsigned int root_tx;
+unsigned int last_tx;
 };
 
+
+static void nobble_tx(gpointer key, gpointer value, gpointer user_data)
+{
+unsigned int *new_tx_id = user_data;
+XsTransaction *tx = value;
+
+if (tx->base_tx == *new_tx_id) {
+/* Transactions based on XBT_NULL will always fail */
+tx->base_tx = XBT_NULL;
+}
+}
+
+static inline unsigned int next_tx(struct XenstoreImplState *s)
+{
+unsigned int tx_id;
+
+/* Find the next TX id which isn't either XBT_NULL or in use. */
+do {
+tx_id = ++s->last_tx;
+} while (tx_id == XBT_NULL || tx_id == s->root_tx ||
+ g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id)));
+
+/*
+ * It is vanishingly unlikely, but ensure that no outstanding transaction
+ * is based on the (previous incarnation of the) newly-allocated TX id.
+ */
+g_hash_table_foreach(s->transactions, nobble_tx, _id);
+
+return tx_id;
+}
+
 static inline XsNode *xs_node_new(void)
 {
 XsNode *n = g_new0(XsNode, 1);
@@ -159,6 +202,7 @@ struct walk_op {
 
 GList *watches;
 unsigned int dom_id;
+unsigned int tx_id;
 
 /* The number of nodes which will exist in the tree if this op succeeds. */
 unsigned int new_nr_nodes;
@@ -176,6 +220,7 @@ struct walk_op {
 bool inplace;
 bool mutating;
 bool create_dirs;
+bool in_transaction;
 };
 
 static void fire_watches(struct walk_op *op, bool parents)
@@ -183,7 +228,7 @@ static void fire_watches(struct walk_op *op, bool parents)
 GList *l = NULL;
 XsWatch *w;
 
-if (!op->mutating) {
+if (!op->mutating || op->in_transaction) {
 return;
 }
 
@@ -450,10 +495,23 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 assert(!op->watches);
 /*
  * On completing the recursion back up the path walk and reaching the
- * top, assign the new node count if the operation was successful.
+ * top, assign the new node count if the operation was successful. If
+ * the main tree was changed, bump its tx ID so that outstanding
+ * transactions correctly fail. But don't bump it every time; only
+ * if it makes a difference.
  */
 if (!err && op->mutating) {
-op->s->nr_nodes = op->new_nr_nodes;
+if (!op->in_transaction) {
+if (op->s->root_tx != op->s->last_tx) {
+op->s->root_tx = next_tx(op->s);
+}
+op->s->nr_nodes = op->new_nr_nodes;
+} else {
+XsTransaction *tx = g_hash_table_lookup(op->s->transactions,
+
GINT_TO_POINTER(op->tx_id));
+assert(tx);
+tx->nr_nodes = op->new_nr_nodes;
+}
 }
 }
 return err;
@@ -535,14 +593,23 @@ static int init_walk_op(XenstoreImplState *s, struct 
walk_op *op,
 op->inplace = true;
 op->mutating = false;
 op->create_dirs = false;
+op->in_transaction = false;
 op->dom_id = dom_id;
+op->tx_id = tx_id;
 op->s = s;
 
 if (tx_id == XBT_NULL) {
 *rootp = >root;
 op->new_nr_nodes = s->nr_nodes;
 } else {
-  

[PULL 11/27] hw/xen: Pass grant ref to gnttab unmap operation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

The previous commit introduced redirectable gnttab operations fairly
much like-for-like, with the exception of the extra arguments to the
->open() call which were always NULL/0 anyway.

This *changes* the arguments to the ->unmap() operation to include the
original ref# that was mapped. Under real Xen it isn't necessary; all we
need to do from QEMU is munmap(), then the kernel will release the grant,
and Xen does the tracking/refcounting for the guest.

When we have emulated grant tables though, we need to do all that for
ourselves. So let's have the back ends keep track of what they mapped
and pass it in to the ->unmap() method for us.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/xen-9p-backend.c|  7 ---
 hw/block/dataplane/xen-block.c  |  1 +
 hw/char/xen_console.c   |  2 +-
 hw/net/xen_nic.c| 13 -
 hw/usb/xen-usb.c| 21 -
 hw/xen/xen-bus.c|  4 ++--
 hw/xen/xen-legacy-backend.c |  4 ++--
 hw/xen/xen-operations.c |  9 -
 include/hw/xen/xen-bus.h|  2 +-
 include/hw/xen/xen-legacy-backend.h |  6 +++---
 include/hw/xen/xen_backend_ops.h|  7 ---
 11 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 864bdaf952..d8bb0e847c 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -359,12 +359,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev)
 if (xen_9pdev->rings[i].data != NULL) {
 xen_be_unmap_grant_refs(_9pdev->xendev,
 xen_9pdev->rings[i].data,
+xen_9pdev->rings[i].intf->ref,
 (1 << xen_9pdev->rings[i].ring_order));
 }
 if (xen_9pdev->rings[i].intf != NULL) {
-xen_be_unmap_grant_refs(_9pdev->xendev,
-xen_9pdev->rings[i].intf,
-1);
+xen_be_unmap_grant_ref(_9pdev->xendev,
+   xen_9pdev->rings[i].intf,
+   xen_9pdev->rings[i].ref);
 }
 if (xen_9pdev->rings[i].bh != NULL) {
 qemu_bh_delete(xen_9pdev->rings[i].bh);
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 2785b9e849..e55b713002 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -705,6 +705,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
 Error *local_err = NULL;
 
 xen_device_unmap_grant_refs(xendev, dataplane->sring,
+dataplane->ring_ref,
 dataplane->nr_ring_ref, _err);
 dataplane->sring = NULL;
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 63153dfde4..19ad6c946a 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -271,7 +271,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
 if (!xendev->dev) {
 xenforeignmemory_unmap(xen_fmem, con->sring, 1);
 } else {
-xen_be_unmap_grant_ref(xendev, con->sring);
+xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
 }
 con->sring = NULL;
 }
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7d92c2d022..166d03787d 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 qemu_send_packet(qemu_get_queue(netdev->nic),
  page + txreq.offset, txreq.size);
 }
-xen_be_unmap_grant_ref(>xendev, page);
+xen_be_unmap_grant_ref(>xendev, page, txreq.gref);
 net_tx_response(netdev, , NETIF_RSP_OKAY);
 }
 if (!netdev->tx_work) {
@@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const 
uint8_t *buf, size_t size
 return -1;
 }
 memcpy(page + NET_IP_ALIGN, buf, size);
-xen_be_unmap_grant_ref(>xendev, page);
+xen_be_unmap_grant_ref(>xendev, page, rxreq.gref);
 net_rx_response(netdev, , NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 
 return size;
@@ -343,7 +343,8 @@ static int net_connect(struct XenLegacyDevice *xendev)
netdev->rx_ring_ref,
PROT_READ | PROT_WRITE);
 if (!netdev->rxs) {
-xen_be_unmap_grant_ref(>xendev, netdev->txs);
+xen_be_unmap_grant_ref(>xendev, netdev->txs,
+   netdev->tx_ring_ref);
 netdev->txs = NULL;
 return -1;
 }
@@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev)
 xen_pv_unbind_evtchn(>xendev);
 
 if (netdev->txs) {
-xen_be_unmap_grant_ref(>xendev, 

[PULL 14/27] hw/xen: Move xenstore_store_pv_console_info to xen_console.c

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

There's no need for this to be in the Xen accel code, and as we want to
use the Xen console support with KVM-emulated Xen we'll want to have a
platform-agnostic version of it. Make it use GString to build up the
path while we're at it.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c   | 61 ---
 hw/char/xen_console.c | 45 +--
 include/hw/xen/xen.h  |  2 --
 3 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 425216230f..2d51c41e40 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -29,67 +29,6 @@ xc_interface *xen_xc;
 xenforeignmemory_handle *xen_fmem;
 xendevicemodel_handle *xen_dmod;
 
-static int store_dev_info(int domid, Chardev *cs, const char *string)
-{
-struct xs_handle *xs = NULL;
-char *path = NULL;
-char *newpath = NULL;
-char *pts = NULL;
-int ret = -1;
-
-/* Only continue if we're talking to a pty. */
-if (!CHARDEV_IS_PTY(cs)) {
-return 0;
-}
-pts = cs->filename + 4;
-
-/* We now have everything we need to set the xenstore entry. */
-xs = xs_open(0);
-if (xs == NULL) {
-fprintf(stderr, "Could not contact XenStore\n");
-goto out;
-}
-
-path = xs_get_domain_path(xs, domid);
-if (path == NULL) {
-fprintf(stderr, "xs_get_domain_path() error\n");
-goto out;
-}
-newpath = realloc(path, (strlen(path) + strlen(string) +
-strlen("/tty") + 1));
-if (newpath == NULL) {
-fprintf(stderr, "realloc error\n");
-goto out;
-}
-path = newpath;
-
-strcat(path, string);
-strcat(path, "/tty");
-if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) {
-fprintf(stderr, "xs_write for '%s' fail", string);
-goto out;
-}
-ret = 0;
-
-out:
-free(path);
-xs_close(xs);
-
-return ret;
-}
-
-void xenstore_store_pv_console_info(int i, Chardev *chr)
-{
-if (i == 0) {
-store_dev_info(xen_domid, chr, "/console");
-} else {
-char buf[32];
-snprintf(buf, sizeof(buf), "/device/console/%d", i);
-store_dev_info(xen_domid, chr, buf);
-}
-}
-
-
 static void xenstore_record_dm_state(const char *state)
 {
 struct xs_handle *xs;
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index ad8638a86d..c7a19c0e7c 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con)
 
 /*  */
 
+static int store_con_info(struct XenConsole *con)
+{
+Chardev *cs = qemu_chr_fe_get_driver(>chr);
+char *pts = NULL;
+char *dom_path;
+GString *path;
+int ret = -1;
+
+/* Only continue if we're talking to a pty. */
+if (!CHARDEV_IS_PTY(cs)) {
+return 0;
+}
+pts = cs->filename + 4;
+
+dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
+if (!dom_path) {
+return 0;
+}
+
+path = g_string_new(dom_path);
+free(dom_path);
+
+if (con->xendev.dev) {
+g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
+} else {
+g_string_append(path, "/console");
+}
+g_string_append(path, "/tty");
+
+if (xenstore_write_str(con->console, path->str, pts)) {
+fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
+goto out;
+}
+ret = 0;
+
+out:
+g_string_free(path, true);
+free(path);
+
+return ret;
+}
+
 static int con_init(struct XenLegacyDevice *xendev)
 {
 struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
@@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev)
  _abort);
 }
 
-xenstore_store_pv_console_info(con->xendev.dev,
-   qemu_chr_fe_get_driver(>chr));
+store_con_info(con);
 
 out:
 g_free(type);
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 03983939f9..56b1c2a827 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -39,8 +39,6 @@ int xen_is_pirq_msi(uint32_t msi_data);
 
 qemu_irq *xen_interrupt_controller_init(void);
 
-void xenstore_store_pv_console_info(int i, Chardev *chr);
-
 void xen_register_framebuffer(struct MemoryRegion *mr);
 
 #endif /* QEMU_HW_XEN_H */
-- 
2.39.0




[PULL 16/27] hw/xen: Rename xen_common.h to xen_native.h

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This header is now only for native Xen code, not PV backends that may be
used in Xen emulation. Since the toolstack libraries may depend on the
specific version of Xen headers that they pull in (and will set the
__XEN_TOOLS__ macro to enable internal definitions that they depend on),
the rule is that xen_native.h (and thus the toolstack library headers)
must be included *before* any of the headers in include/hw/xen/interface.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c   |  1 +
 hw/9pfs/xen-9p-backend.c  |  1 +
 hw/block/dataplane/xen-block.c|  3 ++-
 hw/block/xen-block.c  |  1 -
 hw/i386/pc_piix.c |  4 ++--
 hw/i386/xen/xen-hvm.c | 11 +-
 hw/i386/xen/xen-mapcache.c|  2 +-
 hw/i386/xen/xen_platform.c|  7 +++---
 hw/xen/trace-events   |  2 +-
 hw/xen/xen-operations.c   |  2 +-
 hw/xen/xen_pt.c   |  2 +-
 hw/xen/xen_pt.h   |  2 +-
 hw/xen/xen_pt_config_init.c   |  2 +-
 hw/xen/xen_pt_msi.c   |  4 ++--
 include/hw/xen/xen.h  | 22 ---
 include/hw/xen/{xen_common.h => xen_native.h} | 10 ++---
 include/hw/xen/xen_pvdev.h|  3 ++-
 17 files changed, 47 insertions(+), 32 deletions(-)
 rename include/hw/xen/{xen_common.h => xen_native.h} (98%)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 2d51c41e40..00221e23c5 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -12,6 +12,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen_pt.h"
 #include "chardev/char.h"
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index d8bb0e847c..74f3a05f88 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -22,6 +22,7 @@
 #include "qemu/config-file.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
+#include "qemu/iov.h"
 #include "fsdev/qemu-fsdev.h"
 
 #define VERSIONS "1"
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 8322a1de82..734da42ea7 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -23,8 +23,9 @@
 #include "qemu/main-loop.h"
 #include "qemu/memalign.h"
 #include "qapi/error.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen.h"
 #include "hw/block/xen_blkif.h"
+#include "hw/xen/interface/io/ring.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "xen-block.h"
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 345b284d70..87299615e3 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -19,7 +19,6 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 #include "qom/object_interfaces.h"
-#include "hw/xen/xen_common.h"
 #include "hw/block/xen_blkif.h"
 #include "hw/qdev-properties.h"
 #include "hw/xen/xen-block.h"
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4bf15f9c1f..30eedd62a3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -47,8 +47,6 @@
 #include "hw/kvm/clock.h"
 #include "hw/sysbus.h"
 #include "hw/i2c/smbus_eeprom.h"
-#include "hw/xen/xen-x86.h"
-#include "hw/xen/xen.h"
 #include "exec/memory.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/piix4.h"
@@ -60,6 +58,8 @@
 #include 
 #include "hw/xen/xen_pt.h"
 #endif
+#include "hw/xen/xen-x86.h"
+#include "hw/xen/xen.h"
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "sysemu/numa.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index cb1d24f592..56641a550e 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,7 +18,7 @@
 #include "hw/irq.h"
 #include "hw/hw.h"
 #include "hw/i386/apic-msidef.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-x86.h"
@@ -52,10 +52,11 @@ static bool xen_in_migration;
 
 /* Compatibility with older version */
 
-/* This allows QEMU to build on a system that has Xen 4.5 or earlier
- * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
- * needs to be included before this block and hw/xen/xen_common.h needs to
- * be included before xen/hvm/ioreq.h
+/*
+ * This allows QEMU to build on a system that has Xen 4.5 or earlier installed.
+ * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to
+ * be included before this block and hw/xen/xen_native.h needs to be included
+ * before xen/hvm/ioreq.h
  */
 #ifndef IOREQ_TYPE_VMWARE_PORT
 #define IOREQ_TYPE_VMWARE_PORT  3
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 

[PULL 21/27] hw/xen: Add emulated implementation of grant table operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is limited to mapping a single grant at a time, because under Xen the
pages are mapped *contiguously* into qemu's address space, and that's very
hard to do when those pages actually come from anonymous mappings in qemu
in the first place.

Eventually perhaps we can look at using shared mappings of actual objects
for system RAM, and then we can make new mappings of the same backing
store (be it deleted files, shmem, whatever). But for now let's stick to
a page at a time.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_gnttab.c | 299 ++-
 1 file changed, 296 insertions(+), 3 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 1e691ded32..2bf91d36c0 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -22,6 +22,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_gnttab.h"
 
@@ -34,11 +35,10 @@
 #define TYPE_XEN_GNTTAB "xen-gnttab"
 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
 struct XenGnttabState {
 /*< private >*/
 SysBusDevice busdev;
@@ -57,6 +57,8 @@ struct XenGnttabState {
 MemoryRegion gnt_frames;
 MemoryRegion *gnt_aliases;
 uint64_t *gnt_frame_gpas;
+
+uint8_t *map_track;
 };
 
 struct XenGnttabState *xen_gnttab_singleton;
@@ -88,9 +90,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 s->gnt_frame_gpas[i] = INVALID_GPA;
 }
 
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
 qemu_mutex_init(>gnt_lock);
 
 xen_gnttab_singleton = s;
+
+s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+xen_gnttab_ops = _gnttab_backend_ops;
 }
 
 static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -230,3 +238,288 @@ int xen_gnttab_query_size_op(struct gnttab_query_size 
*size)
 size->max_nr_frames = s->max_frames;
 return 0;
 }
+
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+MemoryRegionSection mrs;
+void *virtaddr;
+uint32_t refcnt;
+int prot;
+};
+
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+  MemoryRegionSection *mrs, int prot)
+{
+if (mrs && mrs->mr) {
+if (prot & PROT_WRITE) {
+memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+XEN_PAGE_SIZE);
+}
+memory_region_unref(mrs->mr);
+mrs->mr = NULL;
+}
+assert(s->map_track[ref] != 0);
+
+if (--s->map_track[ref] == 0) {
+grant_entry_v1_t *gnt_p = >entries.v1[ref];
+qatomic_and(_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
+}
+}
+
+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+uint16_t mask = GTF_type_mask | GTF_sub_page;
+grant_entry_v1_t gnt, *gnt_p;
+int retries = 0;
+
+if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+s->map_track[ref] == UINT8_MAX) {
+return INVALID_GPA;
+}
+
+if (prot & PROT_WRITE) {
+mask |= GTF_readonly;
+}
+
+gnt_p = >entries.v1[ref];
+
+/*
+ * The guest can legitimately be changing the GTF_readonly flag. Allow
+ * that, but don't let a malicious guest cause a livelock.
+ */
+for (retries = 0; retries < 5; retries++) {
+uint16_t new_flags;
+
+/* Read the entry before an atomic operation on its flags */
+gnt = *(volatile grant_entry_v1_t *)gnt_p;
+
+if ((gnt.flags & mask) != GTF_permit_access ||
+gnt.domid != DOMID_QEMU) {
+return INVALID_GPA;
+}
+
+new_flags = gnt.flags | GTF_reading;
+if (prot & PROT_WRITE) {
+new_flags |= GTF_writing;
+}
+
+if (qatomic_cmpxchg(_p->flags, gnt.flags, new_flags) == gnt.flags) 
{
+return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
+}
+}
+
+return INVALID_GPA;
+}
+
+struct xengntdev_handle {
+GHashTable *active_maps;
+};
+
+static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
+uint32_t nr_grants)
+{
+return 0;
+}
+
+static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
+uint32_t count, uint32_t domid,
+uint32_t *refs, int prot)
+{
+XenGnttabState *s = xen_gnttab_singleton;
+struct active_ref *act;
+
+if (!s) {
+errno = ENOTSUP;
+return NULL;
+}
+
+if (domid != xen_domid) {
+errno = EINVAL;
+return NULL;
+}

[PULL 24/27] hw/xen: Implement soft reset for emulated gnttab

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is only part of it; we will also need to get the PV back end drivers
to tear down their own mappings (or do it for them, but they kind of need
to stop using the pointers too).

Some more work on the actual PV back ends and xen-bus code is going to be
needed to really make soft reset and migration fully functional, and this
part is the basis for that.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_gnttab.c  | 26 --
 hw/i386/kvm/xen_gnttab.h  |  1 +
 target/i386/kvm/xen-emu.c |  5 +
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 2bf91d36c0..21c30e3659 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -72,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 error_setg(errp, "Xen grant table support is for Xen emulation");
 return;
 }
-s->nr_frames = 0;
 s->max_frames = kvm_xen_get_gnttab_max_frames();
 memory_region_init_ram(>gnt_frames, OBJECT(dev), "xen:grant_table",
XEN_PAGE_SIZE * s->max_frames, _abort);
 memory_region_set_enabled(>gnt_frames, true);
 s->entries.v1 = memory_region_get_ram_ptr(>gnt_frames);
-memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
 
 /* Create individual page-sizes aliases for overlays */
 s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
@@ -90,8 +88,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 s->gnt_frame_gpas[i] = INVALID_GPA;
 }
 
+s->nr_frames = 0;
+memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
 qemu_mutex_init(>gnt_lock);
 
 xen_gnttab_singleton = s;
@@ -523,3 +524,24 @@ static struct gnttab_backend_ops emu_gnttab_backend_ops = {
 .unmap = xen_be_gnttab_unmap,
 };
 
+int xen_gnttab_reset(void)
+{
+XenGnttabState *s = xen_gnttab_singleton;
+
+if (!s) {
+return -ENOTSUP;
+}
+
+QEMU_LOCK_GUARD(>gnt_lock);
+
+s->nr_frames = 0;
+
+memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
+memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+return 0;
+}
diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h
index 3bdbe96191..ee215239b0 100644
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -13,6 +13,7 @@
 #define QEMU_XEN_GNTTAB_H
 
 void xen_gnttab_create(void);
+int xen_gnttab_reset(void);
 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
 
 struct gnttab_set_version;
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index bad3131d08..0bb6c601c9 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1406,6 +1406,11 @@ int kvm_xen_soft_reset(void)
 return err;
 }
 
+err = xen_gnttab_reset();
+if (err) {
+return err;
+}
+
 err = xen_xenstore_reset();
 if (err) {
 return err;
-- 
2.39.0




[PULL 23/27] hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

We don't actually access the guest's page through the grant, because
this isn't real Xen, and we can just use the page we gave it in the
first place. Map the grant anyway, mostly for cosmetic purposes so it
*looks* like it's in use in the guest-visible grant table.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index bf466c71ed..2cadafd56a 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_evtchn.h"
 #include "xen_xenstore.h"
@@ -34,6 +35,7 @@
 
 #include "hw/xen/interface/io/xs_wire.h"
 #include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
 
 #define TYPE_XEN_XENSTORE "xen-xenstore"
 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
@@ -66,6 +68,9 @@ struct XenXenstoreState {
 
 uint8_t *impl_state;
 uint32_t impl_state_size;
+
+struct xengntdev_handle *gt;
+void *granted_xs;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -1453,6 +1458,17 @@ int xen_xenstore_reset(void)
 }
 s->be_port = err;
 
+/*
+ * We don't actually access the guest's page through the grant, because
+ * this isn't real Xen, and we can just use the page we gave it in the
+ * first place. Map the grant anyway, mostly for cosmetic purposes so
+ * it *looks* like it's in use in the guest-visible grant table.
+ */
+s->gt = qemu_xen_gnttab_open();
+uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, _gntref,
+ PROT_READ | PROT_WRITE);
+
 return 0;
 }
 
-- 
2.39.0




[PULL 07/27] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This implements the basic migration support in the back end, with unit
tests that give additional confidence in the node-counting already in
the tree.

However, the existing PV back ends like xen-disk don't support migration
yet. They will reset the ring and fail to continue where they left off.
We will fix that in future, but not in time for the 8.0 release.

Since there's also an open question of whether we want to serialize the
full XenStore or only the guest-owned nodes in /local/domain/${domid},
for now just mark the XenStore device as unmigratable.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c  |  26 +-
 hw/i386/kvm/xenstore_impl.c | 574 +++-
 hw/i386/kvm/xenstore_impl.h |   5 +
 tests/unit/test-xs-node.c   | 236 ++-
 4 files changed, 825 insertions(+), 16 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 3b409e3817..520422b147 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -66,6 +66,9 @@ struct XenXenstoreState {
 evtchn_port_t guest_port;
 evtchn_port_t be_port;
 struct xenevtchn_handle *eh;
+
+uint8_t *impl_state;
+uint32_t impl_state_size;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -109,16 +112,26 @@ static bool xen_xenstore_is_needed(void *opaque)
 static int xen_xenstore_pre_save(void *opaque)
 {
 XenXenstoreState *s = opaque;
+GByteArray *save;
 
 if (s->eh) {
 s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
 }
+
+g_free(s->impl_state);
+save = xs_impl_serialize(s->impl);
+s->impl_state = save->data;
+s->impl_state_size = save->len;
+g_byte_array_free(save, false);
+
 return 0;
 }
 
 static int xen_xenstore_post_load(void *opaque, int ver)
 {
 XenXenstoreState *s = opaque;
+GByteArray *save;
+int ret;
 
 /*
  * As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -135,11 +148,18 @@ static int xen_xenstore_post_load(void *opaque, int ver)
 }
 s->be_port = be_port;
 }
-return 0;
+
+save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
+s->impl_state = NULL;
+s->impl_state_size = 0;
+
+ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
+return ret;
 }
 
 static const VMStateDescription xen_xenstore_vmstate = {
 .name = "xen_xenstore",
+.unmigratable = 1, /* The PV back ends don't migrate yet */
 .version_id = 1,
 .minimum_version_id = 1,
 .needed = xen_xenstore_is_needed,
@@ -155,6 +175,10 @@ static const VMStateDescription xen_xenstore_vmstate = {
 VMSTATE_BOOL(rsp_pending, XenXenstoreState),
 VMSTATE_UINT32(guest_port, XenXenstoreState),
 VMSTATE_BOOL(fatal_error, XenXenstoreState),
+VMSTATE_UINT32(impl_state_size, XenXenstoreState),
+VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
+impl_state_size, 0,
+vmstate_info_uint8, uint8_t),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 8a2053e243..305fe75519 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -37,6 +37,7 @@ typedef struct XsNode {
 uint64_t gencnt;
 bool deleted_in_tx;
 bool modified_in_tx;
+unsigned int serialized_tx;
 #ifdef XS_NODE_UNIT_TEST
 gchar *name; /* debug only */
 #endif
@@ -68,6 +69,7 @@ struct XenstoreImplState {
 unsigned int nr_domu_transactions;
 unsigned int root_tx;
 unsigned int last_tx;
+bool serialized;
 };
 
 
@@ -1156,8 +1158,10 @@ int xs_impl_set_perms(XenstoreImplState *s, unsigned int 
dom_id,
 return xs_node_walk(n, );
 }
 
-int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
-  const char *token, xs_impl_watch_fn fn, void *opaque)
+static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id,
+const char *path, const char *token,
+xs_impl_watch_fn fn, void *opaque)
+
 {
 char abspath[XENSTORE_ABS_PATH_MAX + 1];
 XsWatch *w, *l;
@@ -1200,12 +1204,22 @@ int xs_impl_watch(XenstoreImplState *s, unsigned int 
dom_id, const char *path,
 s->nr_domu_watches++;
 }
 
-/* A new watch should fire immediately */
-fn(opaque, path, token);
-
 return 0;
 }
 
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+  const char *token, xs_impl_watch_fn fn, void *opaque)
+{
+int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque);
+
+if (!ret) {
+/* A new watch should fire immediately */
+fn(opaque, path, token);
+}
+
+return ret;
+}
+
 static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w)
 {
 XsWatch *next = w->next;
@@ -1361,3 +1375,553 @@ 

[PULL 19/27] hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Whem emulating Xen, multi-page grants are distinctly non-trivial and we
have elected not to support them for the time being. Don't advertise
them to the guest.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/block/xen-block.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 87299615e3..f5a744589d 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -83,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
 g_free(ring_ref);
 return;
 }
-} else if (order <= blockdev->props.max_ring_page_order) {
+} else if (qemu_xen_gnttab_can_map_multi() &&
+   order <= blockdev->props.max_ring_page_order) {
 unsigned int i;
 
 nr_ring_ref = 1 << order;
@@ -255,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 }
 
 xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
-xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
-  blockdev->props.max_ring_page_order);
+
+if (qemu_xen_gnttab_can_map_multi()) {
+xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
+  blockdev->props.max_ring_page_order);
+}
+
 xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
 
 xen_device_frontend_printf(xendev, "virtual-device", "%lu",
-- 
2.39.0




[PULL 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread David Woodhouse
The following changes since commit 9832009d9dd2386664c15cc70f6e6bfe062be8bd:

  Merge tag 'pull-riscv-to-apply-20230306' of 
https://gitlab.com/palmer-dabbelt/qemu into staging (2023-03-07 12:53:00 +)

are available in the Git repository at:

  git://git.infradead.org/users/dwmw2/qemu.git refs/tags/xenfv-2 

for you to fetch changes up to 154eac37190c4d80d29b09c226abd899e397530f:

  docs: Update Xen-on-KVM documentation for PV disk support (2023-03-07 
17:04:30 +)


Tested-by: Paul Durrant 
... on real Xen (master branch, 4.18) with a Debian guest.


David Woodhouse (23):
  hw/xen: Add xenstore wire implementation and implementation stubs
  hw/xen: Add basic XenStore tree walk and write/read/directory support
  hw/xen: Implement XenStore watches
  hw/xen: Implement XenStore transactions
  hw/xen: Watches on XenStore transactions
  hw/xen: Implement core serialize/deserialize methods for xenstore_impl
  hw/xen: Add evtchn operations to allow redirection to internal emulation
  hw/xen: Add gnttab operations to allow redirection to internal emulation
  hw/xen: Pass grant ref to gnttab unmap operation
  hw/xen: Add foreignmem operations to allow redirection to internal 
emulation
  hw/xen: Move xenstore_store_pv_console_info to xen_console.c
  hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
  hw/xen: Rename xen_common.h to xen_native.h
  hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
  hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it
  hw/xen: Hook up emulated implementation for event channel operations
  hw/xen: Add emulated implementation of grant table operations
  hw/xen: Add emulated implementation of XenStore operations
  hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
  hw/xen: Implement soft reset for emulated gnttab
  i386/xen: Initialize Xen backends from pc_basic_device_init() for 
emulation
  MAINTAINERS: Add entry for Xen on KVM emulation
  docs: Update Xen-on-KVM documentation for PV disk support

Paul Durrant (4):
  hw/xen: Implement XenStore permissions
  hw/xen: Create initial XenStore nodes
  hw/xen: Add xenstore operations to allow redirection to internal emulation
  hw/xen: Avoid crash when backend watch fires too early

 MAINTAINERS   |9 +
 accel/xen/xen-all.c   |   69 +-
 docs/system/i386/xen.rst  |   30 +-
 hw/9pfs/meson.build   |2 +-
 hw/9pfs/xen-9p-backend.c  |   32 +-
 hw/block/dataplane/meson.build|2 +-
 hw/block/dataplane/xen-block.c|   12 +-
 hw/block/meson.build  |2 +-
 hw/block/xen-block.c  |   12 +-
 hw/char/meson.build   |2 +-
 hw/char/xen_console.c |   57 +-
 hw/display/meson.build|2 +-
 hw/display/xenfb.c|   32 +-
 hw/i386/kvm/meson.build   |1 +
 hw/i386/kvm/trace-events  |   15 +
 hw/i386/kvm/xen_evtchn.c  |   15 +
 hw/i386/kvm/xen_gnttab.c  |  325 -
 hw/i386/kvm/xen_gnttab.h  |1 +
 hw/i386/kvm/xen_xenstore.c| 1251 +++-
 hw/i386/kvm/xenstore_impl.c   | 1927 +
 hw/i386/kvm/xenstore_impl.h   |   63 +
 hw/i386/pc.c  |7 +
 hw/i386/pc_piix.c |4 +-
 hw/i386/xen/xen-hvm.c |   38 +-
 hw/i386/xen/xen-mapcache.c|2 +-
 hw/i386/xen/xen_platform.c|7 +-
 hw/net/xen_nic.c  |   25 +-
 hw/usb/meson.build|2 +-
 hw/usb/xen-usb.c  |   29 +-
 hw/xen/meson.build|6 +-
 hw/xen/trace-events   |2 +-
 hw/xen/xen-bus-helper.c   |   62 +-
 hw/xen/xen-bus.c  |  411 +-
 hw/xen/xen-legacy-backend.c   |  254 +---
 hw/xen/xen-operations.c   |  478 ++
 hw/xen/xen_devconfig.c|4 +-
 hw/xen/xen_pt.c   |2 +-
 hw/xen/xen_pt.h   |2 +-
 hw/xen/xen_pt_config_init.c   |2 +-
 hw/xen/xen_pt_graphics.c  |1 -
 hw/xen/xen_pt_msi.c   |4 +-
 hw/xen/xen_pvdev.c|   63 +-
 include/hw/xen/xen-bus-helper.h   |   26 +-
 include/hw/xen/xen-bus.h  |   21 +-
 include/hw/xen/xen-legacy-backend.h   |   

[PULL 05/27] hw/xen: Watches on XenStore transactions

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Firing watches on the nodes that still exist is relatively easy; just
walk the tree and look at the nodes with refcount of one.

Firing watches on *deleted* nodes is more fun. We add 'modified_in_tx'
and 'deleted_in_tx' flags to each node. Nodes with those flags cannot
be shared, as they will always be unique to the transaction in which
they were created.

When xs_node_walk would need to *create* a node as scaffolding and it
encounters a deleted_in_tx node, it can resurrect it simply by clearing
its deleted_in_tx flag. If that node originally had any *data*, they're
gone, and the modified_in_tx flag will have been set when it was first
deleted.

We then attempt to send appropriate watches when the transaction is
committed, properly delete the deleted_in_tx nodes, and remove the
modified_in_tx flag from the others.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 151 ++-
 tests/unit/test-xs-node.c   | 231 +++-
 2 files changed, 380 insertions(+), 2 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 0812e367b0..60f42f61d6 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -32,6 +32,8 @@ typedef struct XsNode {
 GByteArray *content;
 GHashTable *children;
 uint64_t gencnt;
+bool deleted_in_tx;
+bool modified_in_tx;
 #ifdef XS_NODE_UNIT_TEST
 gchar *name; /* debug only */
 #endif
@@ -153,6 +155,13 @@ static XsNode *xs_node_copy(XsNode *old)
 XsNode *n = xs_node_new();
 
 n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+if (n->name) {
+n->name = g_strdup(old->name);
+}
+#endif
+
 if (old->children) {
 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
 (GDestroyNotify)xs_node_unref);
@@ -221,6 +230,9 @@ struct walk_op {
 bool mutating;
 bool create_dirs;
 bool in_transaction;
+
+/* Tracking during recursion so we know which is first. */
+bool deleted_in_tx;
 };
 
 static void fire_watches(struct walk_op *op, bool parents)
@@ -277,6 +289,9 @@ static int xs_node_add_content(XsNode **n, struct walk_op 
*op)
 g_byte_array_unref((*n)->content);
 }
 (*n)->content = g_byte_array_ref(data);
+if (op->tx_id != XBT_NULL) {
+(*n)->modified_in_tx = true;
+}
 return 0;
 }
 
@@ -333,10 +348,62 @@ static int node_rm_recurse(gpointer key, gpointer value, 
gpointer user_data)
 return this_inplace;
 }
 
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op);
+static void copy_deleted_recurse(gpointer key, gpointer value,
+ gpointer user_data)
+{
+struct walk_op *op = user_data;
+GHashTable *siblings = op->op_opaque2;
+XsNode *n = xs_node_copy_deleted(value, op);
+
+/*
+ * Reinsert the deleted_in_tx copy of the node into the parent's
+ * 'children' hash table. Having stashed it from op->op_opaque2
+ * before the recursive call to xs_node_copy_deleted() scribbled
+ * over it.
+ */
+g_hash_table_insert(siblings, g_strdup(key), n);
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op)
+{
+XsNode *n = xs_node_new();
+
+n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+if (old->name) {
+n->name = g_strdup(old->name);
+}
+#endif
+
+if (old->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+op->op_opaque2 = n->children;
+g_hash_table_foreach(old->children, copy_deleted_recurse, op);
+}
+n->deleted_in_tx = true;
+/* If it gets resurrected we only fire a watch if it lost its content */
+if (old->content) {
+n->modified_in_tx = true;
+}
+op->new_nr_nodes--;
+return n;
+}
+
 static int xs_node_rm(XsNode **n, struct walk_op *op)
 {
 bool this_inplace = op->inplace;
 
+if (op->tx_id != XBT_NULL) {
+/* It's not trivial to do inplace handling for this one */
+XsNode *old = *n;
+*n = xs_node_copy_deleted(old, op);
+xs_node_unref(old);
+return 0;
+}
+
 /* Fire watches for, and count, nodes in the subtree which get deleted */
 if ((*n)->children) {
 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
@@ -408,6 +475,10 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 }
 
 if (child) {
+if (child->deleted_in_tx) {
+assert(child->ref == 1);
+/* Cannot actually set child->deleted_in_tx = false until later */
+}
 xs_node_ref(child);
 /*
  * Now we own it too. But if we can modify inplace, that's going to
@@ -475,6 +546,15 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 xs_node_unref(old);
 }
 

[PULL 17/27] hw/xen: Build PV backend drivers for CONFIG_XEN_BUS

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that we have the redirectable Xen backend operations we can build the
PV backends even without the Xen libraries.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/meson.build| 2 +-
 hw/block/dataplane/meson.build | 2 +-
 hw/block/meson.build   | 2 +-
 hw/char/meson.build| 2 +-
 hw/display/meson.build | 2 +-
 hw/usb/meson.build | 2 +-
 hw/xen/meson.build | 5 -
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 12443b6ad5..fd37b7a02d 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,7 +15,7 @@ fs_ss.add(files(
 ))
 fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
 fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
-fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
+fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
 
 specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c'))
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build
index 12c6a264f1..78d7ac1a11 100644
--- a/hw/block/dataplane/meson.build
+++ b/hw/block/dataplane/meson.build
@@ -1,2 +1,2 @@
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
diff --git a/hw/block/meson.build b/hw/block/meson.build
index b434d5654c..cc2a75cc50 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: 
files('pflash_cfi02.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 
'virtio-blk-common.c'))
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 7b594f51b8..e02c60dd54 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: 
files('serial-pci.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: 
files('serial-pci-multi.c'))
 softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
 softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: 
files('virtio-console.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
 
 softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
diff --git a/hw/display/meson.build b/hw/display/meson.build
index f470179122..4191694380 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: 
files('pl110.c'))
 softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c'))
 
 softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
 softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c'))
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index bdf34cbd3e..599dc24f0d 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -84,6 +84,6 @@ if libusb.found()
   hw_usb_modules += {'host': usbhost_ss}
 endif
 
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: 
files('xen-usb.c'))
+softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: 
files('xen-usb.c'))
 
 modules += { 'hw-usb': hw_usb_modules }
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index f195bbd25c..19c6aabc7c 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -1,10 +1,13 @@
-softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files(
   'xen-backend.c',
   'xen-bus-helper.c',
   'xen-bus.c',
   'xen-legacy-backend.c',
   'xen_devconfig.c',
   'xen_pvdev.c',
+))
+
+softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
   'xen-operations.c',
 ))
 
-- 
2.39.0




[PULL 03/27] hw/xen: Implement XenStore watches

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Starts out fairly simple: a hash table of watches based on the path.

Except there can be multiple watches on the same path, so the watch ends
up being a simple linked list, and the head of that list is in the hash
table. Which makes removal a bit of a PITA but it's not so bad; we just
special-case "I had to remove the head of the list and now I have to
replace it in / remove it from the hash table". And if we don't remove
the head, it's a simple linked-list operation.

We do need to fire watches on *deleted* nodes, so instead of just a simple
xs_node_unref() on the topmost victim, we need to recurse down and fire
watches on them all.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 253 +---
 tests/unit/test-xs-node.c   |  85 
 2 files changed, 323 insertions(+), 15 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 9e10a31bea..9c2348835f 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -37,9 +37,20 @@ typedef struct XsNode {
 #endif
 } XsNode;
 
+typedef struct XsWatch {
+struct XsWatch *next;
+xs_impl_watch_fn *cb;
+void *cb_opaque;
+char *token;
+unsigned int dom_id;
+int rel_prefix;
+} XsWatch;
+
 struct XenstoreImplState {
 XsNode *root;
 unsigned int nr_nodes;
+GHashTable *watches;
+unsigned int nr_domu_watches;
 };
 
 static inline XsNode *xs_node_new(void)
@@ -146,6 +157,7 @@ struct walk_op {
 void *op_opaque;
 void *op_opaque2;
 
+GList *watches;
 unsigned int dom_id;
 
 /* The number of nodes which will exist in the tree if this op succeeds. */
@@ -166,6 +178,35 @@ struct walk_op {
 bool create_dirs;
 };
 
+static void fire_watches(struct walk_op *op, bool parents)
+{
+GList *l = NULL;
+XsWatch *w;
+
+if (!op->mutating) {
+return;
+}
+
+if (parents) {
+l = op->watches;
+}
+
+w = g_hash_table_lookup(op->s->watches, op->path);
+while (w || l) {
+if (!w) {
+/* Fire the parent nodes from 'op' if asked to */
+w = l->data;
+l = l->next;
+continue;
+}
+
+assert(strlen(op->path) > w->rel_prefix);
+w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token);
+
+w = w->next;
+}
+}
+
 static int xs_node_add_content(XsNode **n, struct walk_op *op)
 {
 GByteArray *data = op->op_opaque;
@@ -213,6 +254,8 @@ static int xs_node_get_content(XsNode **n, struct walk_op 
*op)
 static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data)
 {
 struct walk_op *op = user_data;
+int path_len = strlen(op->path);
+int key_len = strlen(key);
 XsNode *n = value;
 bool this_inplace = op->inplace;
 
@@ -220,11 +263,22 @@ static int node_rm_recurse(gpointer key, gpointer value, 
gpointer user_data)
 op->inplace = 0;
 }
 
+assert(key_len + path_len + 2 <= sizeof(op->path));
+op->path[path_len] = '/';
+memcpy(op->path + path_len + 1, key, key_len + 1);
+
 if (n->children) {
 g_hash_table_foreach_remove(n->children, node_rm_recurse, op);
 }
 op->new_nr_nodes--;
 
+/*
+ * Fire watches on *this* node but not the parents because they are
+ * going to be deleted too, so the watch will fire for them anyway.
+ */
+fire_watches(op, false);
+op->path[path_len] = '\0';
+
 /*
  * Actually deleting the child here is just an optimisation; if we
  * don't then the final unref on the topmost victim will just have
@@ -238,7 +292,7 @@ static int xs_node_rm(XsNode **n, struct walk_op *op)
 {
 bool this_inplace = op->inplace;
 
-/* Keep count of the nodes in the subtree which gets deleted. */
+/* Fire watches for, and count, nodes in the subtree which get deleted */
 if ((*n)->children) {
 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
 }
@@ -269,9 +323,11 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 XsNode *old = *n, *child = NULL;
 bool stole_child = false;
 bool this_inplace;
+XsWatch *watch;
 int err;
 
 namelen = strlen(op->path);
+watch = g_hash_table_lookup(op->s->watches, op->path);
 
 /* Is there a child, or do we hit the double-NUL termination? */
 if (op->path[namelen + 1]) {
@@ -292,6 +348,9 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 if (!child_name) {
 /* This is the actual node on which the operation shall be performed */
 err = op->op_fn(n, op);
+if (!err) {
+fire_watches(op, true);
+}
 goto out;
 }
 
@@ -333,11 +392,24 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 goto out;
 }
 
+/*
+ * If there's a watch on this node, add it to the list to be fired
+ * (with the correct full pathname for the modified node) at the end.
+ */
+   

[PULL 22/27] hw/xen: Add emulated implementation of XenStore operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that we have an internal implementation of XenStore, we can populate
the xenstore_backend_ops to allow PV backends to talk to it.

Watches can't be processed with immediate callbacks because that would
call back into XenBus code recursively. Defer them to a QEMUBH to be run
as appropriate from the main loop. We use a QEMUBH per XS handle, and it
walks all the watches (there shouldn't be many per handle) to fire any
which have pending events. We *could* have done it differently but this
allows us to use the same struct watch_event as we have for the guest
side, and keeps things relatively simple.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 273 -
 1 file changed, 269 insertions(+), 4 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 35898e9b37..bf466c71ed 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -49,7 +49,7 @@ struct XenXenstoreState {
 /*< public >*/
 
 XenstoreImplState *impl;
-GList *watch_events;
+GList *watch_events; /* for the guest */
 
 MemoryRegion xenstore_page;
 struct xenstore_domain_interface *xs;
@@ -73,6 +73,8 @@ struct XenXenstoreState *xen_xenstore_singleton;
 static void xen_xenstore_event(void *opaque);
 static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
+static struct xenstore_backend_ops emu_xenstore_backend_ops;
+
 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
 GList *perms,
 const char *relpath,
@@ -169,6 +171,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 relpath_printf(s, perms, "feature", "%s", "");
 
 g_list_free_full(perms, g_free);
+
+xen_xenstore_ops = _xenstore_backend_ops;
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
@@ -1306,6 +1310,15 @@ struct watch_event {
 char *token;
 };
 
+static void free_watch_event(struct watch_event *ev)
+{
+if (ev) {
+g_free(ev->path);
+g_free(ev->token);
+g_free(ev);
+}
+}
+
 static void queue_watch(XenXenstoreState *s, const char *path,
 const char *token)
 {
@@ -1352,9 +1365,7 @@ static void process_watch_events(XenXenstoreState *s)
 deliver_watch(s, ev->path, ev->token);
 
 s->watch_events = g_list_remove(s->watch_events, ev);
-g_free(ev->path);
-g_free(ev->token);
-g_free(ev);
+free_watch_event(ev);
 }
 
 static void xen_xenstore_event(void *opaque)
@@ -1444,3 +1455,257 @@ int xen_xenstore_reset(void)
 
 return 0;
 }
+
+struct qemu_xs_handle {
+XenstoreImplState *impl;
+GList *watches;
+QEMUBH *watch_bh;
+};
+
+struct qemu_xs_watch {
+struct qemu_xs_handle *h;
+char *path;
+xs_watch_fn fn;
+void *opaque;
+GList *events;
+};
+
+static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int 
domid)
+{
+return g_strdup_printf("/local/domain/%u", domid);
+}
+
+static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+  const char *path, unsigned int *num)
+{
+GList *items = NULL, *l;
+unsigned int i = 0;
+char **items_ret;
+int err;
+
+err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, );
+if (err) {
+errno = err;
+return NULL;
+}
+
+items_ret = g_new0(char *, g_list_length(items) + 1);
+*num = 0;
+for (l = items; l; l = l->next) {
+items_ret[i++] = l->data;
+(*num)++;
+}
+g_list_free(items);
+return items_ret;
+}
+
+static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
+const char *path, unsigned int *len)
+{
+GByteArray *data = g_byte_array_new();
+bool free_segment = false;
+int err;
+
+err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+if (err) {
+free_segment = true;
+errno = err;
+} else {
+if (len) {
+*len = data->len;
+}
+/* The xen-bus-helper code expects to get NUL terminated string! */
+g_byte_array_append(data, (void *)"", 1);
+}
+
+return g_byte_array_free(data, free_segment);
+}
+
+static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
+const char *path, const void *data, unsigned int len)
+{
+GByteArray *gdata = g_byte_array_new();
+int err;
+
+g_byte_array_append(gdata, data, len);
+err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
+g_byte_array_unref(gdata);
+if (err) {
+errno = err;
+return false;
+}
+return true;
+}
+
+static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+

[libvirt test] 179462: tolerable trouble: pass/starved - PUSHED

2023-03-07 Thread osstest service owner
flight 179462 libvirt real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179462/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-qcow2 14 migrate-support-checkfail never pass
 test-arm64-arm64-libvirt-qcow2 15 saverestore-support-checkfail never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 build-armhf-libvirt   1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-raw  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-qcow2  1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 libvirt  9fecdaf1c80f8fb4390c77b4d353ad07ba77c87a
baseline version:
 libvirt  cea8402e1c322a25ec944d1c36e902fac31d4331

Last test of basis   179154  2023-03-04 04:20:24 Z3 days
Testing same since   179462  2023-03-07 04:20:39 Z0 days1 attempts


People who touched revisions under test:
  Andrea Bolognani 
  Michal Privoznik 
  Peter Krempa 

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-arm64  pass
 build-armhf  starved 
 build-i386   pass
 build-amd64-libvirt  pass
 build-arm64-libvirt  pass
 build-armhf-libvirt  starved 
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-arm64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm   pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsmpass
 test-amd64-amd64-libvirt-xsm pass
 test-arm64-arm64-libvirt-xsm pass
 test-amd64-i386-libvirt-xsm  pass
 test-amd64-amd64-libvirt pass
 test-arm64-arm64-libvirt pass
 test-armhf-armhf-libvirt starved 
 test-amd64-i386-libvirt  pass
 test-amd64-amd64-libvirt-pairpass
 test-amd64-i386-libvirt-pair pass
 test-arm64-arm64-libvirt-qcow2   pass
 test-armhf-armhf-libvirt-qcow2   starved 
 test-arm64-arm64-libvirt-raw pass
 test-armhf-armhf-libvirt-raw starved 
 test-amd64-i386-libvirt-raw  pass
 test-amd64-amd64-libvirt-vhd pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at

Re: [PATCH v2 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread Paul Durrant

On 07/03/2023 17:17, David Woodhouse wrote:

Following on from the basic platform support which has already been
merged, here's phase 2 which wires up the XenBus and PV back ends.

It starts with a basic single-tenant internal implementation of a
XenStore, with a copy-on-write tree, watches, transactions, quotas.

Then we introduce operations tables for the grant table, event channel,
foreignmen and xenstore operations so that in addition to using the Xen
libraries for those, QEMU can use its internal emulated versions.

A little bit of cleaning up of header files, and we can enable the build
of xen-bus in the CONFIG_XEN_EMU build, and run a Xen guest with an
actual PV disk...

qemu-system-x86_64 -serial mon:stdio -M q35 -display none -m 1G -smp 2 \
   -accel kvm,xen-version=0x4000e,kernel-irqchip=split \
   -kernel bzImage -append "console=ttyS0 root=/dev/xvda1 selinux=0" \
   -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
   -device xen-disk,drive=disk,vdev=xvda

The main thing that isn't working here is migration. I've implemented it
for the internal xenstore and the unit tests exercise it, but the
existing PV back ends don't support it, perhaps partly because support
for guest transparent live migration support isn't upstream in Xen yet.
So the disk doesn't come back correctly after migration. I'm content
with that for 8.0 though, and we just mark the emulated XenStore device
as unmigratable to prevent users from trying.

The other pre-existing constraint is that only the block back end has
yet been ported to the "new" XenBus infrastructure, and is actually
capable of creating its own backend nodes. Again, I can live with
that for 8.0. Maybe this will motivate us to finally get round to
converting the rest off XenLegacyBackend and killing it.

We also don't have a simple way to perform grant mapping of multiple
guest pages to contiguous addresses, as we can under real Xen. So we
don't advertise max-ring-page-order for xen-disk in the emulated mode.
Fixing that — if we actually want to — would probably require mapping
RAM from an actual backing store object, so that it can be mapped again
at a different location for the PV back end to see.

v2: https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-2

  • Full set of reviewed-by tags from Paul (and associated minor fixes).

  • Disable migration for emulated XenStore device.

  • Update docs and add MAINTAINERS entry.

v1: 
https://lore.kernel.org/qemu-devel/20230302153435.1170111-1-dw...@infradead.org/
 https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-1

David Woodhouse (23):
   hw/xen: Add xenstore wire implementation and implementation stubs
   hw/xen: Add basic XenStore tree walk and write/read/directory support
   hw/xen: Implement XenStore watches
   hw/xen: Implement XenStore transactions
   hw/xen: Watches on XenStore transactions
   hw/xen: Implement core serialize/deserialize methods for xenstore_impl
   hw/xen: Add evtchn operations to allow redirection to internal emulation
   hw/xen: Add gnttab operations to allow redirection to internal emulation
   hw/xen: Pass grant ref to gnttab unmap operation
   hw/xen: Add foreignmem operations to allow redirection to internal 
emulation
   hw/xen: Move xenstore_store_pv_console_info to xen_console.c
   hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
   hw/xen: Rename xen_common.h to xen_native.h
   hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
   hw/xen: Only advertise ring-page-order for xen-block if gnttab supports 
it
   hw/xen: Hook up emulated implementation for event channel operations
   hw/xen: Add emulated implementation of grant table operations
   hw/xen: Add emulated implementation of XenStore operations
   hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
   hw/xen: Implement soft reset for emulated gnttab
   i386/xen: Initialize Xen backends from pc_basic_device_init() for 
emulation
   MAINTAINERS: Add entry for Xen on KVM emulation
   docs: Update Xen-on-KVM documentation for PV disk support

Paul Durrant (4):
   hw/xen: Implement XenStore permissions
   hw/xen: Create initial XenStore nodes
   hw/xen: Add xenstore operations to allow redirection to internal 
emulation
   hw/xen: Avoid crash when backend watch fires too early

  MAINTAINERS   |9 +
  accel/xen/xen-all.c   |   69 +-
  docs/system/i386/xen.rst  |   30 +-
  hw/9pfs/meson.build   |2 +-
  hw/9pfs/xen-9p-backend.c  |   32 +-
  hw/block/dataplane/meson.build|2 +-
  hw/block/dataplane/xen-block.c|   12 +-
  hw/block/meson.build  |2 +-
  hw/block/xen-block.c  |   12 +-
  hw/char/meson.build  

Re: [PATCH v2 25/27] i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation

2023-03-07 Thread Paul Durrant

On 07/03/2023 17:17, David Woodhouse wrote:

From: David Woodhouse 

Now that all the work is done to enable the PV backends to work without
actual Xen, instantiate the bus from pc_basic_device_init() for emulated
mode.

This allows us finally to launch an emulated Xen guest with PV disk.

qemu-system-x86_64 -serial mon:stdio -M q35 -cpu host -display none \
  -m 1G -smp 2 -accel kvm,xen-version=0x4000a,kernel-irqchip=split \
  -kernel bzImage -append "console=ttyS0 root=/dev/xvda1" \
  -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
  -device xen-disk,drive=disk,vdev=xvda

If we use -M pc instead of q35, we can even add an IDE disk and boot a
guest image normally through grub. But q35 gives us AHCI and that isn't
unplugged by the Xen magic, so the guests ends up seeing "both" disks.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
  hw/i386/pc.c | 7 +++
  1 file changed, 7 insertions(+)



Also...

Tested-by: Paul Durrant 

... on real Xen (master branch, 4.18) with a Debian guest.




[ovmf test] 179498: all pass - PUSHED

2023-03-07 Thread osstest service owner
flight 179498 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179498/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf 75fb0cfc82376906243386514be0e4067d702117
baseline version:
 ovmf aa1cd447b346e8cc8141df2fe2d321b032c08acb

Last test of basis   179496  2023-03-07 11:42:13 Z0 days
Testing same since   179498  2023-03-07 15:44:14 Z0 days1 attempts


People who touched revisions under test:
  Pierre Gondois 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   aa1cd447b3..75fb0cfc82  75fb0cfc82376906243386514be0e4067d702117 -> 
xen-tested-master



Re: [PATCH v1 0/2] Fix ARM Generic Timer interrupt parsing

2023-03-07 Thread Andrei Cherechesu



On 07/03/2023 17:27, Bertrand Marquis wrote:
> Hi Andrei,
> 
> When submitting patches, please use the add_maintainer.pl script so that 
> maintainers of the code
> modified are added in CC.

Hi Bertrand,

Thank you for reviewing the patches. I apologize for not adding the
maintainers in CC. I added them now.

> 
>> On 7 Mar 2023, at 11:09, Andrei Cherechesu (OSS) 
>>  wrote:
>>
>> From: Andrei Cherechesu 
>>
>> This 2-patch series fixes the parsing of the ARM Generic Timer
>> interrupts from the device tree.
>>
>> If the generic timer interrupts order in the DT was different than
>> the expected order in Xen code, these interrupts would no longer be
>> correctly parsed and registered by Xen, and would result in boot failure.
>>
>> This method with using "interrupt-names" for the generic timer interrupts
>> instead of having them hardcoded in the DTB in a specific order is the newer
>> approach already implemented in Linux. Xen did not have the necessary code 
>> for
>> this approach, and it has been implemented by the means of this patch series.
> 
> Would mind giving a link to an example or the Linux documentation if there is 
> one ?
> 

The bindings [0] for the ARM Generic Timer DT node were changed around
Linux 5.13, when the interrupt-names property was added, along with the
implementation for handling it [1].


[0]
https://elixir.bootlin.com/linux/v6.3-rc1/source/Documentation/devicetree/bindings/timer/arm,arch_timer.yaml#L44
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/clocksource/arm_arch_timer.c?id=86332e9e3477af8f31c9d5f3e81e57e0fd2118e7


Regards,
Andrei


> Cheers
> Bertrand
> 
>>
>> Functionality should remain the same if "interrupt-names" is not present in 
>> the
>> Generic Timer DTB node of the platform, but the interrupts should then still 
>> be
>> present in the expected "sec-phys", "phys", "virt", "hyp-phys", "hyp-virt" 
>> order.
>> If "interrupt-names" is present, now it is also correctly handled.
>>
>> Andrei Cherechesu (2):
>>  arch/arm: irq: Add platform_get_irq_byname() implementation
>>  arch/arm: time: Add support for parsing interrupts by names
>>
>> xen/arch/arm/include/asm/irq.h|  2 ++
>> xen/arch/arm/include/asm/time.h   |  3 ++-
>> xen/arch/arm/irq.c| 14 +++
>> xen/arch/arm/time.c   | 26 +---
>> xen/drivers/passthrough/arm/smmu-v3.c | 35 +--
>> 5 files changed, 46 insertions(+), 34 deletions(-)
>>
>> -- 
>> 2.35.1
>>
>>
> 



[PATCH v2 05/27] hw/xen: Watches on XenStore transactions

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Firing watches on the nodes that still exist is relatively easy; just
walk the tree and look at the nodes with refcount of one.

Firing watches on *deleted* nodes is more fun. We add 'modified_in_tx'
and 'deleted_in_tx' flags to each node. Nodes with those flags cannot
be shared, as they will always be unique to the transaction in which
they were created.

When xs_node_walk would need to *create* a node as scaffolding and it
encounters a deleted_in_tx node, it can resurrect it simply by clearing
its deleted_in_tx flag. If that node originally had any *data*, they're
gone, and the modified_in_tx flag will have been set when it was first
deleted.

We then attempt to send appropriate watches when the transaction is
committed, properly delete the deleted_in_tx nodes, and remove the
modified_in_tx flag from the others.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 151 ++-
 tests/unit/test-xs-node.c   | 231 +++-
 2 files changed, 380 insertions(+), 2 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 0812e367b0..60f42f61d6 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -32,6 +32,8 @@ typedef struct XsNode {
 GByteArray *content;
 GHashTable *children;
 uint64_t gencnt;
+bool deleted_in_tx;
+bool modified_in_tx;
 #ifdef XS_NODE_UNIT_TEST
 gchar *name; /* debug only */
 #endif
@@ -153,6 +155,13 @@ static XsNode *xs_node_copy(XsNode *old)
 XsNode *n = xs_node_new();
 
 n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+if (n->name) {
+n->name = g_strdup(old->name);
+}
+#endif
+
 if (old->children) {
 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
 (GDestroyNotify)xs_node_unref);
@@ -221,6 +230,9 @@ struct walk_op {
 bool mutating;
 bool create_dirs;
 bool in_transaction;
+
+/* Tracking during recursion so we know which is first. */
+bool deleted_in_tx;
 };
 
 static void fire_watches(struct walk_op *op, bool parents)
@@ -277,6 +289,9 @@ static int xs_node_add_content(XsNode **n, struct walk_op 
*op)
 g_byte_array_unref((*n)->content);
 }
 (*n)->content = g_byte_array_ref(data);
+if (op->tx_id != XBT_NULL) {
+(*n)->modified_in_tx = true;
+}
 return 0;
 }
 
@@ -333,10 +348,62 @@ static int node_rm_recurse(gpointer key, gpointer value, 
gpointer user_data)
 return this_inplace;
 }
 
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op);
+static void copy_deleted_recurse(gpointer key, gpointer value,
+ gpointer user_data)
+{
+struct walk_op *op = user_data;
+GHashTable *siblings = op->op_opaque2;
+XsNode *n = xs_node_copy_deleted(value, op);
+
+/*
+ * Reinsert the deleted_in_tx copy of the node into the parent's
+ * 'children' hash table. Having stashed it from op->op_opaque2
+ * before the recursive call to xs_node_copy_deleted() scribbled
+ * over it.
+ */
+g_hash_table_insert(siblings, g_strdup(key), n);
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op)
+{
+XsNode *n = xs_node_new();
+
+n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+if (old->name) {
+n->name = g_strdup(old->name);
+}
+#endif
+
+if (old->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+op->op_opaque2 = n->children;
+g_hash_table_foreach(old->children, copy_deleted_recurse, op);
+}
+n->deleted_in_tx = true;
+/* If it gets resurrected we only fire a watch if it lost its content */
+if (old->content) {
+n->modified_in_tx = true;
+}
+op->new_nr_nodes--;
+return n;
+}
+
 static int xs_node_rm(XsNode **n, struct walk_op *op)
 {
 bool this_inplace = op->inplace;
 
+if (op->tx_id != XBT_NULL) {
+/* It's not trivial to do inplace handling for this one */
+XsNode *old = *n;
+*n = xs_node_copy_deleted(old, op);
+xs_node_unref(old);
+return 0;
+}
+
 /* Fire watches for, and count, nodes in the subtree which get deleted */
 if ((*n)->children) {
 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
@@ -408,6 +475,10 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 }
 
 if (child) {
+if (child->deleted_in_tx) {
+assert(child->ref == 1);
+/* Cannot actually set child->deleted_in_tx = false until later */
+}
 xs_node_ref(child);
 /*
  * Now we own it too. But if we can modify inplace, that's going to
@@ -475,6 +546,15 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 xs_node_unref(old);
 }
 

[PATCH v2 16/27] hw/xen: Rename xen_common.h to xen_native.h

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This header is now only for native Xen code, not PV backends that may be
used in Xen emulation. Since the toolstack libraries may depend on the
specific version of Xen headers that they pull in (and will set the
__XEN_TOOLS__ macro to enable internal definitions that they depend on),
the rule is that xen_native.h (and thus the toolstack library headers)
must be included *before* any of the headers in include/hw/xen/interface.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c   |  1 +
 hw/9pfs/xen-9p-backend.c  |  1 +
 hw/block/dataplane/xen-block.c|  3 ++-
 hw/block/xen-block.c  |  1 -
 hw/i386/pc_piix.c |  4 ++--
 hw/i386/xen/xen-hvm.c | 11 +-
 hw/i386/xen/xen-mapcache.c|  2 +-
 hw/i386/xen/xen_platform.c|  7 +++---
 hw/xen/trace-events   |  2 +-
 hw/xen/xen-operations.c   |  2 +-
 hw/xen/xen_pt.c   |  2 +-
 hw/xen/xen_pt.h   |  2 +-
 hw/xen/xen_pt_config_init.c   |  2 +-
 hw/xen/xen_pt_msi.c   |  4 ++--
 include/hw/xen/xen.h  | 22 ---
 include/hw/xen/{xen_common.h => xen_native.h} | 10 ++---
 include/hw/xen/xen_pvdev.h|  3 ++-
 17 files changed, 47 insertions(+), 32 deletions(-)
 rename include/hw/xen/{xen_common.h => xen_native.h} (98%)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 2d51c41e40..00221e23c5 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -12,6 +12,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen_pt.h"
 #include "chardev/char.h"
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index d8bb0e847c..74f3a05f88 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -22,6 +22,7 @@
 #include "qemu/config-file.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
+#include "qemu/iov.h"
 #include "fsdev/qemu-fsdev.h"
 
 #define VERSIONS "1"
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 8322a1de82..734da42ea7 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -23,8 +23,9 @@
 #include "qemu/main-loop.h"
 #include "qemu/memalign.h"
 #include "qapi/error.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen.h"
 #include "hw/block/xen_blkif.h"
+#include "hw/xen/interface/io/ring.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "xen-block.h"
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 345b284d70..87299615e3 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -19,7 +19,6 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 #include "qom/object_interfaces.h"
-#include "hw/xen/xen_common.h"
 #include "hw/block/xen_blkif.h"
 #include "hw/qdev-properties.h"
 #include "hw/xen/xen-block.h"
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4bf15f9c1f..30eedd62a3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -47,8 +47,6 @@
 #include "hw/kvm/clock.h"
 #include "hw/sysbus.h"
 #include "hw/i2c/smbus_eeprom.h"
-#include "hw/xen/xen-x86.h"
-#include "hw/xen/xen.h"
 #include "exec/memory.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/piix4.h"
@@ -60,6 +58,8 @@
 #include 
 #include "hw/xen/xen_pt.h"
 #endif
+#include "hw/xen/xen-x86.h"
+#include "hw/xen/xen.h"
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "sysemu/numa.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index cb1d24f592..56641a550e 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,7 +18,7 @@
 #include "hw/irq.h"
 #include "hw/hw.h"
 #include "hw/i386/apic-msidef.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-x86.h"
@@ -52,10 +52,11 @@ static bool xen_in_migration;
 
 /* Compatibility with older version */
 
-/* This allows QEMU to build on a system that has Xen 4.5 or earlier
- * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
- * needs to be included before this block and hw/xen/xen_common.h needs to
- * be included before xen/hvm/ioreq.h
+/*
+ * This allows QEMU to build on a system that has Xen 4.5 or earlier installed.
+ * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to
+ * be included before this block and hw/xen/xen_native.h needs to be included
+ * before xen/hvm/ioreq.h
  */
 #ifndef IOREQ_TYPE_VMWARE_PORT
 #define IOREQ_TYPE_VMWARE_PORT  3
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 

[PATCH v2 22/27] hw/xen: Add emulated implementation of XenStore operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that we have an internal implementation of XenStore, we can populate
the xenstore_backend_ops to allow PV backends to talk to it.

Watches can't be processed with immediate callbacks because that would
call back into XenBus code recursively. Defer them to a QEMUBH to be run
as appropriate from the main loop. We use a QEMUBH per XS handle, and it
walks all the watches (there shouldn't be many per handle) to fire any
which have pending events. We *could* have done it differently but this
allows us to use the same struct watch_event as we have for the guest
side, and keeps things relatively simple.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 273 -
 1 file changed, 269 insertions(+), 4 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 35898e9b37..bf466c71ed 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -49,7 +49,7 @@ struct XenXenstoreState {
 /*< public >*/
 
 XenstoreImplState *impl;
-GList *watch_events;
+GList *watch_events; /* for the guest */
 
 MemoryRegion xenstore_page;
 struct xenstore_domain_interface *xs;
@@ -73,6 +73,8 @@ struct XenXenstoreState *xen_xenstore_singleton;
 static void xen_xenstore_event(void *opaque);
 static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
+static struct xenstore_backend_ops emu_xenstore_backend_ops;
+
 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
 GList *perms,
 const char *relpath,
@@ -169,6 +171,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 relpath_printf(s, perms, "feature", "%s", "");
 
 g_list_free_full(perms, g_free);
+
+xen_xenstore_ops = _xenstore_backend_ops;
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
@@ -1306,6 +1310,15 @@ struct watch_event {
 char *token;
 };
 
+static void free_watch_event(struct watch_event *ev)
+{
+if (ev) {
+g_free(ev->path);
+g_free(ev->token);
+g_free(ev);
+}
+}
+
 static void queue_watch(XenXenstoreState *s, const char *path,
 const char *token)
 {
@@ -1352,9 +1365,7 @@ static void process_watch_events(XenXenstoreState *s)
 deliver_watch(s, ev->path, ev->token);
 
 s->watch_events = g_list_remove(s->watch_events, ev);
-g_free(ev->path);
-g_free(ev->token);
-g_free(ev);
+free_watch_event(ev);
 }
 
 static void xen_xenstore_event(void *opaque)
@@ -1444,3 +1455,257 @@ int xen_xenstore_reset(void)
 
 return 0;
 }
+
+struct qemu_xs_handle {
+XenstoreImplState *impl;
+GList *watches;
+QEMUBH *watch_bh;
+};
+
+struct qemu_xs_watch {
+struct qemu_xs_handle *h;
+char *path;
+xs_watch_fn fn;
+void *opaque;
+GList *events;
+};
+
+static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int 
domid)
+{
+return g_strdup_printf("/local/domain/%u", domid);
+}
+
+static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+  const char *path, unsigned int *num)
+{
+GList *items = NULL, *l;
+unsigned int i = 0;
+char **items_ret;
+int err;
+
+err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, );
+if (err) {
+errno = err;
+return NULL;
+}
+
+items_ret = g_new0(char *, g_list_length(items) + 1);
+*num = 0;
+for (l = items; l; l = l->next) {
+items_ret[i++] = l->data;
+(*num)++;
+}
+g_list_free(items);
+return items_ret;
+}
+
+static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
+const char *path, unsigned int *len)
+{
+GByteArray *data = g_byte_array_new();
+bool free_segment = false;
+int err;
+
+err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+if (err) {
+free_segment = true;
+errno = err;
+} else {
+if (len) {
+*len = data->len;
+}
+/* The xen-bus-helper code expects to get NUL terminated string! */
+g_byte_array_append(data, (void *)"", 1);
+}
+
+return g_byte_array_free(data, free_segment);
+}
+
+static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
+const char *path, const void *data, unsigned int len)
+{
+GByteArray *gdata = g_byte_array_new();
+int err;
+
+g_byte_array_append(gdata, data, len);
+err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
+g_byte_array_unref(gdata);
+if (err) {
+errno = err;
+return false;
+}
+return true;
+}
+
+static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+

[PATCH v2 10/27] hw/xen: Add gnttab operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Move the existing code using libxengnttab to xen-operations.c and allow
the operations to be redirected so that we can add emulation of grant
table mapping for backend drivers.

In emulation, mapping more than one grant ref to be virtually contiguous
would be fairly difficult. The best way to do it might be to make the
ram_block mappings actually backed by a file (shmem or a deleted file,
perhaps) so that we can have multiple *shared* mappings of it. But that
would be fairly intrusive.

Making the backend drivers cope with page *lists* instead of expecting
the mapping to be contiguous is also non-trivial, since some structures
would actually *cross* page boundaries (e.g. the 32-bit blkif responses
which are 12 bytes).

So for now, we'll support only single-page mappings in emulation. Add a
XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE flag to indicate that the native Xen
implementation *does* support multi-page maps, and a helper function to
query it.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/xen/xen-bus.c| 112 ++--
 hw/xen/xen-legacy-backend.c | 125 ++
 hw/xen/xen-operations.c | 157 
 hw/xen/xen_pvdev.c  |   2 +-
 include/hw/xen/xen-bus.h|   3 +-
 include/hw/xen/xen-legacy-backend.h |  13 +--
 include/hw/xen/xen_backend_ops.h| 100 ++
 include/hw/xen/xen_common.h |  39 ---
 softmmu/globals.c   |   1 +
 9 files changed, 280 insertions(+), 272 deletions(-)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index d0b1ae93da..b247e86f28 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -947,7 +947,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
 void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
Error **errp)
 {
-if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) {
 error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
 }
 }
@@ -956,9 +956,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t 
*refs,
 unsigned int nr_refs, int prot,
 Error **errp)
 {
-void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
-xendev->frontend_id, refs,
-prot);
+void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs,
+ xendev->frontend_id, refs, prot);
 
 if (!map) {
 error_setg_errno(errp, errno,
@@ -971,109 +970,17 @@ void *xen_device_map_grant_refs(XenDevice *xendev, 
uint32_t *refs,
 void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
  unsigned int nr_refs, Error **errp)
 {
-if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) {
 error_setg_errno(errp, errno, "xengnttab_unmap failed");
 }
 }
 
-static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
-   XenDeviceGrantCopySegment segs[],
-   unsigned int nr_segs, Error **errp)
-{
-uint32_t *refs = g_new(uint32_t, nr_segs);
-int prot = to_domain ? PROT_WRITE : PROT_READ;
-void *map;
-unsigned int i;
-
-for (i = 0; i < nr_segs; i++) {
-XenDeviceGrantCopySegment *seg = [i];
-
-refs[i] = to_domain ? seg->dest.foreign.ref :
-seg->source.foreign.ref;
-}
-
-map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
-  xendev->frontend_id, refs,
-  prot);
-if (!map) {
-error_setg_errno(errp, errno,
- "xengnttab_map_domain_grant_refs failed");
-goto done;
-}
-
-for (i = 0; i < nr_segs; i++) {
-XenDeviceGrantCopySegment *seg = [i];
-void *page = map + (i * XC_PAGE_SIZE);
-
-if (to_domain) {
-memcpy(page + seg->dest.foreign.offset, seg->source.virt,
-   seg->len);
-} else {
-memcpy(seg->dest.virt, page + seg->source.foreign.offset,
-   seg->len);
-}
-}
-
-if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
-error_setg_errno(errp, errno, "xengnttab_unmap failed");
-}
-
-done:
-g_free(refs);
-}
-
 void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
 XenDeviceGrantCopySegment segs[],
 unsigned int nr_segs, Error **errp)
 {
-xengnttab_grant_copy_segment_t *xengnttab_segs;
-unsigned int i;
-
-if (!xendev->feature_grant_copy) {
-compat_copy_grant_refs(xendev, to_domain, 

[PATCH v2 11/27] hw/xen: Pass grant ref to gnttab unmap operation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

The previous commit introduced redirectable gnttab operations fairly
much like-for-like, with the exception of the extra arguments to the
->open() call which were always NULL/0 anyway.

This *changes* the arguments to the ->unmap() operation to include the
original ref# that was mapped. Under real Xen it isn't necessary; all we
need to do from QEMU is munmap(), then the kernel will release the grant,
and Xen does the tracking/refcounting for the guest.

When we have emulated grant tables though, we need to do all that for
ourselves. So let's have the back ends keep track of what they mapped
and pass it in to the ->unmap() method for us.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/xen-9p-backend.c|  7 ---
 hw/block/dataplane/xen-block.c  |  1 +
 hw/char/xen_console.c   |  2 +-
 hw/net/xen_nic.c| 13 -
 hw/usb/xen-usb.c| 21 -
 hw/xen/xen-bus.c|  4 ++--
 hw/xen/xen-legacy-backend.c |  4 ++--
 hw/xen/xen-operations.c |  9 -
 include/hw/xen/xen-bus.h|  2 +-
 include/hw/xen/xen-legacy-backend.h |  6 +++---
 include/hw/xen/xen_backend_ops.h|  7 ---
 11 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 864bdaf952..d8bb0e847c 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -359,12 +359,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev)
 if (xen_9pdev->rings[i].data != NULL) {
 xen_be_unmap_grant_refs(_9pdev->xendev,
 xen_9pdev->rings[i].data,
+xen_9pdev->rings[i].intf->ref,
 (1 << xen_9pdev->rings[i].ring_order));
 }
 if (xen_9pdev->rings[i].intf != NULL) {
-xen_be_unmap_grant_refs(_9pdev->xendev,
-xen_9pdev->rings[i].intf,
-1);
+xen_be_unmap_grant_ref(_9pdev->xendev,
+   xen_9pdev->rings[i].intf,
+   xen_9pdev->rings[i].ref);
 }
 if (xen_9pdev->rings[i].bh != NULL) {
 qemu_bh_delete(xen_9pdev->rings[i].bh);
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 2785b9e849..e55b713002 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -705,6 +705,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
 Error *local_err = NULL;
 
 xen_device_unmap_grant_refs(xendev, dataplane->sring,
+dataplane->ring_ref,
 dataplane->nr_ring_ref, _err);
 dataplane->sring = NULL;
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 63153dfde4..19ad6c946a 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -271,7 +271,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
 if (!xendev->dev) {
 xenforeignmemory_unmap(xen_fmem, con->sring, 1);
 } else {
-xen_be_unmap_grant_ref(xendev, con->sring);
+xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
 }
 con->sring = NULL;
 }
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7d92c2d022..166d03787d 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 qemu_send_packet(qemu_get_queue(netdev->nic),
  page + txreq.offset, txreq.size);
 }
-xen_be_unmap_grant_ref(>xendev, page);
+xen_be_unmap_grant_ref(>xendev, page, txreq.gref);
 net_tx_response(netdev, , NETIF_RSP_OKAY);
 }
 if (!netdev->tx_work) {
@@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const 
uint8_t *buf, size_t size
 return -1;
 }
 memcpy(page + NET_IP_ALIGN, buf, size);
-xen_be_unmap_grant_ref(>xendev, page);
+xen_be_unmap_grant_ref(>xendev, page, rxreq.gref);
 net_rx_response(netdev, , NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 
 return size;
@@ -343,7 +343,8 @@ static int net_connect(struct XenLegacyDevice *xendev)
netdev->rx_ring_ref,
PROT_READ | PROT_WRITE);
 if (!netdev->rxs) {
-xen_be_unmap_grant_ref(>xendev, netdev->txs);
+xen_be_unmap_grant_ref(>xendev, netdev->txs,
+   netdev->tx_ring_ref);
 netdev->txs = NULL;
 return -1;
 }
@@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev)
 xen_pv_unbind_evtchn(>xendev);
 
 if (netdev->txs) {
-xen_be_unmap_grant_ref(>xendev, 

[PATCH v2 07/27] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This implements the basic migration support in the back end, with unit
tests that give additional confidence in the node-counting already in
the tree.

However, the existing PV back ends like xen-disk don't support migration
yet. They will reset the ring and fail to continue where they left off.
We will fix that in future, but not in time for the 8.0 release.

Since there's also an open question of whether we want to serialize the
full XenStore or only the guest-owned nodes in /local/domain/${domid},
for now just mark the XenStore device as unmigratable.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c  |  26 +-
 hw/i386/kvm/xenstore_impl.c | 574 +++-
 hw/i386/kvm/xenstore_impl.h |   5 +
 tests/unit/test-xs-node.c   | 236 ++-
 4 files changed, 825 insertions(+), 16 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 3b409e3817..520422b147 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -66,6 +66,9 @@ struct XenXenstoreState {
 evtchn_port_t guest_port;
 evtchn_port_t be_port;
 struct xenevtchn_handle *eh;
+
+uint8_t *impl_state;
+uint32_t impl_state_size;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -109,16 +112,26 @@ static bool xen_xenstore_is_needed(void *opaque)
 static int xen_xenstore_pre_save(void *opaque)
 {
 XenXenstoreState *s = opaque;
+GByteArray *save;
 
 if (s->eh) {
 s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
 }
+
+g_free(s->impl_state);
+save = xs_impl_serialize(s->impl);
+s->impl_state = save->data;
+s->impl_state_size = save->len;
+g_byte_array_free(save, false);
+
 return 0;
 }
 
 static int xen_xenstore_post_load(void *opaque, int ver)
 {
 XenXenstoreState *s = opaque;
+GByteArray *save;
+int ret;
 
 /*
  * As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -135,11 +148,18 @@ static int xen_xenstore_post_load(void *opaque, int ver)
 }
 s->be_port = be_port;
 }
-return 0;
+
+save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
+s->impl_state = NULL;
+s->impl_state_size = 0;
+
+ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
+return ret;
 }
 
 static const VMStateDescription xen_xenstore_vmstate = {
 .name = "xen_xenstore",
+.unmigratable = 1, /* The PV back ends don't migrate yet */
 .version_id = 1,
 .minimum_version_id = 1,
 .needed = xen_xenstore_is_needed,
@@ -155,6 +175,10 @@ static const VMStateDescription xen_xenstore_vmstate = {
 VMSTATE_BOOL(rsp_pending, XenXenstoreState),
 VMSTATE_UINT32(guest_port, XenXenstoreState),
 VMSTATE_BOOL(fatal_error, XenXenstoreState),
+VMSTATE_UINT32(impl_state_size, XenXenstoreState),
+VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
+impl_state_size, 0,
+vmstate_info_uint8, uint8_t),
 VMSTATE_END_OF_LIST()
 }
 };
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 8a2053e243..305fe75519 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -37,6 +37,7 @@ typedef struct XsNode {
 uint64_t gencnt;
 bool deleted_in_tx;
 bool modified_in_tx;
+unsigned int serialized_tx;
 #ifdef XS_NODE_UNIT_TEST
 gchar *name; /* debug only */
 #endif
@@ -68,6 +69,7 @@ struct XenstoreImplState {
 unsigned int nr_domu_transactions;
 unsigned int root_tx;
 unsigned int last_tx;
+bool serialized;
 };
 
 
@@ -1156,8 +1158,10 @@ int xs_impl_set_perms(XenstoreImplState *s, unsigned int 
dom_id,
 return xs_node_walk(n, );
 }
 
-int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
-  const char *token, xs_impl_watch_fn fn, void *opaque)
+static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id,
+const char *path, const char *token,
+xs_impl_watch_fn fn, void *opaque)
+
 {
 char abspath[XENSTORE_ABS_PATH_MAX + 1];
 XsWatch *w, *l;
@@ -1200,12 +1204,22 @@ int xs_impl_watch(XenstoreImplState *s, unsigned int 
dom_id, const char *path,
 s->nr_domu_watches++;
 }
 
-/* A new watch should fire immediately */
-fn(opaque, path, token);
-
 return 0;
 }
 
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+  const char *token, xs_impl_watch_fn fn, void *opaque)
+{
+int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque);
+
+if (!ret) {
+/* A new watch should fire immediately */
+fn(opaque, path, token);
+}
+
+return ret;
+}
+
 static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w)
 {
 XsWatch *next = w->next;
@@ -1361,3 +1375,553 @@ 

[PATCH v2 04/27] hw/xen: Implement XenStore transactions

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Given that the whole thing supported copy on write from the beginning,
transactions end up being fairly simple. On starting a transaction, just
take a ref of the existing root; swap it back in on a successful commit.

The main tree has a transaction ID too, and we keep a record of the last
transaction ID given out. if the main tree is ever modified when it isn't
the latest, it gets a new transaction ID.

A commit can only succeed if the main tree hasn't moved on since it was
forked. Strictly speaking, the XenStore protocol allows a transaction to
succeed as long as nothing *it* read or wrote has changed in the interim,
but no implementations do that; *any* change is sufficient to abort a
transaction.

This does not yet fire watches on the changed nodes on a commit. That bit
is more fun and will come in a follow-on commit.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 150 ++--
 tests/unit/test-xs-node.c   | 118 
 2 files changed, 262 insertions(+), 6 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 9c2348835f..0812e367b0 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -46,13 +46,56 @@ typedef struct XsWatch {
 int rel_prefix;
 } XsWatch;
 
+typedef struct XsTransaction {
+XsNode *root;
+unsigned int nr_nodes;
+unsigned int base_tx;
+unsigned int tx_id;
+unsigned int dom_id;
+} XsTransaction;
+
 struct XenstoreImplState {
 XsNode *root;
 unsigned int nr_nodes;
 GHashTable *watches;
 unsigned int nr_domu_watches;
+GHashTable *transactions;
+unsigned int nr_domu_transactions;
+unsigned int root_tx;
+unsigned int last_tx;
 };
 
+
+static void nobble_tx(gpointer key, gpointer value, gpointer user_data)
+{
+unsigned int *new_tx_id = user_data;
+XsTransaction *tx = value;
+
+if (tx->base_tx == *new_tx_id) {
+/* Transactions based on XBT_NULL will always fail */
+tx->base_tx = XBT_NULL;
+}
+}
+
+static inline unsigned int next_tx(struct XenstoreImplState *s)
+{
+unsigned int tx_id;
+
+/* Find the next TX id which isn't either XBT_NULL or in use. */
+do {
+tx_id = ++s->last_tx;
+} while (tx_id == XBT_NULL || tx_id == s->root_tx ||
+ g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id)));
+
+/*
+ * It is vanishingly unlikely, but ensure that no outstanding transaction
+ * is based on the (previous incarnation of the) newly-allocated TX id.
+ */
+g_hash_table_foreach(s->transactions, nobble_tx, _id);
+
+return tx_id;
+}
+
 static inline XsNode *xs_node_new(void)
 {
 XsNode *n = g_new0(XsNode, 1);
@@ -159,6 +202,7 @@ struct walk_op {
 
 GList *watches;
 unsigned int dom_id;
+unsigned int tx_id;
 
 /* The number of nodes which will exist in the tree if this op succeeds. */
 unsigned int new_nr_nodes;
@@ -176,6 +220,7 @@ struct walk_op {
 bool inplace;
 bool mutating;
 bool create_dirs;
+bool in_transaction;
 };
 
 static void fire_watches(struct walk_op *op, bool parents)
@@ -183,7 +228,7 @@ static void fire_watches(struct walk_op *op, bool parents)
 GList *l = NULL;
 XsWatch *w;
 
-if (!op->mutating) {
+if (!op->mutating || op->in_transaction) {
 return;
 }
 
@@ -450,10 +495,23 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 assert(!op->watches);
 /*
  * On completing the recursion back up the path walk and reaching the
- * top, assign the new node count if the operation was successful.
+ * top, assign the new node count if the operation was successful. If
+ * the main tree was changed, bump its tx ID so that outstanding
+ * transactions correctly fail. But don't bump it every time; only
+ * if it makes a difference.
  */
 if (!err && op->mutating) {
-op->s->nr_nodes = op->new_nr_nodes;
+if (!op->in_transaction) {
+if (op->s->root_tx != op->s->last_tx) {
+op->s->root_tx = next_tx(op->s);
+}
+op->s->nr_nodes = op->new_nr_nodes;
+} else {
+XsTransaction *tx = g_hash_table_lookup(op->s->transactions,
+
GINT_TO_POINTER(op->tx_id));
+assert(tx);
+tx->nr_nodes = op->new_nr_nodes;
+}
 }
 }
 return err;
@@ -535,14 +593,23 @@ static int init_walk_op(XenstoreImplState *s, struct 
walk_op *op,
 op->inplace = true;
 op->mutating = false;
 op->create_dirs = false;
+op->in_transaction = false;
 op->dom_id = dom_id;
+op->tx_id = tx_id;
 op->s = s;
 
 if (tx_id == XBT_NULL) {
 *rootp = >root;
 op->new_nr_nodes = s->nr_nodes;
 } else {
-  

[PATCH v2 15/27] hw/xen: Use XEN_PAGE_SIZE in PV backend drivers

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

XC_PAGE_SIZE comes from the actual Xen libraries, while XEN_PAGE_SIZE is
provided by QEMU itself in xen_backend_ops.h. For backends which may be
built for emulation mode, use the latter.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/block/dataplane/xen-block.c |  8 
 hw/display/xenfb.c | 12 ++--
 hw/net/xen_nic.c   | 12 ++--
 hw/usb/xen-usb.c   |  8 
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index e55b713002..8322a1de82 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -101,9 +101,9 @@ static XenBlockRequest 
*xen_block_start_request(XenBlockDataPlane *dataplane)
  * re-use requests, allocate the memory once here. It will be freed
  * xen_block_dataplane_destroy() when the request list is freed.
  */
-request->buf = qemu_memalign(XC_PAGE_SIZE,
+request->buf = qemu_memalign(XEN_PAGE_SIZE,
  BLKIF_MAX_SEGMENTS_PER_REQUEST *
- XC_PAGE_SIZE);
+ XEN_PAGE_SIZE);
 dataplane->requests_total++;
 qemu_iovec_init(>v, 1);
 } else {
@@ -185,7 +185,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
 goto err;
 }
 if (request->req.seg[i].last_sect * dataplane->sector_size >=
-XC_PAGE_SIZE) {
+XEN_PAGE_SIZE) {
 error_report("error: page crossing");
 goto err;
 }
@@ -740,7 +740,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
 
 dataplane->protocol = protocol;
 
-ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref;
+ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
 switch (dataplane->protocol) {
 case BLKIF_PROTOCOL_NATIVE:
 {
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 2c4016fcbd..0074a9b6f8 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -489,13 +489,13 @@ static int xenfb_map_fb(struct XenFB *xenfb)
 }
 
 if (xenfb->pixels) {
-munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE);
 xenfb->pixels = NULL;
 }
 
-xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE);
+xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE);
 n_fbdirs = xenfb->fbpages * mode / 8;
-n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE);
+n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE);
 
 pgmfns = g_new0(xen_pfn_t, n_fbdirs);
 fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
@@ -528,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t 
fb_len_lim,
 {
 size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]);
 size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz;
-size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
-size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz;
+size_t fb_len_max = fb_pages * XEN_PAGE_SIZE;
 int max_width, max_height;
 
 if (fb_len_lim > fb_len_max) {
@@ -930,7 +930,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
  *   instead.  This releases the guest pages and keeps qemu happy.
  */
 qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
-fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE,
   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
   -1, 0);
 if (fb->pixels == MAP_FAILED) {
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 166d03787d..9bbf6599fc 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 continue;
 }
 
-if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
 xen_pv_printf(>xendev, 0, "error: page crossing\n");
 net_tx_error(netdev, , rc);
 continue;
@@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
 if (txreq.flags & NETTXF_csum_blank) {
 /* have read-only mapping -> can't fill checksum in-place */
 if (!tmpbuf) {
-tmpbuf = g_malloc(XC_PAGE_SIZE);
+tmpbuf = g_malloc(XEN_PAGE_SIZE);
 }
 memcpy(tmpbuf, page + txreq.offset, txreq.size);
 net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
@@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const 
uint8_t *buf, size_t size
 if (rc == rp || RING_REQUEST_CONS_OVERFLOW(>rx_ring, rc)) {
 return 0;
 }
-if (size > XC_PAGE_SIZE - 

[PATCH v2 09/27] hw/xen: Add evtchn operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

The existing implementation calling into the real libxenevtchn moves to
a new file hw/xen/xen-operations.c, and is called via a function table
which in a subsequent commit will also be able to invoke the emulated
event channel support.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/xen-9p-backend.c|  24 +++---
 hw/i386/xen/xen-hvm.c   |  27 ---
 hw/xen/meson.build  |   1 +
 hw/xen/xen-bus.c|  22 +++---
 hw/xen/xen-legacy-backend.c |   8 +-
 hw/xen/xen-operations.c |  71 +
 hw/xen/xen_pvdev.c  |  12 +--
 include/hw/xen/xen-bus.h|   1 +
 include/hw/xen/xen-legacy-backend.h |   1 +
 include/hw/xen/xen_backend_ops.h| 118 
 include/hw/xen/xen_common.h |  12 ---
 include/hw/xen/xen_pvdev.h  |   1 +
 softmmu/globals.c   |   1 +
 13 files changed, 242 insertions(+), 57 deletions(-)
 create mode 100644 hw/xen/xen-operations.c
 create mode 100644 include/hw/xen/xen_backend_ops.h

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979c3c..864bdaf952 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -241,7 +241,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
 xen_wmb();
 
 ring->inprogress = false;
-xenevtchn_notify(ring->evtchndev, ring->local_port);
+qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
 
 qemu_bh_schedule(ring->bh);
 }
@@ -324,8 +324,8 @@ static void xen_9pfs_evtchn_event(void *opaque)
 Xen9pfsRing *ring = opaque;
 evtchn_port_t port;
 
-port = xenevtchn_pending(ring->evtchndev);
-xenevtchn_unmask(ring->evtchndev, port);
+port = qemu_xen_evtchn_pending(ring->evtchndev);
+qemu_xen_evtchn_unmask(ring->evtchndev, port);
 
 qemu_bh_schedule(ring->bh);
 }
@@ -337,10 +337,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice 
*xendev)
 
 for (i = 0; i < xen_9pdev->num_rings; i++) {
 if (xen_9pdev->rings[i].evtchndev != NULL) {
-qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-NULL, NULL, NULL);
-xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
- xen_9pdev->rings[i].local_port);
+
qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+NULL, NULL, NULL);
+qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
+   xen_9pdev->rings[i].local_port);
 xen_9pdev->rings[i].evtchndev = NULL;
 }
 }
@@ -447,12 +447,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice 
*xendev)
 xen_9pdev->rings[i].inprogress = false;
 
 
-xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
 if (xen_9pdev->rings[i].evtchndev == NULL) {
 goto out;
 }
-qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
-xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
+xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
 (xen_9pdev->rings[i].evtchndev,
  xendev->dom,
  xen_9pdev->rings[i].evtchn);
@@ -463,8 +463,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
 goto out;
 }
 xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
-qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-xen_9pfs_evtchn_event, NULL, _9pdev->rings[i]);
+qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+xen_9pfs_evtchn_event, NULL, _9pdev->rings[i]);
 }
 
 xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e5a1dd19f4..cb1d24f592 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -761,7 +761,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
 int i;
 evtchn_port_t port;
 
-port = xenevtchn_pending(state->xce_handle);
+port = qemu_xen_evtchn_pending(state->xce_handle);
 if (port == state->bufioreq_local_port) {
 timer_mod(state->buffered_io_timer,
 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
@@ -780,7 +780,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
 }
 
 /* unmask the wanted port again */
-xenevtchn_unmask(state->xce_handle, port);
+qemu_xen_evtchn_unmask(state->xce_handle, port);
 
 /* get the io packet from shared memory */
 

[PATCH v2 20/27] hw/xen: Hook up emulated implementation for event channel operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

We provided the backend-facing evtchn functions very early on as part of
the core Xen platform support, since things like timers and xenstore need
to use them.

By what may or may not be an astonishing coincidence, those functions
just *happen* all to have exactly the right function prototypes to slot
into the evtchn_backend_ops table and be called by the PV backends.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_evtchn.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 886fbf6b3b..98a7b85047 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -34,6 +34,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/irq.h"
+#include "hw/xen/xen_backend_ops.h"
 
 #include "xen_evtchn.h"
 #include "xen_overlay.h"
@@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = {
 .class_init= xen_evtchn_class_init,
 };
 
+static struct evtchn_backend_ops emu_evtchn_backend_ops = {
+.open = xen_be_evtchn_open,
+.bind_interdomain = xen_be_evtchn_bind_interdomain,
+.unbind = xen_be_evtchn_unbind,
+.close = xen_be_evtchn_close,
+.get_fd = xen_be_evtchn_fd,
+.notify = xen_be_evtchn_notify,
+.unmask = xen_be_evtchn_unmask,
+.pending = xen_be_evtchn_pending,
+};
+
 static void gsi_assert_bh(void *opaque)
 {
 struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
@@ -318,6 +330,9 @@ void xen_evtchn_create(void)
 s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
 s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
 s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
+
+/* Set event channel functions for backend drivers to use */
+xen_evtchn_ops = _evtchn_backend_ops;
 }
 
 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
-- 
2.39.0




[PATCH v2 23/27] hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

We don't actually access the guest's page through the grant, because
this isn't real Xen, and we can just use the page we gave it in the
first place. Map the grant anyway, mostly for cosmetic purposes so it
*looks* like it's in use in the guest-visible grant table.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index bf466c71ed..2cadafd56a 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_evtchn.h"
 #include "xen_xenstore.h"
@@ -34,6 +35,7 @@
 
 #include "hw/xen/interface/io/xs_wire.h"
 #include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
 
 #define TYPE_XEN_XENSTORE "xen-xenstore"
 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
@@ -66,6 +68,9 @@ struct XenXenstoreState {
 
 uint8_t *impl_state;
 uint32_t impl_state_size;
+
+struct xengntdev_handle *gt;
+void *granted_xs;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -1453,6 +1458,17 @@ int xen_xenstore_reset(void)
 }
 s->be_port = err;
 
+/*
+ * We don't actually access the guest's page through the grant, because
+ * this isn't real Xen, and we can just use the page we gave it in the
+ * first place. Map the grant anyway, mostly for cosmetic purposes so
+ * it *looks* like it's in use in the guest-visible grant table.
+ */
+s->gt = qemu_xen_gnttab_open();
+uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, _gntref,
+ PROT_READ | PROT_WRITE);
+
 return 0;
 }
 
-- 
2.39.0




[PATCH v2 06/27] hw/xen: Implement XenStore permissions

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Store perms as a GList of strings, check permissions.

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c  |   2 +-
 hw/i386/kvm/xenstore_impl.c | 259 +---
 hw/i386/kvm/xenstore_impl.h |   8 +-
 tests/unit/test-xs-node.c   |  27 +++-
 4 files changed, 275 insertions(+), 21 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 64d8f1a38f..3b409e3817 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -98,7 +98,7 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
 
-s->impl = xs_impl_create();
+s->impl = xs_impl_create(xen_domid);
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 60f42f61d6..8a2053e243 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -12,6 +12,8 @@
 #include "qemu/osdep.h"
 #include "qom/object.h"
 
+#include "hw/xen/xen.h"
+
 #include "xen_xenstore.h"
 #include "xenstore_impl.h"
 
@@ -30,6 +32,7 @@
 typedef struct XsNode {
 uint32_t ref;
 GByteArray *content;
+GList *perms;
 GHashTable *children;
 uint64_t gencnt;
 bool deleted_in_tx;
@@ -133,6 +136,9 @@ static inline void xs_node_unref(XsNode *n)
 if (n->content) {
 g_byte_array_unref(n->content);
 }
+if (n->perms) {
+g_list_free_full(n->perms, g_free);
+}
 if (n->children) {
 g_hash_table_unref(n->children);
 }
@@ -144,8 +150,51 @@ static inline void xs_node_unref(XsNode *n)
 g_free(n);
 }
 
+char *xs_perm_as_string(unsigned int perm, unsigned int domid)
+{
+char letter;
+
+switch (perm) {
+case XS_PERM_READ | XS_PERM_WRITE:
+letter = 'b';
+break;
+case XS_PERM_READ:
+letter = 'r';
+break;
+case XS_PERM_WRITE:
+letter = 'w';
+break;
+case XS_PERM_NONE:
+default:
+letter = 'n';
+break;
+}
+
+return g_strdup_printf("%c%u", letter, domid);
+}
+
+static gpointer do_perm_copy(gconstpointer src, gpointer user_data)
+{
+return g_strdup(src);
+}
+
+static XsNode *xs_node_create(const char *name, GList *perms)
+{
+XsNode *n = xs_node_new();
+
+#ifdef XS_NODE_UNIT_TEST
+if (name) {
+n->name = g_strdup(name);
+}
+#endif
+
+n->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+
+return n;
+}
+
 /* For copying from one hash table to another using g_hash_table_foreach() */
-static void do_insert(gpointer key, gpointer value, gpointer user_data)
+static void do_child_insert(gpointer key, gpointer value, gpointer user_data)
 {
 g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
 }
@@ -162,12 +211,16 @@ static XsNode *xs_node_copy(XsNode *old)
 }
 #endif
 
+assert(old);
 if (old->children) {
 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
 (GDestroyNotify)xs_node_unref);
-g_hash_table_foreach(old->children, do_insert, n->children);
+g_hash_table_foreach(old->children, do_child_insert, n->children);
 }
-if (old && old->content) {
+if (old->perms) {
+n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+}
+if (old->content) {
 n->content = g_byte_array_ref(old->content);
 }
 return n;
@@ -383,6 +436,9 @@ static XsNode *xs_node_copy_deleted(XsNode *old, struct 
walk_op *op)
 op->op_opaque2 = n->children;
 g_hash_table_foreach(old->children, copy_deleted_recurse, op);
 }
+if (old->perms) {
+n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+}
 n->deleted_in_tx = true;
 /* If it gets resurrected we only fire a watch if it lost its content */
 if (old->content) {
@@ -417,6 +473,104 @@ static int xs_node_rm(XsNode **n, struct walk_op *op)
 return 0;
 }
 
+static int xs_node_get_perms(XsNode **n, struct walk_op *op)
+{
+GList **perms = op->op_opaque;
+
+assert(op->inplace);
+assert(*n);
+
+*perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL);
+return 0;
+}
+
+static void parse_perm(const char *perm, char *letter, unsigned int *dom_id)
+{
+unsigned int n = sscanf(perm, "%c%u", letter, dom_id);
+
+assert(n == 2);
+}
+
+static bool can_access(unsigned int dom_id, GList *perms, const char *letters)
+{
+unsigned int i, n;
+char perm_letter;
+unsigned int perm_dom_id;
+bool access;
+
+if (dom_id == 0) {
+return true;
+}
+
+n = g_list_length(perms);
+assert(n >= 1);
+
+/*
+ * The dom_id of the first perm is the owner, and the owner always has
+ * read-write access.
+ */
+

[PATCH v2 26/27] MAINTAINERS: Add entry for Xen on KVM emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5340de0515..640deb2895 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,15 @@ F: target/i386/kvm/
 F: target/i386/sev*
 F: scripts/kvm/vmxcap
 
+Xen emulation on X86 KVM CPUs
+M: David Woodhouse 
+M: Paul Durrant 
+S: Supported
+F: include/sysemu/kvm_xen.h
+F: target/i386/kvm/xen*
+F: hw/i386/kvm/xen*
+F: tests/avocado/xen_guest.py
+
 Guest CPU Cores (other accelerators)
 
 Overall
-- 
2.39.0




[PATCH v2 21/27] hw/xen: Add emulated implementation of grant table operations

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is limited to mapping a single grant at a time, because under Xen the
pages are mapped *contiguously* into qemu's address space, and that's very
hard to do when those pages actually come from anonymous mappings in qemu
in the first place.

Eventually perhaps we can look at using shared mappings of actual objects
for system RAM, and then we can make new mappings of the same backing
store (be it deleted files, shmem, whatever). But for now let's stick to
a page at a time.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_gnttab.c | 299 ++-
 1 file changed, 296 insertions(+), 3 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 1e691ded32..2bf91d36c0 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -22,6 +22,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_gnttab.h"
 
@@ -34,11 +35,10 @@
 #define TYPE_XEN_GNTTAB "xen-gnttab"
 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
 struct XenGnttabState {
 /*< private >*/
 SysBusDevice busdev;
@@ -57,6 +57,8 @@ struct XenGnttabState {
 MemoryRegion gnt_frames;
 MemoryRegion *gnt_aliases;
 uint64_t *gnt_frame_gpas;
+
+uint8_t *map_track;
 };
 
 struct XenGnttabState *xen_gnttab_singleton;
@@ -88,9 +90,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 s->gnt_frame_gpas[i] = INVALID_GPA;
 }
 
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
 qemu_mutex_init(>gnt_lock);
 
 xen_gnttab_singleton = s;
+
+s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+xen_gnttab_ops = _gnttab_backend_ops;
 }
 
 static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -230,3 +238,288 @@ int xen_gnttab_query_size_op(struct gnttab_query_size 
*size)
 size->max_nr_frames = s->max_frames;
 return 0;
 }
+
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+MemoryRegionSection mrs;
+void *virtaddr;
+uint32_t refcnt;
+int prot;
+};
+
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+  MemoryRegionSection *mrs, int prot)
+{
+if (mrs && mrs->mr) {
+if (prot & PROT_WRITE) {
+memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+XEN_PAGE_SIZE);
+}
+memory_region_unref(mrs->mr);
+mrs->mr = NULL;
+}
+assert(s->map_track[ref] != 0);
+
+if (--s->map_track[ref] == 0) {
+grant_entry_v1_t *gnt_p = >entries.v1[ref];
+qatomic_and(_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
+}
+}
+
+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+uint16_t mask = GTF_type_mask | GTF_sub_page;
+grant_entry_v1_t gnt, *gnt_p;
+int retries = 0;
+
+if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+s->map_track[ref] == UINT8_MAX) {
+return INVALID_GPA;
+}
+
+if (prot & PROT_WRITE) {
+mask |= GTF_readonly;
+}
+
+gnt_p = >entries.v1[ref];
+
+/*
+ * The guest can legitimately be changing the GTF_readonly flag. Allow
+ * that, but don't let a malicious guest cause a livelock.
+ */
+for (retries = 0; retries < 5; retries++) {
+uint16_t new_flags;
+
+/* Read the entry before an atomic operation on its flags */
+gnt = *(volatile grant_entry_v1_t *)gnt_p;
+
+if ((gnt.flags & mask) != GTF_permit_access ||
+gnt.domid != DOMID_QEMU) {
+return INVALID_GPA;
+}
+
+new_flags = gnt.flags | GTF_reading;
+if (prot & PROT_WRITE) {
+new_flags |= GTF_writing;
+}
+
+if (qatomic_cmpxchg(_p->flags, gnt.flags, new_flags) == gnt.flags) 
{
+return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
+}
+}
+
+return INVALID_GPA;
+}
+
+struct xengntdev_handle {
+GHashTable *active_maps;
+};
+
+static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
+uint32_t nr_grants)
+{
+return 0;
+}
+
+static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
+uint32_t count, uint32_t domid,
+uint32_t *refs, int prot)
+{
+XenGnttabState *s = xen_gnttab_singleton;
+struct active_ref *act;
+
+if (!s) {
+errno = ENOTSUP;
+return NULL;
+}
+
+if (domid != xen_domid) {
+errno = EINVAL;
+return NULL;
+}

[PATCH v2 12/27] hw/xen: Add foreignmem operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/char/xen_console.c|  8 +++---
 hw/display/xenfb.c   | 20 +++---
 hw/xen/xen-operations.c  | 45 
 include/hw/xen/xen_backend_ops.h | 26 ++
 include/hw/xen/xen_common.h  | 13 -
 softmmu/globals.c|  1 +
 tests/unit/test-xs-node.c|  1 +
 7 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 19ad6c946a..e9cef3e1ef 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -237,9 +237,9 @@ static int con_initialise(struct XenLegacyDevice *xendev)
 
 if (!xendev->dev) {
 xen_pfn_t mfn = con->ring_ref;
-con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
-  PROT_READ | PROT_WRITE,
-  1, , NULL);
+con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE,
+ 1, , NULL);
 } else {
 con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
   PROT_READ | PROT_WRITE);
@@ -269,7 +269,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
 
 if (con->sring) {
 if (!xendev->dev) {
-xenforeignmemory_unmap(xen_fmem, con->sring, 1);
+qemu_xen_foreignmem_unmap(con->sring, 1);
 } else {
 xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
 }
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 260eb38a76..2c4016fcbd 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -98,8 +98,9 @@ static int common_bind(struct common *c)
 if (xenstore_read_fe_int(>xendev, "event-channel", 
>xendev.remote_port) == -1)
 return -1;
 
-c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
-   PROT_READ | PROT_WRITE, 1, , NULL);
+c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL,
+  PROT_READ | PROT_WRITE, 1, ,
+  NULL);
 if (c->page == NULL)
 return -1;
 
@@ -115,7 +116,7 @@ static void common_unbind(struct common *c)
 {
 xen_pv_unbind_evtchn(>xendev);
 if (c->page) {
-xenforeignmemory_unmap(xen_fmem, c->page, 1);
+qemu_xen_foreignmem_unmap(c->page, 1);
 c->page = NULL;
 }
 }
@@ -500,15 +501,16 @@ static int xenfb_map_fb(struct XenFB *xenfb)
 fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
 
 xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
-map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-   PROT_READ, n_fbdirs, pgmfns, NULL);
+map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ,
+  n_fbdirs, pgmfns, NULL);
 if (map == NULL)
 goto out;
 xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
-xenforeignmemory_unmap(xen_fmem, map, n_fbdirs);
+qemu_xen_foreignmem_unmap(map, n_fbdirs);
 
-xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-PROT_READ, xenfb->fbpages, fbmfns, NULL);
+xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL,
+PROT_READ, xenfb->fbpages,
+fbmfns, NULL);
 if (xenfb->pixels == NULL)
 goto out;
 
@@ -927,7 +929,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
  *   Replacing the framebuffer with anonymous shared memory
  *   instead.  This releases the guest pages and keeps qemu happy.
  */
-xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages);
+qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
 fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
   -1, 0);
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index c5956d28c6..440e566bb1 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -22,6 +22,7 @@
  */
 #undef XC_WANT_COMPAT_EVTCHN_API
 #undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
 
 #include 
 
@@ -56,10 +57,13 @@ typedef xc_gnttab xengnttab_handle;
 #define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
 xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
 
+typedef xc_interface xenforeignmemory_handle;
+
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
 #include 
 #include 
+#include 
 
 #endif
 
@@ -218,6 +222,46 @@ static struct gnttab_backend_ops libxengnttab_backend_ops 
= {
 .unmap = libxengnttab_backend_unmap,
 };
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+static void *libxenforeignmem_backend_map(uint32_t dom, 

[PATCH v2 27/27] docs: Update Xen-on-KVM documentation for PV disk support

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 docs/system/i386/xen.rst | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index a00523b492..f06765e88c 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -9,6 +9,8 @@ KVM has support for hosting Xen guests, intercepting Xen 
hypercalls and event
 channel (Xen PV interrupt) delivery. This allows guests which expect to be
 run under Xen to be hosted in QEMU under Linux/KVM instead.
 
+Using the split irqchip is mandatory for Xen support.
+
 Setup
 -
 
@@ -17,14 +19,14 @@ accelerator, for example for Xen 4.10:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split
 
 Additionally, virtual APIC support can be advertised to the guest through the
 ``xen-vapic`` CPU flag:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu 
host,+xen_vapic
 
 When Xen support is enabled, QEMU changes hypervisor identification (CPUID
 0x4000..0x400A) to Xen. The KVM identification and features are not
@@ -33,11 +35,25 @@ moves to leaves 0x4100..0x410A.
 
 The Xen platform device is enabled automatically for a Xen guest. This allows
 a guest to unplug all emulated devices, in order to use Xen PV block and 
network
-drivers instead. Note that until the Xen PV device back ends are enabled to 
work
-with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests
-can be dissuaded from this by adding 'xen_emul_unplug=never' on their command
-line, and it can also be noted that AHCI disk controllers are exempt from being
-unplugged, as are passthrough VFIO PCI devices.
+drivers instead. Under Xen, the boot disk is typically available both via IDE
+emulation, and as a PV block device. Guest bootloaders typically use IDE to 
load
+the guest kernel, which then unplugs the IDE and continues with the Xen PV 
block
+device.
+
+This configuration can be achieved as follows
+
+.. parsed-literal::
+
+  |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\
+   -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device 
xen-disk,drive=disk,vdev=xvda \\
+   -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide
+
+It is necessary to use the pc machine type, as the q35 machine uses AHCI 
instead
+of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug
+mechanism.
+
+VirtIO devices can also be used; Linux guests may need to be dissuaded from
+umplugging them by adding 'xen_emul_unplug=never' on their command line.
 
 Properties
 --
-- 
2.39.0




[PATCH v2 17/27] hw/xen: Build PV backend drivers for CONFIG_XEN_BUS

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that we have the redirectable Xen backend operations we can build the
PV backends even without the Xen libraries.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/9pfs/meson.build| 2 +-
 hw/block/dataplane/meson.build | 2 +-
 hw/block/meson.build   | 2 +-
 hw/char/meson.build| 2 +-
 hw/display/meson.build | 2 +-
 hw/usb/meson.build | 2 +-
 hw/xen/meson.build | 5 -
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 12443b6ad5..fd37b7a02d 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,7 +15,7 @@ fs_ss.add(files(
 ))
 fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
 fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
-fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
+fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
 
 specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c'))
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build
index 12c6a264f1..78d7ac1a11 100644
--- a/hw/block/dataplane/meson.build
+++ b/hw/block/dataplane/meson.build
@@ -1,2 +1,2 @@
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
diff --git a/hw/block/meson.build b/hw/block/meson.build
index b434d5654c..cc2a75cc50 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: 
files('pflash_cfi02.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 
'virtio-blk-common.c'))
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 7b594f51b8..e02c60dd54 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: 
files('serial-pci.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: 
files('serial-pci-multi.c'))
 softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
 softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: 
files('virtio-console.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
 
 softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
diff --git a/hw/display/meson.build b/hw/display/meson.build
index f470179122..4191694380 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: 
files('pl110.c'))
 softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c'))
 
 softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
 softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c'))
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index bdf34cbd3e..599dc24f0d 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -84,6 +84,6 @@ if libusb.found()
   hw_usb_modules += {'host': usbhost_ss}
 endif
 
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: 
files('xen-usb.c'))
+softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: 
files('xen-usb.c'))
 
 modules += { 'hw-usb': hw_usb_modules }
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index f195bbd25c..19c6aabc7c 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -1,10 +1,13 @@
-softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files(
   'xen-backend.c',
   'xen-bus-helper.c',
   'xen-bus.c',
   'xen-legacy-backend.c',
   'xen_devconfig.c',
   'xen_pvdev.c',
+))
+
+softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
   'xen-operations.c',
 ))
 
-- 
2.39.0




[PATCH v2 02/27] hw/xen: Add basic XenStore tree walk and write/read/directory support

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is a fairly simple implementation of a copy-on-write tree.

The node walk function starts off at the root, with 'inplace == true'.
If it ever encounters a node with a refcount greater than one (including
the root node), then that node is shared with other trees, and cannot
be modified in place, so the inplace flag is cleared and we copy on
write from there on down.

Xenstore write has 'mkdir -p' semantics and will create the intermediate
nodes if they don't already exist, so in that case we flip the inplace
flag back to true as we populate the newly-created nodes.

We put a copy of the absolute path into the buffer in the struct walk_op,
with *two* NUL terminators at the end. As xs_node_walk() goes down the
tree, it replaces the next '/' separator with a NUL so that it can use
the 'child name' in place. The next recursion down then puts the '/'
back and repeats the exercise for the next path element... if it doesn't
hit that *second* NUL termination which indicates the true end of the
path.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 527 +++-
 tests/unit/meson.build  |   1 +
 tests/unit/test-xs-node.c   | 197 ++
 3 files changed, 718 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/test-xs-node.c

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 31dbc98fe0..9e10a31bea 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -10,13 +10,470 @@
  */
 
 #include "qemu/osdep.h"
+#include "qom/object.h"
 
 #include "xen_xenstore.h"
 #include "xenstore_impl.h"
 
+#include "hw/xen/interface/io/xs_wire.h"
+
+#define XS_MAX_WATCHES  128
+#define XS_MAX_DOMAIN_NODES 1000
+#define XS_MAX_NODE_SIZE2048
+#define XS_MAX_TRANSACTIONS 10
+#define XS_MAX_PERMS_PER_NODE   5
+
+#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \
+   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+   "0123456789-/_"
+
+typedef struct XsNode {
+uint32_t ref;
+GByteArray *content;
+GHashTable *children;
+uint64_t gencnt;
+#ifdef XS_NODE_UNIT_TEST
+gchar *name; /* debug only */
+#endif
+} XsNode;
+
 struct XenstoreImplState {
+XsNode *root;
+unsigned int nr_nodes;
 };
 
+static inline XsNode *xs_node_new(void)
+{
+XsNode *n = g_new0(XsNode, 1);
+n->ref = 1;
+
+#ifdef XS_NODE_UNIT_TEST
+nr_xs_nodes++;
+xs_node_list = g_list_prepend(xs_node_list, n);
+#endif
+return n;
+}
+
+static inline XsNode *xs_node_ref(XsNode *n)
+{
+/* With just 10 transactions, it can never get anywhere near this. */
+g_assert(n->ref < INT_MAX);
+
+g_assert(n->ref);
+n->ref++;
+return n;
+}
+
+static inline void xs_node_unref(XsNode *n)
+{
+if (!n) {
+return;
+}
+g_assert(n->ref);
+if (--n->ref) {
+return;
+}
+
+if (n->content) {
+g_byte_array_unref(n->content);
+}
+if (n->children) {
+g_hash_table_unref(n->children);
+}
+#ifdef XS_NODE_UNIT_TEST
+g_free(n->name);
+nr_xs_nodes--;
+xs_node_list = g_list_remove(xs_node_list, n);
+#endif
+g_free(n);
+}
+
+/* For copying from one hash table to another using g_hash_table_foreach() */
+static void do_insert(gpointer key, gpointer value, gpointer user_data)
+{
+g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
+}
+
+static XsNode *xs_node_copy(XsNode *old)
+{
+XsNode *n = xs_node_new();
+
+n->gencnt = old->gencnt;
+if (old->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+g_hash_table_foreach(old->children, do_insert, n->children);
+}
+if (old && old->content) {
+n->content = g_byte_array_ref(old->content);
+}
+return n;
+}
+
+/* Returns true if it made a change to the hash table */
+static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child)
+{
+assert(!strchr(path_elem, '/'));
+
+if (!child) {
+assert(n->children);
+return g_hash_table_remove(n->children, path_elem);
+}
+
+#ifdef XS_NODE_UNIT_TEST
+g_free(child->name);
+child->name = g_strdup(path_elem);
+#endif
+if (!n->children) {
+n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+(GDestroyNotify)xs_node_unref);
+}
+
+/*
+ * The documentation for g_hash_table_insert() says that it "returns a
+ * boolean value to indicate whether the newly added value was already
+ * in the hash table or not."
+ *
+ * It could perhaps be clearer that returning TRUE means it wasn't,
+ */
+return g_hash_table_insert(n->children, g_strdup(path_elem), child);
+}
+
+struct walk_op {
+struct XenstoreImplState *s;
+char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL 

[PATCH v2 13/27] hw/xen: Add xenstore operations to allow redirection to internal emulation

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c |  11 +-
 hw/char/xen_console.c   |   2 +-
 hw/i386/kvm/xen_xenstore.c  |   3 -
 hw/i386/kvm/xenstore_impl.h |   8 +-
 hw/xen/xen-bus-helper.c |  62 +++
 hw/xen/xen-bus.c| 261 
 hw/xen/xen-legacy-backend.c | 119 +++--
 hw/xen/xen-operations.c | 198 +
 hw/xen/xen_devconfig.c  |   4 +-
 hw/xen/xen_pt_graphics.c|   1 -
 hw/xen/xen_pvdev.c  |  49 +-
 include/hw/xen/xen-bus-helper.h |  26 +--
 include/hw/xen/xen-bus.h|  17 +-
 include/hw/xen/xen-legacy-backend.h |   6 +-
 include/hw/xen/xen_backend_ops.h| 163 +
 include/hw/xen/xen_common.h |   1 -
 include/hw/xen/xen_pvdev.h  |   2 +-
 softmmu/globals.c   |   1 +
 18 files changed, 525 insertions(+), 409 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index e85e4aeba5..425216230f 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -90,12 +90,15 @@ void xenstore_store_pv_console_info(int i, Chardev *chr)
 }
 
 
-static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
+static void xenstore_record_dm_state(const char *state)
 {
+struct xs_handle *xs;
 char path[50];
 
+/* We now have everything we need to set the xenstore entry. */
+xs = xs_open(0);
 if (xs == NULL) {
-error_report("xenstore connection not initialized");
+fprintf(stderr, "Could not contact XenStore\n");
 exit(1);
 }
 
@@ -109,6 +112,8 @@ static void xenstore_record_dm_state(struct xs_handle *xs, 
const char *state)
 error_report("error recording dm state");
 exit(1);
 }
+
+xs_close(xs);
 }
 
 
@@ -117,7 +122,7 @@ static void xen_change_state_handler(void *opaque, bool 
running,
 {
 if (running) {
 /* record state running */
-xenstore_record_dm_state(xenstore, "running");
+xenstore_record_dm_state("running");
 }
 }
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index e9cef3e1ef..ad8638a86d 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -181,7 +181,7 @@ static int con_init(struct XenLegacyDevice *xendev)
 const char *output;
 
 /* setup */
-dom = xs_get_domain_path(xenstore, con->xendev.dom);
+dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
 if (!xendev->dev) {
 snprintf(con->console, sizeof(con->console), "%s/console", dom);
 } else {
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index fb3648a058..35898e9b37 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -38,9 +38,6 @@
 #define TYPE_XEN_XENSTORE "xen-xenstore"
 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
 
diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h
index bbe2391e2e..0df2a91aae 100644
--- a/hw/i386/kvm/xenstore_impl.h
+++ b/hw/i386/kvm/xenstore_impl.h
@@ -12,13 +12,7 @@
 #ifndef QEMU_XENSTORE_IMPL_H
 #define QEMU_XENSTORE_IMPL_H
 
-typedef uint32_t xs_transaction_t;
-
-#define XBT_NULL 0
-
-#define XS_PERM_NONE  0x00
-#define XS_PERM_READ  0x01
-#define XS_PERM_WRITE 0x02
+#include "hw/xen/xen_backend_ops.h"
 
 typedef struct XenstoreImplState XenstoreImplState;
 
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index 5a1e12b374..b2b2cc9c5d 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -10,6 +10,7 @@
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-bus-helper.h"
 #include "qapi/error.h"
+#include "trace.h"
 
 #include 
 
@@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state)
 return "INVALID";
 }
 
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
-const char *node, struct xs_permissions perms[],
-unsigned int nr_perms, Error **errp)
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+const char *node, unsigned int owner, unsigned int domid,
+unsigned int perms, Error **errp)
 {
 trace_xs_node_create(node);
 
-if (!xs_write(xsh, tid, node, "", 0)) {
+if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) {
 error_setg_errno(errp, errno, "failed to create node '%s'", node);
-return;
-}
-
-if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) {
-error_setg_errno(errp, errno, "failed to set node '%s' permissions",
- node);
 }
 }
 
-void xs_node_destroy(struct 

[PATCH v2 00/27] Enable PV backends with Xen/KVM emulation

2023-03-07 Thread David Woodhouse
Following on from the basic platform support which has already been
merged, here's phase 2 which wires up the XenBus and PV back ends.

It starts with a basic single-tenant internal implementation of a 
XenStore, with a copy-on-write tree, watches, transactions, quotas.

Then we introduce operations tables for the grant table, event channel,
foreignmen and xenstore operations so that in addition to using the Xen
libraries for those, QEMU can use its internal emulated versions.

A little bit of cleaning up of header files, and we can enable the build
of xen-bus in the CONFIG_XEN_EMU build, and run a Xen guest with an
actual PV disk...

   qemu-system-x86_64 -serial mon:stdio -M q35 -display none -m 1G -smp 2 \
  -accel kvm,xen-version=0x4000e,kernel-irqchip=split \
  -kernel bzImage -append "console=ttyS0 root=/dev/xvda1 selinux=0" \
  -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
  -device xen-disk,drive=disk,vdev=xvda

The main thing that isn't working here is migration. I've implemented it 
for the internal xenstore and the unit tests exercise it, but the 
existing PV back ends don't support it, perhaps partly because support 
for guest transparent live migration support isn't upstream in Xen yet. 
So the disk doesn't come back correctly after migration. I'm content 
with that for 8.0 though, and we just mark the emulated XenStore device
as unmigratable to prevent users from trying.

The other pre-existing constraint is that only the block back end has
yet been ported to the "new" XenBus infrastructure, and is actually
capable of creating its own backend nodes. Again, I can live with
that for 8.0. Maybe this will motivate us to finally get round to
converting the rest off XenLegacyBackend and killing it.

We also don't have a simple way to perform grant mapping of multiple
guest pages to contiguous addresses, as we can under real Xen. So we
don't advertise max-ring-page-order for xen-disk in the emulated mode.
Fixing that — if we actually want to — would probably require mapping
RAM from an actual backing store object, so that it can be mapped again
at a different location for the PV back end to see.

v2: https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-2

 • Full set of reviewed-by tags from Paul (and associated minor fixes).

 • Disable migration for emulated XenStore device.

 • Update docs and add MAINTAINERS entry.

v1: 
https://lore.kernel.org/qemu-devel/20230302153435.1170111-1-dw...@infradead.org/
https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-1

David Woodhouse (23):
  hw/xen: Add xenstore wire implementation and implementation stubs
  hw/xen: Add basic XenStore tree walk and write/read/directory support
  hw/xen: Implement XenStore watches
  hw/xen: Implement XenStore transactions
  hw/xen: Watches on XenStore transactions
  hw/xen: Implement core serialize/deserialize methods for xenstore_impl
  hw/xen: Add evtchn operations to allow redirection to internal emulation
  hw/xen: Add gnttab operations to allow redirection to internal emulation
  hw/xen: Pass grant ref to gnttab unmap operation
  hw/xen: Add foreignmem operations to allow redirection to internal 
emulation
  hw/xen: Move xenstore_store_pv_console_info to xen_console.c
  hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
  hw/xen: Rename xen_common.h to xen_native.h
  hw/xen: Build PV backend drivers for CONFIG_XEN_BUS
  hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it
  hw/xen: Hook up emulated implementation for event channel operations
  hw/xen: Add emulated implementation of grant table operations
  hw/xen: Add emulated implementation of XenStore operations
  hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
  hw/xen: Implement soft reset for emulated gnttab
  i386/xen: Initialize Xen backends from pc_basic_device_init() for 
emulation
  MAINTAINERS: Add entry for Xen on KVM emulation
  docs: Update Xen-on-KVM documentation for PV disk support

Paul Durrant (4):
  hw/xen: Implement XenStore permissions
  hw/xen: Create initial XenStore nodes
  hw/xen: Add xenstore operations to allow redirection to internal emulation
  hw/xen: Avoid crash when backend watch fires too early

 MAINTAINERS   |9 +
 accel/xen/xen-all.c   |   69 +-
 docs/system/i386/xen.rst  |   30 +-
 hw/9pfs/meson.build   |2 +-
 hw/9pfs/xen-9p-backend.c  |   32 +-
 hw/block/dataplane/meson.build|2 +-
 hw/block/dataplane/xen-block.c|   12 +-
 hw/block/meson.build  |2 +-
 hw/block/xen-block.c  |   12 +-
 hw/char/meson.build   |2 +-
 hw/char/xen_console.c |   57 +-
 

[PATCH v2 08/27] hw/xen: Create initial XenStore nodes

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_xenstore.c | 70 ++
 1 file changed, 70 insertions(+)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 520422b147..fb3648a058 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -76,9 +76,39 @@ struct XenXenstoreState *xen_xenstore_singleton;
 static void xen_xenstore_event(void *opaque);
 static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
+static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
+GList *perms,
+const char *relpath,
+const char *fmt, ...)
+{
+gchar *abspath;
+gchar *value;
+va_list args;
+GByteArray *data;
+int err;
+
+abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
+va_start(args, fmt);
+value = g_strdup_vprintf(fmt, args);
+va_end(args);
+
+data = g_byte_array_new_take((void *)value, strlen(value));
+
+err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
+assert(!err);
+
+g_byte_array_unref(data);
+
+err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
+assert(!err);
+
+g_free(abspath);
+}
+
 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
 {
 XenXenstoreState *s = XEN_XENSTORE(dev);
+GList *perms;
 
 if (xen_mode != XEN_EMULATE) {
 error_setg(errp, "Xen xenstore support is for Xen emulation");
@@ -102,6 +132,46 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
xen_xenstore_event, NULL, NULL, NULL, s);
 
 s->impl = xs_impl_create(xen_domid);
+
+/* Populate the default nodes */
+
+/* Nodes owned by 'dom0' but readable by the guest */
+perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
+perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
+
+relpath_printf(s, perms, "", "%s", "");
+
+relpath_printf(s, perms, "domid", "%u", xen_domid);
+
+relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", 
"%u", 1);
+relpath_printf(s, perms, 
"control/platform-feature-multiprocessor-suspend", "%u", 1);
+
+relpath_printf(s, perms, "platform/acpi", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
+relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
+
+g_list_free_full(perms, g_free);
+
+/* Nodes owned by the guest */
+perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
+
+relpath_printf(s, perms, "attr", "%s", "");
+
+relpath_printf(s, perms, "control/shutdown", "%s", "");
+relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
+relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
+relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
+relpath_printf(s, perms, "control/feature-s3", "%u", 1);
+relpath_printf(s, perms, "control/feature-s4", "%u", 1);
+
+relpath_printf(s, perms, "data", "%s", "");
+relpath_printf(s, perms, "device", "%s", "");
+relpath_printf(s, perms, "drivers", "%s", "");
+relpath_printf(s, perms, "error", "%s", "");
+relpath_printf(s, perms, "feature", "%s", "");
+
+g_list_free_full(perms, g_free);
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
-- 
2.39.0




[PATCH v2 03/27] hw/xen: Implement XenStore watches

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Starts out fairly simple: a hash table of watches based on the path.

Except there can be multiple watches on the same path, so the watch ends
up being a simple linked list, and the head of that list is in the hash
table. Which makes removal a bit of a PITA but it's not so bad; we just
special-case "I had to remove the head of the list and now I have to
replace it in / remove it from the hash table". And if we don't remove
the head, it's a simple linked-list operation.

We do need to fire watches on *deleted* nodes, so instead of just a simple
xs_node_unref() on the topmost victim, we need to recurse down and fire
watches on them all.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xenstore_impl.c | 253 +---
 tests/unit/test-xs-node.c   |  85 
 2 files changed, 323 insertions(+), 15 deletions(-)

diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
index 9e10a31bea..9c2348835f 100644
--- a/hw/i386/kvm/xenstore_impl.c
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -37,9 +37,20 @@ typedef struct XsNode {
 #endif
 } XsNode;
 
+typedef struct XsWatch {
+struct XsWatch *next;
+xs_impl_watch_fn *cb;
+void *cb_opaque;
+char *token;
+unsigned int dom_id;
+int rel_prefix;
+} XsWatch;
+
 struct XenstoreImplState {
 XsNode *root;
 unsigned int nr_nodes;
+GHashTable *watches;
+unsigned int nr_domu_watches;
 };
 
 static inline XsNode *xs_node_new(void)
@@ -146,6 +157,7 @@ struct walk_op {
 void *op_opaque;
 void *op_opaque2;
 
+GList *watches;
 unsigned int dom_id;
 
 /* The number of nodes which will exist in the tree if this op succeeds. */
@@ -166,6 +178,35 @@ struct walk_op {
 bool create_dirs;
 };
 
+static void fire_watches(struct walk_op *op, bool parents)
+{
+GList *l = NULL;
+XsWatch *w;
+
+if (!op->mutating) {
+return;
+}
+
+if (parents) {
+l = op->watches;
+}
+
+w = g_hash_table_lookup(op->s->watches, op->path);
+while (w || l) {
+if (!w) {
+/* Fire the parent nodes from 'op' if asked to */
+w = l->data;
+l = l->next;
+continue;
+}
+
+assert(strlen(op->path) > w->rel_prefix);
+w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token);
+
+w = w->next;
+}
+}
+
 static int xs_node_add_content(XsNode **n, struct walk_op *op)
 {
 GByteArray *data = op->op_opaque;
@@ -213,6 +254,8 @@ static int xs_node_get_content(XsNode **n, struct walk_op 
*op)
 static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data)
 {
 struct walk_op *op = user_data;
+int path_len = strlen(op->path);
+int key_len = strlen(key);
 XsNode *n = value;
 bool this_inplace = op->inplace;
 
@@ -220,11 +263,22 @@ static int node_rm_recurse(gpointer key, gpointer value, 
gpointer user_data)
 op->inplace = 0;
 }
 
+assert(key_len + path_len + 2 <= sizeof(op->path));
+op->path[path_len] = '/';
+memcpy(op->path + path_len + 1, key, key_len + 1);
+
 if (n->children) {
 g_hash_table_foreach_remove(n->children, node_rm_recurse, op);
 }
 op->new_nr_nodes--;
 
+/*
+ * Fire watches on *this* node but not the parents because they are
+ * going to be deleted too, so the watch will fire for them anyway.
+ */
+fire_watches(op, false);
+op->path[path_len] = '\0';
+
 /*
  * Actually deleting the child here is just an optimisation; if we
  * don't then the final unref on the topmost victim will just have
@@ -238,7 +292,7 @@ static int xs_node_rm(XsNode **n, struct walk_op *op)
 {
 bool this_inplace = op->inplace;
 
-/* Keep count of the nodes in the subtree which gets deleted. */
+/* Fire watches for, and count, nodes in the subtree which get deleted */
 if ((*n)->children) {
 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
 }
@@ -269,9 +323,11 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 XsNode *old = *n, *child = NULL;
 bool stole_child = false;
 bool this_inplace;
+XsWatch *watch;
 int err;
 
 namelen = strlen(op->path);
+watch = g_hash_table_lookup(op->s->watches, op->path);
 
 /* Is there a child, or do we hit the double-NUL termination? */
 if (op->path[namelen + 1]) {
@@ -292,6 +348,9 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 if (!child_name) {
 /* This is the actual node on which the operation shall be performed */
 err = op->op_fn(n, op);
+if (!err) {
+fire_watches(op, true);
+}
 goto out;
 }
 
@@ -333,11 +392,24 @@ static int xs_node_walk(XsNode **n, struct walk_op *op)
 goto out;
 }
 
+/*
+ * If there's a watch on this node, add it to the list to be fired
+ * (with the correct full pathname for the modified node) at the end.
+ */
+   

[PATCH v2 14/27] hw/xen: Move xenstore_store_pv_console_info to xen_console.c

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

There's no need for this to be in the Xen accel code, and as we want to
use the Xen console support with KVM-emulated Xen we'll want to have a
platform-agnostic version of it. Make it use GString to build up the
path while we're at it.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 accel/xen/xen-all.c   | 61 ---
 hw/char/xen_console.c | 45 +--
 include/hw/xen/xen.h  |  2 --
 3 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 425216230f..2d51c41e40 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -29,67 +29,6 @@ xc_interface *xen_xc;
 xenforeignmemory_handle *xen_fmem;
 xendevicemodel_handle *xen_dmod;
 
-static int store_dev_info(int domid, Chardev *cs, const char *string)
-{
-struct xs_handle *xs = NULL;
-char *path = NULL;
-char *newpath = NULL;
-char *pts = NULL;
-int ret = -1;
-
-/* Only continue if we're talking to a pty. */
-if (!CHARDEV_IS_PTY(cs)) {
-return 0;
-}
-pts = cs->filename + 4;
-
-/* We now have everything we need to set the xenstore entry. */
-xs = xs_open(0);
-if (xs == NULL) {
-fprintf(stderr, "Could not contact XenStore\n");
-goto out;
-}
-
-path = xs_get_domain_path(xs, domid);
-if (path == NULL) {
-fprintf(stderr, "xs_get_domain_path() error\n");
-goto out;
-}
-newpath = realloc(path, (strlen(path) + strlen(string) +
-strlen("/tty") + 1));
-if (newpath == NULL) {
-fprintf(stderr, "realloc error\n");
-goto out;
-}
-path = newpath;
-
-strcat(path, string);
-strcat(path, "/tty");
-if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) {
-fprintf(stderr, "xs_write for '%s' fail", string);
-goto out;
-}
-ret = 0;
-
-out:
-free(path);
-xs_close(xs);
-
-return ret;
-}
-
-void xenstore_store_pv_console_info(int i, Chardev *chr)
-{
-if (i == 0) {
-store_dev_info(xen_domid, chr, "/console");
-} else {
-char buf[32];
-snprintf(buf, sizeof(buf), "/device/console/%d", i);
-store_dev_info(xen_domid, chr, buf);
-}
-}
-
-
 static void xenstore_record_dm_state(const char *state)
 {
 struct xs_handle *xs;
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index ad8638a86d..c7a19c0e7c 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con)
 
 /*  */
 
+static int store_con_info(struct XenConsole *con)
+{
+Chardev *cs = qemu_chr_fe_get_driver(>chr);
+char *pts = NULL;
+char *dom_path;
+GString *path;
+int ret = -1;
+
+/* Only continue if we're talking to a pty. */
+if (!CHARDEV_IS_PTY(cs)) {
+return 0;
+}
+pts = cs->filename + 4;
+
+dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
+if (!dom_path) {
+return 0;
+}
+
+path = g_string_new(dom_path);
+free(dom_path);
+
+if (con->xendev.dev) {
+g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
+} else {
+g_string_append(path, "/console");
+}
+g_string_append(path, "/tty");
+
+if (xenstore_write_str(con->console, path->str, pts)) {
+fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
+goto out;
+}
+ret = 0;
+
+out:
+g_string_free(path, true);
+free(path);
+
+return ret;
+}
+
 static int con_init(struct XenLegacyDevice *xendev)
 {
 struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
@@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev)
  _abort);
 }
 
-xenstore_store_pv_console_info(con->xendev.dev,
-   qemu_chr_fe_get_driver(>chr));
+store_con_info(con);
 
 out:
 g_free(type);
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 03983939f9..56b1c2a827 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -39,8 +39,6 @@ int xen_is_pirq_msi(uint32_t msi_data);
 
 qemu_irq *xen_interrupt_controller_init(void);
 
-void xenstore_store_pv_console_info(int i, Chardev *chr);
-
 void xen_register_framebuffer(struct MemoryRegion *mr);
 
 #endif /* QEMU_HW_XEN_H */
-- 
2.39.0




[PATCH v2 24/27] hw/xen: Implement soft reset for emulated gnttab

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This is only part of it; we will also need to get the PV back end drivers
to tear down their own mappings (or do it for them, but they kind of need
to stop using the pointers too).

Some more work on the actual PV back ends and xen-bus code is going to be
needed to really make soft reset and migration fully functional, and this
part is the basis for that.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/xen_gnttab.c  | 26 --
 hw/i386/kvm/xen_gnttab.h  |  1 +
 target/i386/kvm/xen-emu.c |  5 +
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 2bf91d36c0..21c30e3659 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -72,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 error_setg(errp, "Xen grant table support is for Xen emulation");
 return;
 }
-s->nr_frames = 0;
 s->max_frames = kvm_xen_get_gnttab_max_frames();
 memory_region_init_ram(>gnt_frames, OBJECT(dev), "xen:grant_table",
XEN_PAGE_SIZE * s->max_frames, _abort);
 memory_region_set_enabled(>gnt_frames, true);
 s->entries.v1 = memory_region_get_ram_ptr(>gnt_frames);
-memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
 
 /* Create individual page-sizes aliases for overlays */
 s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
@@ -90,8 +88,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error 
**errp)
 s->gnt_frame_gpas[i] = INVALID_GPA;
 }
 
+s->nr_frames = 0;
+memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
 s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
 s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
 qemu_mutex_init(>gnt_lock);
 
 xen_gnttab_singleton = s;
@@ -523,3 +524,24 @@ static struct gnttab_backend_ops emu_gnttab_backend_ops = {
 .unmap = xen_be_gnttab_unmap,
 };
 
+int xen_gnttab_reset(void)
+{
+XenGnttabState *s = xen_gnttab_singleton;
+
+if (!s) {
+return -ENOTSUP;
+}
+
+QEMU_LOCK_GUARD(>gnt_lock);
+
+s->nr_frames = 0;
+
+memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
+memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+return 0;
+}
diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h
index 3bdbe96191..ee215239b0 100644
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -13,6 +13,7 @@
 #define QEMU_XEN_GNTTAB_H
 
 void xen_gnttab_create(void);
+int xen_gnttab_reset(void);
 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
 
 struct gnttab_set_version;
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index bad3131d08..0bb6c601c9 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1406,6 +1406,11 @@ int kvm_xen_soft_reset(void)
 return err;
 }
 
+err = xen_gnttab_reset();
+if (err) {
+return err;
+}
+
 err = xen_xenstore_reset();
 if (err) {
 return err;
-- 
2.39.0




[PATCH v2 01/27] hw/xen: Add xenstore wire implementation and implementation stubs

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

This implements the basic wire protocol for the XenStore commands, punting
all the actual implementation to xs_impl_* functions which all just return
errors for now.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/kvm/meson.build |   1 +
 hw/i386/kvm/trace-events|  15 +
 hw/i386/kvm/xen_xenstore.c  | 871 +++-
 hw/i386/kvm/xenstore_impl.c | 117 +
 hw/i386/kvm/xenstore_impl.h |  58 +++
 5 files changed, 1054 insertions(+), 8 deletions(-)
 create mode 100644 hw/i386/kvm/xenstore_impl.c
 create mode 100644 hw/i386/kvm/xenstore_impl.h

diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
index 82dd6ae7c6..6621ba5cd7 100644
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
   'xen_evtchn.c',
   'xen_gnttab.c',
   'xen_xenstore.c',
+  'xenstore_impl.c',
   ))
 
 i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index b83c3eb965..e4c82de6f3 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
 kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
 kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
 kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
+xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u 
tx %u err %s"
+xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int 
offset) "tx %u path %s offset %u"
+xenstore_transaction_start(unsigned int new_tx) "new_tx %u"
+xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d"
+xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_watch(const char *path, const char *token) "path %s token %s"
+xenstore_unwatch(const char *path, const char *token) "path %s token %s"
+xenstore_reset_watches(void) ""
+xenstore_watch_event(const char *path, const char *token) "path %s token %s"
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 14193ef3f9..64d8f1a38f 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -28,6 +28,10 @@
 #include "sysemu/kvm.h"
 #include "sysemu/kvm_xen.h"
 
+#include "trace.h"
+
+#include "xenstore_impl.h"
+
 #include "hw/xen/interface/io/xs_wire.h"
 #include "hw/xen/interface/event_channel.h"
 
@@ -47,6 +51,9 @@ struct XenXenstoreState {
 SysBusDevice busdev;
 /*< public >*/
 
+XenstoreImplState *impl;
+GList *watch_events;
+
 MemoryRegion xenstore_page;
 struct xenstore_domain_interface *xs;
 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
@@ -64,6 +71,7 @@ struct XenXenstoreState {
 struct XenXenstoreState *xen_xenstore_singleton;
 
 static void xen_xenstore_event(void *opaque);
+static void fire_watch_cb(void *opaque, const char *path, const char *token);
 
 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
 {
@@ -89,6 +97,8 @@ static void xen_xenstore_realize(DeviceState *dev, Error 
**errp)
 }
 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
+
+s->impl = xs_impl_create();
 }
 
 static bool xen_xenstore_is_needed(void *opaque)
@@ -213,20 +223,761 @@ static void reset_rsp(XenXenstoreState *s)
 s->rsp_offset = 0;
 }
 
+static void xs_error(XenXenstoreState *s, unsigned int id,
+ xs_transaction_t tx_id, int errnum)
+{
+struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+const char *errstr = NULL;
+
+for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
+struct xsd_errors *xsd_error = _errors[i];
+
+if (xsd_error->errnum == errnum) {
+errstr = xsd_error->errstring;
+break;
+}
+}
+assert(errstr);
+
+trace_xenstore_error(id, tx_id, errstr);
+
+rsp->type = XS_ERROR;
+rsp->req_id = id;
+rsp->tx_id = tx_id;
+rsp->len = (uint32_t)strlen(errstr) + 1;
+
+memcpy([1], errstr, rsp->len);
+}
+
+static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
+  xs_transaction_t tx_id)
+{
+struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+const char *okstr = "OK";
+
+rsp->type = type;
+rsp->req_id = req_id;
+rsp->tx_id = tx_id;
+rsp->len = (uint32_t)strlen(okstr) + 1;
+
+

[PATCH v2 18/27] hw/xen: Avoid crash when backend watch fires too early

2023-03-07 Thread David Woodhouse
From: Paul Durrant 

The xen-block code ends up calling aio_poll() through blkconf_geometry(),
which means we see watch events during the indirect call to
xendev_class->realize() in xen_device_realize(). Unfortunately this call
is made before populating the initial frontend and backend device nodes
in xenstore and hence xen_block_frontend_changed() (which is called from
a watch event) fails to read the frontend's 'state' node, and hence
believes the device is being torn down. This in-turn sets the backend
state to XenbusStateClosed and causes the device to be deleted before it
is fully set up, leading to the crash.
By simply moving the call to xendev_class->realize() after the initial
xenstore nodes are populated, this sorry state of affairs is avoided.

Reported-by: David Woodhouse 
Signed-off-by: Paul Durrant 
Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/xen/xen-bus.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index 9fe54967d4..c59850b1de 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -1034,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error 
**errp)
 goto unrealize;
 }
 
-if (xendev_class->realize) {
-xendev_class->realize(xendev, errp);
-if (*errp) {
-goto unrealize;
-}
-}
-
 xen_device_backend_printf(xendev, "frontend", "%s",
   xendev->frontend_path);
 xen_device_backend_printf(xendev, "frontend-id", "%u",
@@ -1059,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error 
**errp)
 xen_device_frontend_set_state(xendev, XenbusStateInitialising, true);
 }
 
+if (xendev_class->realize) {
+xendev_class->realize(xendev, errp);
+if (*errp) {
+goto unrealize;
+}
+}
+
 xendev->exit.notify = xen_device_exit;
 qemu_add_exit_notifier(>exit);
 return;
-- 
2.39.0




[PATCH v2 25/27] i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Now that all the work is done to enable the PV backends to work without
actual Xen, instantiate the bus from pc_basic_device_init() for emulated
mode.

This allows us finally to launch an emulated Xen guest with PV disk.

   qemu-system-x86_64 -serial mon:stdio -M q35 -cpu host -display none \
 -m 1G -smp 2 -accel kvm,xen-version=0x4000a,kernel-irqchip=split \
 -kernel bzImage -append "console=ttyS0 root=/dev/xvda1" \
 -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
 -device xen-disk,drive=disk,vdev=xvda

If we use -M pc instead of q35, we can even add an IDE disk and boot a
guest image normally through grub. But q35 gives us AHCI and that isn't
unplugged by the Xen magic, so the guests ends up seeing "both" disks.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/i386/pc.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7bebea57e3..1489abf010 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -102,6 +102,11 @@
 #include "trace.h"
 #include CONFIG_DEVICES
 
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
+#endif
+
 /*
  * Helper for setting model-id for CPU models that changed model-id
  * depending on QEMU versions up to QEMU 2.4.
@@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 if (pcms->bus) {
 pci_create_simple(pcms->bus, -1, "xen-platform");
 }
+xen_bus_init();
+xen_be_init();
 }
 #endif
 
-- 
2.39.0




[PATCH v2 19/27] hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Whem emulating Xen, multi-page grants are distinctly non-trivial and we
have elected not to support them for the time being. Don't advertise
them to the guest.

Signed-off-by: David Woodhouse 
Reviewed-by: Paul Durrant 
---
 hw/block/xen-block.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 87299615e3..f5a744589d 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -83,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
 g_free(ring_ref);
 return;
 }
-} else if (order <= blockdev->props.max_ring_page_order) {
+} else if (qemu_xen_gnttab_can_map_multi() &&
+   order <= blockdev->props.max_ring_page_order) {
 unsigned int i;
 
 nr_ring_ref = 1 << order;
@@ -255,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 }
 
 xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
-xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
-  blockdev->props.max_ring_page_order);
+
+if (qemu_xen_gnttab_can_map_multi()) {
+xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
+  blockdev->props.max_ring_page_order);
+}
+
 xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
 
 xen_device_frontend_printf(xendev, "virtual-device", "%lu",
-- 
2.39.0




Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:59, Paul Durrant wrote:

On 07/03/2023 16:52, David Woodhouse wrote:

On Tue, 2023-03-07 at 16:39 +, Paul Durrant wrote:

On 07/03/2023 16:33, David Woodhouse wrote:

On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:

From: David Woodhouse 

In fact I think we want to only serialize the contents of the domain's
path in /local/domain/${domid} and leave the rest to be recreated? 
Will

defer to Paul for that.

Signed-off-by: David Woodhouse 


Paul, your Reviewed-by: on this one is conspicuous in its absence. I
mentioned migration in the cover letter — this much is working fine,
but it's the PV back ends that don't yet work.

I'd quite like to merge the basic serialization/deserialization of
XenStore itself, with the unit tests.


The patch is basically ok, I just think the serialization should be
limited to the guest nodes... filtering out those not owned by xen_domid
would probably work for that.


Yeah, so let's just do this (as part of this patch #7) for now:

--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -235,6 +235,7 @@ static int xen_xenstore_post_load(void *opaque, int
ver)
  static const VMStateDescription xen_xenstore_vmstate = {
  .name = "xen_xenstore",
+    .unmigratable = 1, /* The PV back ends don't migrate yet */
  .version_id = 1,
  .minimum_version_id = 1,
  .needed = xen_xenstore_is_needed,


It means we can't migrate guests even if they're only using fully
emulated devices... but I think that's a reasonable limitation until we
implement it fully.



Ok. With that added...

Revieweed-by: Paul Durrant 


Typoed, sorry...

Reviewed-by: Paul Durrant 








Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:52, David Woodhouse wrote:

On Tue, 2023-03-07 at 16:39 +, Paul Durrant wrote:

On 07/03/2023 16:33, David Woodhouse wrote:

On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:

From: David Woodhouse 

In fact I think we want to only serialize the contents of the domain's
path in /local/domain/${domid} and leave the rest to be recreated? Will
defer to Paul for that.

Signed-off-by: David Woodhouse 


Paul, your Reviewed-by: on this one is conspicuous in its absence. I
mentioned migration in the cover letter — this much is working fine,
but it's the PV back ends that don't yet work.

I'd quite like to merge the basic serialization/deserialization of
XenStore itself, with the unit tests.


The patch is basically ok, I just think the serialization should be
limited to the guest nodes... filtering out those not owned by xen_domid
would probably work for that.


Yeah, so let's just do this (as part of this patch #7) for now:

--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -235,6 +235,7 @@ static int xen_xenstore_post_load(void *opaque, int
ver)
  
  static const VMStateDescription xen_xenstore_vmstate = {

  .name = "xen_xenstore",
+.unmigratable = 1, /* The PV back ends don't migrate yet */
  .version_id = 1,
  .minimum_version_id = 1,
  .needed = xen_xenstore_is_needed,


It means we can't migrate guests even if they're only using fully
emulated devices... but I think that's a reasonable limitation until we
implement it fully.



Ok. With that added...

Revieweed-by: Paul Durrant 




Re: [RFC PATCH v2 1/2] xen/memory : Add a stats_table resource type

2023-03-07 Thread Jan Beulich
On 07.03.2023 15:44, Matias Ezequiel Vara Larsen wrote:
> On Thu, Feb 23, 2023 at 01:42:08PM +0100, Jan Beulich wrote:
>> On 23.02.2023 13:16, Matias Ezequiel Vara Larsen wrote:
>>> On Fri, Feb 17, 2023 at 03:10:53PM +0100, Jan Beulich wrote:
 On 17.02.2023 10:29, Matias Ezequiel Vara Larsen wrote:
> On Fri, Feb 17, 2023 at 09:57:43AM +0100, Jan Beulich wrote:
>> On 17.02.2023 09:50, Matias Ezequiel Vara Larsen wrote:
>>> On Wed, Dec 14, 2022 at 08:56:57AM +0100, Jan Beulich wrote:
 On 14.12.2022 08:29, Jan Beulich wrote:
> On 07.10.2022 14:39, Matias Ezequiel Vara Larsen wrote:
>> +static int stats_vcpu_alloc_mfn(struct domain *d)
>> +{
>> +struct page_info *pg;
>> +
>> +pg = alloc_domheap_page(d, MEMF_no_refcount);
>
> The ioreq and vmtrace resources are also allocated this way, but 
> they're
> HVM-specific. The one here being supposed to be VM-type independent, 
> I'm
> afraid such pages will be accessible by an "owning" PV domain (it'll
> need to guess the MFN, but that's no excuse).

 Which might be tolerable if it then can't write to the page. That would
 require "locking" the page r/o (from guest pov), which ought to be
 possible by leveraging a variant of what share_xen_page_with_guest()
 does: It marks pages PGT_none with a single type ref. This would mean
 ...

>> +if ( !pg )
>> +return -ENOMEM;
>> +
>> +if ( !get_page_and_type(pg, d, PGT_writable_page) ) {

 ... using PGT_none here. Afaict this _should_ work, but we have no
 precedent of doing so in the tree, and I may be overlooking something
 which prevents that from working.

>>>
>>> I do not fully understand this. I checked share_xen_page_with_guest() 
>>> and I
>>> think you're talking about doing something like this for each allocated 
>>> page to
>>> set them ro from a pv guest pov:
>>>
>>> pg->u.inuse.type_info = PGT_none;
>>> pg->u.inuse.type_info |= PGT_validated | 1;
>>> page_set_owner(page, d); // not sure if this is needed
>>>
>>> Then, I should use PGT_none instead of PGT_writable_page in
>>> get_page_and_type(). Am I right?
>>
>> No, if at all possible you should avoid open-coding anything. As said,
>> simply passing PGT_none to get_page_and_type() ought to work (again, as
>> said, unless I'm overlooking something). share_xen_page_with_guest()
>> can do what it does because the page isn't owned yet. For a page with
>> owner you may not fiddle with type_info in such an open-coded manner.
>>
>
> Thanks. I got the following bug when passing PGT_none:
>
> (XEN) Bad type in validate_page 0 t=0001 c=8042
> (XEN) Xen BUG at mm.c:2643

 The caller of the function needs to avoid the call not only for writable
 and shared pages, but also for this new case of PGT_none.
>>>
>>> Thanks. If I understand correctly, _get_page_type() needs to avoid to call
>>> validate_page() when type = PGT_none.
>>
>> Yes.
>>
>>> For the writable and shared pages, this
>>> is avoided by setting nx |= PGT_validated. Am I right?
>>
>> Well, no, I wouldn't describe it like that. The two (soon three) types not
>> requiring validation simply set the flag without calling validate_page().
>>
> 
> I see, thanks. I added the corresponding check at _get_page_type() to set the
> flag without calling validate_page() for the PGT_none type. I think I am
> missing something when I am releasing the pages. I am triggering the following
> BUG() when issuing put_page_and_type():
>  
> (XEN) Xen BUG at mm.c:2698
> 
> This is at devalidate_page(). I guess the call to devalidate_page() shall be
> also avoided.

Well, yes, symmetry requires a change there as well. Here it's indirect:
You want to avoid the call to _put_final_page_type(). That's enclosed by
(nx & PGT_type_mask) <= PGT_l4_page_table, which happens to be true for
PGT_none as well. There may be more instances of such a comparison, so
it'll be necessary to find them and check whether they may now also be
reached with PGT_none (looks like a comparison against PGT_root_page_table
in _get_page_type() is also affected, albeit in a largely benign way).

> I was wondering if put_page_and_type() is required in this case.

It is, or some equivalent thereof. Again - see other examples where a
similar allocation pattern exists.

Jan



Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread David Woodhouse
On Tue, 2023-03-07 at 16:39 +, Paul Durrant wrote:
> On 07/03/2023 16:33, David Woodhouse wrote:
> > On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:
> > > From: David Woodhouse 
> > > 
> > > In fact I think we want to only serialize the contents of the domain's
> > > path in /local/domain/${domid} and leave the rest to be recreated? Will
> > > defer to Paul for that.
> > > 
> > > Signed-off-by: David Woodhouse 
> > 
> > Paul, your Reviewed-by: on this one is conspicuous in its absence. I
> > mentioned migration in the cover letter — this much is working fine,
> > but it's the PV back ends that don't yet work.
> > 
> > I'd quite like to merge the basic serialization/deserialization of
> > XenStore itself, with the unit tests.
> 
> The patch is basically ok, I just think the serialization should be 
> limited to the guest nodes... filtering out those not owned by xen_domid 
> would probably work for that.

Yeah, so let's just do this (as part of this patch #7) for now:

--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -235,6 +235,7 @@ static int xen_xenstore_post_load(void *opaque, int
ver)
 
 static const VMStateDescription xen_xenstore_vmstate = {
 .name = "xen_xenstore",
+.unmigratable = 1, /* The PV back ends don't migrate yet */
 .version_id = 1,
 .minimum_version_id = 1,
 .needed = xen_xenstore_is_needed,


It means we can't migrate guests even if they're only using fully
emulated devices... but I think that's a reasonable limitation until we
implement it fully.




smime.p7s
Description: S/MIME cryptographic signature


Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:39, Paul Durrant wrote:

On 07/03/2023 16:33, David Woodhouse wrote:

On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:

From: David Woodhouse 

In fact I think we want to only serialize the contents of the domain's
path in /local/domain/${domid} and leave the rest to be recreated? Will
defer to Paul for that.

Signed-off-by: David Woodhouse 


Paul, your Reviewed-by: on this one is conspicuous in its absence. I
mentioned migration in the cover letter — this much is working fine,
but it's the PV back ends that don't yet work.

I'd quite like to merge the basic serialization/deserialization of
XenStore itself, with the unit tests.


The patch is basically ok, I just think the serialization should be 
limited to the guest nodes... filtering out those not owned by xen_domid 
would probably work for that.




Perhaps we can also set TYPE_XEN_DEVICE or TYPE_XEN_BLOCK_DEVICE to be
unmigratable? Ideally I think we want TYPE_XEN_DEVICE to be
unmigratable by default *unless* the specific device class (including
net and other as we port them from XenLegacyDevice) says otherwise.



Yes, that sounds like a good idea.


Is there a way to do that?


Not something I've looked into. I'll go look now.



Maybe calling migrate_add_blocker() in the realize method is the right 
way to achieve this?


  Paul




Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:33, David Woodhouse wrote:

On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:

From: David Woodhouse 

In fact I think we want to only serialize the contents of the domain's
path in /local/domain/${domid} and leave the rest to be recreated? Will
defer to Paul for that.

Signed-off-by: David Woodhouse 


Paul, your Reviewed-by: on this one is conspicuous in its absence. I
mentioned migration in the cover letter — this much is working fine,
but it's the PV back ends that don't yet work.

I'd quite like to merge the basic serialization/deserialization of
XenStore itself, with the unit tests.


The patch is basically ok, I just think the serialization should be 
limited to the guest nodes... filtering out those not owned by xen_domid 
would probably work for that.




Perhaps we can also set TYPE_XEN_DEVICE or TYPE_XEN_BLOCK_DEVICE to be
unmigratable? Ideally I think we want TYPE_XEN_DEVICE to be
unmigratable by default *unless* the specific device class (including
net and other as we port them from XenLegacyDevice) says otherwise.



Yes, that sounds like a good idea.


Is there a way to do that?


Not something I've looked into. I'll go look now.

  Paul



Re: [RFC PATCH v1 07/25] hw/xen: Implement core serialize/deserialize methods for xenstore_impl

2023-03-07 Thread David Woodhouse
On Thu, 2023-03-02 at 15:34 +, David Woodhouse wrote:
> From: David Woodhouse 
> 
> In fact I think we want to only serialize the contents of the domain's
> path in /local/domain/${domid} and leave the rest to be recreated? Will
> defer to Paul for that.
> 
> Signed-off-by: David Woodhouse 

Paul, your Reviewed-by: on this one is conspicuous in its absence. I
mentioned migration in the cover letter — this much is working fine,
but it's the PV back ends that don't yet work.

I'd quite like to merge the basic serialization/deserialization of
XenStore itself, with the unit tests.

Perhaps we can also set TYPE_XEN_DEVICE or TYPE_XEN_BLOCK_DEVICE to be
unmigratable? Ideally I think we want TYPE_XEN_DEVICE to be
unmigratable by default *unless* the specific device class (including
net and other as we port them from XenLegacyDevice) says otherwise.

Is there a way to do that?


smime.p7s
Description: S/MIME cryptographic signature


Re: [RFC PATCH v1 27/25] docs: Update Xen-on-KVM documentation for PV disk support

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:22, David Woodhouse wrote:

From: David Woodhouse 

Signed-off-by: David Woodhouse 
---
  docs/system/i386/xen.rst | 30 +++---
  1 file changed, 23 insertions(+), 7 deletions(-)



Reviewed-by: Paul Durrant 




Re: [RFC PATCH v1 26/25] MAINTAINERS: Add entry for Xen on KVM emulation

2023-03-07 Thread Paul Durrant

On 07/03/2023 16:21, David Woodhouse wrote:

From: David Woodhouse 

Signed-off-by: David Woodhouse 
---
  MAINTAINERS | 9 +
  1 file changed, 9 insertions(+)



Reviewed-by: Paul Durrant 




Re: [RFC PATCH v1 25/25] i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation

2023-03-07 Thread Paul Durrant

On 02/03/2023 15:34, David Woodhouse wrote:

From: David Woodhouse 

Now that all the work is done to enable the PV backends to work without
actual Xen, instantiate the bus from pc_basic_device_init() for emulated
mode.

This allows us finally to launch an emulated Xen guest with PV disk.

qemu-system-x86_64 -serial mon:stdio -M q35 -cpu host -display none \
  -m 1G -smp 2 -accel kvm,xen-version=0x4000a,kernel-irqchip=split \
  -kernel bzImage -append "console=ttyS0 root=/dev/xvda1" \
  -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
  -device xen-disk,drive=disk,vdev=xvda

If we use -M pc instead of q35, we can even add an IDE disk and boot a
guest image normally through grub. But q35 gives us AHCI and that isn't
unplugged by the Xen magic, so the guests ends up seeing "both" disks.

Signed-off-by: David Woodhouse 
---
  hw/i386/pc.c | 7 +++
  1 file changed, 7 insertions(+)



Reviewed-by: Paul Durrant 




Re: [RFC PATCH v1 24/25] hw/xen: Implement soft reset for emulated gnttab

2023-03-07 Thread Paul Durrant

On 02/03/2023 15:34, David Woodhouse wrote:

From: David Woodhouse 

This is only part of it; we will also need to get the PV back end drivers
to tear down their own mappings (or do it for them, but they kind of need
to stop using the pointers too).

Some more work on the actual PV back ends and xen-bus code is going to be
needed to really make soft reset and migration fully functional, and this
part is the basis for that.

Signed-off-by: David Woodhouse 
---
  hw/i386/kvm/xen_gnttab.c  | 26 --
  hw/i386/kvm/xen_gnttab.h  |  1 +
  target/i386/kvm/xen-emu.c |  5 +
  3 files changed, 30 insertions(+), 2 deletions(-)



Reviewed-by: Paul Durrant 




Re: [PATCH V2] docs: vhost-user: Add Xen specific memory mapping support

2023-03-07 Thread Stefan Hajnoczi
On Tue, Mar 07, 2023 at 11:13:36AM +0530, Viresh Kumar wrote:
> On 06-03-23, 10:34, Stefan Hajnoczi wrote:
> > On Mon, Mar 06, 2023 at 04:40:24PM +0530, Viresh Kumar wrote:
> > > +Xen mmap description
> > > +
> > > +
> > > ++---+---+
> > > +| flags | domid |
> > > ++---+---+
> > > +
> > > +:flags: 64-bit bit field
> > > +
> > > +- Bit 0 is set for Xen foreign memory memory mapping.
> > > +- Bit 1 is set for Xen grant memory memory mapping.
> > > +- Bit 2 is set if the back-end can directly map additional memory (like
> > > +  descriptor buffers or indirect descriptors, which aren't part of 
> > > already
> > > +  shared memory regions) without the need of front-end sending an 
> > > additional
> > > +  memory region first.
> > 
> > I don't understand what Bit 2 does. Can you rephrase this? It's unclear
> > to me how additional memory can be mapped without a memory region
> > (especially the fd) is sent?
> 
> I (somehow) assumed we will be able to use the same file descriptor
> that was shared for the virtqueues memory regions and yes I can see
> now why it wouldn't work or create problems.
> 
> And I need suggestion now on how to make this work.
> 
> With Xen grants, the front end receives grant address from the from
> guest kernel, they aren't physical addresses, kind of IOMMU stuff.
> 
> The back-end gets access for memory regions of the virtqueues alone
> initially.  When the back-end gets a request, it reads the descriptor
> and finds the buffer address, which isn't part of already shared
> regions. The same happens for descriptor addresses in case indirect
> descriptor feature is negotiated.
> 
> At this point I was thinking maybe the back-end can simply call the
> mmap/ioctl to map the memory, using the file descriptor used for the
> virtqueues.
> 
> How else can we make this work ? We also need to unmap/remove the
> memory region, as soon as the buffer is processed as the grant address
> won't be relevant for any subsequent request.
> 
> Should I use VHOST_USER_IOTLB_MSG for this ? I did look at it and I
> wasn't convinced if it was an exact fit. For example it says that a
> memory address reported with miss/access fail should be part of an
> already sent memory region, which isn't the case here.

VHOST_USER_IOTLB_MSG probably isn't necessary because address
translation is not required. It will also reduce performance by adding
extra communication.

Instead, you could change the 1 memory region : 1 mmap relationship that
existing non-Xen vhost-user back-end implementations have. In Xen
vhost-user back-ends, the memory region details (including the file
descriptor and Xen domain id) would be stashed away in back-end when the
front-end adds memory regions. No mmap would be performed upon
VHOST_USER_ADD_MEM_REG or VHOST_USER_SET_MEM_TABLE.

Whenever the back-end needs to do DMA, it looks up the memory region and
performs the mmap + Xen-specific calls:
- A long-lived mmap of the vring is set up when
  VHOST_USER_SET_VRING_ENABLE is received.
- Short-lived mmaps of the indirect descriptors and memory pointed to by
  the descriptors is set up by the virtqueue processing code.

Does this sound workable to you?

Stefan


signature.asc
Description: PGP signature


Re: [RFC PATCH v1 23/25] hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore

2023-03-07 Thread Paul Durrant

On 02/03/2023 15:34, David Woodhouse wrote:

From: David Woodhouse 

Signed-off-by: David Woodhouse 
---
  hw/i386/kvm/xen_xenstore.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 028f80499e..f9b7387024 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
  
  #include "hw/sysbus.h"

  #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
  #include "xen_overlay.h"
  #include "xen_evtchn.h"
  #include "xen_xenstore.h"
@@ -34,6 +35,7 @@
  
  #include "hw/xen/interface/io/xs_wire.h"

  #include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
  
  #define TYPE_XEN_XENSTORE "xen-xenstore"

  OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
@@ -66,6 +68,9 @@ struct XenXenstoreState {
  
  uint8_t *impl_state;

  uint32_t impl_state_size;
+
+struct xengntdev_handle *gt;
+void *granted_xs;
  };
  
  struct XenXenstoreState *xen_xenstore_singleton;

@@ -1452,6 +1457,17 @@ int xen_xenstore_reset(void)
  }
  s->be_port = err;
  
+/*

+ * We don't actually access the guest's page through the grant, because
+ * this isn't real Xen, and we can just use the page we gave it in the
+ * first place. Map the grant anyway, mostly for cosmetic purposes so
+ * it *looks* like it's in use in the guest-visible grant table.


Might be useful to stick this text in the commit comment too.

Reviewed-by: Paul Durrant 


+ */
+s->gt = qemu_xen_gnttab_open();
+uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, _gntref,
+ PROT_READ | PROT_WRITE);
+
  return 0;
  }
  





[RFC PATCH v1 27/25] docs: Update Xen-on-KVM documentation for PV disk support

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
---
 docs/system/i386/xen.rst | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/docs/system/i386/xen.rst b/docs/system/i386/xen.rst
index a00523b492..f06765e88c 100644
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -9,6 +9,8 @@ KVM has support for hosting Xen guests, intercepting Xen 
hypercalls and event
 channel (Xen PV interrupt) delivery. This allows guests which expect to be
 run under Xen to be hosted in QEMU under Linux/KVM instead.
 
+Using the split irqchip is mandatory for Xen support.
+
 Setup
 -
 
@@ -17,14 +19,14 @@ accelerator, for example for Xen 4.10:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split
 
 Additionally, virtual APIC support can be advertised to the guest through the
 ``xen-vapic`` CPU flag:
 
 .. parsed-literal::
 
-  |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic
+  |qemu_system| --accel kvm,xen-version=0x4000a,kernel-irqchip=split --cpu 
host,+xen_vapic
 
 When Xen support is enabled, QEMU changes hypervisor identification (CPUID
 0x4000..0x400A) to Xen. The KVM identification and features are not
@@ -33,11 +35,25 @@ moves to leaves 0x4100..0x410A.
 
 The Xen platform device is enabled automatically for a Xen guest. This allows
 a guest to unplug all emulated devices, in order to use Xen PV block and 
network
-drivers instead. Note that until the Xen PV device back ends are enabled to 
work
-with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests
-can be dissuaded from this by adding 'xen_emul_unplug=never' on their command
-line, and it can also be noted that AHCI disk controllers are exempt from being
-unplugged, as are passthrough VFIO PCI devices.
+drivers instead. Under Xen, the boot disk is typically available both via IDE
+emulation, and as a PV block device. Guest bootloaders typically use IDE to 
load
+the guest kernel, which then unplugs the IDE and continues with the Xen PV 
block
+device.
+
+This configuration can be achieved as follows
+
+.. parsed-literal::
+
+  |qemu_system| -M pc --accel kvm,xen-version=0x4000a,kernel-irqchip=split \\
+   -drive file=${GUEST_IMAGE},if=none,id=disk,file.locking=off -device 
xen-disk,drive=disk,vdev=xvda \\
+   -drive file=${GUEST_IMAGE},index=2,media=disk,file.locking=off,if=ide
+
+It is necessary to use the pc machine type, as the q35 machine uses AHCI 
instead
+of legacy IDE, and AHCI disks are not unplugged through the Xen PV unplug
+mechanism.
+
+VirtIO devices can also be used; Linux guests may need to be dissuaded from
+umplugging them by adding 'xen_emul_unplug=never' on their command line.
 
 Properties
 --
-- 
2.34.1




smime.p7s
Description: S/MIME cryptographic signature


[RFC PATCH v1 26/25] MAINTAINERS: Add entry for Xen on KVM emulation

2023-03-07 Thread David Woodhouse
From: David Woodhouse 

Signed-off-by: David Woodhouse 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index da29661b37..76b705e467 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -443,6 +443,15 @@ F: target/i386/kvm/
 F: target/i386/sev*
 F: scripts/kvm/vmxcap
 
+Xen emulation on X86 KVM CPUs
+M: David Woodhouse 
+M: Paul Durrant 
+S: Supported
+F: include/sysemu/kvm_xen.h
+F: target/i386/kvm/xen*
+F: hw/i386/kvm/xen*
+F: tests/avocado/xen_guest.py
+
 Guest CPU Cores (other accelerators)
 
 Overall
-- 
2.34.1




smime.p7s
Description: S/MIME cryptographic signature


Re: [RFC PATCH v1 22/25] hw/xen: Add emulated implementation of XenStore operations

2023-03-07 Thread Paul Durrant

On 02/03/2023 15:34, David Woodhouse wrote:

From: David Woodhouse 

Now that we have an internal implementation of XenStore, we can populate
the xenstore_backend_ops to allow PV backends to talk to it.

Watches can't be processed with immediate callbacks because that would
call back into XenBus code recursively. Defer them to a QEMUBH to be run
as appropriate from the main loop. We use a QEMUBH per XS handle, and it
walks all the watches (there shouldn't be many per handle) to fire any
which have pending events. We *could* have done it differently but this
allows us to use the same struct watch_event as we have for the guest
side, and keeps things relatively simple.


Yes, it's more consistent with watch events on real Xen this way.



Signed-off-by: David Woodhouse 
---
  hw/i386/kvm/xen_xenstore.c | 273 -
  1 file changed, 269 insertions(+), 4 deletions(-)



Reviewed-by: Paul Durrant 




Re: [RFC PATCH v1 21/25] hw/xen: Add emulated implementation of grant table operations

2023-03-07 Thread David Woodhouse
On Tue, 2023-03-07 at 16:07 +, Paul Durrant wrote:
> On 02/03/2023 15:34, David Woodhouse wrote:
> > From: David Woodhouse 
> > 
> > This is limited to mapping a single grant at a time, because under Xen the
> > pages are mapped *contiguously* into qemu's address space, and that's very
> > hard to do when those pages actually come from anonymous mappings in qemu
> > in the first place.
> > 
> > Eventually perhaps we can look at using shared mappings of actual objects
> > for system RAM, and then we can make new mappings of the same backing
> > store (be it deleted files, shmem, whatever). But for now let's stick to
> > a page at a time.
> > 
> > Signed-off-by: David Woodhouse 
> > ---
> >   hw/i386/kvm/xen_gnttab.c | 299 ++-
> >   1 file changed, 296 insertions(+), 3 deletions(-)
> > 
> [snip]
> > +static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
> > +{
> > +    uint16_t mask = GTF_type_mask | GTF_sub_page;
> > +    grant_entry_v1_t gnt, *gnt_p;
> > +    int retries = 0;
> > +
> > +    if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
> > +    s->map_track[ref] == UINT8_MAX) {
> > +    return INVALID_GPA;
> > +    }
> > +
> > +    if (prot & PROT_WRITE) {
> > +    mask |= GTF_readonly;
> > +    }
> > +
> > +    gnt_p = >entries.v1[ref];
> > +
> > +    /*
> > + * The guest can legitimately be changing the GTF_readonly flag. Allow
> 
> I'd call a guest playing with the ref after setting GTF_permit_access a 
> buggy guest and not bother with the loop.

Didn't we have this argument already when I tried to get you to change
your one? :)

I argue that it's OK for a guest to *increase* permissions to include
write permission, even while a read-only grant may be in progress.

And also to *decrease* permissions to take away write permission, even
while read-only grants may be in progress.

The loop is what Xen does, so let's do it that way.

> 
> > + * that, but don't let a malicious guest cause a livelock.
> > + */
> > +    for (retries = 0; retries < 5; retries++) {
> > +    uint16_t new_flags;
> > +
> > +    /* Read the entry before an atomic operation on its flags */
> > +    gnt = *(volatile grant_entry_v1_t *)gnt_p;
> > +
> > +    if ((gnt.flags & mask) != GTF_permit_access ||
> > +    gnt.domid != DOMID_QEMU) {
> > +    return INVALID_GPA;
> > +    }
> > +
> > +    new_flags = gnt.flags | GTF_reading;
> > +    if (prot & PROT_WRITE) {
> > +    new_flags |= GTF_writing;
> > +    }
> > +
> > +    if (qatomic_cmpxchg(_p->flags, gnt.flags, new_flags) == 
> > gnt.flags) {
> 
> Xen actually does a cmpxchg on both the flags and the domid. We probably 
> ought to fail to set the flags if the guest is playing with the domid
> but since we're single-tenant it doesn't *really* matter... just a 
> nice-to-have. So...

Yeah, changing the *domid* while it's active is definitely not an OK
thing to do.



smime.p7s
Description: S/MIME cryptographic signature


Re: [RFC PATCH v1 21/25] hw/xen: Add emulated implementation of grant table operations

2023-03-07 Thread Paul Durrant

On 02/03/2023 15:34, David Woodhouse wrote:

From: David Woodhouse 

This is limited to mapping a single grant at a time, because under Xen the
pages are mapped *contiguously* into qemu's address space, and that's very
hard to do when those pages actually come from anonymous mappings in qemu
in the first place.

Eventually perhaps we can look at using shared mappings of actual objects
for system RAM, and then we can make new mappings of the same backing
store (be it deleted files, shmem, whatever). But for now let's stick to
a page at a time.

Signed-off-by: David Woodhouse 
---
  hw/i386/kvm/xen_gnttab.c | 299 ++-
  1 file changed, 296 insertions(+), 3 deletions(-)


[snip]

+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+uint16_t mask = GTF_type_mask | GTF_sub_page;
+grant_entry_v1_t gnt, *gnt_p;
+int retries = 0;
+
+if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+s->map_track[ref] == UINT8_MAX) {
+return INVALID_GPA;
+}
+
+if (prot & PROT_WRITE) {
+mask |= GTF_readonly;
+}
+
+gnt_p = >entries.v1[ref];
+
+/*
+ * The guest can legitimately be changing the GTF_readonly flag. Allow


I'd call a guest playing with the ref after setting GTF_permit_access a 
buggy guest and not bother with the loop.



+ * that, but don't let a malicious guest cause a livelock.
+ */
+for (retries = 0; retries < 5; retries++) {
+uint16_t new_flags;
+
+/* Read the entry before an atomic operation on its flags */
+gnt = *(volatile grant_entry_v1_t *)gnt_p;
+
+if ((gnt.flags & mask) != GTF_permit_access ||
+gnt.domid != DOMID_QEMU) {
+return INVALID_GPA;
+}
+
+new_flags = gnt.flags | GTF_reading;
+if (prot & PROT_WRITE) {
+new_flags |= GTF_writing;
+}
+
+if (qatomic_cmpxchg(_p->flags, gnt.flags, new_flags) == gnt.flags) 
{


Xen actually does a cmpxchg on both the flags and the domid. We probably 
ought to fail to set the flags if the guest is playing with the domid 
but since we're single-tenant it doesn't *really* matter... just a 
nice-to-have. So...


Reviewed-by: Paul Durrant 




Re: [PATCH 2/2] xen: update CONFIG_DEBUG_INFO help text

2023-03-07 Thread Juergen Gross

On 07.03.23 16:14, Jan Beulich wrote:

On 07.03.2023 16:02, Juergen Gross wrote:

On 07.03.23 15:34, Jan Beulich wrote:

On 07.03.2023 15:23, Juergen Gross wrote:

On 07.03.23 15:18, Jan Beulich wrote:

On 07.03.2023 15:04, Juergen Gross wrote:

On 07.03.23 11:41, Jan Beulich wrote:

On 07.03.2023 07:32, Juergen Gross wrote:

--- a/xen/Kconfig.debug
+++ b/xen/Kconfig.debug
@@ -15,8 +15,11 @@ config DEBUG_INFO
bool "Compile Xen with debug info"
default DEBUG
---help---
- If you say Y here the resulting Xen will include debugging info
- resulting in a larger binary image.
+ Say Y here if you want to build Xen with debug information. This
+ information is needed e.g. for doing crash dump analysis of the
+ hypervisor via the "crash" tool.
+ Saying Y will increase the size of xen-syms and the built EFI
+ binary.


Largely fine with me, just one question: Why do you mention xen-syms by
name, but then verbally describe xen.efi? And since, unlike for xen-syms,


For xen-syms I couldn't find an easily understandable wording. I'd be fine
with just saying "xen.efi".


this affects the installed binary actually used for booting (which may
be placed on a space constrained partition), it may be prudent to
mention INSTALL_EFI_STRIP here (as a way to reduce the binary size of
what ends up on the EFI partition, even if that wouldn't affect the
"normal" way of putting the binary on the EFI partition - people would
still need to take care of that in their distros).


What about adding a related Kconfig option instead?


How would a Kconfig option possibly affect this? You want debug info
in the xen.efi in its standard install location (outside of the EFI
partition); or else if you don't want it there why would you want it
in xen-syms? It is the step of populating the EFI partition from the
standard install location where some equivalent of INSTALL_EFI_STRIP
would come into play. That step is done outside of Xen's build
system and hence outside of any Kconfig control.


We have 2 binaries for the non-EFI hypervisor (xen-syms and xen[.gz]).
Why can't we have the same for EFI? E.g. xen-syms.efi and xen.efi.
The former would have the debug-info, the latter could be installed
into the EFI partition.


I view the two-binaries model of the non-EFI case as merely an
implementation detail;


The ability to do crash dump analysis is more than an implementation
detail IMHO. It is a feature and as such the availability of xen-syms
should be seen as an interface which functionality should be kept.


That you're looking the opposite way at things: The oddity is that we
can't use xen-syms directly for booting (which is also why it has this
specific name; otherwise "xen" would be what the linker produces).


it just so happens that there's little point
in mkelf32 retaining debug info. I therefore don't view it as very
reasonable to artificially introduce yet another binary.


In case there is no other way to enable hypervisor crash dump analysis
I don't see this as an unreasonable approach.

It should be verified that this approach is really enabling the crash
dump analysis of a crash dump from a xen.efi booted system, of course.


Right. First question would be whether they manage to consume Dwarf
debug info from a PE/COFF executable. Possibly the way to go is to
separate Dwarf data out of xen.efi into an ELF "container"; I have no
idea whether objcopy could be used for something like that.


I tried:

> objcopy --remove-section=.text --remove-section=.rodata 
--remove-section=.init.* --remove-section=.data --remove-section=.data.* 
--remove-section=.bss --output-target=elf64-x86-64 xen.efi xen-debug


and the result was:

> objdump -h xen-debug

xen-debug: file format elf64-x86-64

Sections:
Idx Name  Size  VMA   LMA   File off  Algn
  0 .buildid  0035  82d040486fb8  82d040486fb8  0fb8  2**2
  CONTENTS, ALLOC, LOAD, READONLY, DATA
  1 .init 000a2340  82d04060  82d04060  1000  2**2
  CONTENTS, ALLOC, LOAD, CODE
  2 .reloc1658  82d04094d5a0  82d04094d5a0  000a35a0  2**2
  CONTENTS, ALLOC, LOAD, READONLY, DATA
  3 .debug_abbrev 00091f6b  82d04094ebf8  82d04094ebf8  000a4bf8  2**0
  CONTENTS, READONLY, DEBUGGING, OCTETS
  4 .debug_info   00fd7af4  82d0409e0b63  82d0409e0b63  00136b63  2**0
  CONTENTS, READONLY, DEBUGGING, OCTETS
  5 .debug_str00843395  82d0419b8657  82d0419b8657  0110e657  2**0
  CONTENTS, READONLY, DEBUGGING, OCTETS
  6 .debug_line   0011dba5  82d0421fb9ec  82d0421fb9ec  019519ec  2**0
  CONTENTS, READONLY, DEBUGGING, OCTETS
  7 .debug_frame  0003fd7c  82d042319594  82d042319594  01a6f591  2**0
  CONTENTS, READONLY, DEBUGGING, OCTETS
  8 .debug_loc0047fcfd  

[PATCH v6 4/4] xen/x86: switch x86 to use generic implemetation of bug.h

2023-03-07 Thread Oleksii Kurochko
The following changes were made:
* Make GENERIC_BUG_FRAME mandatory for X86
* Update asm/bug.h using generic implementation in 
* Update do_invalid_op using generic do_bug_frame()
* Define BUG_DEBUGGER_TRAP_FATAL to debugger_trap_fatal(X86_EXC_GP,regs)
* type of eip variable was changed to 'void *'

Signed-off-by: Oleksii Kurochko 
---
Changes in V6:
 * update the commit message
 * update the type of eip to 'void *' in do_invalid_op()
 * fix the logic of do_invalid_op()
 * move macros BUG_DEBUGGER_TRAP_FATAL under #ifndef __ASSEMBLY__ as
   it is not necessary to be in assembly code.
---
Changes in V5:
 * Nothing changed
---
Changes in V4:
 * Back comment /* !__ASSEMBLY__ */ for #else case in 
 * Remove changes related to x86/.../asm/debuger.h as do_bug_frame() prototype
   was updated and cpu_user_regs isn't const any more.
---
Changes in V3:
 * As prototype and what do_bug_frame() returns was changed so patch 3 and 4
   was updated to use a new version of do_bug_frame
 * MODIFIER was change to BUG_ASM_CONST to align with generic implementation
---
Changes in V2:
  * Remove all unnecessary things from  as they were introduced in
.
  * Define BUG_INSTR = 'ud2' and MODIFIER = 'c' ( it is needed to skip '$'
when use an imidiate in x86 assembly )
  * Update do_invalid_op() to re-use handle_bug_frame() and find_bug_frame()
from generic implemetation of CONFIG_GENERIC_BUG_FRAME
  * Code style fixes.
---
 xen/arch/x86/Kconfig   |  1 +
 xen/arch/x86/include/asm/bug.h | 77 ++--
 xen/arch/x86/traps.c   | 81 --
 3 files changed, 12 insertions(+), 147 deletions(-)

diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 6a7825f4ba..b0ff1f3ee6 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -11,6 +11,7 @@ config X86
select ARCH_MAP_DOMAIN_PAGE
select ARCH_SUPPORTS_INT128
select CORE_PARKING
+   select GENERIC_BUG_FRAME
select HAS_ALTERNATIVE
select HAS_COMPAT
select HAS_CPUFREQ
diff --git a/xen/arch/x86/include/asm/bug.h b/xen/arch/x86/include/asm/bug.h
index ff5cca1f19..f852cd0ee9 100644
--- a/xen/arch/x86/include/asm/bug.h
+++ b/xen/arch/x86/include/asm/bug.h
@@ -1,83 +1,12 @@
 #ifndef __X86_BUG_H__
 #define __X86_BUG_H__
 
-#undef BUG_DISP_WIDTH
-#undef BUG_LINE_LO_WIDTH
-#undef BUG_LINE_HI_WIDTH
-
-#define BUG_DISP_WIDTH24
-#define BUG_LINE_LO_WIDTH (31 - BUG_DISP_WIDTH)
-#define BUG_LINE_HI_WIDTH (31 - BUG_DISP_WIDTH)
-
 #ifndef __ASSEMBLY__
 
-#define BUG_FRAME_STRUCT
-
-struct bug_frame {
-signed int loc_disp:BUG_DISP_WIDTH;
-unsigned int line_hi:BUG_LINE_HI_WIDTH;
-signed int ptr_disp:BUG_DISP_WIDTH;
-unsigned int line_lo:BUG_LINE_LO_WIDTH;
-signed int msg_disp[];
-};
-
-#define bug_loc(b) ((const void *)(b) + (b)->loc_disp)
-#define bug_ptr(b) ((const void *)(b) + (b)->ptr_disp)
-#define bug_line(b) (b)->line_hi + ((b)->loc_disp < 0)) &\
-   ((1 << BUG_LINE_HI_WIDTH) - 1)) <<\
-  BUG_LINE_LO_WIDTH) +   \
- (((b)->line_lo + ((b)->ptr_disp < 0)) & \
-  ((1 << BUG_LINE_LO_WIDTH) - 1)))
-#define bug_msg(b) ((const char *)(b) + (b)->msg_disp[1])
-
-#define _ASM_BUGFRAME_TEXT(second_frame) \
-".Lbug%=: ud2\n" \
-".pushsection .bug_frames.%c[bf_type], \"a\", @progbits\n"   \
-".p2align 2\n"   \
-".Lfrm%=:\n" \
-".long (.Lbug%= - .Lfrm%=) + %c[bf_line_hi]\n"   \
-".long (%c[bf_ptr] - .Lfrm%=) + %c[bf_line_lo]\n"\
-".if " #second_frame "\n"\
-".long 0, %c[bf_msg] - .Lfrm%=\n"\
-".endif\n"   \
-".popsection\n"  \
-
-#define _ASM_BUGFRAME_INFO(type, line, ptr, msg) \
-[bf_type]"i" (type), \
-[bf_ptr] "i" (ptr),  \
-[bf_msg] "i" (msg),  \
-[bf_line_lo] "i" ((line & ((1 << BUG_LINE_LO_WIDTH) - 1))\
-  << BUG_DISP_WIDTH),\
-[bf_line_hi] "i" (((line) >> BUG_LINE_LO_WIDTH) << BUG_DISP_WIDTH)
-
-#define BUG_FRAME(type, line, ptr, second_frame, msg) do {   \
-BUILD_BUG_ON((line) >> (BUG_LINE_LO_WIDTH + BUG_LINE_HI_WIDTH)); \
-BUILD_BUG_ON((type) >= BUGFRAME_NR); 

[PATCH v6 1/4] xen: introduce CONFIG_GENERIC_BUG_FRAME

2023-03-07 Thread Oleksii Kurochko
A large part of the content of the bug.h is repeated among all
architectures, so it was decided to create a new config
CONFIG_GENERIC_BUG_FRAME.

The version of  from x86 was taken as the base version.

The patch introduces the following stuff:
  * common bug.h header
  * generic implementation of do_bug_frame
  * new config CONFIG_GENERIC_BUG_FRAME

Signed-off-by: Oleksii Kurochko 
---
Changes in V6:
 * fix code style.
 * change -EINVAL to -ENOENT in case when bug_frame wasn't found in
   generic do_bug_frame()
 * change all 'return id' to 'break' inside switch/case of generic 
do_bug_frame()
 * move up "#ifndef __ASSEMBLY__" to include BUG_DEBUGGER_TRAP_FATAL
 * update the comment of BUG_ASM_CONST
 * make the line with 'BUILD_BUG_ON((line) >> (BUG_LINE_LO_WIDTH + 
BUG_LINE_HI_WIDTH))' in
 BUG_FRAME macros more abstract
 * remove #ifndef BUG_FRAME_STRUCT around BUG_DISP_WIDTH, BUG_LINE_LO_WIDTH as 
it is
 required to be defined before  as it is used by x86's 
 when
 the header is included in assembly code.
---
Changes in V5:
 * Remove "#ifdef BUG_FN_REG..." from generic do_bug_frame() as ARM will
   use generic implementation fully.
---
Changes in V4:
 * common/bug.c:
- Use BUG_DEBUGGER_TRAP_FATAL(regs) mnacros instead of 
debugger_trap_fatal(TRAP_invalid_op, regs)
  in  as TRAP_invalid_op is x86-specific thereby 
BUG_DEBUGGER_TRAP_FATAL should
  be defined for each architecture.
- add information about what do_bug_frame() returns.
- invert the condition 'if ( region )' in do_bug_frame() to 
reduce the indention.
- change type of variable i from 'unsigned int' to 'size_t' as 
it  is compared with
  n_bugs which has type 'size_t'

 * xen/bug.h:
- Introduce generic BUG_DEBUGGER_TRAP_FATAL(regs) mnacros which 
is used to deal with 
  debugger_trap_fatal(TRAP_invalid_op, regs) where 
TRAP_invalid_op is x86-specific
- remove '#include ' as it doesn't need any 
more after switch to
  x86 implementation.
- remove '#include ' as it isn't needed any more
- move bug_*() macros inside '#ifndef BUG_FRAME_STRUCT'
- add  to fix compile issue with BUILD_ON()...
- Add documentation for BUG_ASM_CONST.
 * Update the commit message
---
Changes in V3:
 * Add debugger_trap_fatal() to do_bug_frame(). It simplifies usage of
   do_bug_frame() for x86 so making handle_bug_frame() and find_bug_frame()
   not needed anymore.
 * Update do_bug_frame() to return -EINVAL if something goes wrong; otherwise
   id of bug_frame
 * Update _ASM_BUGFRAME_TEXT to make it more portable.
 * Drop unnecessary comments.
 * define stub value for TRAP_invalid_op in case if wasn't defined in
   arch-specific folders.
---
Changes in V2:
  - Switch to x86 implementation as generic as it is more compact
( at least from the point of view of bug frame structure ).
  - Rename CONFIG_GENERIC_DO_BUG_FRAME to CONFIG_GENERIC_BUG_FRAME.
  - Change the macro bug_loc(b) to avoid the need for a cast:
#define bug_loc(b) ((unsigned long)(b) + (b)->loc_disp)
  - Rename BUG_FRAME_STUFF to BUG_FRAME_STRUCT
  - Make macros related to bug frame structure more generic.
  - Introduce BUG_INSTR and MODIFIER to make _ASM_BUGFRAME_TEXT reusable
between x86 and RISC-V.
  - Rework do_bug_frame() and introduce find_bug_frame() and handle_bug_frame()
functions to make it reusable by x86.
  - code style fixes
---
 xen/common/Kconfig|   3 +
 xen/common/Makefile   |   1 +
 xen/common/bug.c  | 104 +++
 xen/include/xen/bug.h | 158 ++
 4 files changed, 266 insertions(+)
 create mode 100644 xen/common/bug.c
 create mode 100644 xen/include/xen/bug.h

diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index f1ea3199c8..b226323537 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -28,6 +28,9 @@ config ALTERNATIVE_CALL
 config ARCH_MAP_DOMAIN_PAGE
bool
 
+config GENERIC_BUG_FRAME
+   bool
+
 config HAS_ALTERNATIVE
bool
 
diff --git a/xen/common/Makefile b/xen/common/Makefile
index bbd75b4be6..46049eac35 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_ARGO) += argo.o
 obj-y += bitmap.o
+obj-$(CONFIG_GENERIC_BUG_FRAME) += bug.o
 obj-$(CONFIG_HYPFS_CONFIG) += config_data.o
 obj-$(CONFIG_CORE_PARKING) += core_parking.o
 obj-y += cpu.o
diff --git a/xen/common/bug.c b/xen/common/bug.c
new file mode 100644
index 00..be8f3b783d
--- /dev/null
+++ b/xen/common/bug.c
@@ -0,0 +1,104 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+/*
+ * Returns a negative value in case of an error otherwise
+ * BUGFRAME_{run_fn, warn, bug, assert}
+ */
+int do_bug_frame(struct cpu_user_regs *regs, unsigned long pc)
+{
+const struct 

[PATCH v6 3/4] xen/arm: switch ARM to use generic implementation of bug.h

2023-03-07 Thread Oleksii Kurochko
The following changes were made:
* make GENERIC_BUG_FRAME mandatory for ARM
* As do_bug_frame() returns -EINVAL in case something goes wrong
  otherwise id of bug frame. Thereby 'if' cases where do_bug_frame() was
  updated to check if the returned value is less than 0
* Switch ARM's implementation of bug.h macros to generic one

Signed-off-by: Oleksii Kurochko 
---
Changes in V6:
 * Update the "changes in v5"
 * Rebase on top of the patch [xen: introduce CONFIG_GENERIC_BUG_FRAME] as
   there were minor changes.
---
Changes in V5:
 * common/bug.c changes were removed after rebase
   (the patch [xen: introduce CONFIG_GENERIC_BUG_FRAME] was reworked to make
ARM implementation to use generic do_bug_frame())
---
Changes in V4:
 * Switch ARM implementation to generic one
 * Remove BUG_FN_REG from arm{16,32}/bug.h as it isn't needed after switch to 
generic implementation
 * Update commit message
---
Changes in V3:
 * As prototype and what do_bug_frame() returns was changed so patch 3 and 4
   was updated to use a new version of do_bug_frame
---
Changes in V2:
 * Rename bug_file() in ARM implementation to bug_ptr() as
   generic do_bug_frame() uses bug_ptr().
 * Remove generic parts from bug.h
 * Remove declaration of 'int do_bug_frame(...)'
   from  as it was introduced in 
---
 xen/arch/arm/Kconfig |  1 +
 xen/arch/arm/arm32/traps.c   |  2 +-
 xen/arch/arm/include/asm/arm32/bug.h |  2 -
 xen/arch/arm/include/asm/arm64/bug.h |  2 -
 xen/arch/arm/include/asm/bug.h   | 79 +--
 xen/arch/arm/include/asm/traps.h |  2 -
 xen/arch/arm/traps.c | 81 +---
 7 files changed, 4 insertions(+), 165 deletions(-)

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 239d3aed3c..aad6644a7b 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM_64
 
 config ARM
def_bool y
+   select GENERIC_BUG_FRAME
select HAS_ALTERNATIVE
select HAS_DEVICE_TREE
select HAS_PASSTHROUGH
diff --git a/xen/arch/arm/arm32/traps.c b/xen/arch/arm/arm32/traps.c
index a2fc1c22cb..61c61132c7 100644
--- a/xen/arch/arm/arm32/traps.c
+++ b/xen/arch/arm/arm32/traps.c
@@ -48,7 +48,7 @@ void do_trap_undefined_instruction(struct cpu_user_regs *regs)
 if ( instr != BUG_OPCODE )
 goto die;
 
-if ( do_bug_frame(regs, pc) )
+if ( do_bug_frame(regs, pc) < 0 )
 goto die;
 
 regs->pc += 4;
diff --git a/xen/arch/arm/include/asm/arm32/bug.h 
b/xen/arch/arm/include/asm/arm32/bug.h
index 25cce151dc..3e66f35969 100644
--- a/xen/arch/arm/include/asm/arm32/bug.h
+++ b/xen/arch/arm/include/asm/arm32/bug.h
@@ -10,6 +10,4 @@
 
 #define BUG_INSTR ".word " __stringify(BUG_OPCODE)
 
-#define BUG_FN_REG r0
-
 #endif /* __ARM_ARM32_BUG_H__ */
diff --git a/xen/arch/arm/include/asm/arm64/bug.h 
b/xen/arch/arm/include/asm/arm64/bug.h
index 5e11c0dfd5..59f664d7de 100644
--- a/xen/arch/arm/include/asm/arm64/bug.h
+++ b/xen/arch/arm/include/asm/arm64/bug.h
@@ -6,6 +6,4 @@
 
 #define BUG_INSTR "brk " __stringify(BRK_BUG_FRAME_IMM)
 
-#define BUG_FN_REG x0
-
 #endif /* __ARM_ARM64_BUG_H__ */
diff --git a/xen/arch/arm/include/asm/bug.h b/xen/arch/arm/include/asm/bug.h
index 9ed9412fa8..1d87533044 100644
--- a/xen/arch/arm/include/asm/bug.h
+++ b/xen/arch/arm/include/asm/bug.h
@@ -11,84 +11,7 @@
 # error "unknown ARM variant"
 #endif
 
-#undef BUG_DISP_WIDTH
-#undef BUG_LINE_LO_WIDTH
-#undef BUG_LINE_HI_WIDTH
-
-#define BUG_DISP_WIDTH24
-#define BUG_LINE_LO_WIDTH (31 - BUG_DISP_WIDTH)
-#define BUG_LINE_HI_WIDTH (31 - BUG_DISP_WIDTH)
-
-#define BUG_FRAME_STRUCT
-
-struct bug_frame {
-signed int loc_disp;/* Relative address to the bug address */
-signed int file_disp;   /* Relative address to the filename */
-signed int msg_disp;/* Relative address to the predicate (for ASSERT) 
*/
-uint16_t line;  /* Line number */
-uint32_t pad0:16;   /* Padding for 8-bytes align */
-};
-
-#define bug_loc(b) ((const void *)(b) + (b)->loc_disp)
-#define bug_file(b) ((const void *)(b) + (b)->file_disp);
-#define bug_line(b) ((b)->line)
-#define bug_msg(b) ((const char *)(b) + (b)->msg_disp)
-
-/* Many versions of GCC doesn't support the asm %c parameter which would
- * be preferable to this unpleasantness. We use mergeable string
- * sections to avoid multiple copies of the string appearing in the
- * Xen image. BUGFRAME_run_fn needs to be handled separately.
- */
-#define BUG_FRAME(type, line, file, has_msg, msg) do {  \
-BUILD_BUG_ON((line) >> 16); \
-BUILD_BUG_ON((type) >= BUGFRAME_NR);\
-asm ("1:"BUG_INSTR"\n"  \
- ".pushsection .rodata.str, \"aMS\", %progbits, 1\n"\
- "2:\t.asciz " __stringify(file) "\n"   \
- "3:\n" 

[PATCH v6 2/4] xen: change to

2023-03-07 Thread Oleksii Kurochko
The idea of the patch is to change all  to  and
keep Xen compilable with adding only minimal amount of changes:
1. It was added "#include " to ARM's "" as it
  uses uint_{16,32}t in 'struct bug_frame'.
2. It was added '#define BUG_FRAME_STRUCT' which means that ARM hasn't
  been switched to generic implementation yet.
3. It was added '#define BUG_FRAME_STRUCT' which means that x86 hasn't
  been switched to generic implementation yet.
4. BUGFRAME_* and _start_bug_frame[], _stop_bug_frame_*[] were removed
  for ARM & x86 to deal with compilation errors such as:
  redundant redeclaration of ...
5. It was added undef of BUG_DISP_WIDTH, BUG_LINE_LO_WIDTH
  for ARM & x86 as they were introduced unconditionally in .
  The macros should be defined before  in  as x86's
   implementation requires them in case when the header is
  included in assembly code.
  The macros will be deleted in the following patches where
  the architectures will be switched fully to generic implementation.

In the following two patches x86 and ARM archictectures will be
switched fully:
* xen/arm: switch ARM to use generic implementation of bug.h
* xen/x86: switch x86 to use generic implemetation of bug.h

Signed-off-by: Oleksii Kurochko 
---
Changes in V6:
- change the inclusion order of .
- add #undef of BUG_DISP_WIDTH, BUG_LINE_LO_WIDTH for ARM & x86
  as they were introduced unconditionally in .
- update the commit message
---
Changes in V5:
 - Nothing changed
---
Changes in V4:
- defines BUG_DISP_WIDTH, BUG_LINE_LO_WIDTH, BUG_LINE_HI_WIDTH were 
moved into
  "ifndef BUG_FRAME_STRUCT" in  as they are specific for 
'struct bug_frame' and so should
  co-exist together. So the defines were back to  until 
BUG_FRAME_STRUCT will be defined in
  .
- Update the comment message.
---
Changes in V3:
 * Update patch 2 not to break compilation: move some parts from patches 3 and 4
   to patch 2:
   * move some generic parts from  to 
   * add define BUG_FRAME_STRUCT in ARM's 
---
Changes in V2:
 * Put [PATCH v1 4/4] xen: change  to  as second patch,
   update the patch to change all  to  among the whole 
project
   to not break build.
 * Update the commit message.
---
 xen/arch/arm/include/asm/bug.h   | 21 -
 xen/arch/arm/include/asm/div64.h |  2 +-
 xen/arch/arm/vgic/vgic-v2.c  |  2 +-
 xen/arch/arm/vgic/vgic.c |  2 +-
 xen/arch/x86/acpi/cpufreq/cpufreq.c  |  2 +-
 xen/arch/x86/include/asm/asm_defns.h |  2 +-
 xen/arch/x86/include/asm/bug.h   | 19 ++-
 xen/drivers/cpufreq/cpufreq.c|  2 +-
 xen/include/xen/lib.h|  2 +-
 9 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/xen/arch/arm/include/asm/bug.h b/xen/arch/arm/include/asm/bug.h
index f4088d0913..9ed9412fa8 100644
--- a/xen/arch/arm/include/asm/bug.h
+++ b/xen/arch/arm/include/asm/bug.h
@@ -1,6 +1,8 @@
 #ifndef __ARM_BUG_H__
 #define __ARM_BUG_H__
 
+#include 
+
 #if defined(CONFIG_ARM_32)
 # include 
 #elif defined(CONFIG_ARM_64)
@@ -9,10 +11,16 @@
 # error "unknown ARM variant"
 #endif
 
+#undef BUG_DISP_WIDTH
+#undef BUG_LINE_LO_WIDTH
+#undef BUG_LINE_HI_WIDTH
+
 #define BUG_DISP_WIDTH24
 #define BUG_LINE_LO_WIDTH (31 - BUG_DISP_WIDTH)
 #define BUG_LINE_HI_WIDTH (31 - BUG_DISP_WIDTH)
 
+#define BUG_FRAME_STRUCT
+
 struct bug_frame {
 signed int loc_disp;/* Relative address to the bug address */
 signed int file_disp;   /* Relative address to the filename */
@@ -26,13 +34,6 @@ struct bug_frame {
 #define bug_line(b) ((b)->line)
 #define bug_msg(b) ((const char *)(b) + (b)->msg_disp)
 
-#define BUGFRAME_run_fn 0
-#define BUGFRAME_warn   1
-#define BUGFRAME_bug2
-#define BUGFRAME_assert 3
-
-#define BUGFRAME_NR 4
-
 /* Many versions of GCC doesn't support the asm %c parameter which would
  * be preferable to this unpleasantness. We use mergeable string
  * sections to avoid multiple copies of the string appearing in the
@@ -89,12 +90,6 @@ struct bug_frame {
 unreachable();  \
 } while (0)
 
-extern const struct bug_frame __start_bug_frames[],
-  __stop_bug_frames_0[],
-  __stop_bug_frames_1[],
-  __stop_bug_frames_2[],
-  __stop_bug_frames_3[];
-
 #endif /* __ARM_BUG_H__ */
 /*
  * Local variables:
diff --git a/xen/arch/arm/include/asm/div64.h b/xen/arch/arm/include/asm/div64.h
index 1cd58bc51a..fc667a80f9 100644
--- a/xen/arch/arm/include/asm/div64.h
+++ b/xen/arch/arm/include/asm/div64.h
@@ -74,7 +74,7 @@
 
 #elif __GNUC__ >= 4
 
-#include 
+#include 
 
 /*
  * If the divisor happens to be constant, we determine the appropriate
diff --git a/xen/arch/arm/vgic/vgic-v2.c b/xen/arch/arm/vgic/vgic-v2.c
index 1a99d3a8b4..c90e88fddb 100644
--- a/xen/arch/arm/vgic/vgic-v2.c
+++ b/xen/arch/arm/vgic/vgic-v2.c
@@ -16,8 +16,8 @@
  */
 
 

  1   2   >