date:20230320

Re: [PATCH 2/2] tools/xl: rework p9 config parsing

2023-03-20 Thread Juergen Gross


On 20.03.23 18:12, Jason Andryuk wrote:

On Fri, Mar 17, 2023 at 7:16 AM Juergen Gross  wrote:


Rework the config parsing of a p9 device to use the
split_string_into_pair() function instead of open coding it.

Signed-off-by: Juergen Gross 
---
  tools/xl/xl_parse.c | 72 ++---
  1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 2f9dfea05c..715e14f95f 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -2111,54 +2111,52 @@ void parse_config_data(const char *config_source,

  if (!xlu_cfg_get_list(config, "p9", , 0, 0)) {
  libxl_device_p9 *p9;
-char *security_model = NULL;
-char *path = NULL;
-char *tag = NULL;
-char *backend = NULL;
-char *p, *p2, *buf2;

  d_config->num_p9s = 0;
  d_config->p9s = NULL;
  while ((buf = xlu_cfg_get_listitem (p9devs, d_config->num_p9s)) != 
NULL) {
+libxl_string_list pairs;
+int len;
+
  p9 = ARRAY_EXTEND_INIT(d_config->p9s,
 d_config->num_p9s,
 libxl_device_p9_init);
  libxl_device_p9_init(p9);

-buf2 = strdup(buf);
-p = strtok(buf2, ",");
-if(p) {
-   do {
-  while(*p == ' ')
- ++p;
-  if ((p2 = strchr(p, '=')) == NULL)
- break;
-  *p2 = '\0';
-  if (!strcmp(p, "security_model")) {
- security_model = strdup(p2 + 1);
-  } else if(!strcmp(p, "path")) {
- path = strdup(p2 + 1);
-  } else if(!strcmp(p, "tag")) {
- tag = strdup(p2 + 1);
-  } else if(!strcmp(p, "backend")) {
- backend = strdup(p2 + 1);
-  } else {
- fprintf(stderr, "Unknown string `%s' in 9pfs spec\n", p);
- exit(1);
-  }
-   } while ((p = strtok(NULL, ",")) != NULL);
-}
-if (!path || !security_model || !tag) {
-   fprintf(stderr, "9pfs spec missing required field!\n");
-   exit(1);
+split_string_into_string_list(buf, ",", );
+len = libxl_string_list_length();
+for (i = 0; i < len; i++) {
+char *key, *value;
+int rc;
+
+rc = split_string_into_pair(pairs[i], "=", , ,
+isspace);
+if (rc != 0) {
+fprintf(stderr, "failed to parse 9pfs configuration: %s",
+pairs[i]);
+exit(1);
+}
+
+if (!strcmp(key, "security_model")) {
+replace_string(>security_model, value);
+} else if (!strcmp(key, "path")) {
+replace_string(>path, value);
+} else if (!strcmp(key, "tag")) {
+replace_string(>tag, value);
+} else if (!strcmp(key, "backend")) {
+replace_string(>backend_domname, value);
+} else {
+fprintf(stderr, "Unknown 9pfs parameter '%s'\n", key);
+exit(1);
+}
+free(key);
+free(value);
  }
-free(buf2);


I think you need libxl_string_list_dispose(); somewhere around here?


Ah yes, thanks for noticing.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature

[linux-linus test] 179818: regressions - trouble: fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179818 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179818/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-xsm  18 guest-localmigrate   fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-intel 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-amd 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-libvirt-pair 27 guest-migrate/dst_host/src_host fail REGR. 
vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 17 guest-saverestore.2 fail REGR. 
vs. 178042
 test-arm64-arm64-xl-thunderx 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-libvirt20 guest-start/debian.repeat fail REGR. vs. 178042
 test-arm64-arm64-libvirt-xsm 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl 18 guest-start/debian.repeat fail REGR. vs. 178042
 test-arm64-arm64-xl-credit2 18 guest-start/debian.repeat fail REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 12 debian-hvm-install fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 build-i386-pvops  6 kernel-build fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 12 debian-hvm-install 
fail REGR. vs. 178042
 test-amd64-amd64-pair25 guest-start/debian   fail REGR. vs. 178042
 test-amd64-amd64-qemuu-nested-amd 12 debian-hvm-install  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvshim   14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-intel 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-pygrub  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-qcow2 12 debian-di-install  fail REGR. vs. 178042
 test-arm64-arm64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-arm64-arm64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-xl   17 guest-saverestore fail in 179791 REGR. vs. 178042
 test-amd64-amd64-xl-shadow 20 guest-localmigrate/x10 fail in 179791 REGR. vs. 
178042
 test-amd64-amd64-xl-credit2 22 guest-start/debian.repeat fail in 179791 REGR. 
vs. 178042
 build-arm64-pvops 6 kernel-build   fail in 179791 REGR. vs. 178042
 test-amd64-amd64-freebsd12-amd64 16 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-freebsd11-amd64 16 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-coresched-amd64-xl 17 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-qemuu-nested-intel 13 nested-setup fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-xl-qemuu-ovmf-amd64 17 guest-saverestore.2 fail in 179797 
REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 13 guest-stop fail in 
179797 REGR. vs. 178042
 test-arm64-arm64-xl-credit1 18 guest-start/debian.repeat fail in 179797 REGR. 
vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 18 guest-localmigrate/x10 fail in 
179797 REGR. vs. 178042
 test-amd64-amd64-xl-credit1 20 guest-localmigrate/x10 fail in 179805 REGR. vs. 
178042
 test-amd64-amd64-xl-pvhv2-amd 18 guest-localmigrate fail in 179805 REGR. vs. 
178042
 test-amd64-amd64-xl-multivcpu 20 guest-localmigrate/x10 fail in 179805 REGR. 
vs. 178042
 test-amd64-amd64-libvirt-xsm 19 guest-stop fail in 179805 REGR. vs. 178042
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 18 
guest-start/debianhvm.repeat fail in 179805 REGR. vs. 178042
 build-arm64   6 xen-build  fail in 179805 REGR. vs. 178042

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail in 179791 pass in 179818
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 16 guest-localmigrate fail in 
179791 pass in 179818
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 15 guest-saverestore fail in 
179791 pass in 179818
 test-amd64-amd64-xl-credit2 20 guest-localmigrate/x10 fail in 179797 pass in 
179791
 test-amd64-amd64-xl-credit1 17 guest-saverestore fail in 179797 pass in 179805
 test-amd64-amd64-libvirt-xsm 18 guest-saverestore.2 fail in 179797 pass in 
179805
 test-amd64-amd64-xl-xsm  14 guest-start  fail in 179797 pass in 179818
 test-arm64-arm64-xl  14 guest-start  fail in 179797 pass in 179818
 test-arm64-arm64-xl-credit2  17 guest-stop   fail in 179797 pass in 179818
 test-amd64-amd64-xl-multivcpu 14 guest-start fail in 179797 pass in 179818
 test-amd64-amd64-xl-xsm 17 guest-saverestore fail in 179805 pass in 179818
 test-amd64-amd64-libvirt-pair 25

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Elliott Mitchell

On Mon, Mar 20, 2023 at 09:28:20AM +0100, Jan Beulich wrote:
> On 20.03.2023 09:14, Jan Beulich wrote:
> > On 17.03.2023 18:26, Elliott Mitchell wrote:
> >> On Fri, Mar 17, 2023 at 09:22:09AM +0100, Jan Beulich wrote:
> >>> On 16.03.2023 23:03, Elliott Mitchell wrote:
>  On Mon, Mar 13, 2023 at 08:01:02AM +0100, Jan Beulich wrote:
> > On 11.03.2023 01:09, Elliott Mitchell wrote:
> >> On Thu, Mar 09, 2023 at 10:03:23AM +0100, Jan Beulich wrote:
> >>>
> >>> In any event you will want to collect a serial log at maximum 
> >>> verbosity.
> >>> It would also be of interest to know whether turning off the IOMMU 
> >>> avoids
> >>> the issue as well (on the assumption that your system has less than 
> >>> 255
> >>> CPUs).
> >>
> >> I think I might have figured out the situation in a different fashion.
> >>
> >> I was taking a look at the BIOS manual for this motherboard and noticed
> >> a mention of a "Local APIC Mode" setting.  Four values are listed
> >> "Compatibility", "xAPIC", "x2APIC", and "Auto".
> >>
> >> That is the sort of setting I likely left at "Auto" and that may well
> >> result in x2 functionality being disabled.  Perhaps the x2APIC
> >> functionality on AMD is detecting whether the hardware is present, and
> >> failing to test whether it has been enabled?  (could be useful to 
> >> output
> >> a message suggesting enabling the hardware feature)
> >
> > Can we please move to a little more technical terms here? What is 
> > "present"
> > and "enabled" in your view? I don't suppose you mean the CPUID bit 
> > (which
> > we check) and the x2APIC-mode-enable one (which we drive as needed). 
> > It's
> > also left unclear what the four modes of BIOS operation evaluate to. 
> > Even
> > if we knew that, overriding e.g. "Compatibility" (which likely means 
> > some
> > form of "disabled" / "hidden") isn't normally an appropriate thing to 
> > do.
> > In "Auto" mode Xen likely should work - the only way I could interpret 
> > the
> > the other modes are "xAPIC" meaning no x2APIC ACPI tables entries (and
> > presumably the CPUID bit also masked), "x2APIC" meaning x2APIC mode pre-
> > enabled by firmware, and "Auto" leaving it to the OS to select. Yet 
> > that's
> > speculation on my part ...
> 
>  I provided the information I had discovered.  There is a setting for this
>  motherboard (likely present on some similar motherboards) which /may/
>  effect the issue.  I doubt I've tried "compatibility", but none of the
>  values I've tried have gotten the system to boot without "x2apic=false"
>  on Xen's command-line.
> 
>  When setting to "x2APIC" just after "(XEN) AMD-Vi: IOMMU Extended 
>  Features:"
>  I see the line "(XEN) - x2APIC".  Later is the line
>  "(XEN) x2APIC mode is already enabled by BIOS."  I'll guess "Auto"
>  leaves the x2APIC turned off since neither line is present.
> >>>
> >>> When "(XEN) - x2APIC" is absent the IOMMU can't be switched into x2APIC
> >>> mode. Are you sure that's the case when using "Auto"?
> >>
> >> grep -eAPIC\ driver -e-\ x2APIC:
> >>
> >> "Auto":
> >> (XEN) Using APIC driver default
> >> (XEN) Overriding APIC driver with bigsmp
> >> (XEN) Switched to APIC driver x2apic_cluster
> >>
> >> "x2APIC":
> >> (XEN) Using APIC driver x2apic_cluster
> >> (XEN) - x2APIC
> >>
> >> Yes, I'm sure.
> > 
> > Okay, this then means we're running in a mode we don't mean to run
> > in: When the IOMMU claims to not support x2APIC mode (which is odd in
> > the first place when at the same time the CPU reports x2APIC mode as
> > supported), amd_iommu_prepare() is intended to switch interrupt
> > remapping mode to "restricted" (which in turn would force x2APIC mode
> > to "physical", not "clustered"). I notice though that there are a
> > number of error paths in the function which bypass this setting. Could
> > you add a couple of printk()s to understand which path is taken (each
> > time; the function can be called more than once)?
> 
> I think I've spotted at least one issue. Could you give the patch below
> a try please? (Patch is fine for master and 4.17 but would need context
> adjustment for 4.16.)


> AMD/IOMMU: without XT, x2APIC needs to be forced into physical mode
> 
> An earlier change with the same title (commit 1ba66a870eba) altered only
> the path where x2apic_phys was already set to false (perhaps from the
> command line). The same of course needs applying when the variable
> wasn't modified yet from its initial value.
> 
> Reported-by: Elliott Mitchell 
> Signed-off-by: Jan Beulich 

This does appear to be an improvement.  With this the system boots if
the "Local APIC Mode" setting is "auto".  As you may have guessed,
"(XEN) Switched to APIC driver x2apic_phys".



When I tried setting "Local APIC Mode" to "x2APIC" though things didn't
go so well.

RE: [PATCH v2 3/4] xen/arm: Defer GICv2 CPU interface mapping until the first access

2023-03-20 Thread Henry Wang

Hi,

> Considering that the CPU interface is continuous (I suppose), I have two
> ways of rewriting the gfn check, we can do either:
> 
> gfn_eq(gfn, gaddr_to_gfn(d->arch.vgic.cbase)) ||
> gfn_eq(gfn, gfn_add(gaddr_to_gfn(d->arch.vgic.cbase), 1))
> 
> or
> 
> gfn_to_gaddr(gfn) >= d->arch.vgic.cbase ||
> gfn_to_gaddr(gfn) < d->arch.vgic.cbase + d->arch.vgic.csize

Oops, copy paste error, this should be 

gfn_to_gaddr(gfn) >= d->arch.vgic.cbase &&
gfn_to_gaddr(gfn) < d->arch.vgic.cbase + d->arch.vgic.csize

Kind regards,
Henry

RE: [PATCH v2 3/4] xen/arm: Defer GICv2 CPU interface mapping until the first access

2023-03-20 Thread Henry Wang

Hi Julien,

Thanks very much for your time and review :)

> -Original Message-
> From: Julien Grall 
> Subject: Re: [PATCH v2 3/4] xen/arm: Defer GICv2 CPU interface mapping until
> the first access
> 
> Hi Henry,
> 
> On 30/01/2023 04:06, Henry Wang wrote:
> > Since the hardware domain (Dom0) has an unlimited size P2M pool, the
> > gicv2_map_hwdom_extra_mappings() is also not touched by this patch.
> 
> I didn't notice this in the previous version. The fact that dom0 has
> unlimited size P2M pool doesn't matter here (this could also change in
> the future). Even if the P2M pool was limited, then it would be fine
> because the extra mappings happen after domain_create(). So there is no
> need to map them on-demand as the code could be order the way we want.
> 
> So this paragraph needs to be reworded.

Sure, I've reworded this paragraph to below:
"Since gicv2_map_hwdom_extra_mappings() happens after domain_create(),
so there is no need to map the extra mappings on-demand, and therefore
keep the hwdom extra mappings as untouched."

> 
> > +/*
> > + * Map the GICv2 virtual CPU interface in the GIC CPU interface
> > + * region of the guest on the first access of the MMIO region.
> > + */
> > +if ( d->arch.vgic.version == GIC_V2 &&
> > + gfn_eq(gfn, gaddr_to_gfn(d->arch.vgic.cbase)) )
> 
> The CPU interface size is 8KB (bigger in some cases) but here you only
> check for the access to be in the first 4KB.

Yeah indeed, gfn might fall into the range between 4KB and 8KB, sorry
about that.

Considering that the CPU interface is continuous (I suppose), I have two
ways of rewriting the gfn check, we can do either:

gfn_eq(gfn, gaddr_to_gfn(d->arch.vgic.cbase)) ||
gfn_eq(gfn, gfn_add(gaddr_to_gfn(d->arch.vgic.cbase), 1))

or

gfn_to_gaddr(gfn) >= d->arch.vgic.cbase ||
gfn_to_gaddr(gfn) < d->arch.vgic.cbase + d->arch.vgic.csize

May I ask which one would you think is better? Thanks!

Kind regards,
Henry

> 
> Cheers,
> 
> --
> Julien Grall

[xen-unstable-smoke test] 179826: tolerable trouble: pass/starved - PUSHED

2023-03-20 Thread osstest service owner

flight 179826 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179826/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 xen  0bbf102d8794fb961cb103ada00999768547916e
baseline version:
 xen  c2581c58bec96afa450ebaca3fa2a33bcb0a9974

Last test of basis   179814  2023-03-20 17:00:25 Z0 days
Testing same since   179826  2023-03-21 01:05:01 Z0 days1 attempts


People who touched revisions under test:
  Jiamei Xie 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  starved 
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  starved 
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   c2581c58be..0bbf102d87  0bbf102d8794fb961cb103ada00999768547916e -> smoke

[qemu-mainline test] 179816: regressions - trouble: fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179816 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179816/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-libvirt  14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-xsm  14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-pair 25 guest-start/debian   fail REGR. vs. 179518
 test-amd64-amd64-xl-qcow212 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt-vhd 12 debian-di-installfail REGR. vs. 179518
 test-arm64-arm64-libvirt-xsm 14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-raw  12 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt 14 guest-start  fail REGR. vs. 179518
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail REGR. vs. 179518
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail REGR. vs. 179518
 test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail REGR. vs. 179518
 test-arm64-arm64-libvirt-raw 12 debian-di-installfail REGR. vs. 179518
 test-amd64-i386-xl-vhd   12 debian-di-installfail REGR. vs. 179518
 test-arm64-arm64-xl-vhd  12 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt-pair 25 guest-start/debian  fail REGR. vs. 179518

Tests which are failing intermittently (not blocking):
 test-amd64-i386-pair 10 xen-install/src_host   fail pass in 179802
 test-amd64-i386-qemuu-rhel6hvm-amd  7 xen-install  fail pass in 179802

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 179518
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 179518
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 179518
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 179518
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 179518
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-qcow2  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-raw  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit1   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-multivcpu  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-rtds  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-vhd   1 build-check(1)   starved  n/a
 build-armhf-libvirt   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-cubietruck  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit2   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 qemuu74c581b6452394e591f13beba9fea2ec0688e2f5
baseline version:
 qemuu7b0f0aa55fd292fa3489755a3a896e496c51ea86

Last test of basis   179518  2023-03-09 10:37:19 Z   11 days
Failing since179526  2023-03-10 01:53:40 Z   10 days   19 attempts
Testing same since   179733  2023-03-17 21:10:25 Z3 days6 attempts


People who touched revisions under test:
  Akihiko Odaki 
  Albert Esteve 
  Alex Bennée 
  Alex Williamson 
  Alistair Francis 
  Andreas Schwab 
  Anton Johansson 
  Avihai Horon 
  BALATON Zoltan 
  Bernhard Beschow 
  Carlos López 
  Cédric Le Goater 
  Cédric Le Goater 
  Damien Hedde 
  Daniel P. Berrangé 
  David Hildenbrand 
  David Woodhouse 
  David Woodhouse 
  Dr. David Alan Gilbert 
  Eugenio Pérez 
  Fabiano Rosas 
  Fan Ni 
  fanwenjie

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Tamas K Lengyel

On Mon, Mar 20, 2023 at 3:34 PM Andrew Cooper 
wrote:
>
> On 20/03/2023 7:22 pm, Ковалёв Сергей wrote:
> >
> > 21.03.2023 1:51, Tamas K Lengyel wrote:
> >> On Mon, Mar 20, 2023 at 12:32 PM Ковалёв Сергей  >> > wrote:
> >>  >
> >>  > gva_to_gfn command used for fast address translation in LibVMI
> >> project.
> >>  > With such a command it is possible to perform address translation in
> >>  > single call instead of series of queries to get every page table.
> >>
> >> You have a couple assumptions here:
> >>   - Xen will always have a direct map of the entire guest memory -
> >> there are already plans to move away from that. Without that this
> >> approach won't have any advantage over doing the same mapping by LibVMI
> >
> > Thanks! I didn't know about the plan.
>
> To be clear, "not mapping the guest by default" is for speculative
> safety/hardening reasons.
>
> Xen will always need to be capable of mapping arbitrary parts of the
> guest, even if only transiently, so there's no relevant interaction with
> this new proposed hypercall.
>
>
> The truth is that Xen will always be able to do a single pagewalk faster
> than libvmi can do it (via mappings, or otherwise), but if libvmi does
> properly maintain a cache of mappings then it will be faster that
> repeated hypercalls into Xen.  Where the split lies depends heavily on
> the libvmi workload.
>
> I don't see a problem in principle with a hypercall like this - it is
> "just" a performance optimisation over capabilities that libvmi already
> has - but the version presented here is overly simplistic.

For debugging purposes sure it would be fine to have this hypercall but I
wouldn't set it as the default for LibVMI. Oftentimes the lookup needs to
be more nuanced then what Xen understands about paging. For example, on
Windows guests you can have transition pages that don't have the present
bit set yet are perfectly valid for introspection purposes (
https://citeseerx.ist.psu.edu/document?repid=rep1=pdf=3311ed0c63d4ca707c49256655e401f37f25ec50).
Xen would need to be enlightened about this type of OS-specific tidbits for
which I think LibVMI is a much better place to keep the logic for.

Tamas

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Tamas K Lengyel

On Mon, Mar 20, 2023 at 3:23 PM Ковалёв Сергей  wrote:
>
>
>
> 21.03.2023 1:51, Tamas K Lengyel wrote:
> >
> >
> > On Mon, Mar 20, 2023 at 12:32 PM Ковалёв Сергей  > > wrote:
> >  >
> >  > gva_to_gfn command used for fast address translation in LibVMI
project.
> >  > With such a command it is possible to perform address translation in
> >  > single call instead of series of queries to get every page table.
> >
> > You have a couple assumptions here:
> >   - Xen will always have a direct map of the entire guest memory - there
> > are already plans to move away from that. Without that this approach
> > won't have any advantage over doing the same mapping by LibVMI
>
> Thanks! I didn't know about the plan. Though I use this patch
> back ported into 4.16.
>
> >   - LibVMI has to map every page for each page table for every lookup -
> > you have to do that only for the first, afterwards the pages on which
> > the pagetable is are kept in a cache and subsequent lookups would be
> > actually faster then having to do this domctl since you can keep being
> > in the same process instead of having to jump to Xen.
>
> Yes. I know about the page cache. But I have faced with several issues
> with cache like this one https://github.com/libvmi/libvmi/pull/1058 .
> So I had to disable the cache.

The issue you linked to is an issue with a stale v2p cache, which is a
virtual TLB. The cache I talked about is the page cache, which is just
maintaining a list of the pages that were accessed by LibVMI for future
accesses. You can have one and not the other (ie. ./configure
--disable-address-cache --enable-page-cache).

Tamas

[XEN PATCH v4] x86/monitor: Add new monitor event to catch I/O instructions

2023-03-20 Thread Dmitry Isaykin

Adds monitor support for I/O instructions.

Signed-off-by: Dmitry Isaykin 
Signed-off-by: Anton Belousov 
---
Changes in v4:
 * Avoid the use of fixed-width types
 * Document vm_event_io structure fields
 * Untie vm-event interface from ioreq one

Changes in v3:
 * Rebase on staging
 * Refactor branch logic on monitor_traps response

Changes in v2:
 * Handled INS and OUTS instructions too
 * Added I/O monitoring support for AMD
 * Rename functions and structures (remove "_instruction" part)
 * Reorder parameters of hvm_monitor_io to match handle_pio's order
 * Change type of string_ins parameter to bool
 * Change vm_event_io structure
 * Handle monitor_traps's return status
---
 tools/include/xenctrl.h|  1 +
 tools/libs/ctrl/xc_monitor.c   | 13 +
 xen/arch/x86/hvm/monitor.c | 21 +
 xen/arch/x86/hvm/svm/svm.c |  9 +
 xen/arch/x86/hvm/vmx/vmx.c | 24 +++-
 xen/arch/x86/include/asm/domain.h  |  1 +
 xen/arch/x86/include/asm/hvm/monitor.h |  3 +++
 xen/arch/x86/include/asm/monitor.h |  3 ++-
 xen/arch/x86/monitor.c | 13 +
 xen/include/public/domctl.h|  1 +
 xen/include/public/vm_event.h  | 10 ++
 11 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 23037874d3..05967ecc92 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -2102,6 +2102,7 @@ int xc_monitor_emul_unimplemented(xc_interface *xch, 
uint32_t domain_id,
   bool enable);
 int xc_monitor_vmexit(xc_interface *xch, uint32_t domain_id, bool enable,
   bool sync);
+int xc_monitor_io(xc_interface *xch, uint32_t domain_id, bool enable);
 /**
  * This function enables / disables emulation for each REP for a
  * REP-compatible instruction.
diff --git a/tools/libs/ctrl/xc_monitor.c b/tools/libs/ctrl/xc_monitor.c
index c5fa62ff30..3cb96f444f 100644
--- a/tools/libs/ctrl/xc_monitor.c
+++ b/tools/libs/ctrl/xc_monitor.c
@@ -261,6 +261,19 @@ int xc_monitor_vmexit(xc_interface *xch, uint32_t 
domain_id, bool enable,
 return do_domctl(xch, );
 }
 
+int xc_monitor_io(xc_interface *xch, uint32_t domain_id, bool enable)
+{
+DECLARE_DOMCTL;
+
+domctl.cmd = XEN_DOMCTL_monitor_op;
+domctl.domain = domain_id;
+domctl.u.monitor_op.op = enable ? XEN_DOMCTL_MONITOR_OP_ENABLE
+: XEN_DOMCTL_MONITOR_OP_DISABLE;
+domctl.u.monitor_op.event = XEN_DOMCTL_MONITOR_EVENT_IO;
+
+return do_domctl(xch, );
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index a11cd76f4d..4f500beaf5 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -346,6 +346,27 @@ int hvm_monitor_vmexit(unsigned long exit_reason,
 return monitor_traps(curr, ad->monitor.vmexit_sync, );
 }
 
+int hvm_monitor_io(unsigned int port, unsigned int bytes,
+   bool in, bool str)
+{
+struct vcpu *curr = current;
+struct arch_domain *ad = >domain->arch;
+vm_event_request_t req = {
+.reason = VM_EVENT_REASON_IO_INSTRUCTION,
+.u.io.bytes = bytes,
+.u.io.port = port,
+.u.io.in = in,
+.u.io.str = str,
+};
+
+if ( !ad->monitor.io_enabled )
+return 0;
+
+set_npt_base(curr, );
+
+return monitor_traps(curr, true, );
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index bfe03316de..02563e4b70 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2939,6 +2939,15 @@ void svm_vmexit_handler(void)
 break;
 
 case VMEXIT_IOIO:
+rc = hvm_monitor_io(vmcb->ei.io.port,
+vmcb->ei.io.bytes,
+vmcb->ei.io.in,
+vmcb->ei.io.str);
+if ( rc < 0 )
+goto unexpected_exit_type;
+if ( rc )
+break;
+
 if ( !vmcb->ei.io.str )
 {
 if ( handle_pio(vmcb->ei.io.port,
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 00b531f76c..0b7a302928 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4560,8 +4560,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
 break;
 
 case EXIT_REASON_IO_INSTRUCTION:
+{
+unsigned int port, bytes;
+bool in, str;
+int rc;
+
 __vmread(EXIT_QUALIFICATION, _qualification);
-if ( exit_qualification & 0x10 )
+
+port = (exit_qualification >> 16) & 0x;
+bytes = (exit_qualification & 0x07) + 1;
+in = (exit_qualification & 0x08);
+str = (exit_qualification & 0x10);
+rc = hvm_monitor_io(port, bytes, in, str);
+if ( rc < 0 )
+goto exit_and_crash;
+if ( rc )
+

Re: [PATCH 2/2] automation: add a suspend test on an Alder Lake system

2023-03-20 Thread Stefano Stabellini

On Mon, 20 Mar 2023, Marek Marczykowski-Górecki wrote:
> On Mon, Mar 20, 2023 at 01:41:45PM -0700, Stefano Stabellini wrote:
> > On Mon, 20 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > On Mon, Mar 20, 2023 at 01:08:42PM -0700, Stefano Stabellini wrote:
> > > > On Sat, 18 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > > On Fri, Mar 17, 2023 at 04:10:22PM -0700, Stefano Stabellini wrote:
> > > > > > On Fri, 17 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > > > > +fakeroot -s ../fakeroot-save tar xzf ../binaries/initrd.tar.gz
> > > > > > 
> > > > > > I am a bit confused about it: are you sure you need fakeroot for 
> > > > > > this?
> > > > > > This script is running inside a container as root already? Are you 
> > > > > > using
> > > > > > Docker on the RPi4 to run this job?
> > > > > 
> > > > > This is running in a rootless podman container. But even with docker,
> > > > > for device files to work (see commit message) it would need to run
> > > > > privileged container, which I'd like to avoid.
> > > > 
> > > > Are you sure? I can run a non-privileged container with device assigned
> > > > just fine with Docker and
> > > >  
> > > >   devices = ["/dev/ttyUSB0:/dev/ttyUSB0"]
> > > > 
> > > > in the gitlab-runner config.toml.
> > > 
> > > It isn't about accessing existing devices, it's about creating them
> > > (unpacking rootfs which contains static /dev) and then packing it back
> > > still having those devices.
> > 
> > OK for that definitely you don't need a privileged container. A regular
> > container comes with "root" by default, but without all the privileges
> > that "root" normally allows outside of a container. That is enough (at
> > least in my environments) to pack/unpack a rootfs successfully without
> > fakeroot. Maybe this is a podman-specific limitation?
> 
> It seems so, as rootless podman isn't running commands as root on the
> "host", but instead in a separate user namespace.
> 
> > If you are curious to try, you should be able to run a simple
> > pack/unpack rootfs with Docker (of course without --privileged) without
> > issues.
> 
> In fact, the same issue happens in docker, if I enable this extra
> protection there[1].
> 
> [1] https://docs.docker.com/engine/security/userns-remap/
 
Useful to know! Please add all the info to the commit message or
a in-code comment. I am fine with using fakeroot for this (and in fact we
might want to add it to the other scripts as well at some point, given
that it shouldn't hurt in the "root" case either).

Re: [PATCH] xen: Fix host pci for stubdom

2023-03-20 Thread Jason Andryuk

On Mon, Mar 20, 2023 at 2:41 PM Bernhard Beschow  wrote:
>
>
>
> Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk :
> >PCI passthrough for an HVM with a stubdom is PV PCI passthrough from
> >dom0 to the stubdom, and then QEMU passthrough of the PCI device inside
> >the stubdom.  xen-pciback has boolean module param passthrough which
> >controls "how to export PCI topology to guest".  If passthrough=1, the
> >frontend shows a PCI SBDF matching the backend host device.  When
> >passthough=0, the frontend will get a sequentially allocated SBDF.
> >
> >libxl passes the host SBDF over QMP to QEMU.  For non-stubdom or stubdom
> >with passthrough=1, this works fine.  However, it fails for
> >passthrough=0 when QEMU can't find the sysfs node for the host SBDF.
> >
> >Handle all these cases.  Look for the xenstore frontend nodes.  If they
> >are missing, then default to using the QMP command provided SBDF.  This
> >is the non-stubdom case.  If xenstore nodes are found, then read the
> >local SBDF from the xenstore nodes.  This will handle either
> >passthrough=0/1 case.
> >
> >Based on a stubdom-specific patch originally by Marek
> >Marczykowski-Górecki , which is based
> >on earlier work by HW42 
> >
> >Signed-off-by: Jason Andryuk 
> >---
> > hw/xen/xen-host-pci-device.c | 96 +++-
> > hw/xen/xen-host-pci-device.h |  6 +++
> > 2 files changed, 101 insertions(+), 1 deletion(-)
> >
> >diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
> >index 8c6e9a1716..51a72b432d 100644
> >--- a/hw/xen/xen-host-pci-device.c
> >+++ b/hw/xen/xen-host-pci-device.c
> >@@ -9,6 +9,7 @@
> > #include "qemu/osdep.h"
> > #include "qapi/error.h"
> > #include "qemu/cutils.h"
> >+#include "hw/xen/xen-legacy-backend.h"
> > #include "xen-host-pci-device.h"
> >
> > #define XEN_HOST_PCI_MAX_EXT_CAP \
> >@@ -33,13 +34,101 @@
> > #define IORESOURCE_PREFETCH 0x1000  /* No side effects */
> > #define IORESOURCE_MEM_64   0x0010
> >
> >+/*
> >+ * Non-passthrough (dom0) accesses are local PCI devices and use the given 
> >BDF
> >+ * Passthough (stubdom) accesses are through PV frontend PCI device.  Those
>
> I'm unable to parse this sentence, which may be due to my unfamiliarity with 
> Xen terminology.

It's two sentences, but it's missing a period.
"Non-passthrough (dom0) accesses are local PCI devices and use the
given BDF."  and "Passthough (stubdom) accesses are through PV
frontend PCI device."

> There is also an extra space before "Those".

It's two spaces between sentences, which visually separates the
sentences.  It's a common formatting, so I think it's okay.

Thanks for taking a look.

> >+ * either have a BDF identical to the backend's BFD 
> >(xen-backend.passthrough=1)

(And a typo here: s/BFD/BDF/)

> >+ * or a local virtual BDF (xen-backend.passthrough=0)
> >+ *
> >+ * We are always given the backend's BDF and need to lookup the appropriate
> >+ * local BDF for sysfs access.
> >+ */

Regards,
Jason

Re: [PATCH 2/2] automation: add a suspend test on an Alder Lake system

2023-03-20 Thread Marek Marczykowski-Górecki

On Mon, Mar 20, 2023 at 01:41:45PM -0700, Stefano Stabellini wrote:
> On Mon, 20 Mar 2023, Marek Marczykowski-Górecki wrote:
> > On Mon, Mar 20, 2023 at 01:08:42PM -0700, Stefano Stabellini wrote:
> > > On Sat, 18 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > On Fri, Mar 17, 2023 at 04:10:22PM -0700, Stefano Stabellini wrote:
> > > > > On Fri, 17 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > > > +fakeroot -s ../fakeroot-save tar xzf ../binaries/initrd.tar.gz
> > > > > 
> > > > > I am a bit confused about it: are you sure you need fakeroot for this?
> > > > > This script is running inside a container as root already? Are you 
> > > > > using
> > > > > Docker on the RPi4 to run this job?
> > > > 
> > > > This is running in a rootless podman container. But even with docker,
> > > > for device files to work (see commit message) it would need to run
> > > > privileged container, which I'd like to avoid.
> > > 
> > > Are you sure? I can run a non-privileged container with device assigned
> > > just fine with Docker and
> > >  
> > >   devices = ["/dev/ttyUSB0:/dev/ttyUSB0"]
> > > 
> > > in the gitlab-runner config.toml.
> > 
> > It isn't about accessing existing devices, it's about creating them
> > (unpacking rootfs which contains static /dev) and then packing it back
> > still having those devices.
> 
> OK for that definitely you don't need a privileged container. A regular
> container comes with "root" by default, but without all the privileges
> that "root" normally allows outside of a container. That is enough (at
> least in my environments) to pack/unpack a rootfs successfully without
> fakeroot. Maybe this is a podman-specific limitation?

It seems so, as rootless podman isn't running commands as root on the
"host", but instead in a separate user namespace.

> If you are curious to try, you should be able to run a simple
> pack/unpack rootfs with Docker (of course without --privileged) without
> issues.

In fact, the same issue happens in docker, if I enable this extra
protection there[1].

[1] https://docs.docker.com/engine/security/userns-remap/

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab


signature.asc
Description: PGP signature

Re: [PATCH 2/2] automation: add a suspend test on an Alder Lake system

2023-03-20 Thread Stefano Stabellini

On Mon, 20 Mar 2023, Marek Marczykowski-Górecki wrote:
> On Mon, Mar 20, 2023 at 01:08:42PM -0700, Stefano Stabellini wrote:
> > On Sat, 18 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > On Fri, Mar 17, 2023 at 04:10:22PM -0700, Stefano Stabellini wrote:
> > > > On Fri, 17 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > > +fakeroot -s ../fakeroot-save tar xzf ../binaries/initrd.tar.gz
> > > > 
> > > > I am a bit confused about it: are you sure you need fakeroot for this?
> > > > This script is running inside a container as root already? Are you using
> > > > Docker on the RPi4 to run this job?
> > > 
> > > This is running in a rootless podman container. But even with docker,
> > > for device files to work (see commit message) it would need to run
> > > privileged container, which I'd like to avoid.
> > 
> > Are you sure? I can run a non-privileged container with device assigned
> > just fine with Docker and
> >  
> >   devices = ["/dev/ttyUSB0:/dev/ttyUSB0"]
> > 
> > in the gitlab-runner config.toml.
> 
> It isn't about accessing existing devices, it's about creating them
> (unpacking rootfs which contains static /dev) and then packing it back
> still having those devices.

OK for that definitely you don't need a privileged container. A regular
container comes with "root" by default, but without all the privileges
that "root" normally allows outside of a container. That is enough (at
least in my environments) to pack/unpack a rootfs successfully without
fakeroot. Maybe this is a podman-specific limitation?

If you are curious to try, you should be able to run a simple
pack/unpack rootfs with Docker (of course without --privileged) without
issues.

Re: [PATCH 2/2] automation: add a suspend test on an Alder Lake system

2023-03-20 Thread Marek Marczykowski-Górecki

On Mon, Mar 20, 2023 at 01:08:42PM -0700, Stefano Stabellini wrote:
> On Sat, 18 Mar 2023, Marek Marczykowski-Górecki wrote:
> > On Fri, Mar 17, 2023 at 04:10:22PM -0700, Stefano Stabellini wrote:
> > > On Fri, 17 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > > +fakeroot -s ../fakeroot-save tar xzf ../binaries/initrd.tar.gz
> > > 
> > > I am a bit confused about it: are you sure you need fakeroot for this?
> > > This script is running inside a container as root already? Are you using
> > > Docker on the RPi4 to run this job?
> > 
> > This is running in a rootless podman container. But even with docker,
> > for device files to work (see commit message) it would need to run
> > privileged container, which I'd like to avoid.
> 
> Are you sure? I can run a non-privileged container with device assigned
> just fine with Docker and
>  
>   devices = ["/dev/ttyUSB0:/dev/ttyUSB0"]
> 
> in the gitlab-runner config.toml.

It isn't about accessing existing devices, it's about creating them
(unpacking rootfs which contains static /dev) and then packing it back
still having those devices.

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab


signature.asc
Description: PGP signature

Re: [ImageBuilder][PATCH v3 2/2] uboot-script-gen: add support for static shared memory

2023-03-20 Thread Stefano Stabellini

On Mon, 20 Mar 2023, jiamei.xie wrote:
> Introduce support for creating shared-mem node for dom0less domUs in
> the device tree. Add the following option:
> - DOMU_SHARED_MEM[number]="SHM-ID HPA GPA size"
>   if specified, indicate the unique identifier of the shared memory
>   region is SHM-ID, the host physical address HPA will get mapped at
>   guest address GPA in domU and the memory of size will be reserved to
>   be shared memory.
> 
> The static shared memory is used between two dom0less domUs.
> 
> Below is an example:
> NUM_DOMUS=2
> DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
> DOMU_SHARED_MEM[1]="my-shared-mem-0 0x5000 0x600 0x1000"
> 
> This static shared memory region is identified as "my-shared-mem-0",
> host physical address starting at 0x5000 of 256MB will be reserved
> to be shared between two domUs. It will get mapped at 0x600 in both
> guest physical address space. Both DomUs are the borrower domain, the
> owner domain is the default owner domain DOMID_IO.
> 
> Signed-off-by: jiamei.xie 

Reviewed-by: Stefano Stabellini 


> ---
> Changes from v2:
>  - Remove "domid" parameter
>  - Use lower capital letters for local variables
> Changes from v1:
>  - Rather than two separate properties and just use one like follows:
>Change
>  DOMU_SHARED_MEM[0]="0x5000 0x600 0x1000"
>  DOMU_SHARED_MEM_ID[0]="my-shared-mem-0"
>to
>  DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
>  - Use split_value function instead of opencoding it.
> ---
>  README.md| 17 +
>  scripts/uboot-script-gen | 26 ++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/README.md b/README.md
> index 78b83f1..fe5d205 100644
> --- a/README.md
> +++ b/README.md
> @@ -196,6 +196,23 @@ Where:
>if specified, indicates the host physical address regions
>[baseaddr, baseaddr + size) to be reserved to the VM for static allocation.
>  
> +- DOMU_SHARED_MEM[number]="SHM-ID HPA GPA size"
> +  if specified, indicate SHM-ID represents the unique identifier of the 
> shared
> +  memory region, the host physical address HPA will get mapped at guest
> +  address GPA in domU and the memory of size will be reserved to be shared
> +  memory. The shared memory is used between two dom0less domUs.
> +
> +  Below is an example:
> +  NUM_DOMUS=2
> +  DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
> +  DOMU_SHARED_MEM[1]="my-shared-mem-0 0x5000 0x600 0x1000"
> +
> +  This static shared memory region is identified as "my-shared-mem-0", host
> +  physical address starting at 0x5000 of 256MB will be reserved to be
> +  shared between two domUs. It will get mapped at 0x600 in both guest
> +  physical address space. Both DomUs are the borrower domain, the owner
> +  domain is the default owner domain DOMID_IO.
> +
>  - DOMU_DIRECT_MAP[number] can be set to 1 or 0.
>If set to 1, the VM is direct mapped. The default is 1.
>This is only applicable when DOMU_STATIC_MEM is specified.
> diff --git a/scripts/uboot-script-gen b/scripts/uboot-script-gen
> index cca3e59..9656a45 100755
> --- a/scripts/uboot-script-gen
> +++ b/scripts/uboot-script-gen
> @@ -204,6 +204,27 @@ function add_device_tree_xen_static_heap()
>  dt_set "$path" "xen,static-heap" "hex" "${cells[*]}"
>  }
>  
> +function add_device_tree_static_shared_mem()
> +{
> +local path=$1
> +local shared_mem=$2
> +local shared_mem_id=${shared_mem%% *}
> +local regions="${shared_mem#* }"
> +local cells=()
> +local shared_mem_host=${regions%% *}
> +
> +dt_mknode "${path}" "shared-mem@${shared_mem_host}"
> +
> +for val in ${regions[@]}
> +do
> +cells+=("$(split_value $val)")
> +done
> +
> +dt_set "${path}/shared-mem@${shared_mem_host}" "compatible" "str" 
> "xen,domain-shared-memory-v1"
> +dt_set "${path}/shared-mem@${shared_mem_host}" "xen,shm-id" "str" 
> "${shared_mem_id}"
> +dt_set "${path}/shared-mem@${shared_mem_host}" "xen,shared-mem" "hex" 
> "${cells[*]}"
> +}
> +
>  function add_device_tree_cpupools()
>  {
>  local cpu
> @@ -329,6 +350,11 @@ function xen_device_tree_editing()
>  dt_set "/chosen/domU$i" "xen,enhanced" "str" "enabled"
>  fi
>  
> +if test -n "${DOMU_SHARED_MEM[i]}"
> +then
> +add_device_tree_static_shared_mem "/chosen/domU${i}" 
> "${DOMU_SHARED_MEM[i]}"
> +fi
> +
>  if test "${DOMU_COLORS[$i]}"
>  then
>  local startcolor=$(echo "${DOMU_COLORS[$i]}"  | cut -d "-" -f 1)
> -- 
> 2.25.1
>

Re: [PATCH 2/2] automation: add a suspend test on an Alder Lake system

2023-03-20 Thread Stefano Stabellini

On Sat, 18 Mar 2023, Marek Marczykowski-Górecki wrote:
> On Fri, Mar 17, 2023 at 04:10:22PM -0700, Stefano Stabellini wrote:
> > On Fri, 17 Mar 2023, Marek Marczykowski-Górecki wrote:
> > > This is a first test using Qubes OS CI infra. The gitlab-runner has
> > > access to ssh-based control interface (control@thor.testnet, ssh key
> > > exposed to the test via ssh-agent) and pre-configured HTTP dir for boot
> > > files (mapped under /scratch/gitlab-runner/tftp inside the container).
> > > Details about the setup are described on
> > > https://www.qubes-os.org/news/2022/05/05/automated-os-testing-on-physical-laptops/
> > > 
> > > This test boots Xen, and try if S3 works. It runs on a ADL-based desktop
> > > system. The test script is based on the Xilinx one.
> > > 
> > > The machine needs newer kernel than other x86 tests run, so use 6.1.x
> > > kernel added in previous commit.
> > > 
> > > When building rootfs, use fakeroot to preserve device files when
> > > repacking rootfs archives in a non-privileged container.
> > > 
> > > Signed-off-by: Marek Marczykowski-Górecki 
> > > 
> > 
> > This is awesome!! Thanks Marek!
> > 
> > 
> > > ---
> > > I'm bad at naming things. Things that I could use naming suggestions:
> > >  - test script (qubes-x86-64-suspend.sh) - this might be okay?
> > >  - test template job name (.adl-x86-64)
> > >  - test job name (adl-suspend-x86-64-gcc)
> > >  - tag (qubes-hw2)
> > 
> > I think these names are OK. I would maybe rename the tag "qubes-hw2" to
> > "qubes" because so far there is only one but I am fine with qubes-hw2
> > also.
> 
> I have 10 of them (and growing), so I'd like to keep tag name at least
> somehow referencing which runner it uses. For example, this one is
> a desktop with Alder Lake, but some other tests I may want to use a laptop
> with Tiger Lake, for example. The mapping name -> hw spec isn't publicly
> written down (although our openQA publishes all kind of logs from them,
> so it's possible to infer this info).

That's fine by me, use whatever naming scheme works for you :-)


> > > For context, our CI has several machines, named test-X or hwX, each
> > > controlled with matching hal900X RPi (this is where gitlab-runner is).
> > > This test uses only one specific hw, but I plan adding few others too.
> > > ---
> > >  automation/gitlab-ci/test.yaml |  31 -
> > >  automation/scripts/qubes-x86-64-suspend.sh | 155 ++-
> > >  2 files changed, 186 insertions(+)
> > >  create mode 100755 automation/scripts/qubes-x86-64-suspend.sh
> > > 
> > > diff --git a/automation/gitlab-ci/test.yaml 
> > > b/automation/gitlab-ci/test.yaml
> > > index 2e1a6886df7f..f5511dd6da70 100644
> > > --- a/automation/gitlab-ci/test.yaml
> > > +++ b/automation/gitlab-ci/test.yaml
> > > @@ -15,6 +15,10 @@
> > >  .arm32-test-needs: 
> > >- qemu-system-aarch64-6.0.0-arm32-export
> > >  
> > > +.x86-64-test-needs: 
> > > +  - alpine-3.12-rootfs-export
> > > +  - kernel-6.1.19-export
> > 
> > As you know, the jobs starting with a "." are template jobs to avoid
> > repeating the same things over and over. .x86-64-test-needs could be
> > used in qemu-alpine-x86_64-gcc also.
> 
> Right, when switching all of them to 6.1 kernel, that makes sense.
> 
> > >  .qemu-arm64:
> > >extends: .test-jobs-common
> > >variables:
> > > @@ -84,6 +88,25 @@
> > >tags:
> > >  - xilinx
> > >  
> > > +.adl-x86-64:
> > > +  extends: .test-jobs-common
> > > +  variables:
> > > +# the test controller runs on RPi4
> > > +CONTAINER: alpine:3.12-arm64v8
> > > +LOGFILE: smoke-test.log
> > > +  artifacts:
> > > +paths:
> > > +  - smoke.serial
> > > +  - '*.log'
> > > +when: always
> > > +  only:
> > > +variables:
> > > +  - $QUBES_JOBS == "true" && $CI_COMMIT_REF_PROTECTED == "true"
> > 
> > Let me know which trees should have QUBES_JOBS set to true (thus able to
> > start Qubes jobs.) At a minimum, I think we would want
> > https://gitlab.com/xen-project/xen to test "staging" and "master". I can
> > set QUBES_JOBS to true to https://gitlab.com/xen-project/xen if you are
> > OK with it.
> 
> Yes, that's perfectly okay. I'd like at least also staging-4.17, but
> depending on push frequency other staging-* are probably fine too (I
> very much doubt long queue would be an issue). Of course that assumes
> those tests would be backported, which I'm not sure if is planned. I'm
> also okay with allowing committers and/or other maintainers to use it on
> demand, but preferably not all patchew branches.

Yes that makes sense. Let's start with
https://gitlab.com/xen-project/xen for now, then we'll figure out a way
to implement more precisely what you wrote above, which also matches the
requirements for the xilinx runner.


> BTW, if you trigger job manually via a web interface or API, you can
> specify variables for just a single pipeline. And if you use that
> feature, you can also make gitlab present you a bit more convenient
> interface with

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Andrew Cooper

On 20/03/2023 7:35 pm, Ковалёв Сергей wrote:
> 20.03.2023 22:07, Andrew Cooper пишет:
>>
>> More generally, issuing the hypercall under vcpu0 isn't necessarily
>> correct.  It is common for all vCPUs to have equivalent paging settings,
>> but e.g. Xen transiently disables CR4.CET and CR0.WP in order to make
>> self-modifying code changes.
>>
>> Furthermore, the setting of CR4.{PAE,PSE} determines reserved bits, so
>> you can't even ignore the access rights and hope that the translation
>> works out correctly.
>
> Thanks! I didn't think about such things earlier. I should to think
> this know carefully.

If you haven't already, read

https://github.com/xen-project/xen/blob/master/xen/arch/x86/mm/guest_walk.c

and

https://github.com/andyhhp/xtf/blob/pagetable-emulation/tests/pagetable-emulation/main.c

These are various notes and tests I made last time I had to rewrite
Xen's pagewalk from scratch.

>
>>
>> Ideally we'd have a pagewalk algorithm which didn't require taking a
>> vcpu, and instead just took a set of paging configuration, but it is all
>> chronically entangled right now.
>>
>
> Do You mean to add new implementation of "paging_ga_to_gfn_cr3"?

Yes, but I didn't mean for this to be taken as a suggestion.  It's far
more work than it sounds...

>
>> I think, at a minimum, you need to take a vcpu_id as an input, but I
>> suspect to make this a usable API you want an altp2m view id too.
>>
>
> Why we should consider altp2m while translating guest virtual address to
> guest physical one?

Because altp2m can change the gfn mappings, and therefore the contents
of the pagetables.

A pagewalk from cr3 in one view can end up being totally different to a
walk from the same cr3 in a different view.

>
>> Also, I'm pretty sure this is only safe for a paused vCPU.  If the vCPU
>> isn't paused, then there's a TOCTOU race in the pagewalk code when
>> inspecting control registers.
>>
>
> Thanks! Should we pause the domain?

Certainly the vCPU.  Chances are that if you're making this hypercall
from a libvmi callback, the vCPU in question is already paused, at which
point taking one extra pause ref on it is very quick.

>
>>> +    uint32_t pfec = PFEC_page_present;
>>> +    unsigned int page_order;
>>> +
>>> +    uint64_t gfn = paging_ga_to_gfn_cr3(v, cr3, ga, ,
>>> _order);
>>> +    domctl->u.gva_to_gfn.addr = gfn;
>>> +    domctl->u.gva_to_gfn.page_order = page_order;
>>
>> page_order is only not stack rubble if gfn is different to INVALID_GFN.
>>
>
> Sorry but I don't understand "is only not stack rubble". Do you mean
> that I should initialize "page_order" while defining it?

page_order is only initialised when gfn returns != INVALID_GFN.

See the function description.

~Andrew

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Ковалёв Сергей





20.03.2023 22:07, Andrew Cooper пишет:

On 20/03/2023 4:32 pm, Ковалёв Сергей wrote:

gva_to_gfn command used for fast address translation in LibVMI project.
With such a command it is possible to perform address translation in
single call instead of series of queries to get every page table.

Thanks to Dmitry Isaykin for involvement.

Signed-off-by: Sergey Kovalev 


I fully appreciate why you want this hypercall, and I've said several
times that libvmi wants something better than it has, but...


diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 2118fcad5d..0c9706ea0a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1364,6 +1364,23 @@ long arch_do_domctl(
  copyback = true;
  break;

+    case XEN_DOMCTL_gva_to_gfn:
+    {
+    uint64_t ga = domctl->u.gva_to_gfn.addr;
+    uint64_t cr3 = domctl->u.gva_to_gfn.cr3;
+    struct vcpu* v = d->vcpu[0];


... this isn't safe if you happen to issue this hypercall too early in a
domain's lifecycle.

If nothing else, you want to do a domain_vcpu() check and return -ENOENT
in the failure case.


Thanks!



More generally, issuing the hypercall under vcpu0 isn't necessarily
correct.  It is common for all vCPUs to have equivalent paging settings,
but e.g. Xen transiently disables CR4.CET and CR0.WP in order to make
self-modifying code changes.

Furthermore, the setting of CR4.{PAE,PSE} determines reserved bits, so
you can't even ignore the access rights and hope that the translation
works out correctly.


Thanks! I didn't think about such things earlier. I should to think
this know carefully.



Ideally we'd have a pagewalk algorithm which didn't require taking a
vcpu, and instead just took a set of paging configuration, but it is all
chronically entangled right now.



Do You mean to add new implementation of "paging_ga_to_gfn_cr3"?


I think, at a minimum, you need to take a vcpu_id as an input, but I
suspect to make this a usable API you want an altp2m view id too.



Why we should consider altp2m while translating guest virtual address to
guest physical one?


Also, I'm pretty sure this is only safe for a paused vCPU.  If the vCPU
isn't paused, then there's a TOCTOU race in the pagewalk code when
inspecting control registers.



Thanks! Should we pause the domain?


+    uint32_t pfec = PFEC_page_present;
+    unsigned int page_order;
+
+    uint64_t gfn = paging_ga_to_gfn_cr3(v, cr3, ga, ,
_order);
+    domctl->u.gva_to_gfn.addr = gfn;
+    domctl->u.gva_to_gfn.page_order = page_order;


page_order is only not stack rubble if gfn is different to INVALID_GFN.



Sorry but I don't understand "is only not stack rubble". Do you mean
that I should initialize "page_order" while defining it?


+    if ( __copy_to_guest(u_domctl, domctl, 1) )
+    ret = -EFAULT;


You want to restrict this to just the gva_to_gfn sub-portion.  No point
copying back more than necessary.

~Andrew


Thanks a lot!

--
Best regards,
Sergey Kovalev

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Andrew Cooper

On 20/03/2023 7:22 pm, Ковалёв Сергей wrote:
>
> 21.03.2023 1:51, Tamas K Lengyel wrote:
>> On Mon, Mar 20, 2023 at 12:32 PM Ковалёв Сергей > > wrote:
>>  >
>>  > gva_to_gfn command used for fast address translation in LibVMI
>> project.
>>  > With such a command it is possible to perform address translation in
>>  > single call instead of series of queries to get every page table.
>>
>> You have a couple assumptions here:
>>   - Xen will always have a direct map of the entire guest memory -
>> there are already plans to move away from that. Without that this
>> approach won't have any advantage over doing the same mapping by LibVMI
>
> Thanks! I didn't know about the plan.

To be clear, "not mapping the guest by default" is for speculative
safety/hardening reasons.

Xen will always need to be capable of mapping arbitrary parts of the
guest, even if only transiently, so there's no relevant interaction with
this new proposed hypercall.

The truth is that Xen will always be able to do a single pagewalk faster
than libvmi can do it (via mappings, or otherwise), but if libvmi does
properly maintain a cache of mappings then it will be faster that
repeated hypercalls into Xen.  Where the split lies depends heavily on
the libvmi workload.

I don't see a problem in principle with a hypercall like this - it is
"just" a performance optimisation over capabilities that libvmi already
has - but the version presented here is overly simplistic.

~Andrew

Re: [PATCH v2 3/4] xen/arm: Defer GICv2 CPU interface mapping until the first access

2023-03-20 Thread Julien Grall


Hi Henry,

On 30/01/2023 04:06, Henry Wang wrote:

Currently, the mapping of the GICv2 CPU interface is created in
arch_domain_create(). This causes some troubles in populating and
freeing of the domain P2M pages pool. For example, a default 16
P2M pages are required in p2m_init() to cope with the P2M mapping
of 8KB GICv2 CPU interface area, and these 16 P2M pages would cause
the complexity of P2M destroy in the failure path of
arch_domain_create().

As per discussion in [1], similarly as the MMIO access for ACPI, this
patch defers the GICv2 CPU interface mapping until the first MMIO
access. This is achieved by moving the GICv2 CPU interface mapping
code from vgic_v2_domain_init()/vgic_v2_map_resources() to the
stage-2 data abort trap handling code. The original CPU interface
size and virtual CPU interface base address is now saved in
`struct vgic_dist` instead of the local variable of
vgic_v2_domain_init()/vgic_v2_map_resources().

Take the opportunity to unify the way of data access using the
existing pointer to struct vgic_dist in vgic_v2_map_resources() for
new GICv2.

Since the hardware domain (Dom0) has an unlimited size P2M pool, the
gicv2_map_hwdom_extra_mappings() is also not touched by this patch.


I didn't notice this in the previous version. The fact that dom0 has 
unlimited size P2M pool doesn't matter here (this could also change in 
the future). Even if the P2M pool was limited, then it would be fine 
because the extra mappings happen after domain_create(). So there is no 
need to map them on-demand as the code could be order the way we want.


So this paragraph needs to be reworded.


  #ifdef CONFIG_GICV3
  /* GIC V3 addressing */
  /* List of contiguous occupied by the redistributors */
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 061c92acbd..9dd703f661 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1787,9 +1787,12 @@ static inline bool hpfar_is_valid(bool s1ptw, uint8_t 
fsc)
  }
  
  /*

- * When using ACPI, most of the MMIO regions will be mapped on-demand
- * in stage-2 page tables for the hardware domain because Xen is not
- * able to know from the EFI memory map the MMIO regions.
+ * Try to map the MMIO regions for some special cases:
+ * 1. When using ACPI, most of the MMIO regions will be mapped on-demand
+ *in stage-2 page tables for the hardware domain because Xen is not
+ *able to know from the EFI memory map the MMIO regions.
+ * 2. For guests using GICv2, the GICv2 CPU interface mapping is created
+ *on the first access of the MMIO region.
   */
  static bool try_map_mmio(gfn_t gfn)
  {
@@ -1798,6 +1801,15 @@ static bool try_map_mmio(gfn_t gfn)
  /* For the hardware domain, all MMIOs are mapped with GFN == MFN */
  mfn_t mfn = _mfn(gfn_x(gfn));
  
+/*

+ * Map the GICv2 virtual CPU interface in the GIC CPU interface
+ * region of the guest on the first access of the MMIO region.
+ */
+if ( d->arch.vgic.version == GIC_V2 &&
+ gfn_eq(gfn, gaddr_to_gfn(d->arch.vgic.cbase)) )


The CPU interface size is 8KB (bigger in some cases) but here you only 
check for the access to be in the first 4KB.


Cheers,

--
Julien Grall

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Ковалёв Сергей

21.03.2023 1:51, Tamas K Lengyel wrote:

On Mon, Mar 20, 2023 at 12:32 PM Ковалёв Сергей > wrote:

 >
 > gva_to_gfn command used for fast address translation in LibVMI project.
 > With such a command it is possible to perform address translation in
 > single call instead of series of queries to get every page table.

You have a couple assumptions here:
  - Xen will always have a direct map of the entire guest memory - there 
are already plans to move away from that. Without that this approach 
won't have any advantage over doing the same mapping by LibVMI

Thanks! I didn't know about the plan. Though I use this patch
back ported into 4.16.

  - LibVMI has to map every page for each page table for every lookup - 
you have to do that only for the first, afterwards the pages on which 
the pagetable is are kept in a cache and subsequent lookups would be 
actually faster then having to do this domctl since you can keep being 
in the same process instead of having to jump to Xen.

Yes. I know about the page cache. But I have faced with several issues
with cache like this one https://github.com/libvmi/libvmi/pull/1058 .
So I had to disable the cache.

With these perspectives in mind I don't think this would be a useful 
addition. Please prove me wrong with performance numbers and a specific 
use-case that warrants adding this and how you plan to introduce it into 
LibVMI without causing performance regression to all other use-cases.

I will send You a PR into LibVMI in a day or two. I don't have any
performance numbers at the time. I send this patch to share my current
work as soon as possible.

To prevent regression in all use-cases we could add a configure option.
Thanks to make me notice that!

Tamas

--
С уважением,
Ковалёв Сергей.

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Andrew Cooper

On 20/03/2023 4:32 pm, Ковалёв Сергей wrote:
> gva_to_gfn command used for fast address translation in LibVMI project.
> With such a command it is possible to perform address translation in
> single call instead of series of queries to get every page table.
>
> Thanks to Dmitry Isaykin for involvement.
>
> Signed-off-by: Sergey Kovalev 

I fully appreciate why you want this hypercall, and I've said several
times that libvmi wants something better than it has, but...

> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 2118fcad5d..0c9706ea0a 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1364,6 +1364,23 @@ long arch_do_domctl(
>  copyback = true;
>  break;
>
> +    case XEN_DOMCTL_gva_to_gfn:
> +    {
> +    uint64_t ga = domctl->u.gva_to_gfn.addr;
> +    uint64_t cr3 = domctl->u.gva_to_gfn.cr3;
> +    struct vcpu* v = d->vcpu[0];

... this isn't safe if you happen to issue this hypercall too early in a
domain's lifecycle.

If nothing else, you want to do a domain_vcpu() check and return -ENOENT
in the failure case.

More generally, issuing the hypercall under vcpu0 isn't necessarily
correct.  It is common for all vCPUs to have equivalent paging settings,
but e.g. Xen transiently disables CR4.CET and CR0.WP in order to make
self-modifying code changes.

Furthermore, the setting of CR4.{PAE,PSE} determines reserved bits, so
you can't even ignore the access rights and hope that the translation
works out correctly.

Ideally we'd have a pagewalk algorithm which didn't require taking a
vcpu, and instead just took a set of paging configuration, but it is all
chronically entangled right now.

I think, at a minimum, you need to take a vcpu_id as an input, but I
suspect to make this a usable API you want an altp2m view id too.

Also, I'm pretty sure this is only safe for a paused vCPU.  If the vCPU
isn't paused, then there's a TOCTOU race in the pagewalk code when
inspecting control registers.

> +    uint32_t pfec = PFEC_page_present;
> +    unsigned int page_order;
> +
> +    uint64_t gfn = paging_ga_to_gfn_cr3(v, cr3, ga, ,
> _order);
> +    domctl->u.gva_to_gfn.addr = gfn;
> +    domctl->u.gva_to_gfn.page_order = page_order;

page_order is only not stack rubble if gfn is different to INVALID_GFN.

> +    if ( __copy_to_guest(u_domctl, domctl, 1) )
> +    ret = -EFAULT;

You want to restrict this to just the gva_to_gfn sub-portion.  No point
copying back more than necessary.

~Andrew

Re: [XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Tamas K Lengyel

On Mon, Mar 20, 2023 at 12:32 PM Ковалёв Сергей  wrote:
>
> gva_to_gfn command used for fast address translation in LibVMI project.
> With such a command it is possible to perform address translation in
> single call instead of series of queries to get every page table.

You have a couple assumptions here:
 - Xen will always have a direct map of the entire guest memory - there are
already plans to move away from that. Without that this approach won't have
any advantage over doing the same mapping by LibVMI
 - LibVMI has to map every page for each page table for every lookup - you
have to do that only for the first, afterwards the pages on which the
pagetable is are kept in a cache and subsequent lookups would be actually
faster then having to do this domctl since you can keep being in the same
process instead of having to jump to Xen.

With these perspectives in mind I don't think this would be a useful
addition. Please prove me wrong with performance numbers and a specific
use-case that warrants adding this and how you plan to introduce it into
LibVMI without causing performance regression to all other use-cases.

Tamas

Re: [PATCH v2 2/4] xen/arm: Rename vgic_cpu_base and vgic_dist_base for new vGIC

2023-03-20 Thread Julien Grall


Hi Henry,

On 30/01/2023 04:06, Henry Wang wrote:

In the follow-up patch from this series, the GICv2 CPU interface
mapping will be deferred until the first access in the stage-2
data abort trap handling code. Since the data abort trap handling
code is common for the current and the new vGIC implementation,
it is necessary to unify the variable names in struct vgic_dist
for these two implementations.

Therefore, this commit renames the vgic_cpu_base and vgic_dist_base
for new vGIC to cbase and dbase. So we can use the same code in
the data abort trap handling code for both vGIC implementations.

Signed-off-by: Henry Wang 


Acked-by: Julien Grall 

Cheers,

--
Julien Grall

[xen-unstable-smoke test] 179814: tolerable trouble: pass/starved - PUSHED

2023-03-20 Thread osstest service owner

flight 179814 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179814/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 xen  c2581c58bec96afa450ebaca3fa2a33bcb0a9974
baseline version:
 xen  9bf21fcaef07f68ab52d0382ff554616a1cf66d8

Last test of basis   179720  2023-03-17 13:01:58 Z3 days
Testing same since   179814  2023-03-20 17:00:25 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  David Woodhouse 
  Michal Orzel 
  Oleksii Kurochko 

jobs:
 build-arm64-xsm  pass
 build-amd64  pass
 build-armhf  starved 
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  starved 
 test-arm64-arm64-xl-xsm  pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/xen.git
   9bf21fcaef..c2581c58be  c2581c58bec96afa450ebaca3fa2a33bcb0a9974 -> smoke

Re: [PATCH] xen: Fix host pci for stubdom

2023-03-20 Thread Bernhard Beschow




Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk :
>PCI passthrough for an HVM with a stubdom is PV PCI passthrough from
>dom0 to the stubdom, and then QEMU passthrough of the PCI device inside
>the stubdom.  xen-pciback has boolean module param passthrough which
>controls "how to export PCI topology to guest".  If passthrough=1, the
>frontend shows a PCI SBDF matching the backend host device.  When
>passthough=0, the frontend will get a sequentially allocated SBDF.
>
>libxl passes the host SBDF over QMP to QEMU.  For non-stubdom or stubdom
>with passthrough=1, this works fine.  However, it fails for
>passthrough=0 when QEMU can't find the sysfs node for the host SBDF.
>
>Handle all these cases.  Look for the xenstore frontend nodes.  If they
>are missing, then default to using the QMP command provided SBDF.  This
>is the non-stubdom case.  If xenstore nodes are found, then read the
>local SBDF from the xenstore nodes.  This will handle either
>passthrough=0/1 case.
>
>Based on a stubdom-specific patch originally by Marek
>Marczykowski-Górecki , which is based
>on earlier work by HW42 
>
>Signed-off-by: Jason Andryuk 
>---
> hw/xen/xen-host-pci-device.c | 96 +++-
> hw/xen/xen-host-pci-device.h |  6 +++
> 2 files changed, 101 insertions(+), 1 deletion(-)
>
>diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
>index 8c6e9a1716..51a72b432d 100644
>--- a/hw/xen/xen-host-pci-device.c
>+++ b/hw/xen/xen-host-pci-device.c
>@@ -9,6 +9,7 @@
> #include "qemu/osdep.h"
> #include "qapi/error.h"
> #include "qemu/cutils.h"
>+#include "hw/xen/xen-legacy-backend.h"
> #include "xen-host-pci-device.h"
> 
> #define XEN_HOST_PCI_MAX_EXT_CAP \
>@@ -33,13 +34,101 @@
> #define IORESOURCE_PREFETCH 0x1000  /* No side effects */
> #define IORESOURCE_MEM_64   0x0010
> 
>+/*
>+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
>+ * Passthough (stubdom) accesses are through PV frontend PCI device.  Those

I'm unable to parse this sentence, which may be due to my unfamiliarity with 
Xen terminology.

There is also an extra space before "Those".

Best regards,
Bernhard

>+ * either have a BDF identical to the backend's BFD 
>(xen-backend.passthrough=1)
>+ * or a local virtual BDF (xen-backend.passthrough=0)
>+ *
>+ * We are always given the backend's BDF and need to lookup the appropriate
>+ * local BDF for sysfs access.
>+ */
>+static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp)
>+{
>+unsigned int num_devs, len, i;
>+unsigned int domain, bus, dev, func;
>+char *be_path;
>+char path[80];
>+char *msg;
>+
>+be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", );
>+if (!be_path) {
>+/*
>+ * be_path doesn't exist, so we are dealing with a local
>+ * (non-passthough) device.
>+ */
>+d->local_domain = d->domain;
>+d->local_bus = d->bus;
>+d->local_dev = d->dev;
>+d->local_func = d->func;
>+
>+return;
>+}
>+
>+snprintf(path, sizeof(path), "%s/num_devs", be_path);
>+msg = qemu_xen_xs_read(xenstore, 0, path, );
>+if (!msg) {
>+goto err_out;
>+}
>+
>+if (sscanf(msg, "%u", _devs) != 1) {
>+error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+goto err_out;
>+}
>+free(msg);
>+
>+for (i = 0; i < num_devs; i++) {
>+snprintf(path, sizeof(path), "%s/dev-%u", be_path, i);
>+msg = qemu_xen_xs_read(xenstore, 0, path, );
>+if (!msg) {
>+error_setg(errp, "Failed to read %s", path);
>+goto err_out;
>+}
>+if (sscanf(msg, "%x:%x:%x.%x", , , , ) != 4) {
>+error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+goto err_out;
>+}
>+free(msg);
>+if (domain != d->domain ||
>+bus != d->bus ||
>+dev != d->dev ||
>+func != d->func)
>+continue;
>+snprintf(path, sizeof(path), "%s/vdev-%u", be_path, i);
>+msg = qemu_xen_xs_read(xenstore, 0, path, );
>+if (!msg) {
>+error_setg(errp, "Failed to read %s", path);
>+goto out;
>+}
>+if (sscanf(msg, "%x:%x:%x.%x", , , , ) != 4) {
>+error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+goto err_out;
>+}
>+free(msg);
>+d->local_domain = domain;
>+d->local_bus = bus;
>+d->local_dev = dev;
>+d->local_func = func;
>+goto out;
>+}
>+
>+error_setg(errp, "Failed to find local %x:%x:%x.%x", d->domain, d->bus,
>+   d->dev, d->func);
>+
>+err_out:
>+free(msg);
>+out:
>+free(be_path);
>+}
>+
> static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d,
> const char *name, char *buf, ssize_t size)
> {
> int rc;
> 
> rc = snprintf(buf, size,

Re: [XEN PATCH v3] x86/monitor: Add new monitor event to catch I/O instructions

2023-03-20 Thread Tamas K Lengyel

> Are you actually sure you want to tie the vm-event interface to the ioreq
> one (this is also a question to you, Tamas)? It would look slightly better
> to me if this was a simple boolean named after its purpose (e.g. "write"
> or "out" when it's meant to be set for OUT / OUTS and clear for IN / INS).

A boolean would be preferred indeed.

Tamas

[PATCH] x86/vmx: Provide named fields for IO exit qualification

2023-03-20 Thread Andrew Cooper

This removes most of the opencoded bit logic on the exit qualification.
Unfortunately, size is 1-based not 0-based, so need adjusting in a separate
variable.

No functional change.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Roger Pau Monné 
CC: Wei Liu 
CC: Kevin Tian 
---
 xen/arch/x86/hvm/vmx/vmx.c | 30 ++
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 00b531f76cbf..a96c601efded 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -4560,23 +4560,37 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
 break;
 
 case EXIT_REASON_IO_INSTRUCTION:
-__vmread(EXIT_QUALIFICATION, _qualification);
-if ( exit_qualification & 0x10 )
+{
+union {
+unsigned long raw;
+struct {
+uint32_t size:3;
+bool in:1;
+bool str:1;
+bool rep:1;
+bool imm:1;
+uint32_t :9;
+uint16_t port;
+};
+} io_qual;
+unsigned int bytes;
+
+__vmread(EXIT_QUALIFICATION, _qual.raw);
+bytes = io_qual.size + 1;
+
+if ( io_qual.str )
 {
-/* INS, OUTS */
 if ( !hvm_emulate_one_insn(x86_insn_is_portio, "port I/O") )
 hvm_inject_hw_exception(TRAP_gp_fault, 0);
 }
 else
 {
-/* IN, OUT */
-uint16_t port = (exit_qualification >> 16) & 0x;
-int bytes = (exit_qualification & 0x07) + 1;
-int dir = (exit_qualification & 0x08) ? IOREQ_READ : IOREQ_WRITE;
-if ( handle_pio(port, bytes, dir) )
+if ( handle_pio(io_qual.port, bytes,
+io_qual.in ? IOREQ_READ : IOREQ_WRITE) )
 update_guest_eip(); /* Safe: IN, OUT */
 }
 break;
+}
 
 case EXIT_REASON_INVD:
 case EXIT_REASON_WBINVD:
-- 
2.30.2

Re: [XEN PATCH v3] x86/monitor: Add new monitor event to catch I/O instructions

2023-03-20 Thread Julien Grall


Hi,

On 17/03/2023 12:01, Dmitry Isaykin wrote:

diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 51be28c3de..7280e9f968 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1063,6 +1063,7 @@ struct xen_domctl_psr_cmt_op {
  /* Enabled by default */
  #define XEN_DOMCTL_MONITOR_EVENT_INGUEST_PAGEFAULT 11
  #define XEN_DOMCTL_MONITOR_EVENT_VMEXIT12
+#define XEN_DOMCTL_MONITOR_EVENT_IO13
  
  struct xen_domctl_monitor_op {

  uint32_t op; /* XEN_DOMCTL_MONITOR_OP_* */
diff --git a/xen/include/public/vm_event.h b/xen/include/public/vm_event.h
index 0035c26e12..1e4b6063f5 100644
--- a/xen/include/public/vm_event.h
+++ b/xen/include/public/vm_event.h
@@ -160,6 +160,8 @@
  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED  14
  /* VMEXIT */
  #define VM_EVENT_REASON_VMEXIT  15
+/* IN/OUT Instruction executed */
+#define VM_EVENT_REASON_IO_INSTRUCTION  16
  
  /* Supported values for the vm_event_write_ctrlreg index. */

  #define VM_EVENT_X86_CR00
@@ -388,6 +390,13 @@ struct vm_event_vmexit {
  } arch;
  };
  
+struct vm_event_io {

+uint32_t data_size;
+uint16_t port;
+uint8_t  dir; /* IOREQ_READ or IOREQ_WRITE */
+uint8_t  string_ins;
It would be good to comment what the value is meant to be? Is it 0 and 1 
(extra meaning to be confirmed) with the other values reserved for 
future purpose?


Cheers,

--
Julien Grall

Re: [PATCH v4] acpi/processor: fix evaluating _PDC method when running as Xen dom0

2023-03-20 Thread Rafael J. Wysocki

On Fri, Mar 17, 2023 at 1:42 PM Juergen Gross  wrote:
>
> On 16.03.23 17:42, Roger Pau Monne wrote:
> > In ACPI systems, the OS can direct power management, as opposed to the
> > firmware.  This OS-directed Power Management is called OSPM.  Part of
> > telling the firmware that the OS going to direct power management is
> > making ACPI "_PDC" (Processor Driver Capabilities) calls.  These _PDC
> > methods must be evaluated for every processor object.  If these _PDC
> > calls are not completed for every processor it can lead to
> > inconsistency and later failures in things like the CPU frequency
> > driver.
> >
> > In a Xen system, the dom0 kernel is responsible for system-wide power
> > management.  The dom0 kernel is in charge of OSPM.  However, the
> > number of CPUs available to dom0 can be different than the number of
> > CPUs physically present on the system.
> >
> > This leads to a problem: the dom0 kernel needs to evaluate _PDC for
> > all the processors, but it can't always see them.
> >
> > In dom0 kernels, ignore the existing ACPI method for determining if a
> > processor is physically present because it might not be accurate.
> > Instead, ask the hypervisor for this information.
> >
> > Fix this by introducing a custom function to use when running as Xen
> > dom0 in order to check whether a processor object matches a CPU that's
> > online.  Such checking is done using the existing information fetched
> > by the Xen pCPU subsystem, extending it to also store the ACPI ID.
> >
> > This ensures that _PDC method gets evaluated for all physically online
> > CPUs, regardless of the number of CPUs made available to dom0.
> >
> > Fixes: 5d554a7bb064 ('ACPI: processor: add internal 
> > processor_physically_present()')
> > Signed-off-by: Roger Pau Monné 
>
> Reviewed-by: Juergen Gross 

Applied as 6.4 material under a slightly edited subject, thanks!

[linux-linus test] 179805: regressions - trouble: blocked/fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179805 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179805/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-credit1  20 guest-localmigrate/x10   fail REGR. vs. 178042
 test-amd64-amd64-xl-xsm  17 guest-saverestorefail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-intel 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-amd 18 guest-localmigrate  fail REGR. vs. 178042
 test-amd64-amd64-xl-multivcpu 20 guest-localmigrate/x10  fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-amd 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-libvirt-xsm 19 guest-stop   fail REGR. vs. 178042
 test-arm64-arm64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 18 
guest-start/debianhvm.repeat fail REGR. vs. 178042
 build-arm64   6 xen-buildfail REGR. vs. 178042
 test-amd64-amd64-libvirt 14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 12 debian-hvm-install fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 build-i386-pvops  6 kernel-build fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 12 debian-hvm-install 
fail REGR. vs. 178042
 test-amd64-amd64-pair25 guest-start/debian   fail REGR. vs. 178042
 test-amd64-amd64-qemuu-nested-amd 12 debian-hvm-install  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvshim   14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-intel 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-pygrub  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-qcow2 12 debian-di-install  fail REGR. vs. 178042
 test-amd64-amd64-xl   17 guest-saverestore fail in 179791 REGR. vs. 178042
 test-amd64-amd64-xl-shadow 20 guest-localmigrate/x10 fail in 179791 REGR. vs. 
178042
 test-amd64-amd64-libvirt-pair 27 guest-migrate/dst_host/src_host fail in 
179791 REGR. vs. 178042
 test-amd64-amd64-xl-credit2 22 guest-start/debian.repeat fail in 179791 REGR. 
vs. 178042
 build-arm64-pvops 6 kernel-build   fail in 179791 REGR. vs. 178042
 test-amd64-amd64-freebsd12-amd64 16 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-freebsd11-amd64 16 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-coresched-amd64-xl 17 guest-saverestore fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-qemuu-nested-intel 13 nested-setup fail in 179797 REGR. vs. 
178042
 test-amd64-amd64-xl-qemuu-ovmf-amd64 17 guest-saverestore.2 fail in 179797 
REGR. vs. 178042
 test-arm64-arm64-xl-thunderx 14 guest-startfail in 179797 REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 13 guest-stop fail in 
179797 REGR. vs. 178042
 test-arm64-arm64-libvirt-xsm 14 guest-startfail in 179797 REGR. vs. 178042
 test-arm64-arm64-xl  14 guest-startfail in 179797 REGR. vs. 178042
 test-arm64-arm64-xl-credit2  17 guest-stop fail in 179797 REGR. vs. 178042
 test-arm64-arm64-xl-credit1 18 guest-start/debian.repeat fail in 179797 REGR. 
vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 18 guest-localmigrate/x10 fail in 
179797 REGR. vs. 178042
 test-arm64-arm64-xl-vhd   12 debian-di-install fail in 179797 REGR. vs. 178042
 test-arm64-arm64-libvirt-raw 12 debian-di-install fail in 179797 REGR. vs. 
178042

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail in 179791 pass in 179805
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 15 guest-saverestore fail in 
179791 pass in 179805
 test-amd64-amd64-xl-credit2 20 guest-localmigrate/x10 fail in 179797 pass in 
179791
 test-amd64-amd64-xl-credit1 17 guest-saverestore fail in 179797 pass in 179805
 test-amd64-amd64-xl-xsm  14 guest-start  fail in 179797 pass in 179805
 test-amd64-amd64-xl-pvhv2-amd 14 guest-start fail in 179797 pass in 179805
 test-amd64-amd64-libvirt-xsm 18 guest-saverestore.2 fail in 179797 pass in 
179805
 test-amd64-amd64-xl-rtds 14 guest-start  fail in 179797 pass in 179805
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 12 debian-hvm-install fail 
in 179797 pass in 179805
 test-amd64-amd64-xl-multivcpu 14 guest-start fail in 179797 pass in 179805
 test-amd64-amd64-xl-shadow   17 guest-saverestore  fail pass in 179791
 test-amd64-amd64-xl  14 guest-startfail pass in 179791
 test-amd64-amd64-libvirt-pair 25 guest-start/debianfail pass in

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Andrew Cooper

On 20/03/2023 8:28 am, Jan Beulich wrote:
> On 20.03.2023 09:14, Jan Beulich wrote:
>> On 17.03.2023 18:26, Elliott Mitchell wrote:
>>> On Fri, Mar 17, 2023 at 09:22:09AM +0100, Jan Beulich wrote:
 On 16.03.2023 23:03, Elliott Mitchell wrote:
> On Mon, Mar 13, 2023 at 08:01:02AM +0100, Jan Beulich wrote:
>> On 11.03.2023 01:09, Elliott Mitchell wrote:
>>> On Thu, Mar 09, 2023 at 10:03:23AM +0100, Jan Beulich wrote:
 In any event you will want to collect a serial log at maximum 
 verbosity.
 It would also be of interest to know whether turning off the IOMMU 
 avoids
 the issue as well (on the assumption that your system has less than 255
 CPUs).
>>> I think I might have figured out the situation in a different fashion.
>>>
>>> I was taking a look at the BIOS manual for this motherboard and noticed
>>> a mention of a "Local APIC Mode" setting.  Four values are listed
>>> "Compatibility", "xAPIC", "x2APIC", and "Auto".
>>>
>>> That is the sort of setting I likely left at "Auto" and that may well
>>> result in x2 functionality being disabled.  Perhaps the x2APIC
>>> functionality on AMD is detecting whether the hardware is present, and
>>> failing to test whether it has been enabled?  (could be useful to output
>>> a message suggesting enabling the hardware feature)
>> Can we please move to a little more technical terms here? What is 
>> "present"
>> and "enabled" in your view? I don't suppose you mean the CPUID bit (which
>> we check) and the x2APIC-mode-enable one (which we drive as needed). It's
>> also left unclear what the four modes of BIOS operation evaluate to. Even
>> if we knew that, overriding e.g. "Compatibility" (which likely means some
>> form of "disabled" / "hidden") isn't normally an appropriate thing to do.
>> In "Auto" mode Xen likely should work - the only way I could interpret 
>> the
>> the other modes are "xAPIC" meaning no x2APIC ACPI tables entries (and
>> presumably the CPUID bit also masked), "x2APIC" meaning x2APIC mode pre-
>> enabled by firmware, and "Auto" leaving it to the OS to select. Yet 
>> that's
>> speculation on my part ...
> I provided the information I had discovered.  There is a setting for this
> motherboard (likely present on some similar motherboards) which /may/
> effect the issue.  I doubt I've tried "compatibility", but none of the
> values I've tried have gotten the system to boot without "x2apic=false"
> on Xen's command-line.
>
> When setting to "x2APIC" just after "(XEN) AMD-Vi: IOMMU Extended 
> Features:"
> I see the line "(XEN) - x2APIC".  Later is the line
> "(XEN) x2APIC mode is already enabled by BIOS."  I'll guess "Auto"
> leaves the x2APIC turned off since neither line is present.
 When "(XEN) - x2APIC" is absent the IOMMU can't be switched into x2APIC
 mode. Are you sure that's the case when using "Auto"?
>>> grep -eAPIC\ driver -e-\ x2APIC:
>>>
>>> "Auto":
>>> (XEN) Using APIC driver default
>>> (XEN) Overriding APIC driver with bigsmp
>>> (XEN) Switched to APIC driver x2apic_cluster
>>>
>>> "x2APIC":
>>> (XEN) Using APIC driver x2apic_cluster
>>> (XEN) - x2APIC
>>>
>>> Yes, I'm sure.
>> Okay, this then means we're running in a mode we don't mean to run
>> in: When the IOMMU claims to not support x2APIC mode (which is odd in
>> the first place when at the same time the CPU reports x2APIC mode as
>> supported), amd_iommu_prepare() is intended to switch interrupt
>> remapping mode to "restricted" (which in turn would force x2APIC mode
>> to "physical", not "clustered"). I notice though that there are a
>> number of error paths in the function which bypass this setting. Could
>> you add a couple of printk()s to understand which path is taken (each
>> time; the function can be called more than once)?
> I think I've spotted at least one issue. Could you give the patch below
> a try please? (Patch is fine for master and 4.17 but would need context
> adjustment for 4.16.)
>
> Jan
>
> AMD/IOMMU: without XT, x2APIC needs to be forced into physical mode
>
> An earlier change with the same title (commit 1ba66a870eba) altered only
> the path where x2apic_phys was already set to false (perhaps from the
> command line). The same of course needs applying when the variable
> wasn't modified yet from its initial value.
>
> Reported-by: Elliott Mitchell 
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

I think it's worth saying that for diagnosing purposes, if
x2apic_phys=true also resolves the issue, then it is likely related to this.

~Andrew

>
> --- unstable.orig/xen/arch/x86/genapic/x2apic.c
> +++ unstable/xen/arch/x86/genapic/x2apic.c
> @@ -236,11 +236,11 @@ const struct genapic *__init apic_x2apic
>  if ( x2apic_phys < 0 )
>  {
>  /*
> - * Force physical mode if there's no interrupt remapping support: The
> -

Re: [PATCH 2/2] tools/xl: rework p9 config parsing

2023-03-20 Thread Jason Andryuk

On Fri, Mar 17, 2023 at 7:16 AM Juergen Gross  wrote:
>
> Rework the config parsing of a p9 device to use the
> split_string_into_pair() function instead of open coding it.
>
> Signed-off-by: Juergen Gross 
> ---
>  tools/xl/xl_parse.c | 72 ++---
>  1 file changed, 35 insertions(+), 37 deletions(-)
>
> diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
> index 2f9dfea05c..715e14f95f 100644
> --- a/tools/xl/xl_parse.c
> +++ b/tools/xl/xl_parse.c
> @@ -2111,54 +2111,52 @@ void parse_config_data(const char *config_source,
>
>  if (!xlu_cfg_get_list(config, "p9", , 0, 0)) {
>  libxl_device_p9 *p9;
> -char *security_model = NULL;
> -char *path = NULL;
> -char *tag = NULL;
> -char *backend = NULL;
> -char *p, *p2, *buf2;
>
>  d_config->num_p9s = 0;
>  d_config->p9s = NULL;
>  while ((buf = xlu_cfg_get_listitem (p9devs, d_config->num_p9s)) != 
> NULL) {
> +libxl_string_list pairs;
> +int len;
> +
>  p9 = ARRAY_EXTEND_INIT(d_config->p9s,
> d_config->num_p9s,
> libxl_device_p9_init);
>  libxl_device_p9_init(p9);
>
> -buf2 = strdup(buf);
> -p = strtok(buf2, ",");
> -if(p) {
> -   do {
> -  while(*p == ' ')
> - ++p;
> -  if ((p2 = strchr(p, '=')) == NULL)
> - break;
> -  *p2 = '\0';
> -  if (!strcmp(p, "security_model")) {
> - security_model = strdup(p2 + 1);
> -  } else if(!strcmp(p, "path")) {
> - path = strdup(p2 + 1);
> -  } else if(!strcmp(p, "tag")) {
> - tag = strdup(p2 + 1);
> -  } else if(!strcmp(p, "backend")) {
> - backend = strdup(p2 + 1);
> -  } else {
> - fprintf(stderr, "Unknown string `%s' in 9pfs spec\n", 
> p);
> - exit(1);
> -  }
> -   } while ((p = strtok(NULL, ",")) != NULL);
> -}
> -if (!path || !security_model || !tag) {
> -   fprintf(stderr, "9pfs spec missing required field!\n");
> -   exit(1);
> +split_string_into_string_list(buf, ",", );
> +len = libxl_string_list_length();
> +for (i = 0; i < len; i++) {
> +char *key, *value;
> +int rc;
> +
> +rc = split_string_into_pair(pairs[i], "=", , ,
> +isspace);
> +if (rc != 0) {
> +fprintf(stderr, "failed to parse 9pfs configuration: %s",
> +pairs[i]);
> +exit(1);
> +}
> +
> +if (!strcmp(key, "security_model")) {
> +replace_string(>security_model, value);
> +} else if (!strcmp(key, "path")) {
> +replace_string(>path, value);
> +} else if (!strcmp(key, "tag")) {
> +replace_string(>tag, value);
> +} else if (!strcmp(key, "backend")) {
> +replace_string(>backend_domname, value);
> +} else {
> +fprintf(stderr, "Unknown 9pfs parameter '%s'\n", key);
> +exit(1);
> +}
> +free(key);
> +free(value);
>  }
> -free(buf2);

I think you need libxl_string_list_dispose(); somewhere around here?

The rest looks good.

Regards,
Jason

[qemu-mainline test] 179802: regressions - trouble: fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179802 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179802/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-libvirt  14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-xsm  14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-pair 25 guest-start/debian   fail REGR. vs. 179518
 test-amd64-amd64-xl-qcow212 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt-vhd 12 debian-di-installfail REGR. vs. 179518
 test-arm64-arm64-libvirt-xsm 14 guest-start  fail REGR. vs. 179518
 test-amd64-i386-libvirt-raw  12 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt 14 guest-start  fail REGR. vs. 179518
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail REGR. vs. 179518
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail REGR. vs. 179518
 test-amd64-i386-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail REGR. vs. 179518
 test-arm64-arm64-libvirt-raw 12 debian-di-installfail REGR. vs. 179518
 test-amd64-i386-xl-vhd   12 debian-di-installfail REGR. vs. 179518
 test-arm64-arm64-xl-vhd  12 debian-di-installfail REGR. vs. 179518
 test-amd64-amd64-libvirt-pair 25 guest-start/debian  fail REGR. vs. 179518

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 179518
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 179518
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 179518
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 179518
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 179518
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-qcow2  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-raw  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit1   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-multivcpu  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-rtds  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-vhd   1 build-check(1)   starved  n/a
 build-armhf-libvirt   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-cubietruck  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit2   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 qemuu74c581b6452394e591f13beba9fea2ec0688e2f5
baseline version:
 qemuu7b0f0aa55fd292fa3489755a3a896e496c51ea86

Last test of basis   179518  2023-03-09 10:37:19 Z   11 days
Failing since179526  2023-03-10 01:53:40 Z   10 days   18 attempts
Testing same since   179733  2023-03-17 21:10:25 Z2 days5 attempts


People who touched revisions under test:
  Akihiko Odaki 
  Albert Esteve 
  Alex Bennée 
  Alex Williamson 
  Alistair Francis 
  Andreas Schwab 
  Anton Johansson 
  Avihai Horon 
  BALATON Zoltan 
  Bernhard Beschow 
  Carlos López 
  Cédric Le Goater 
  Cédric Le Goater 
  Damien Hedde 
  Daniel P. Berrangé 
  David Hildenbrand 
  David Woodhouse 
  David Woodhouse 
  Dr. David Alan Gilbert 
  Eugenio Pérez 
  Fabiano Rosas 
  Fan Ni 
  fanwenjie 
  fa...@mail.ustc.edu.cn 
  Fiona Ebner 
  Gal Hammer 
  Gerd Hoffmann 
  Hanna Czenczek 
  Helge Deller 
  Idan Horowitz 
  Igor Mammedov 
  Ilya Leoshkevich 
  Ivan Klokov 
  Jared Rossi 
  Jason Wang 
  Jiaxun Yang 
  Joao

Re: [PATCH 1/2] tools/xl: allow split_string_into_pair() to trim values

2023-03-20 Thread Jason Andryuk

On Fri, Mar 17, 2023 at 7:16 AM Juergen Gross  wrote:
>
> Most use cases of split_string_into_pair() are requiring the returned
> strings to be white space trimmed.
>
> In order to avoid the same code pattern multiple times, add a predicate
> parameter to split_string_into_pair() which can be specified to call
> trim() with that predicate for the string pair returned. Specifying
> NULL for the predicate will avoid the call of trim().
>
> Signed-off-by: Juergen Gross 

Reviewed-by: Jason Andryuk

Re: [PATCH v2 2/3] xen/riscv: setup initial pagetables

2023-03-20 Thread Jan Beulich

On 16.03.2023 17:43, Oleksii Kurochko wrote:
> Signed-off-by: Oleksii Kurochko 
> ---
> Changes in V2:
>  * Update the commit message

Odd: It's empty. Since it's not part of the title, you could at least
say that you're also enabling the MMU. (Most of the time entirely
empty descriptions are suspicious.)

Jan

Re: [PATCH v2 2/2] x86/APIC: modify error_interrupt() to output using single printk()

2023-03-20 Thread Jan Beulich

On 20.03.2023 16:54, Elliott Mitchell wrote:
> On Mon, Mar 20, 2023 at 04:39:48PM +0100, Jan Beulich wrote:
>> On 20.03.2023 15:29, Elliott Mitchell wrote:
>>>
>>> There are several minor issues here which may be best handled during
>>> commit as they're very small items about how precisely you want this to
>>> look.
>>>
>>> First, I later realized I goofed the argument order.  In order to match
>>> the original implementation, it needs to be entries[7] ... entries[0]
>>> (could though be the low-order bits should be reported first).
>>
>> I'm not really concerned of the order. A change of order wants
>> mentioning in the description though.
> 
> Seemed simple enough to fix on commit (simply switch the order of
> numbers).
> 
>>> Second, the order of the for loop no longer matters.  Using
>>> ARRAY_SIZE(esr_fields) and increment should now be more maintainable
>>> (this would also allow i to be unsigned).
>>
>> Indeed. But that would better done in a separate patch then anyway.
> 
> Feel free to split.
> 
>>> Third, I'm simply unsure how you would prefer to format the printk().
>>
>> About any way matching style guidelines is okay. There are two more
>> things to mention though (sorry for not noticing earlier): We aim at
>> keeping the entire format string on one line, for grep-ability. And
>> there's no need (and in fact no reason) to split the sequence of %s
>> from the \n. To summarize:
>>
>> printk(XENLOG_DEBUG
>>"APIC error on CPU%u: %02x(%02x)%s%s%s%s%s%s%s%s\n",
>>
>> (unless of course it all fits on one line, which it looks like it
>> does).
> 
> I like keeping the "%s%s%s%s%s%s%s%s" section separated since it needs to
> match the number of arguments.  In the future where more bits of the
> register are defined, both sections will need to be modified together.
> 
> 
> This seems to be a spot where there are large numbers of similarly
> functional, but mildly different style variants.  As such I suspect this
> is best left in your hands as this is a bog of trivial style
> considerations which have no real functional effect.

Just to clarify: What is or is not adjusted on commit is a decision of
the committer. A no longer as active committer was actually of the
opinion that it is a mistake to ever make any changes while committing.
In the case here you're asking for far more changes (including either
one to the description of patch 1, or the folding of both patches) than
I personally would be willing to do. I'm sorry for that.

Jan

Re: [RFC XEN PATCH 6/6] tools/libs/light: pci: translate irq to gsi

2023-03-20 Thread Roger Pau Monné

On Mon, Mar 20, 2023 at 04:29:25PM +0100, Jan Beulich wrote:
> On 20.03.2023 16:16, Roger Pau Monné wrote:
> > @@ -244,12 +242,18 @@ static void vioapic_write_redirent(
> >  }
> >  else
> >  {
> > +int ret;
> > +
> >  unmasked = ent.fields.mask;
> >  /* Remote IRR and Delivery Status are read-only. */
> >  ent.bits = ((ent.bits >> 32) << 32) | val;
> >  ent.fields.delivery_status = 0;
> >  ent.fields.remote_irr = pent->fields.remote_irr;
> >  unmasked = unmasked && !ent.fields.mask;
> > +ret = mp_register_gsi(gsi, ent.fields.trig_mode, 
> > ent.fields.polarity);
> > +if ( ret && ret !=  -EEXIST )
> > +gprintk(XENLOG_WARNING, "vioapic: error registering GSI %u: 
> > %d\n",
> > +gsi, ret);
> >  }
> 
> I assume this is only meant to be experimental, as I'm missing confinement
> to Dom0 here.

Indeed.  I've attached a fixed version below, let's make sure this
doesn't influence testing.

> I also question this when the mask bit as set, as in that
> case neither the trigger mode bit nor the polarity one can be relied upon.
> At which point it would look to me as if it was necessary for Dom0 to use
> a hypercall instead (which naturally would then be PHYSDEVOP_setup_gsi).

AFAICT Linux does correctly set the trigger/polarity even when the
pins are masked, so this should be safe as a proof of concept. Let's
first figure out whether the issue is really with the lack of setup of
the IO-APIC pins.  At the end without input from Ray this is just a
wild guess.

Regards, Roger.

diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c
index 405d0a95af..cc53a3bd12 100644
--- a/xen/arch/x86/hvm/hypercall.c
+++ b/xen/arch/x86/hvm/hypercall.c
@@ -86,6 +86,8 @@ long hvm_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
 case PHYSDEVOP_map_pirq:
 case PHYSDEVOP_unmap_pirq:
+break;
+
 case PHYSDEVOP_eoi:
 case PHYSDEVOP_irq_status_query:
 case PHYSDEVOP_get_free_pirq:
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index 41e3c4d5e4..64f7b5bcc5 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -180,9 +180,7 @@ static int vioapic_hwdom_map_gsi(unsigned int gsi, unsigned 
int trig,
 
 /* Interrupt has been unmasked, bind it now. */
 ret = mp_register_gsi(gsi, trig, pol);
-if ( ret == -EEXIST )
-return 0;
-if ( ret )
+if ( ret && ret != -EEXIST )
 {
 gprintk(XENLOG_WARNING, "vioapic: error registering GSI %u: %d\n",
  gsi, ret);
@@ -250,6 +248,16 @@ static void vioapic_write_redirent(
 ent.fields.delivery_status = 0;
 ent.fields.remote_irr = pent->fields.remote_irr;
 unmasked = unmasked && !ent.fields.mask;
+if ( is_hardware_domain(d) )
+{
+int ret = mp_register_gsi(gsi, ent.fields.trig_mode,
+  ent.fields.polarity);
+
+if ( ret && ret !=  -EEXIST )
+gprintk(XENLOG_WARNING,
+"vioapic: error registering GSI %u: %d\n",
+gsi, ret);
+}
 }
 
 *pent = ent;

Re: [PATCH v2 1/3] xen/riscv: introduce setup_initial_pages

2023-03-20 Thread Jan Beulich

On 16.03.2023 17:43, Oleksii Kurochko wrote:
> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/mm.h
> @@ -0,0 +1,8 @@
> +#ifndef _ASM_RISCV_MM_H
> +#define _ASM_RISCV_MM_H
> +
> +void setup_initial_pagetables(void);
> +
> +extern void enable_mmu(void);

Nit: At least within a single header you probably want to be consistent
about the use of "extern". Personally I think it would better be omitted
from function declarations.

> --- /dev/null
> +++ b/xen/arch/riscv/include/asm/page.h
> @@ -0,0 +1,67 @@
> +#ifndef _ASM_RISCV_PAGE_H
> +#define _ASM_RISCV_PAGE_H
> +
> +#include 
> +#include 
> +
> +#define PAGE_ENTRIES(1 << PAGETABLE_ORDER)
> +#define VPN_MASK((unsigned long)(PAGE_ENTRIES - 1))
> +
> +#define PAGE_ORDER  (12)

DYM PAGE_SHIFT here, as used elsewhere in Xen?

Also are you aware of page-bits.h, where I think some of these constants
should go?

> +#ifdef CONFIG_RISCV_64
> +#define PAGETABLE_ORDER (9)
> +#else /* CONFIG_RISCV_32 */
> +#define PAGETABLE_ORDER (10)
> +#endif
> +
> +#define LEVEL_ORDER(lvl)(lvl * PAGETABLE_ORDER)
> +#define LEVEL_SHIFT(lvl)(LEVEL_ORDER(lvl) + PAGE_ORDER)
> +#define LEVEL_SIZE(lvl) (_AT(paddr_t, 1) << LEVEL_SHIFT(lvl))
> +
> +#define XEN_PT_LEVEL_SHIFT(lvl) LEVEL_SHIFT(lvl)
> +#define XEN_PT_LEVEL_ORDER(lvl) LEVEL_ORDER(lvl)
> +#define XEN_PT_LEVEL_SIZE(lvl)  LEVEL_SIZE(lvl)

Mind me asking what these are good for? Doesn't one set of macros
suffice?

> +#define XEN_PT_LEVEL_MAP_MASK(lvl)  (~(XEN_PT_LEVEL_SIZE(lvl) - 1))
> +#define XEN_PT_LEVEL_MASK(lvl)  (VPN_MASK << XEN_PT_LEVEL_SHIFT(lvl))
> +
> +#define PTE_SHIFT   10

What does this describe? According to its single use here it may
simply require a better name.

> +#define PTE_VALID   BIT(0, UL)
> +#define PTE_READABLEBIT(1, UL)
> +#define PTE_WRITABLEBIT(2, UL)
> +#define PTE_EXECUTABLE  BIT(3, UL)
> +#define PTE_USERBIT(4, UL)
> +#define PTE_GLOBAL  BIT(5, UL)
> +#define PTE_ACCESSEDBIT(6, UL)
> +#define PTE_DIRTY   BIT(7, UL)
> +#define PTE_RSW (BIT(8, UL) | BIT(9, UL))
> +
> +#define PTE_LEAF_DEFAULT(PTE_VALID | PTE_READABLE | 
> PTE_EXECUTABLE)
> +#define PTE_TABLE   (PTE_VALID)
> +
> +/* Calculate the offsets into the pagetables for a given VA */
> +#define pt_linear_offset(lvl, va)   ((va) >> XEN_PT_LEVEL_SHIFT(lvl))
> +
> +#define pt_index(lvl, va)   pt_linear_offset(lvl, (va) & 
> XEN_PT_LEVEL_MASK(lvl))
> +
> +/* Page Table entry */
> +typedef struct {
> +uint64_t pte;
> +} pte_t;

Not having read the respective spec (yet) I'm wondering if this really
is this way also for RV32 (despite the different PAGETABLE_ORDER).

> --- /dev/null
> +++ b/xen/arch/riscv/mm.c
> @@ -0,0 +1,121 @@
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/*
> + * xen_second_pagetable is indexed with the VPN[2] page table entry field
> + * xen_first_pagetable is accessed from the VPN[1] page table entry field
> + * xen_zeroeth_pagetable is accessed from the VPN[0] page table entry field
> + */
> +pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +xen_second_pagetable[PAGE_ENTRIES];
> +static pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +xen_first_pagetable[PAGE_ENTRIES];
> +static pte_t __section(".bss.page_aligned") __aligned(PAGE_SIZE)
> +xen_zeroeth_pagetable[PAGE_ENTRIES];

I would assume Andrew's comment on the earlier version also extended to
the names used here (and then also to various local variables or function
parameters further down).

> +extern unsigned long __init_begin[];
> +extern unsigned long __init_end[];
> +extern unsigned char cpu0_boot_stack[STACK_SIZE];
> +
> +static void __init
> +setup_initial_mapping(pte_t *second, pte_t *first, pte_t *zeroeth,
> +  unsigned long map_start,
> +  unsigned long map_end,
> +  unsigned long pa_start,
> +  unsigned long flags)
> +{
> +unsigned long page_addr;
> +
> +// /* align start addresses */
> +// map_start &= XEN_PT_LEVEL_MAP_MASK(0);
> +// pa_start &= XEN_PT_LEVEL_MAP_MASK(0);

It's not clear what this is about, but in any event the comment is malformed.

> +page_addr = map_start;
> +while ( page_addr < map_end )
> +{
> +unsigned long index2 = pt_index(2, page_addr);
> +unsigned long index1 = pt_index(1, page_addr);
> +unsigned long index0 = pt_index(0, page_addr);
> +
> +/* Setup level2 table */
> +second[index2] = paddr_to_pte((unsigned long)first);
> +second[index2].pte |= PTE_TABLE;
> +
> +/* Setup level1 table */
> +first[index1] =

[XEN PATCH v1 1/1] x86/domctl: add gva_to_gfn command

2023-03-20 Thread Ковалёв Сергей


gva_to_gfn command used for fast address translation in LibVMI project.
With such a command it is possible to perform address translation in
single call instead of series of queries to get every page table.

Thanks to Dmitry Isaykin for involvement.

Signed-off-by: Sergey Kovalev 

---
Cc: Jan Beulich 
Cc: Andrew Cooper 
Cc: "Roger Pau Monné" 
Cc: Wei Liu 
Cc: George Dunlap 
Cc: Julien Grall 
Cc: Stefano Stabellini 
Cc: Tamas K Lengyel 
Cc: xen-devel@lists.xenproject.org
---

---
 xen/arch/x86/domctl.c   | 17 +
 xen/include/public/domctl.h | 13 +
 2 files changed, 30 insertions(+)

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 2118fcad5d..0c9706ea0a 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1364,6 +1364,23 @@ long arch_do_domctl(
 copyback = true;
 break;

+case XEN_DOMCTL_gva_to_gfn:
+{
+uint64_t ga = domctl->u.gva_to_gfn.addr;
+uint64_t cr3 = domctl->u.gva_to_gfn.cr3;
+struct vcpu* v = d->vcpu[0];
+uint32_t pfec = PFEC_page_present;
+unsigned int page_order;
+
+uint64_t gfn = paging_ga_to_gfn_cr3(v, cr3, ga, , 
_order);

+domctl->u.gva_to_gfn.addr = gfn;
+domctl->u.gva_to_gfn.page_order = page_order;
+if ( __copy_to_guest(u_domctl, domctl, 1) )
+ret = -EFAULT;
+
+break;
+}
+
 default:
 ret = -ENOSYS;
 break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 51be28c3de..628dfc68fd 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -948,6 +948,17 @@ struct xen_domctl_paging_mempool {
 uint64_aligned_t size; /* Size in bytes. */
 };

+/*
+ * XEN_DOMCTL_gva_to_gfn.
+ *
+ * Get the guest virtual to guest physicall address translated.
+ */
+struct xen_domctl_gva_to_gfn {
+uint64_aligned_t addr;
+uint64_aligned_t cr3;
+uint64_aligned_t page_order;
+};
+
 #if defined(__i386__) || defined(__x86_64__)
 struct xen_domctl_vcpu_msr {
 uint32_t index;
@@ -1278,6 +1289,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_vmtrace_op84
 #define XEN_DOMCTL_get_paging_mempool_size   85
 #define XEN_DOMCTL_set_paging_mempool_size   86
+#define XEN_DOMCTL_gva_to_gfn87
 #define XEN_DOMCTL_gdbsx_guestmemio1000
 #define XEN_DOMCTL_gdbsx_pausevcpu 1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu   1002
@@ -1340,6 +1352,7 @@ struct xen_domctl {
 struct xen_domctl_vuart_op  vuart_op;
 struct xen_domctl_vmtrace_opvmtrace_op;
 struct xen_domctl_paging_mempoolpaging_mempool;
+struct xen_domctl_gva_to_gfngva_to_gfn;
 uint8_t pad[128];
 } u;
 };
--
2.38.1

Re: [RFC XEN PATCH 0/6] Introduce VirtIO GPU and Passthrough GPU support on Xen PVH dom0

2023-03-20 Thread Huang Rui

Hi Jan, Roger, Stefano, Andrew,

Sorry to late response, I was fully occupied by another problem last week.
And I will give the reply one by one in the mail tomorrow. Thanks for your
patience. :-)

Thanks,
Ray

On Sun, Mar 12, 2023 at 03:54:49PM +0800, Huang, Ray wrote:
> Hi all,
> 
> In graphic world, the 3D applications/games are runing based on open
> graphic libraries such as OpenGL and Vulkan. Mesa is the Linux
> implemenatation of OpenGL and Vulkan for multiple hardware platforms.
> Because the graphic libraries would like to have the GPU hardware
> acceleration. In virtualization world, virtio-gpu and passthrough-gpu are
> two of gpu virtualization technologies.
> 
> Current Xen only supports OpenGL (virgl:
> https://docs.mesa3d.org/drivers/virgl.html) for virtio-gpu and passthrough
> gpu based on PV dom0 for x86 platform. Today, we would like to introduce
> Vulkan (venus: https://docs.mesa3d.org/drivers/venus.html) and another
> OpenGL on Vulkan (zink: https://docs.mesa3d.org/drivers/zink.html) support
> for VirtIO GPU on Xen. These functions are supported on KVM at this moment,
> but so far, they are not supported on Xen. And we also introduce the PCIe
> passthrough (GPU) function based on PVH dom0 for AMD x86 platform.
> 
> These supports required multiple repositories changes on kernel, xen, qemu,
> mesa, and virglrenderer. Please check below branches:
> 
> Kernel: 
> https://git.kernel.org/pub/scm/linux/kernel/git/rui/linux.git/log/?h=upstream-fox-xen
> Xen: https://gitlab.com/huangrui123/xen/-/commits/upstream-for-xen
> QEMU: https://gitlab.com/huangrui123/qemu/-/commits/upstream-for-xen
> Mesa: https://gitlab.freedesktop.org/rui/mesa/-/commits/upstream-for-xen
> Virglrenderer: 
> https://gitlab.freedesktop.org/rui/virglrenderer/-/commits/upstream-for-xen
> 
> In xen part, we mainly add the PCIe passthrough support on PVH dom0. It's
> using the QEMU to passthrough the GPU device into guest HVM domU. And
> mainly work is to transfer the interrupt by using gsi, vector, and pirq.
> 
> Below are the screenshot of these functions, please take a look.
> 
> Venus:
> https://drive.google.com/file/d/1_lPq6DMwHu1JQv7LUUVRx31dBj0HJYcL/view?usp=share_link
> 
> Zink:
> https://drive.google.com/file/d/1FxLmKu6X7uJOxx1ZzwOm1yA6IL5WMGzd/view?usp=share_link
> 
> Passthrough GPU:
> https://drive.google.com/file/d/17onr5gvDK8KM_LniHTSQEI2hGJZlI09L/view?usp=share_link
> 
> We are working to write the documentation that describe how to verify these
> functions in the xen wiki page. And will update it in the future version.
> 
> Thanks,
> Ray
> 
> Chen Jiqian (5):
>   vpci: accept BAR writes if dom0 is PVH
>   x86/pvh: shouldn't check pirq flag when map pirq in PVH
>   x86/pvh: PVH dom0 also need PHYSDEVOP_setup_gsi call
>   tools/libs/call: add linux os call to get gsi from irq
>   tools/libs/light: pci: translate irq to gsi
> 
> Roger Pau Monne (1):
>   x86/pvh: report ACPI VFCT table to dom0 if present
> 
>  tools/include/xen-sys/Linux/privcmd.h |  7 +++
>  tools/include/xencall.h   |  2 ++
>  tools/include/xenctrl.h   |  2 ++
>  tools/libs/call/core.c|  5 +
>  tools/libs/call/libxencall.map|  2 ++
>  tools/libs/call/linux.c   | 14 ++
>  tools/libs/call/private.h |  9 +
>  tools/libs/ctrl/xc_physdev.c  |  4 
>  tools/libs/light/libxl_pci.c  |  1 +
>  xen/arch/x86/hvm/dom0_build.c |  1 +
>  xen/arch/x86/hvm/hypercall.c  |  3 +--
>  xen/drivers/vpci/header.c |  2 +-
>  xen/include/acpi/actbl3.h |  1 +
>  13 files changed, 50 insertions(+), 3 deletions(-)
> 
> -- 
> 2.25.1
>

Re: [PATCH v2 2/2] x86/APIC: modify error_interrupt() to output using single printk()

2023-03-20 Thread Elliott Mitchell

On Mon, Mar 20, 2023 at 04:39:48PM +0100, Jan Beulich wrote:
> On 20.03.2023 15:29, Elliott Mitchell wrote:
> > 
> > There are several minor issues here which may be best handled during
> > commit as they're very small items about how precisely you want this to
> > look.
> > 
> > First, I later realized I goofed the argument order.  In order to match
> > the original implementation, it needs to be entries[7] ... entries[0]
> > (could though be the low-order bits should be reported first).
> 
> I'm not really concerned of the order. A change of order wants
> mentioning in the description though.

Seemed simple enough to fix on commit (simply switch the order of
numbers).

> > Second, the order of the for loop no longer matters.  Using
> > ARRAY_SIZE(esr_fields) and increment should now be more maintainable
> > (this would also allow i to be unsigned).
> 
> Indeed. But that would better done in a separate patch then anyway.

Feel free to split.

> > Third, I'm simply unsure how you would prefer to format the printk().
> 
> About any way matching style guidelines is okay. There are two more
> things to mention though (sorry for not noticing earlier): We aim at
> keeping the entire format string on one line, for grep-ability. And
> there's no need (and in fact no reason) to split the sequence of %s
> from the \n. To summarize:
> 
> printk(XENLOG_DEBUG
>"APIC error on CPU%u: %02x(%02x)%s%s%s%s%s%s%s%s\n",
> 
> (unless of course it all fits on one line, which it looks like it
> does).

I like keeping the "%s%s%s%s%s%s%s%s" section separated since it needs to
match the number of arguments.  In the future where more bits of the
register are defined, both sections will need to be modified together.


This seems to be a spot where there are large numbers of similarly
functional, but mildly different style variants.  As such I suspect this
is best left in your hands as this is a bog of trivial style
considerations which have no real functional effect.


-- 
(\___(\___(\__  --=> 8-) EHM <=--  __/)___/)___/)
 \BS (| ehem+sig...@m5p.com  PGP 87145445 |)   /
  \_CS\   |  _  -O #include  O-   _  |   /  _/
8A19\___\_|_/58D2 7E3D DDF4 7BA6 <-PGP-> 41D1 B375 37D0 8714\_|_/___/5445

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Jan Beulich

On 20.03.2023 16:37, Elliott Mitchell wrote:
> On Mon, Mar 20, 2023 at 09:14:17AM +0100, Jan Beulich wrote:
>> On 17.03.2023 18:26, Elliott Mitchell wrote:
>>> I'm tempted to propose allowing _Static_assert() since it is valuable
>>> functionality for preventing issues.
>>
>> How does _Static_assert() come into play here? Also note that we already
>> use it when available ...
> 
> This is more in relation to the patch.  Appears GCC's C90 mode disables
> _Static_assert(), so the _Static_assert(ARRAY_SIZE(args) == 8) had to be
> dropped.

I'm puzzled by this. It's been for a long time that we've been building
with -std=gnu99. Plus you simply open-coded BUILD_BUG_ON() - if you had
used it, it would have taken care of the necessary abstraction for you
anyway.

Jan

Re: [PATCH v2 2/2] x86/APIC: modify error_interrupt() to output using single printk()

2023-03-20 Thread Jan Beulich

On 20.03.2023 15:29, Elliott Mitchell wrote:
> On Mon, Mar 20, 2023 at 09:56:54AM +0100, Jan Beulich wrote:
>> On 17.03.2023 20:53, Elliott Mitchell wrote:
>>> This takes care of the issue of APIC errors tending to occur on multiple
>>> cores at one.  In turn this tends to causes the error messages to be
>>
>> Nit: "at once"?
> 
> https://en.wiktionary.org/wiki/at_once
> 
> Adverb #2, synonym of "simultaneously".

And that's what you mean, I think? Not being a native speaker, I have no
idea what "at one" is meaning here.

>>> @@ -1419,12 +1420,12 @@ static void cf_check error_interrupt(struct 
>>> cpu_user_regs *regs)
>>>  v1 = apic_read(APIC_ESR);
>>>  ack_APIC_irq();
>>>  
>>> -printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)",
>>> -smp_processor_id(), v , v1);
>>>  for ( i = 7; i >= 0; --i )
>>> -if ( v1 & (1 << i) )
>>> -printk("%s", esr_fields[i]);
>>> -printk("\n");
>>> +entries[i] = v1 & (1 << i) ? esr_fields[i] : "";
>>> +printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)"
>>> +"%s%s%s%s%s%s%s%s" "\n",
>>> +smp_processor_id(), v , v1, entries[0], entries[1], entries[2],
>>> +entries[3], entries[4], entries[5], entries[6], entries[7]);
>>
>> Two style nits: Indentation wants fixing here (it was wrong in the original
>> code already), and the stray blank between v and the comma also wants
>> dropping at this occasion.
> 
> There are several minor issues here which may be best handled during
> commit as they're very small items about how precisely you want this to
> look.
> 
> First, I later realized I goofed the argument order.  In order to match
> the original implementation, it needs to be entries[7] ... entries[0]
> (could though be the low-order bits should be reported first).

I'm not really concerned of the order. A change of order wants
mentioning in the description though.

> Second, the order of the for loop no longer matters.  Using
> ARRAY_SIZE(esr_fields) and increment should now be more maintainable
> (this would also allow i to be unsigned).

Indeed. But that would better done in a separate patch then anyway.

> Third, I'm simply unsure how you would prefer to format the printk().

About any way matching style guidelines is okay. There are two more
things to mention though (sorry for not noticing earlier): We aim at
keeping the entire format string on one line, for grep-ability. And
there's no need (and in fact no reason) to split the sequence of %s
from the \n. To summarize:

printk(XENLOG_DEBUG
   "APIC error on CPU%u: %02x(%02x)%s%s%s%s%s%s%s%s\n",

(unless of course it all fits on one line, which it looks like it
does).

Jan

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Elliott Mitchell

On Mon, Mar 20, 2023 at 09:14:17AM +0100, Jan Beulich wrote:
> On 17.03.2023 18:26, Elliott Mitchell wrote:
> > On Fri, Mar 17, 2023 at 09:22:09AM +0100, Jan Beulich wrote:
> >> On 16.03.2023 23:03, Elliott Mitchell wrote:
> >>> On Mon, Mar 13, 2023 at 08:01:02AM +0100, Jan Beulich wrote:
>  On 11.03.2023 01:09, Elliott Mitchell wrote:
> > On Thu, Mar 09, 2023 at 10:03:23AM +0100, Jan Beulich wrote:
> >>
> >> In any event you will want to collect a serial log at maximum 
> >> verbosity.
> >> It would also be of interest to know whether turning off the IOMMU 
> >> avoids
> >> the issue as well (on the assumption that your system has less than 255
> >> CPUs).
> >
> > I think I might have figured out the situation in a different fashion.
> >
> > I was taking a look at the BIOS manual for this motherboard and noticed
> > a mention of a "Local APIC Mode" setting.  Four values are listed
> > "Compatibility", "xAPIC", "x2APIC", and "Auto".
> >
> > That is the sort of setting I likely left at "Auto" and that may well
> > result in x2 functionality being disabled.  Perhaps the x2APIC
> > functionality on AMD is detecting whether the hardware is present, and
> > failing to test whether it has been enabled?  (could be useful to output
> > a message suggesting enabling the hardware feature)
> 
>  Can we please move to a little more technical terms here? What is 
>  "present"
>  and "enabled" in your view? I don't suppose you mean the CPUID bit (which
>  we check) and the x2APIC-mode-enable one (which we drive as needed). It's
>  also left unclear what the four modes of BIOS operation evaluate to. Even
>  if we knew that, overriding e.g. "Compatibility" (which likely means some
>  form of "disabled" / "hidden") isn't normally an appropriate thing to do.
>  In "Auto" mode Xen likely should work - the only way I could interpret 
>  the
>  the other modes are "xAPIC" meaning no x2APIC ACPI tables entries (and
>  presumably the CPUID bit also masked), "x2APIC" meaning x2APIC mode pre-
>  enabled by firmware, and "Auto" leaving it to the OS to select. Yet 
>  that's
>  speculation on my part ...
> >>>
> >>> I provided the information I had discovered.  There is a setting for this
> >>> motherboard (likely present on some similar motherboards) which /may/
> >>> effect the issue.  I doubt I've tried "compatibility", but none of the
> >>> values I've tried have gotten the system to boot without "x2apic=false"
> >>> on Xen's command-line.
> >>>
> >>> When setting to "x2APIC" just after "(XEN) AMD-Vi: IOMMU Extended 
> >>> Features:"
> >>> I see the line "(XEN) - x2APIC".  Later is the line
> >>> "(XEN) x2APIC mode is already enabled by BIOS."  I'll guess "Auto"
> >>> leaves the x2APIC turned off since neither line is present.
> >>
> >> When "(XEN) - x2APIC" is absent the IOMMU can't be switched into x2APIC
> >> mode. Are you sure that's the case when using "Auto"?
> > 
> > grep -eAPIC\ driver -e-\ x2APIC:
> > 
> > "Auto":
> > (XEN) Using APIC driver default
> > (XEN) Overriding APIC driver with bigsmp
> > (XEN) Switched to APIC driver x2apic_cluster
> > 
> > "x2APIC":
> > (XEN) Using APIC driver x2apic_cluster
> > (XEN) - x2APIC
> > 
> > Yes, I'm sure.
> 
> Okay, this then means we're running in a mode we don't mean to run
> in: When the IOMMU claims to not support x2APIC mode (which is odd in
> the first place when at the same time the CPU reports x2APIC mode as
> supported), amd_iommu_prepare() is intended to switch interrupt
> remapping mode to "restricted" (which in turn would force x2APIC mode
> to "physical", not "clustered"). I notice though that there are a
> number of error paths in the function which bypass this setting. Could
> you add a couple of printk()s to understand which path is taken (each
> time; the function can be called more than once)?

If I'm reading the logs right, this could sugggest "Local APIC Mode"
setting was modifying the IOMMU side of the APIC, not the processor side
of the APIC setting.

There is also a "Compatibility" value which I haven't tried.  Perhaps
taking a look would be interesting.

> >>> Both cases the line "(XEN) Switched to APIC driver x2apic_cluster" is
> >>> present (so perhaps "Auto" merely doesn't activate it).
> >>
> >> Did you also try "x2apic_phys" on the Xen command line (just to be sure
> >> this isn't a clustered-mode only issue)?
> > 
> > No.  In fact x2apic_cluster is mentioned in all failure cases.
> 
> Could you give physical mode a try, please?

I had taken the previous message as an implicit request to do so.  I was
stating I had not previously done so.  While "x2apic=false" is functional
as a workaround, I really would like to get to the bottom of this.

> >>> Appears error_interrupt() needs locking or some concurrency handling
> >>> mechanism since the last error is jumbled.  With the setting "x2APIC"
> >>> I get a bunch

Re: [RFC XEN PATCH 6/6] tools/libs/light: pci: translate irq to gsi

2023-03-20 Thread Jan Beulich

On 20.03.2023 16:16, Roger Pau Monné wrote:
> @@ -244,12 +242,18 @@ static void vioapic_write_redirent(
>  }
>  else
>  {
> +int ret;
> +
>  unmasked = ent.fields.mask;
>  /* Remote IRR and Delivery Status are read-only. */
>  ent.bits = ((ent.bits >> 32) << 32) | val;
>  ent.fields.delivery_status = 0;
>  ent.fields.remote_irr = pent->fields.remote_irr;
>  unmasked = unmasked && !ent.fields.mask;
> +ret = mp_register_gsi(gsi, ent.fields.trig_mode, 
> ent.fields.polarity);
> +if ( ret && ret !=  -EEXIST )
> +gprintk(XENLOG_WARNING, "vioapic: error registering GSI %u: 
> %d\n",
> +gsi, ret);
>  }

I assume this is only meant to be experimental, as I'm missing confinement
to Dom0 here. I also question this when the mask bit as set, as in that
case neither the trigger mode bit nor the polarity one can be relied upon.
At which point it would look to me as if it was necessary for Dom0 to use
a hypercall instead (which naturally would then be PHYSDEVOP_setup_gsi).

Jan

[XEN PATCH v2] build: detect compiler change to rerun kconfig

2023-03-20 Thread Anthony PERARD

This simple comment allows to detect when $(CC) changes version.
Kconfig will be rerun in this case. (Rerun is forced by
include/config/auto.conf.cmd which detects changes of CC_VERSION_TEXT
value).

Signed-off-by: Anthony PERARD 
---

Technically, it was acked by Andrew:

https://lore.kernel.org/xen-devel/20200326135621.687685-1-anthony.per...@citrix.com/t/#ma1171775b9938bcdffc80b2fcb8d2a883cd3e32a

v2:
- s/upgrade/change/ in subject and in the comment in the Makefile
- Fix path in commit description
---
 xen/Kconfig  | 2 ++
 xen/Makefile | 4 
 2 files changed, 6 insertions(+)

diff --git a/xen/Kconfig b/xen/Kconfig
index 134e6e68ad..756c7842e9 100644
--- a/xen/Kconfig
+++ b/xen/Kconfig
@@ -4,6 +4,8 @@
 #
 mainmenu "Xen/$(SRCARCH) $(XEN_FULLVERSION) Configuration"
 
+comment "Compiler: $(CC_VERSION_TEXT)"
+
 source "scripts/Kconfig.include"
 
 config BROKEN
diff --git a/xen/Makefile b/xen/Makefile
index 2710d7327e..12c6891a79 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -326,6 +326,10 @@ ifdef building_out_of_srctree
{ echo "# this is build directory, ignore it"; echo "*"; } > .gitignore
 endif
 
+# CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
+# and from include/config/auto.conf.cmd to detect the compiler change.
+export CC_VERSION_TEXT := $(shell $(CC) --version 2>/dev/null | head -n 1)
+
 ifeq ($(config-build),y)
 # ===
 # *config targets only - make sure prerequisites are updated, and descend
-- 
Anthony PERARD

Re: [RFC XEN PATCH 6/6] tools/libs/light: pci: translate irq to gsi

2023-03-20 Thread Roger Pau Monné

On Fri, Mar 17, 2023 at 01:55:08PM -0700, Stefano Stabellini wrote:
> On Fri, 17 Mar 2023, Roger Pau Monné wrote:
> > On Fri, Mar 17, 2023 at 11:15:37AM -0700, Stefano Stabellini wrote:
> > > On Fri, 17 Mar 2023, Roger Pau Monné wrote:
> > > > On Fri, Mar 17, 2023 at 09:39:52AM +0100, Jan Beulich wrote:
> > > > > On 17.03.2023 00:19, Stefano Stabellini wrote:
> > > > > > On Thu, 16 Mar 2023, Jan Beulich wrote:
> > > > > >> So yes, it then all boils down to that Linux-
> > > > > >> internal question.
> > > > > > 
> > > > > > Excellent question but we'll have to wait for Ray as he is the one 
> > > > > > with
> > > > > > access to the hardware. But I have this data I can share in the
> > > > > > meantime:
> > > > > > 
> > > > > > [1.260378] IRQ to pin mappings:
> > > > > > [1.260387] IRQ1 -> 0:1
> > > > > > [1.260395] IRQ2 -> 0:2
> > > > > > [1.260403] IRQ3 -> 0:3
> > > > > > [1.260410] IRQ4 -> 0:4
> > > > > > [1.260418] IRQ5 -> 0:5
> > > > > > [1.260425] IRQ6 -> 0:6
> > > > > > [1.260432] IRQ7 -> 0:7
> > > > > > [1.260440] IRQ8 -> 0:8
> > > > > > [1.260447] IRQ9 -> 0:9
> > > > > > [1.260455] IRQ10 -> 0:10
> > > > > > [1.260462] IRQ11 -> 0:11
> > > > > > [1.260470] IRQ12 -> 0:12
> > > > > > [1.260478] IRQ13 -> 0:13
> > > > > > [1.260485] IRQ14 -> 0:14
> > > > > > [1.260493] IRQ15 -> 0:15
> > > > > > [1.260505] IRQ106 -> 1:8
> > > > > > [1.260513] IRQ112 -> 1:4
> > > > > > [1.260521] IRQ116 -> 1:13
> > > > > > [1.260529] IRQ117 -> 1:14
> > > > > > [1.260537] IRQ118 -> 1:15
> > > > > > [1.260544]  done.
> > > > > 
> > > > > And what does Linux think are IRQs 16 ... 105? Have you compared with
> > > > > Linux running baremetal on the same hardware?
> > > > 
> > > > So I have some emails from Ray from he time he was looking into this,
> > > > and on Linux dom0 PVH dmesg there is:
> > > > 
> > > > [0.065063] IOAPIC[0]: apic_id 33, version 17, address 0xfec0, 
> > > > GSI 0-23
> > > > [0.065096] IOAPIC[1]: apic_id 34, version 17, address 0xfec01000, 
> > > > GSI 24-55
> > > > 
> > > > So it seems the vIO-APIC data provided by Xen to dom0 is at least
> > > > consistent.
> > > >  
> > > > > > And I think Ray traced the point in Linux where Linux gives us an 
> > > > > > IRQ ==
> > > > > > 112 (which is the one causing issues):
> > > > > > 
> > > > > > __acpi_register_gsi->
> > > > > > acpi_register_gsi_ioapic->
> > > > > > mp_map_gsi_to_irq->
> > > > > > mp_map_pin_to_irq->
> > > > > > __irq_resolve_mapping()
> > > > > > 
> > > > > > if (likely(data)) {
> > > > > > desc = irq_data_to_desc(data);
> > > > > > if (irq)
> > > > > > *irq = data->irq;
> > > > > > /* this IRQ is 112, IO-APIC-34 domain */
> > > > > > }
> > > > 
> > > > 
> > > > Could this all be a result of patch 4/5 in the Linux series ("[RFC
> > > > PATCH 4/5] x86/xen: acpi registers gsi for xen pvh"), where a different
> > > > __acpi_register_gsi hook is installed for PVH in order to setup GSIs
> > > > using PHYSDEV ops instead of doing it natively from the IO-APIC?
> > > > 
> > > > FWIW, the introduced function in that patch
> > > > (acpi_register_gsi_xen_pvh()) seems to unconditionally call
> > > > acpi_register_gsi_ioapic() without checking if the GSI is already
> > > > registered, which might lead to multiple IRQs being allocated for the
> > > > same underlying GSI?
> > > 
> > > I understand this point and I think it needs investigating.
> > > 
> > > 
> > > > As I commented there, I think that approach is wrong.  If the GSI has
> > > > not been mapped in Xen (because dom0 hasn't unmasked the respective
> > > > IO-APIC pin) we should add some logic in the toolstack to map it
> > > > before attempting to bind.
> > > 
> > > But this statement confuses me. The toolstack doesn't get involved in
> > > IRQ setup for PCI devices for HVM guests?
> > 
> > It does for GSI interrupts AFAICT, see pci_add_dm_done() and the call
> > to xc_physdev_map_pirq().  I'm not sure whether that's a remnant that
> > cold be removed (maybe for qemu-trad only?) or it's also required by
> > QEMU upstream, I would have to investigate more.
> 
> You are right. I am not certain, but it seems like a mistake in the
> toolstack to me. In theory, pci_add_dm_done should only be needed for PV
> guests, not for HVM guests. I am not sure. But I can see the call to
> xc_physdev_map_pirq you were referring to now.
> 
> 
> > It's my understanding it's in pci_add_dm_done() where Ray was getting
> > the mismatched IRQ vs GSI number.
> 
> I think the mismatch was actually caused by the xc_physdev_map_pirq call
> from QEMU, which makes sense because in any case it should happen before
> the same call done by pci_add_dm_done (pci_add_dm_done is called after
> sending the pci passthrough QMP command

Re: [PATCH v2 2/2] x86/APIC: modify error_interrupt() to output using single printk()

2023-03-20 Thread Elliott Mitchell

On Mon, Mar 20, 2023 at 09:56:54AM +0100, Jan Beulich wrote:
> On 17.03.2023 20:53, Elliott Mitchell wrote:
> > This takes care of the issue of APIC errors tending to occur on multiple
> > cores at one.  In turn this tends to causes the error messages to be
> 
> Nit: "at once"?

https://en.wiktionary.org/wiki/at_once

Adverb #2, synonym of "simultaneously".

> > @@ -1419,12 +1420,12 @@ static void cf_check error_interrupt(struct 
> > cpu_user_regs *regs)
> >  v1 = apic_read(APIC_ESR);
> >  ack_APIC_irq();
> >  
> > -printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)",
> > -smp_processor_id(), v , v1);
> >  for ( i = 7; i >= 0; --i )
> > -if ( v1 & (1 << i) )
> > -printk("%s", esr_fields[i]);
> > -printk("\n");
> > +entries[i] = v1 & (1 << i) ? esr_fields[i] : "";
> > +printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)"
> > +"%s%s%s%s%s%s%s%s" "\n",
> > +smp_processor_id(), v , v1, entries[0], entries[1], entries[2],
> > +entries[3], entries[4], entries[5], entries[6], entries[7]);
> 
> Two style nits: Indentation wants fixing here (it was wrong in the original
> code already), and the stray blank between v and the comma also wants
> dropping at this occasion.

There are several minor issues here which may be best handled during
commit as they're very small items about how precisely you want this to
look.

First, I later realized I goofed the argument order.  In order to match
the original implementation, it needs to be entries[7] ... entries[0]
(could though be the low-order bits should be reported first).

Second, the order of the for loop no longer matters.  Using
ARRAY_SIZE(esr_fields) and increment should now be more maintainable
(this would also allow i to be unsigned).

Third, I'm simply unsure how you would prefer to format the printk().
I imagine at some future point the register may have additional bits
allocated.  At such point esr_fields[] would grow, but this would also
require adding to the format string and adding an argument.

Seems like several fiddly bits which mostly effect look and don't really
effect the implementation.

-- 
(\___(\___(\__  --=> 8-) EHM <=--  __/)___/)___/)
 \BS (| ehem+sig...@m5p.com  PGP 87145445 |)   /
  \_CS\   |  _  -O #include  O-   _  |   /  _/
8A19\___\_|_/58D2 7E3D DDF4 7BA6 <-PGP-> 41D1 B375 37D0 8714\_|_/___/5445

Re: [help] Xen 4.14.5 on Devuan 4.0 Chimaera, regression from Xen 4.0.1

2023-03-20 Thread Jan Beulich

On 20.03.2023 13:46, Denis wrote:
> On 20.03.2023 12:01, Andrew Cooper wrote:
>> On 19/03/2023 7:38 pm, Denis wrote:
>>> As you suspected, there are a few IO page faults at the end of the boot 
>>> process
>>> (from my limited understanding it's maybe related to 
>>> "00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD/ATI] SBx00 SMBus 
>>> Controller (rev 41)")
>>>
>>> I'll attach the "xl dmesg" output file.
>>
>> Do you have this file?
>>
>> If they're only at the end of boot and not later around passthrough,
>> then they might be from other functionality in the Southbridge.
> 
> Sorry, my bad, forgot to attach it :(
> 
> After booting, the messages appear only ant the end, yet when running the HVM 
> domU,
> a few lines pop up there as well.

The addresses are always in the HyperTransport interrupt remapping range,
so to me this is a fair indication of there being something amiss IO-APIC-
wise. As it stands I only view the "disable intremap" route as appropriate
for this system.

Jan

Re: [PATCH-for-8.1 4/5] bulk: Do not declare function prototypes using extern keyword

2023-03-20 Thread Philippe Mathieu-Daudé


On 20/3/23 14:48, Daniel P. Berrangé wrote:

On Mon, Mar 20, 2023 at 02:42:18PM +0100, Philippe Mathieu-Daudé wrote:

By default, C function prototypes declared in headers are visible,
so there is no need to declare them as 'extern' functions.
Remove this redundancy in a single bulk commit; do not modify:

   - meson.build (used to check function availability at runtime)
   - pc-bios
   - libdecnumber
   - *.c

Signed-off-by: Philippe Mathieu-Daudé 
---
  block/dmg.h|  8 +++
  bsd-user/bsd-file.h|  6 ++---
  crypto/hmacpriv.h  | 13 +--
  hw/xen/xen_pt.h|  8 +++
  include/crypto/secret_common.h | 14 +---
  include/exec/page-vary.h   |  4 ++--
  include/hw/misc/aspeed_scu.h   |  2 +-
  include/hw/nvram/npcm7xx_otp.h |  4 ++--
  include/hw/qdev-core.h |  4 ++--
  include/qemu/crc-ccitt.h   |  4 ++--
  include/qemu/osdep.h   |  2 +-
  include/qemu/rcu.h | 14 ++--
  include/qemu/sys_membarrier.h  |  4 ++--
  include/qemu/uri.h |  6 ++---
  include/sysemu/accel-blocker.h | 14 ++--
  include/sysemu/os-win32.h  |  4 ++--
  include/user/safe-syscall.h|  4 ++--
  target/i386/sev.h  |  6 ++---
  target/mips/cpu.h  |  4 ++--
  tcg/tcg-internal.h |  4 ++--
  tests/tcg/minilib/minilib.h|  2 +-
  include/exec/memory_ldst.h.inc | 42 +-
  roms/seabios   |  2 +-


Accidental submodule commit.,


  23 files changed, 84 insertions(+), 91 deletions(-)

diff --git a/block/dmg.h b/block/dmg.h
index e488601b62..ed209b5dec 100644
--- a/block/dmg.h
+++ b/block/dmg.h
@@ -51,10 +51,10 @@ typedef struct BDRVDMGState {
  z_stream zstream;
  } BDRVDMGState;
  
-extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,

- char *next_out, unsigned int avail_out);
+int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+  char *next_out, unsigned int avail_out);
  
-extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,

-   char *next_out, unsigned int avail_out);
+int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+char *next_out, unsigned int avail_out);


These are variable declarations, so with this change you'll get multiple
copies of the variable if this header is included from multiple source
files. IOW, the 'extern' usage is correct.


Doh indeed, good catch, thanks.


diff --git a/roms/seabios b/roms/seabios
index ea1b7a0733..3208b098f5 16
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit ea1b7a0733906b8425d948ae94fba63c32b1d425
+Subproject commit 3208b098f51a9ef96d0dfa71d5ec3a3eaec88f0a


Nope !


Oops...

Re: [PATCH v2 1/2] x86/APIC: include full string with error_interrupt() error messages

2023-03-20 Thread Elliott Mitchell

On Mon, Mar 20, 2023 at 09:49:14AM +0100, Jan Beulich wrote:
> On 17.03.2023 20:45, Elliott Mitchell wrote:
> > Rather than adding ", " with each printf(), simply include them in the
> > string initially.
> 
> Why is this better? You're now using more space in .rodata. (I haven't
> looked at patch 2 yet to see whether there's a possible reason there
> for the change here, but if there was it would need saying here.)

I would expect this to give trivially better performance.  Instead of
needing to needing copy some data from the format string, then strcat()
from the arguments this turns it into a single strcat().

Other item is this sort of change is very often a precursor to replacing
the use of a *printf()-type function with a str*cat()-type function.
Though in this case I doubt there is a strlcatk() function so that is
unlikely.

-- 
(\___(\___(\__  --=> 8-) EHM <=--  __/)___/)___/)
 \BS (| ehem+sig...@m5p.com  PGP 87145445 |)   /
  \_CS\   |  _  -O #include  O-   _  |   /  _/
8A19\___\_|_/58D2 7E3D DDF4 7BA6 <-PGP-> 41D1 B375 37D0 8714\_|_/___/5445

Re: [PATCH-for-8.1 4/5] bulk: Do not declare function prototypes using extern keyword

2023-03-20 Thread Daniel P . Berrangé

On Mon, Mar 20, 2023 at 02:42:18PM +0100, Philippe Mathieu-Daudé wrote:
> By default, C function prototypes declared in headers are visible,
> so there is no need to declare them as 'extern' functions.
> Remove this redundancy in a single bulk commit; do not modify:
> 
>   - meson.build (used to check function availability at runtime)
>   - pc-bios
>   - libdecnumber
>   - *.c
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  block/dmg.h|  8 +++
>  bsd-user/bsd-file.h|  6 ++---
>  crypto/hmacpriv.h  | 13 +--
>  hw/xen/xen_pt.h|  8 +++
>  include/crypto/secret_common.h | 14 +---
>  include/exec/page-vary.h   |  4 ++--
>  include/hw/misc/aspeed_scu.h   |  2 +-
>  include/hw/nvram/npcm7xx_otp.h |  4 ++--
>  include/hw/qdev-core.h |  4 ++--
>  include/qemu/crc-ccitt.h   |  4 ++--
>  include/qemu/osdep.h   |  2 +-
>  include/qemu/rcu.h | 14 ++--
>  include/qemu/sys_membarrier.h  |  4 ++--
>  include/qemu/uri.h |  6 ++---
>  include/sysemu/accel-blocker.h | 14 ++--
>  include/sysemu/os-win32.h  |  4 ++--
>  include/user/safe-syscall.h|  4 ++--
>  target/i386/sev.h  |  6 ++---
>  target/mips/cpu.h  |  4 ++--
>  tcg/tcg-internal.h |  4 ++--
>  tests/tcg/minilib/minilib.h|  2 +-
>  include/exec/memory_ldst.h.inc | 42 +-
>  roms/seabios   |  2 +-

Accidental submodule commit.,

>  23 files changed, 84 insertions(+), 91 deletions(-)
> 
> diff --git a/block/dmg.h b/block/dmg.h
> index e488601b62..ed209b5dec 100644
> --- a/block/dmg.h
> +++ b/block/dmg.h
> @@ -51,10 +51,10 @@ typedef struct BDRVDMGState {
>  z_stream zstream;
>  } BDRVDMGState;
>  
> -extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
> - char *next_out, unsigned int avail_out);
> +int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
> +  char *next_out, unsigned int avail_out);
>  
> -extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
> -   char *next_out, unsigned int avail_out);
> +int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
> +char *next_out, unsigned int avail_out);

These are variable declarations, so with this change you'll get multiple
copies of the variable if this header is included from multiple source
files. IOW, the 'extern' usage is correct.

> diff --git a/roms/seabios b/roms/seabios
> index ea1b7a0733..3208b098f5 16
> --- a/roms/seabios
> +++ b/roms/seabios
> @@ -1 +1 @@
> -Subproject commit ea1b7a0733906b8425d948ae94fba63c32b1d425
> +Subproject commit 3208b098f51a9ef96d0dfa71d5ec3a3eaec88f0a

Nope !

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH-for-8.1 4/5] bulk: Do not declare function prototypes using extern keyword

2023-03-20 Thread Philippe Mathieu-Daudé

By default, C function prototypes declared in headers are visible,
so there is no need to declare them as 'extern' functions.
Remove this redundancy in a single bulk commit; do not modify:

  - meson.build (used to check function availability at runtime)
  - pc-bios
  - libdecnumber
  - *.c

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/dmg.h|  8 +++
 bsd-user/bsd-file.h|  6 ++---
 crypto/hmacpriv.h  | 13 +--
 hw/xen/xen_pt.h|  8 +++
 include/crypto/secret_common.h | 14 +---
 include/exec/page-vary.h   |  4 ++--
 include/hw/misc/aspeed_scu.h   |  2 +-
 include/hw/nvram/npcm7xx_otp.h |  4 ++--
 include/hw/qdev-core.h |  4 ++--
 include/qemu/crc-ccitt.h   |  4 ++--
 include/qemu/osdep.h   |  2 +-
 include/qemu/rcu.h | 14 ++--
 include/qemu/sys_membarrier.h  |  4 ++--
 include/qemu/uri.h |  6 ++---
 include/sysemu/accel-blocker.h | 14 ++--
 include/sysemu/os-win32.h  |  4 ++--
 include/user/safe-syscall.h|  4 ++--
 target/i386/sev.h  |  6 ++---
 target/mips/cpu.h  |  4 ++--
 tcg/tcg-internal.h |  4 ++--
 tests/tcg/minilib/minilib.h|  2 +-
 include/exec/memory_ldst.h.inc | 42 +-
 roms/seabios   |  2 +-
 23 files changed, 84 insertions(+), 91 deletions(-)

diff --git a/block/dmg.h b/block/dmg.h
index e488601b62..ed209b5dec 100644
--- a/block/dmg.h
+++ b/block/dmg.h
@@ -51,10 +51,10 @@ typedef struct BDRVDMGState {
 z_stream zstream;
 } BDRVDMGState;
 
-extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
- char *next_out, unsigned int avail_out);
+int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+  char *next_out, unsigned int avail_out);
 
-extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
-   char *next_out, unsigned int avail_out);
+int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+char *next_out, unsigned int avail_out);
 
 #endif
diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 588e0c50d4..3c00dc0056 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -51,10 +51,8 @@ do {\
 unlock_user(p1, arg1, 0);   \
 } while (0)
 
-extern struct iovec *lock_iovec(int type, abi_ulong target_addr, int count,
-int copy);
-extern void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count,
-int copy);
+struct iovec *lock_iovec(int type, abi_ulong target_addr, int count, int copy);
+void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count, int 
copy);
 
 int safe_open(const char *path, int flags, mode_t mode);
 int safe_openat(int fd, const char *path, int flags, mode_t mode);
diff --git a/crypto/hmacpriv.h b/crypto/hmacpriv.h
index 4387ca2587..62dfe8257a 100644
--- a/crypto/hmacpriv.h
+++ b/crypto/hmacpriv.h
@@ -28,19 +28,18 @@ struct QCryptoHmacDriver {
 void (*hmac_free)(QCryptoHmac *hmac);
 };
 
-extern void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
-  const uint8_t *key, size_t nkey,
-  Error **errp);
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+   const uint8_t *key, size_t nkey,
+   Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_lib_driver;
 
 #ifdef CONFIG_AF_ALG
 
 #include "afalgpriv.h"
 
-extern QCryptoAFAlg *
-qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
-   const uint8_t *key, size_t nkey,
-   Error **errp);
+QCryptoAFAlg *qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+ const uint8_t *key, size_t nkey,
+ Error **errp);
 extern QCryptoHmacDriver qcrypto_hmac_afalg_driver;
 
 #endif
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index b20744f7c7..31bcfdf705 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -340,11 +340,9 @@ static inline bool 
xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar)
 return s->msix && s->msix->bar_index == bar;
 }
 
-extern void *pci_assign_dev_load_option_rom(PCIDevice *dev,
-int *size,
-unsigned int domain,
-unsigned int bus, unsigned int 
slot,
-unsigned int function);
+void *pci_assign_dev_load_option_rom(PCIDevice *dev, int *size,
+ unsigned int domain, unsigned int bus,
+ unsigned int slot, unsigned int function);
 static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev)
 {
 return

[ovmf test] 179808: all pass - PUSHED

2023-03-20 Thread osstest service owner

flight 179808 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179808/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf b7a8264ae4704f781e70cc44dafdf07e4e5e690a
baseline version:
 ovmf b17a3a133b18fb41493fba7d86e9b5804ea6a8cf

Last test of basis   179730  2023-03-17 18:22:13 Z2 days
Testing same since   179808  2023-03-20 11:40:43 Z0 days1 attempts


People who touched revisions under test:
  Abner Chang 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

To xenbits.xen.org:/home/xen/git/osstest/ovmf.git
   b17a3a133b..b7a8264ae4  b7a8264ae4704f781e70cc44dafdf07e4e5e690a -> 
xen-tested-master

Re: [PATCH] x86/Xen: make use of IBPB controlling VM assist

2023-03-20 Thread Juergen Gross


On 20.03.23 14:17, Jan Beulich wrote:

On 20.03.2023 14:02, Juergen Gross wrote:

On 20.03.23 11:19, Jan Beulich wrote:

On 17.03.2023 14:56, Juergen Gross wrote:

+void __init xen_pv_fix_mitigations(void)
+{
+   if (!xen_vm_assist_ibpb(true))
+   setup_clear_cpu_cap(X86_FEATURE_ENTRY_IBPB);


... using both setup_clear_cpu_cap() (here) and setup_force_cpu_cap()
(in retbleed_select_mitigation() won't work: The latter wins, due to
how apply_forced_caps() works.


Oh, right.

Just a wild guess of mine: probably the x86 maintainers would still prefer
a single Xen hook plus something like a setup_unforce_cpu_cap() addition.


If so, I'm not willing to make such a patch. That's clearly more fragile
than the approach chosen. I guess once I've made the one adjustment you
have pointed out, I'll resubmit otherwise unchanged and include x86 folks.
We'll see what the responses are going to be, if any at all.


Fine with me.




But of course calling both functions for the same feature is bogus
anyway. In fact I think it is for a good reason that in Xen we log a
message in such an event.


Depends. For Xen we do so in the kernel for multiple features, see
xen_init_capabilities().


I don't see anything there which looks like it might be both "force"d
and "clear"ed in a single session.


Oh, I misunderstood you then.


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature

Re: [PATCH] x86/Xen: make use of IBPB controlling VM assist

2023-03-20 Thread Jan Beulich

On 20.03.2023 14:02, Juergen Gross wrote:
> On 20.03.23 11:19, Jan Beulich wrote:
>> On 17.03.2023 14:56, Juergen Gross wrote:
>>> +void __init xen_pv_fix_mitigations(void)
>>> +{
>>> +   if (!xen_vm_assist_ibpb(true))
>>> +   setup_clear_cpu_cap(X86_FEATURE_ENTRY_IBPB);
>>
>> ... using both setup_clear_cpu_cap() (here) and setup_force_cpu_cap()
>> (in retbleed_select_mitigation() won't work: The latter wins, due to
>> how apply_forced_caps() works.
> 
> Oh, right.
> 
> Just a wild guess of mine: probably the x86 maintainers would still prefer
> a single Xen hook plus something like a setup_unforce_cpu_cap() addition.

If so, I'm not willing to make such a patch. That's clearly more fragile
than the approach chosen. I guess once I've made the one adjustment you
have pointed out, I'll resubmit otherwise unchanged and include x86 folks.
We'll see what the responses are going to be, if any at all.

>> But of course calling both functions for the same feature is bogus
>> anyway. In fact I think it is for a good reason that in Xen we log a
>> message in such an event.
> 
> Depends. For Xen we do so in the kernel for multiple features, see
> xen_init_capabilities().

I don't see anything there which looks like it might be both "force"d
and "clear"ed in a single session.

Jan

[PATCH v6 0/4] Add pci_dev_for_each_resource() helper and update users

2023-03-20 Thread Andy Shevchenko

Provide two new helper macros to iterate over PCI device resources and
convert users.

Looking at it, refactor existing pci_bus_for_each_resource() and convert
users accordingly.

Changelog v6:
- dropped unused variable in PPC code (LKP)

Changelog v5:
- renamed loop variable to minimize the clash (Keith)
- addressed smatch warning (Dan)
- addressed 0-day bot findings (LKP)

Changelog v4:
- rebased on top of v6.3-rc1
- added tag (Krzysztof)

Changelog v3:
- rebased on top of v2 by Mika, see above
- added tag to pcmcia patch (Dominik)

Changelog v2:
- refactor to have two macros
- refactor existing pci_bus_for_each_resource() in the same way and
  convert users

Andy Shevchenko (3):
  PCI: Split pci_bus_for_each_resource_p() out of
pci_bus_for_each_resource()
  EISA: Convert to use pci_bus_for_each_resource_p()
  pcmcia: Convert to use pci_bus_for_each_resource_p()

Mika Westerberg (1):
  PCI: Introduce pci_dev_for_each_resource()

 .clang-format |  3 ++
 arch/alpha/kernel/pci.c   |  5 ++-
 arch/arm/kernel/bios32.c  | 16 +-
 arch/arm/mach-dove/pcie.c | 10 +++---
 arch/arm/mach-mv78xx0/pcie.c  | 10 +++---
 arch/arm/mach-orion5x/pci.c   | 10 +++---
 arch/mips/pci/ops-bcm63xx.c   |  8 ++---
 arch/mips/pci/pci-legacy.c|  3 +-
 arch/powerpc/kernel/pci-common.c  | 21 +++--
 arch/powerpc/platforms/4xx/pci.c  |  8 ++---
 arch/powerpc/platforms/52xx/mpc52xx_pci.c |  5 ++-
 arch/powerpc/platforms/pseries/pci.c  | 16 +-
 arch/sh/drivers/pci/pcie-sh7786.c | 10 +++---
 arch/sparc/kernel/leon_pci.c  |  5 ++-
 arch/sparc/kernel/pci.c   | 10 +++---
 arch/sparc/kernel/pcic.c  |  5 ++-
 drivers/eisa/pci_eisa.c   |  4 +--
 drivers/pci/bus.c |  7 ++---
 drivers/pci/hotplug/shpchp_sysfs.c|  8 ++---
 drivers/pci/pci.c |  5 ++-
 drivers/pci/probe.c   |  2 +-
 drivers/pci/remove.c  |  5 ++-
 drivers/pci/setup-bus.c   | 37 +--
 drivers/pci/setup-res.c   |  4 +--
 drivers/pci/vgaarb.c  | 17 +++
 drivers/pci/xen-pcifront.c|  4 +--
 drivers/pcmcia/rsrc_nonstatic.c   |  9 ++
 drivers/pcmcia/yenta_socket.c |  3 +-
 drivers/pnp/quirks.c  | 29 ++
 include/linux/pci.h   | 29 ++
 30 files changed, 142 insertions(+), 166 deletions(-)

-- 
2.39.2

[PATCH v6 3/4] EISA: Convert to use pci_bus_for_each_resource_p()

2023-03-20 Thread Andy Shevchenko

The pci_bus_for_each_resource_p() hides the iterator loop since
it may be not used otherwise. With this, we may drop that iterator
variable definition.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
---
 drivers/eisa/pci_eisa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 930c2332c3c4..907b86384396 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -20,8 +20,8 @@ static struct eisa_root_device pci_eisa_root;
 
 static int __init pci_eisa_init(struct pci_dev *pdev)
 {
-   int rc, i;
struct resource *res, *bus_res = NULL;
+   int rc;
 
if ((rc = pci_enable_device (pdev))) {
dev_err(>dev, "Could not enable device\n");
@@ -38,7 +38,7 @@ static int __init pci_eisa_init(struct pci_dev *pdev)
 * eisa_root_register() can only deal with a single io port resource,
*  so we use the first valid io port resource.
 */
-   pci_bus_for_each_resource(pdev->bus, res, i)
+   pci_bus_for_each_resource_p(pdev->bus, res)
if (res && (res->flags & IORESOURCE_IO)) {
bus_res = res;
break;
-- 
2.39.2

[PATCH v6 4/4] pcmcia: Convert to use pci_bus_for_each_resource_p()

2023-03-20 Thread Andy Shevchenko

The pci_bus_for_each_resource_p() hides the iterator loop since
it may be not used otherwise. With this, we may drop that iterator
variable definition.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
Acked-by: Dominik Brodowski 
---
 drivers/pcmcia/rsrc_nonstatic.c | 9 +++--
 drivers/pcmcia/yenta_socket.c   | 3 +--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index ad1141fddb4c..9d92d4bb6239 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -934,7 +934,7 @@ static int adjust_io(struct pcmcia_socket *s, unsigned int 
action, unsigned long
 static int nonstatic_autoadd_resources(struct pcmcia_socket *s)
 {
struct resource *res;
-   int i, done = 0;
+   int done = 0;
 
if (!s->cb_dev || !s->cb_dev->bus)
return -ENODEV;
@@ -960,12 +960,9 @@ static int nonstatic_autoadd_resources(struct 
pcmcia_socket *s)
 */
if (s->cb_dev->bus->number == 0)
return -EINVAL;
-
-   for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
-   res = s->cb_dev->bus->resource[i];
-#else
-   pci_bus_for_each_resource(s->cb_dev->bus, res, i) {
 #endif
+
+   pci_bus_for_each_resource_p(s->cb_dev->bus, res) {
if (!res)
continue;
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 1365eaa20ff4..2e5bdf3db0ba 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -673,9 +673,8 @@ static int yenta_search_res(struct yenta_socket *socket, 
struct resource *res,
u32 min)
 {
struct resource *root;
-   int i;
 
-   pci_bus_for_each_resource(socket->dev->bus, root, i) {
+   pci_bus_for_each_resource_p(socket->dev->bus, root) {
if (!root)
continue;
 
-- 
2.39.2

[PATCH v6 2/4] PCI: Split pci_bus_for_each_resource_p() out of pci_bus_for_each_resource()

2023-03-20 Thread Andy Shevchenko

Refactor pci_bus_for_each_resource() in the same way as it's done in
pci_dev_for_each_resource() case. This will allow to hide iterator
inside the loop, where it's not used otherwise.

No functional changes intended.

Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
---
 .clang-format  |  1 +
 drivers/pci/bus.c  |  7 +++
 drivers/pci/hotplug/shpchp_sysfs.c |  8 
 drivers/pci/pci.c  |  5 ++---
 drivers/pci/probe.c|  2 +-
 drivers/pci/setup-bus.c| 10 --
 include/linux/pci.h| 14 ++
 7 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/.clang-format b/.clang-format
index 266abb843654..81c9f055086f 100644
--- a/.clang-format
+++ b/.clang-format
@@ -520,6 +520,7 @@ ForEachMacros:
   - 'of_property_for_each_string'
   - 'of_property_for_each_u32'
   - 'pci_bus_for_each_resource'
+  - 'pci_bus_for_each_resource_p'
   - 'pci_dev_for_each_resource'
   - 'pci_dev_for_each_resource_p'
   - 'pci_doe_for_each_off'
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 549c4bd5caec..b0789d332d36 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -182,13 +182,13 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, 
struct resource *res,
void *alignf_data,
struct pci_bus_region *region)
 {
-   int i, ret;
struct resource *r, avail;
resource_size_t max;
+   int ret;
 
type_mask |= IORESOURCE_TYPE_BITS;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource_p(bus, r) {
resource_size_t min_used = min;
 
if (!r)
@@ -289,9 +289,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx)
struct resource *res = >resource[idx];
struct resource orig_res = *res;
struct resource *r;
-   int i;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource_p(bus, r) {
resource_size_t start, end;
 
if (!r)
diff --git a/drivers/pci/hotplug/shpchp_sysfs.c 
b/drivers/pci/hotplug/shpchp_sysfs.c
index 64beed7a26be..ff04f0c5e7c3 100644
--- a/drivers/pci/hotplug/shpchp_sysfs.c
+++ b/drivers/pci/hotplug/shpchp_sysfs.c
@@ -24,16 +24,16 @@
 static ssize_t show_ctrl(struct device *dev, struct device_attribute *attr, 
char *buf)
 {
struct pci_dev *pdev;
-   int index, busnr;
struct resource *res;
struct pci_bus *bus;
size_t len = 0;
+   int busnr;
 
pdev = to_pci_dev(dev);
bus = pdev->subordinate;
 
len += sysfs_emit_at(buf, len, "Free resources: memory\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource_p(bus, res) {
if (res && (res->flags & IORESOURCE_MEM) &&
!(res->flags & IORESOURCE_PREFETCH)) {
len += sysfs_emit_at(buf, len,
@@ -43,7 +43,7 @@ static ssize_t show_ctrl(struct device *dev, struct 
device_attribute *attr, char
}
}
len += sysfs_emit_at(buf, len, "Free resources: prefetchable memory\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource_p(bus, res) {
if (res && (res->flags & IORESOURCE_MEM) &&
   (res->flags & IORESOURCE_PREFETCH)) {
len += sysfs_emit_at(buf, len,
@@ -53,7 +53,7 @@ static ssize_t show_ctrl(struct device *dev, struct 
device_attribute *attr, char
}
}
len += sysfs_emit_at(buf, len, "Free resources: IO\n");
-   pci_bus_for_each_resource(bus, res, index) {
+   pci_bus_for_each_resource_p(bus, res) {
if (res && (res->flags & IORESOURCE_IO)) {
len += sysfs_emit_at(buf, len,
 "start = %8.8llx, length = 
%8.8llx\n",
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7a67611dc5f4..2f8915ab41ef 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -779,9 +779,8 @@ struct resource *pci_find_parent_resource(const struct 
pci_dev *dev,
 {
const struct pci_bus *bus = dev->bus;
struct resource *r;
-   int i;
 
-   pci_bus_for_each_resource(bus, r, i) {
+   pci_bus_for_each_resource_p(bus, r) {
if (!r)
continue;
if (resource_contains(r, res)) {
@@ -799,7 +798,7 @@ struct resource *pci_find_parent_resource(const struct 
pci_dev *dev,
 * be both a positively-decoded aperture and a
 * subtractively-decoded region that contain the BAR.
 * We want the positively-decoded one, so this depends
-* on pci_bus_for_each_resource() giving us those
+* on pci_bus_for_each_resource_p() giving us those
 *

[PATCH v6 1/4] PCI: Introduce pci_dev_for_each_resource()

2023-03-20 Thread Andy Shevchenko

From: Mika Westerberg 

Instead of open-coding it everywhere introduce a tiny helper that can be
used to iterate over each resource of a PCI device, and convert the most
obvious users into it.

While at it drop doubled empty line before pdev_sort_resources().

No functional changes intended.

Suggested-by: Andy Shevchenko 
Signed-off-by: Mika Westerberg 
Signed-off-by: Andy Shevchenko 
Reviewed-by: Krzysztof Wilczyński 
---
 .clang-format |  2 ++
 arch/alpha/kernel/pci.c   |  5 ++--
 arch/arm/kernel/bios32.c  | 16 ++---
 arch/arm/mach-dove/pcie.c | 10 
 arch/arm/mach-mv78xx0/pcie.c  | 10 
 arch/arm/mach-orion5x/pci.c   | 10 
 arch/mips/pci/ops-bcm63xx.c   |  8 +++
 arch/mips/pci/pci-legacy.c|  3 +--
 arch/powerpc/kernel/pci-common.c  | 21 
 arch/powerpc/platforms/4xx/pci.c  |  8 +++
 arch/powerpc/platforms/52xx/mpc52xx_pci.c |  5 ++--
 arch/powerpc/platforms/pseries/pci.c  | 16 ++---
 arch/sh/drivers/pci/pcie-sh7786.c | 10 
 arch/sparc/kernel/leon_pci.c  |  5 ++--
 arch/sparc/kernel/pci.c   | 10 
 arch/sparc/kernel/pcic.c  |  5 ++--
 drivers/pci/remove.c  |  5 ++--
 drivers/pci/setup-bus.c   | 27 -
 drivers/pci/setup-res.c   |  4 +---
 drivers/pci/vgaarb.c  | 17 -
 drivers/pci/xen-pcifront.c|  4 +---
 drivers/pnp/quirks.c  | 29 ---
 include/linux/pci.h   | 15 ++--
 23 files changed, 111 insertions(+), 134 deletions(-)

diff --git a/.clang-format b/.clang-format
index d988e9fa9b26..266abb843654 100644
--- a/.clang-format
+++ b/.clang-format
@@ -520,6 +520,8 @@ ForEachMacros:
   - 'of_property_for_each_string'
   - 'of_property_for_each_u32'
   - 'pci_bus_for_each_resource'
+  - 'pci_dev_for_each_resource'
+  - 'pci_dev_for_each_resource_p'
   - 'pci_doe_for_each_off'
   - 'pcl_for_each_chunk'
   - 'pcl_for_each_segment'
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 64fbfb0763b2..4458eb7f44f0 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -288,11 +288,10 @@ pcibios_claim_one_bus(struct pci_bus *b)
struct pci_bus *child_bus;
 
list_for_each_entry(dev, >devices, bus_list) {
+   struct resource *r;
int i;
 
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   struct resource *r = >resource[i];
-
+   pci_dev_for_each_resource(dev, r, i) {
if (r->parent || !r->start || !r->flags)
continue;
if (pci_has_flag(PCI_PROBE_ONLY) ||
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index e7ef2b5bea9c..5254734b23e6 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -142,15 +142,15 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND2, 
PCI_DEVICE_ID_WINBOND2_89C940F,
  */
 static void pci_fixup_dec21285(struct pci_dev *dev)
 {
-   int i;
-
if (dev->devfn == 0) {
+   struct resource *r;
+
dev->class &= 0xff;
dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   dev->resource[i].start = 0;
-   dev->resource[i].end   = 0;
-   dev->resource[i].flags = 0;
+   pci_dev_for_each_resource_p(dev, r) {
+   r->start = 0;
+   r->end = 0;
+   r->flags = 0;
}
}
 }
@@ -162,13 +162,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, 
PCI_DEVICE_ID_DEC_21285, pci_fixup_d
 static void pci_fixup_ide_bases(struct pci_dev *dev)
 {
struct resource *r;
-   int i;
 
if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
return;
 
-   for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-   r = dev->resource + i;
+   pci_dev_for_each_resource_p(dev, r) {
if ((r->start & ~0x80) == 0x374) {
r->start |= 2;
r->end = r->start;
diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
index 754ca381f600..58cecd79a204 100644
--- a/arch/arm/mach-dove/pcie.c
+++ b/arch/arm/mach-dove/pcie.c
@@ -142,14 +142,14 @@ static struct pci_ops pcie_ops = {
 static void rc_pci_fixup(struct pci_dev *dev)
 {
if (dev->bus->parent == NULL && dev->devfn == 0) {
-   int i;
+   struct resource *r;
 
dev->class &= 0xff;
dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
-   for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-

Re: [PATCH] x86/Xen: make use of IBPB controlling VM assist

2023-03-20 Thread Juergen Gross


On 20.03.23 11:19, Jan Beulich wrote:

On 17.03.2023 14:56, Juergen Gross wrote:

On 15.02.23 09:31, Jan Beulich wrote:

Eventually yes. But I would prefer to sort the above question first
(which I'm sure would have been raised by them, in perhaps more
harsh a way), hence the initially limited exposure.


I'd rather add _one_ hook for Xen-PV in check_bugs() just before the call
of arch_smt_update(). This can then correct any needed mitigation settings.


Doing this in single central place is what I was originally hoping I
could do. But that simply doesn't work (afaict): It is for a reason
that I apply the adjustment in the RETBLEED_MITIGATION_IBPB case, by
suppressing the setting of the feature bit. Not the least because ...


So something like (note that due to using cpu_feature_enabled(X86_FEATURE_XENPV)
DCE is happening in case CONFIG_XEN_PV isn't defined)":

--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -63,4 +63,7 @@ void __init xen_pvh_init(struct boot_params *boot_params);
   void __init mem_map_via_hcall(struct boot_params *boot_params_p);
   #endif

+int __init xen_vm_assist_ibpb(bool enable);
+void __init xen_pv_fix_mitigations(void);
+
   #endif /* _ASM_X86_XEN_HYPERVISOR_H */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -18,6 +18,8 @@
   #include 
   #include 

+#include 
+
   #include 
   #include 
   #include 
@@ -177,6 +179,9 @@ void __init check_bugs(void)
  srbds_select_mitigation();
  l1d_flush_select_mitigation();

+   if (cpu_feature_enabled(X86_FEATURE_XENPV))
+   xen_pv_fix_mitigations();
+
  arch_smt_update();

   #ifdef CONFIG_X86_32
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1476,6 +1476,23 @@ static uint32_t __init xen_platform_pv(void)
  return 0;
   }

+int __init xen_vm_assist_ibpb(bool enable)
+{
+   /*
+* Note that the VM-assist is a disable, so a request to enable IBPB
+* on our behalf needs to turn the functionality off (and vice versa).
+*/
+   return HYPERVISOR_vm_assist(enable ? VMASST_CMD_disable
+  : VMASST_CMD_enable,
+   VMASST_TYPE_mode_switch_no_ibpb);
+}
+
+void __init xen_pv_fix_mitigations(void)
+{
+   if (!xen_vm_assist_ibpb(true))
+   setup_clear_cpu_cap(X86_FEATURE_ENTRY_IBPB);


... using both setup_clear_cpu_cap() (here) and setup_force_cpu_cap()
(in retbleed_select_mitigation() won't work: The latter wins, due to
how apply_forced_caps() works.


Oh, right.

Just a wild guess of mine: probably the x86 maintainers would still prefer
a single Xen hook plus something like a setup_unforce_cpu_cap() addition.


But of course calling both functions for the same feature is bogus
anyway. In fact I think it is for a good reason that in Xen we log a
message in such an event.


Depends. For Xen we do so in the kernel for multiple features, see
xen_init_capabilities().


Juergen



OpenPGP_0xB0DE9DD628BF132F.asc
Description: OpenPGP public key


OpenPGP_signature
Description: OpenPGP digital signature

Re: [help] Xen 4.14.5 on Devuan 4.0 Chimaera, regression from Xen 4.0.1

2023-03-20 Thread Denis

On 20.03.2023 12:01, Andrew Cooper wrote:
> On 19/03/2023 7:38 pm, Denis wrote:
> > On 14.03.2023 16:11, Andrew Cooper wrote:
> >> On 14/03/2023 2:53 pm, Denis wrote:
> >>> On 14.03.2023 07:37; Jan Beulich wrote:
>  On 14.03.2023 02:15, Denis wrote:
> > On 13.03.2023 10:36, Jan wrote
> >> On 10.03.2023 21:50, Denis wrote:
> >>> Should I test something else?
> >> ... there was no request for any further testing here, for the moment.
> > ah...sorry, going by "Would be nice to have this confirmed forthe system
> > in question, i.e. without Xen underneath Linux" I thought I could test
> > something which might help shed some light on all of this.
>  Well, yes, that Linux-without-Xen test would still be useful to have
>  results from. I didn't account for this in my earlier reply because
>  I had asked for it before already, and I did take "something else"
>  for meaning anything that might have turned up as useful from the new
>  data you had provided.
> >>> What tests could I do or what info should I provide to help?
> >> Can you please rebuild Xen with this patch:
> >>
> >> diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c
> >> b/xen/drivers/passthrough/amd/iommu_acpi.c
> >> index 2fdebd2d74c9..747eae25f56c 100644
> >> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
> >> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
> >> @@ -1033,7 +1033,7 @@ static int __init parse_ivrs_table(struct
> >> acpi_table_header *table)
> >>  const struct acpi_ivrs_header *ivrs_block;
> >>  unsigned long length;
> >>  unsigned int apic;
> >> -    bool_t sb_ioapic = !iommu_intremap;
> >> +    bool_t sb_ioapic = 1;
> >>  int error = 0;
> >>  
> >>  BUG_ON(!table);
> >>
> >> which should cause the behaviour to revert back to that of Xen 4.0.1 
> >> (i.e. it will fully ignore the checks relating to the southbridge ioapic).
> >>
> >> Confirm that with this, and booting Xen simply with `iommu=1` that full
> >> DMA remapping and interrupt remapping is considered active.
> >>
> >>
> >> Then, can you play around with passing the soundblaster through to VMs. 
> >> Looking at the LSPCI you provided, it only supports legacy line interrupts.
> >>
> >> Does the device work fine, or do you get a bunch of errors on `xl dmesg`
> >> about IO page faults (which is a generic "IOMMU said no to something"
> >> message)?
> > Sorry, it took my awhile to get it done.
> >
> > The relevant things are enabled again, passthrough works (even the PCI 
> > Audigy2) 
> > and the devices are recognzied in the HVM domU.
> >
> > As you suspected, there are a few IO page faults at the end of the boot 
> > process
> > (from my limited understanding it's maybe related to 
> > "00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD/ATI] SBx00 SMBus 
> > Controller (rev 41)")
> >
> > I'll attach the "xl dmesg" output file.
> 
> Do you have this file?
> 
> If they're only at the end of boot and not later around passthrough,
> then they might be from other functionality in the Southbridge.

Sorry, my bad, forgot to attach it :(

After booting, the messages appear only ant the end, yet when running the HVM 
domU,
a few lines pop up there as well.

Also, the domU in question seems to use more CPU that it did on the old system.
I don't know if this is a downside of the newer Xen versions and the chnges made
over time. 
 

Denis(XEN) Xen version 4.14.5 (Devuan 4.14.5+86-g1c354767d5-1) 
(pkg-xen-de...@lists.alioth.debian.org) (x86_64-linux-gnu-gcc (Debian 10.2.1-6) 
10.2.1 20210110) debug=n  Fri Nov  4 19:25:46 UTC 2022
(XEN) build-id: 0f2f44d0ec7833c8466e02314aff674d56788116
(XEN) Bootloader: GRUB 2.06-3~deb11u5
(XEN) Command line: placeholder dom0_mem=768M,max:768M iommu=1,debug loglvl=all 
guest_loglvl=all
(XEN) Xen image load base address: 0xcf40
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN)  VBE/DDC methods: V2; EDID transfer time: 1 seconds
(XEN) Disc information:
(XEN)  Found 1 MBR signatures
(XEN)  Found 1 EDD information structures
(XEN) CPU Vendor: AMD, Family 16 (0x10), Model 4 (0x4), Stepping 3 (raw 
00100f43)
(XEN) Xen-e820 RAM map:
(XEN)  [, 0009b7ff] (usable)
(XEN)  [0009f800, 0009] (reserved)
(XEN)  [000f, 000f] (reserved)
(XEN)  [0010, cfce] (usable)
(XEN)  [cfcf, cfcf0fff] (ACPI NVS)
(XEN)  [cfcf1000, cfcf] (ACPI data)
(XEN)  [cfd0, cfdf] (reserved)
(XEN)  [e000, efff] (reserved)
(XEN)  [fec0, ] (reserved)
(XEN)  [0001, 00042fff] (usable)
(XEN) ACPI: RSDP 000F6100, 0014 (r0 GBT   )
(XEN) ACPI: RSDT CFCF1000, 0044 (r1 GBTGBTUACPI 42302E31 GBTU  1010101)
(XEN) ACPI: FACP CFCF1080, 0074 (r1 GBTGBTUACPI 42302E31 GBTU  1010101)
(XEN) ACPI: DSDT CFCF1100, 7BE3 (r1 GBTGBTUACPI 1000 MSFT  300)
(XEN) ACPI: FACS CFCF, 0040

[PATCH v4] xen/console: Skip switching serial input to non existing domains

2023-03-20 Thread Michal Orzel

At the moment, we direct serial input to hardware domain by default.
This does not make any sense when running in true dom0less mode, since
such domain does not exist. As a result, users wishing to write to
an emulated UART of a domU are always forced to execute CTRL-AAA first.
The same issue is when rotating among serial inputs, where we always
have to go through hardware domain case. This problem can be elaborated
further to all the domains that no longer exist.

Modify switch_serial_input() so that we skip switching serial input to
non existing domains. Take the opportunity to define and make use of
macro max_console_rx to make it clear what 'max_init_domid + 1' means
in the console code context. Also, modify call to printk() to use correct
format specifier for unsigned int.

For now, to minimize the required changes and to match the current
behavior with hwdom, the default input goes to the first real domain.
The choice is more or less arbitrary since dom0less domUs are supposedly
equal. This will be handled in the future by adding support in boot time
configuration for marking a specific domain preferred in terms of
directing serial input to.

Signed-off-by: Michal Orzel 
Reviewed-by: Jan Beulich 
---
Changes in v4:
 - use infinite loop approach
 - remove unneeded else
 - take the opportunity to fix printk format specifier
Changes in v3:
 - properly handle case where domain with highest ID no longer exists
 - define max_console_rx
Changes in v2:
 - was: xen/console: Handle true dom0less case when switching serial input
 - use a more generic approach to handle all non-existing domains
---
 xen/drivers/char/console.c | 36 +++-
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 51e5408f2114..0e410fa086df 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -473,6 +473,8 @@ static void cf_check dump_console_ring_key(unsigned char 
key)
  */
 static unsigned int __read_mostly console_rx = 0;
 
+#define max_console_rx (max_init_domid + 1)
+
 /* Make sure to rcu_unlock_domain after use */
 struct domain *console_input_domain(void)
 {
@@ -483,15 +485,31 @@ struct domain *console_input_domain(void)
 
 static void switch_serial_input(void)
 {
-if ( console_rx == max_init_domid + 1 )
-{
-console_rx = 0;
-printk("*** Serial input to Xen");
-}
-else
+unsigned int next_rx = console_rx;
+
+/*
+ * Rotate among Xen, dom0 and boot-time created domUs while skipping
+ * switching serial input to non existing domains.
+ */
+for ( ; ; )
 {
-console_rx++;
-printk("*** Serial input to DOM%d", console_rx - 1);
+struct domain *d;
+
+if ( next_rx++ >= max_console_rx )
+{
+console_rx = 0;
+printk("*** Serial input to Xen");
+break;
+}
+
+d = rcu_lock_domain_by_id(next_rx - 1);
+if ( d )
+{
+rcu_unlock_domain(d);
+console_rx = next_rx;
+printk("*** Serial input to DOM%u", next_rx - 1);
+break;
+}
 }
 
 if ( switch_code )
@@ -1089,7 +1107,7 @@ void __init console_endboot(void)
  * a useful 'how to switch' message.
  */
 if ( opt_conswitch[1] == 'x' )
-console_rx = max_init_domid + 1;
+console_rx = max_console_rx;
 
 register_keyhandler('w', dump_console_ring_key,
 "synchronously dump console ring buffer (dmesg)", 0);
-- 
2.25.1

[xen-unstable test] 179799: tolerable trouble: fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179799 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179799/

Failures :-/ but no regressions.

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-libvirt-vhd 19 guest-start/debian.repeat fail in 179770 pass 
in 179799
 test-amd64-amd64-dom0pvh-xl-amd 22 guest-start/debian.repeat fail pass in 
179770
 test-amd64-amd64-pair 28 guest-migrate/dst_host/src_host/debian.repeat fail 
pass in 179770

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-qcow221 guest-start/debian.repeatfail  like 179742
 test-amd64-amd64-xl-qemut-win7-amd64 19 guest-stopfail like 179770
 test-amd64-i386-xl-qemuu-win7-amd64 19 guest-stop fail like 179770
 test-amd64-amd64-xl-qemuu-ws16-amd64 19 guest-stopfail like 179770
 test-amd64-amd64-qemuu-nested-amd 20 debian-hvm-install/l1/l2 fail like 179770
 test-amd64-i386-xl-qemut-ws16-amd64 19 guest-stop fail like 179770
 test-amd64-i386-xl-qemut-win7-amd64 19 guest-stop fail like 179770
 test-amd64-amd64-xl-qemut-ws16-amd64 19 guest-stopfail like 179770
 test-amd64-i386-xl-qemuu-ws16-amd64 19 guest-stop fail like 179770
 test-amd64-amd64-xl-qemuu-win7-amd64 19 guest-stopfail like 179770
 test-amd64-i386-xl-pvshim14 guest-start  fail   never pass
 test-amd64-i386-libvirt-xsm  15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit1  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-thunderx 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-xsm  16 saverestore-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 15 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-xsm 16 saverestore-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  15 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-credit2  16 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 13 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-raw  14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 14 migrate-support-checkfail   never pass
 test-arm64-arm64-libvirt-raw 15 saverestore-support-checkfail   never pass
 test-amd64-amd64-libvirt-vhd 14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  14 migrate-support-checkfail   never pass
 test-arm64-arm64-xl-vhd  15 saverestore-support-checkfail   never pass
 test-amd64-i386-libvirt  15 migrate-support-checkfail   never pass
 build-armhf-libvirt   1 build-check(1)   starved  n/a
 test-armhf-armhf-examine  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-qcow2  1 build-check(1)   starved  n/a
 test-armhf-armhf-libvirt-raw  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit1   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-credit2   1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-cubietruck  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-multivcpu  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-rtds  1 build-check(1)   starved  n/a
 test-armhf-armhf-xl-vhd   1 build-check(1)   starved  n/a
 build-armhf   2 hosts-allocate   starved  n/a

version targeted for testing:
 xen  9bf21fcaef07f68ab52d0382ff554616a1cf66d8
baseline version:
 xen  9bf21fcaef07f68ab52d0382ff554616a1cf66d8

Last test of basis   179799  2023-03-20 02:03:59 Z0 days
Testing same since  (not found) 0 attempts

jobs:
 build-amd64-xsm  pass
 build-arm64-xsm  pass
 build-i386-xsm   pass
 build-amd64-xtf  pass
 build-amd64  pass
 build-arm64

Re: [PATCH v3] xen/console: Skip switching serial input to non existing domains

2023-03-20 Thread Jan Beulich

On 20.03.2023 13:07, Michal Orzel wrote:
> On 20/03/2023 12:17, Jan Beulich wrote:
>> One other transformation for you to consider is to switch to a base
>> layout like
>>
>> unsigned int next_rx = console_rx;
>> while ( next_rx++ <= max_console_rx )
>> {
>> ...
>> }
>>
>> i.e. without a separate increment at the bottom of the loop. Which,
>> now that I've spelled it out, raises the question of why the outer
>> loop needs a condition in the first place (because as written above
>> it clearly is always true). So perhaps better (and more directly
>> showing what's going on)
>>
>> unsigned int next_rx = console_rx;
>> for ( ; ; )
>> {
>> if ( next_rx++ >= max_console_rx )
>> ...
>> }
> Makes sense to me so I will do this assuming that you agree on adding your Rb 
> tag also
> for this approach.

I do, yes.

Jan

Re: [PATCH v2] vpci/msix: handle accesses adjacent to the MSI-X table

2023-03-20 Thread Jan Beulich

On 16.03.2023 13:07, Roger Pau Monne wrote:
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -27,6 +27,11 @@
>  ((addr) >= vmsix_table_addr(vpci, nr) &&  \
>   (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
>  
> +#define VMSIX_ADDR_SAME_PAGE(addr, vpci, nr)  \
> +(PFN_DOWN(addr) >= PFN_DOWN(vmsix_table_addr(vpci, nr)) &&\
> + PFN_DOWN((addr)) < PFN_UP(vmsix_table_addr(vpci, nr) +   \
> +   vmsix_table_size(vpci, nr) - 1))

Looks like this would be better in line with get_slot() (and slightly
cheaper) if the 2nd comparison was ... <= PFN_DOWN().

> @@ -149,7 +154,7 @@ static struct vpci_msix *msix_find(const struct domain 
> *d, unsigned long addr)
>  
>  for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
>  if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
> - VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
> + VMSIX_ADDR_SAME_PAGE(addr, msix->pdev->vpci, i) )
>  return msix;
>  }
>  
> @@ -182,93 +187,201 @@ static struct vpci_msix_entry *get_entry(struct 
> vpci_msix *msix,
>  return >entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>  }
>  
> -static void __iomem *get_pba(struct vpci *vpci)
> +static void __iomem *get_table(struct vpci *vpci, unsigned int slot)
>  {
>  struct vpci_msix *msix = vpci->msix;
>  /*
> - * PBA will only be unmapped when the device is deassigned, so access it
> - * without holding the vpci lock.
> + * Regions will only be unmapped when the device is deassigned, so access
> + * them without holding the vpci lock.
>   */
> -void __iomem *pba = read_atomic(>pba);
> +void __iomem *table = read_atomic(>table[slot]);
> +paddr_t addr = 0;
>  
> -if ( likely(pba) )
> -return pba;
> +if ( likely(table) )
> +return table;
>  
> -pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> -  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> -if ( !pba )
> -return read_atomic(>pba);
> +switch ( slot )
> +{
> +case VPCI_MSIX_TBL_TAIL:
> +addr = vmsix_table_size(vpci, VPCI_MSIX_TABLE);
> +fallthrough;
> +case VPCI_MSIX_TBL_HEAD:
> +addr += vmsix_table_addr(vpci, VPCI_MSIX_TABLE);
> +break;
> +
> +case VPCI_MSIX_PBA_TAIL:
> +addr = vmsix_table_size(vpci, VPCI_MSIX_PBA);
> +fallthrough;
> +case VPCI_MSIX_PBA_HEAD:
> +addr += vmsix_table_addr(vpci, VPCI_MSIX_PBA);
> +break;

Hmm, wasn't the plan to stop special-casing the PBA, including its
special treatment wrt the p2m? Reading on I realize you need this for
the (future) DomU case (to enforce r/o-ness, albeit having looked at
the spec again the other day I'm not really convinced anymore we
really need to squash writes), but we should be able to avoid the
extra overhead for Dom0? (Granted it may make sense to leave this to
a separate patch, if we want to keep the DomU handling despite not
presently needing it.)

> +}
> +
> +table = ioremap(round_pgdown(addr), PAGE_SIZE);
> +if ( !table )
> +return read_atomic(>table[slot]);
>  
>  spin_lock(>lock);
> -if ( !msix->pba )
> +if ( !msix->table[slot] )
>  {
> -write_atomic(>pba, pba);
> +write_atomic(>table[slot], table);
>  spin_unlock(>lock);
>  }
>  else
>  {
>  spin_unlock(>lock);
> -iounmap(pba);
> +iounmap(table);
>  }
>  
> -return read_atomic(>pba);
> +return read_atomic(>table[slot]);
>  }
>  
> -static int cf_check msix_read(
> -struct vcpu *v, unsigned long addr, unsigned int len, unsigned long 
> *data)
> +unsigned int get_slot(const struct vpci *vpci, unsigned long addr)
>  {
> -const struct domain *d = v->domain;
> -struct vpci_msix *msix = msix_find(d, addr);
> -const struct vpci_msix_entry *entry;
> -unsigned int offset;
> +unsigned long pfn = PFN_DOWN(addr);
>  
> -*data = ~0ul;
> +/*
> + * The logic below relies on having the tables identity mapped to the 
> guest
> + * address space, or for the `addr` parameter to be translated into its
> + * host physical memory address equivalent.
> + */
>  
> -if ( !msix )
> -return X86EMUL_RETRY;
> +if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE)) )
> +return VPCI_MSIX_TBL_HEAD;
> +if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE) +
> + vmsix_table_size(vpci, VPCI_MSIX_TABLE) - 1) )
> +return VPCI_MSIX_TBL_TAIL;
> +if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA)) )
> +return VPCI_MSIX_PBA_HEAD;
> +if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA) +
> + vmsix_table_size(vpci, VPCI_MSIX_PBA) - 1) )
> +return VPCI_MSIX_PBA_TAIL;
> +
>

Re: [PATCH v3] xen/console: Skip switching serial input to non existing domains

2023-03-20 Thread Michal Orzel




On 20/03/2023 12:17, Jan Beulich wrote:
> 
> 
> On 20.03.2023 09:19, Michal Orzel wrote:
>> @@ -483,15 +485,34 @@ struct domain *console_input_domain(void)
>>
>>  static void switch_serial_input(void)
>>  {
>> -if ( console_rx == max_init_domid + 1 )
>> -{
>> -console_rx = 0;
>> -printk("*** Serial input to Xen");
>> -}
>> -else
>> +unsigned int next_rx = console_rx + 1;
>> +
>> +/*
>> + * Rotate among Xen, dom0 and boot-time created domUs while skipping
>> + * switching serial input to non existing domains.
>> + */
>> +while ( next_rx <= max_console_rx + 1 )
>>  {
>> -console_rx++;
>> -printk("*** Serial input to DOM%d", console_rx - 1);
>> +if ( next_rx == max_console_rx + 1 )
> 
> Part of the earlier problems stemmed from the comparison being == here.
> Could I talk you into using >= instead?
With the loop condition unmodified it would not make sense as it would be 
impossible.
However, because of what you wrote below, I will do this together with other 
modifications.

> 
>> +{
>> +console_rx = 0;
>> +printk("*** Serial input to Xen");
>> +break;
>> +}
>> +else
> 
> No need for "else" after "break" (or alike). Omitting it will not only
> decrease indentation, but also make more visible that the earlier if()
> won't "fall through".
> 
ok.

>> +{
>> +struct domain *d = rcu_lock_domain_by_id(next_rx - 1);
>> +
>> +if ( d )
>> +{
>> +rcu_unlock_domain(d);
>> +console_rx = next_rx;
>> +printk("*** Serial input to DOM%d", console_rx - 1);
> 
> While I expect the compiler will transform this to using "next_rx"
> here anyway, I think it would be nice if it was written like this
> right away.
ok.

> 
> Since you touch the printk() anyway, please also switch to using the
> more applicable %u.
ok.

> 
> With the adjustments
> Reviewed-by: Jan Beulich 
> 
> One other transformation for you to consider is to switch to a base
> layout like
> 
> unsigned int next_rx = console_rx;
> while ( next_rx++ <= max_console_rx )
> {
> ...
> }
> 
> i.e. without a separate increment at the bottom of the loop. Which,
> now that I've spelled it out, raises the question of why the outer
> loop needs a condition in the first place (because as written above
> it clearly is always true). So perhaps better (and more directly
> showing what's going on)
> 
> unsigned int next_rx = console_rx;
> for ( ; ; )
> {
> if ( next_rx++ >= max_console_rx )
> ...
> }
Makes sense to me so I will do this assuming that you agree on adding your Rb 
tag also
for this approach.

~Michal

Re: [PATCH v5 2/5] Change remaining xenbits.xen.org links to HTTPS

2023-03-20 Thread George Dunlap

On Mon, Feb 27, 2023 at 6:46 PM Demi Marie Obenour <
d...@invisiblethingslab.com> wrote:

> On Mon, Feb 27, 2023 at 09:35:51AM +0100, Jan Beulich wrote:
> > On 25.02.2023 21:37, Demi Marie Obenour wrote:
> > > --- a/Config.mk
> > > +++ b/Config.mk
> > > @@ -191,7 +191,7 @@ APPEND_CFLAGS += $(foreach i, $(APPEND_INCLUDES),
> -I$(i))
> > >  EMBEDDED_EXTRA_CFLAGS := -fno-pie -fno-stack-protector
> -fno-stack-protector-all
> > >  EMBEDDED_EXTRA_CFLAGS += -fno-exceptions
> -fno-asynchronous-unwind-tables
> > >
> > > -XEN_EXTFILES_URL ?= http://xenbits.xen.org/xen-extfiles
> > > +XEN_EXTFILES_URL ?= https://xenbits.xen.org/xen-extfiles
> > >  # All the files at that location were downloaded from elsewhere on
> > >  # the internet.  The original download URL is preserved as a comment
> > >  # near the place in the Xen Makefiles where the file is used.
> > > diff --git a/tools/misc/mkrpm b/tools/misc/mkrpm
> > > index
> 68819b2d739cea5491b53f9b944ee2bd20d92c2b..548db4b5da2691547438df5d7d58e5b4c3bd90d0
> 100644
> > > --- a/tools/misc/mkrpm
> > > +++ b/tools/misc/mkrpm
> > > @@ -34,7 +34,7 @@ Version: $version
> > >  Release: $release
> > >  License: GPL
> > >  Group:   System/Hypervisor
> > > -URL: http://xenbits.xenproject.org/xen.git
> > > +URL: https://xenbits.xen.org/git-http/xen.git
> >
> > Please can you not lose "project" from the URL? That's the more modern
> > form, after all. In fact, since you're touching the other URL above
> > anyway, I wonder if it wouldn't be a good idea to insert "project"
> > there as well. With at least the former adjustment (which I suppose
> > can be done while committing, as long as you agree)
> > Acked-by: Jan Beulich 
>
> I’m fine with either or both of those adjustments.  I was not aware that
> https://xenbits.xen.org is an alias for https://xenbits.xenproject.org.
>

"xen.org" is the original.  When Xen joined the Linux Foundation, there
were some complications with the trademark: Citrix had renamed all their
products to XenFoo (even those which had nothing to do with Xen), and so
wanted to keep the trademark; but the LF felt they needed a trademark they
could own & enforce.  The solution the lawyers came up with was for Citrix
to allow the LF to own the trademark to "The Xen Project", while Citrix
retained the trademark to "Xen".  Everything was meant to have shifted over
to "xenproject.org", but of course "xen.org" was kept around to avoid
breaking links; and here we are, 10 years later.

Neither LF nor CSG are particularly trigger-happy with lawsuits, so it's
not a huge deal, but all things being equal, it's better to use "
xenproject.org"; and switching to "xen.org" is certainly a (small)
regression.

 -George

Re: [PATCH v8 1/5] xen: introduce CONFIG_GENERIC_BUG_FRAME

2023-03-20 Thread Oleksii

On Fri, 2023-03-17 at 15:59 +0100, Jan Beulich wrote:
> On 17.03.2023 10:23, Oleksii wrote:
> > On Thu, 2023-03-16 at 12:26 +0100, Jan Beulich wrote:
> > > On 15.03.2023 18:21, Oleksii Kurochko wrote:
> > > > --- /dev/null
> > > > +++ b/xen/common/bug.c
> > > > @@ -0,0 +1,108 @@
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +
> > > > +#include 
> > > 
> > > I actually meant to also ask: What is this needed for? Glancing
> > > over
> > > the
> > > code ...
> > > 
> > > > +/*
> > > > + * Returns a negative value in case of an error otherwise
> > > > + * BUGFRAME_{run_fn, warn, bug, assert}
> > > > + */
> > > > +int do_bug_frame(struct cpu_user_regs *regs, unsigned long pc)
> > > > +{
> > > > +    const struct bug_frame *bug = NULL;
> > > > +    const struct virtual_region *region;
> > > > +    const char *prefix = "", *filename, *predicate;
> > > > +    unsigned long fixup;
> > > > +    unsigned int id, lineno;
> > > > +
> > > > +    region = find_text_region(pc);
> > > > +    if ( !region )
> > > > +    return -EINVAL;
> > > > +
> > > > +    for ( id = 0; id < BUGFRAME_NR; id++ )
> > > > +    {
> > > > +    const struct bug_frame *b;
> > > > +    size_t i;
> > > > +
> > > > +    for ( i = 0, b = region->frame[id].bugs;
> > > > +  i < region->frame[id].n_bugs; b++, i++ )
> > > > +    {
> > > > +    if ( bug_loc(b) == pc )
> > > > +    {
> > > > +    bug = b;
> > > > +    goto found;
> > > > +    }
> > > > +    }
> > > > +    }
> > > > +
> > > > + found:
> > > > +    if ( !bug )
> > > > +    return -ENOENT;
> > > > +
> > > > +    if ( id == BUGFRAME_run_fn )
> > > > +    {
> > > > +    void (*fn)(struct cpu_user_regs *) = bug_ptr(bug);
> > > > +
> > > > +    fn(regs);
> > > > +
> > > > +    /* Re-enforce consistent types, because of the casts
> > > > involved. */
> > > > +    if ( false )
> > > > +    run_in_exception_handler(fn);
> > > > +
> > > > +    return id;
> > > > +    }
> > > > +
> > > > +    /* WARN, BUG or ASSERT: decode the filename pointer and
> > > > line
> > > > number. */
> > > > +    filename = bug_ptr(bug);
> > > > +    if ( !is_kernel(filename) && !is_patch(filename) )
> > > > +    return -EINVAL;
> > > > +    fixup = strlen(filename);
> > > > +    if ( fixup > 50 )
> > > > +    {
> > > > +    filename += fixup - 47;
> > > > +    prefix = "...";
> > > > +    }
> > > > +    lineno = bug_line(bug);
> > > > +
> > > > +    switch ( id )
> > > > +    {
> > > > +    case BUGFRAME_warn:
> > > > +    printk("Xen WARN at %s%s:%d\n", prefix, filename,
> > > > lineno);
> > > > +    show_execution_state(regs);
> > > > +
> > > > +    break;
> > > > +
> > > > +    case BUGFRAME_bug:
> > > > +    printk("Xen BUG at %s%s:%d\n", prefix, filename,
> > > > lineno);
> > > > +
> > > > +    if ( BUG_DEBUGGER_TRAP_FATAL(regs) )
> > > > +    break;
> > > > +
> > > > +    show_execution_state(regs);
> > > > +    panic("Xen BUG at %s%s:%d\n", prefix, filename,
> > > > lineno);
> > > > +
> > > > +    case BUGFRAME_assert:
> > > > +    /* ASSERT: decode the predicate string pointer. */
> > > > +    predicate = bug_msg(bug);
> > > > +    if ( !is_kernel(predicate) && !is_patch(predicate) )
> > > > +    predicate = "";
> > > > +
> > > > +    printk("Assertion '%s' failed at %s%s:%d\n",
> > > > +   predicate, prefix, filename, lineno);
> > > > +
> > > > +    if ( BUG_DEBUGGER_TRAP_FATAL(regs) )
> > > > +    break;
> > > > +
> > > > +    show_execution_state(regs);
> > > > +    panic("Assertion '%s' failed at %s%s:%d\n",
> > > > +  predicate, prefix, filename, lineno);
> > > > +    }
> > > > +
> > > > +    return id;
> > > > +}
> > > 
> > > ... I can't really spot what it might be that comes from that
> > > header.
> > > Oh, on the N+1st run I've spotted it - it's
> > > show_execution_state().
> > > The declaration of which, already being used from common code
> > > ahead
> > > of this series, should imo be moved to a common header. I guess
> > > I'll
> > > make yet another patch ...
> > As mentioned above. Not only show_execution_state() but also
> > cpu_user_regs structure. ( at lest, for ARM & RISCV )
> 
> Do we deref "regs" anywhere? I can't seem to be able to spot an
> instance.
> Without a deref (or alike) a forward decl is all that's needed for
> this
> code to compile.
You are there is no a deref so let's swich to a forward decl.

I'll add it to a new version of the patch series.

~ Oleksii

Re: [PATCH v5 0/5] Stop using insecure transports

2023-03-20 Thread George Dunlap

On Mon, Mar 20, 2023 at 11:14 AM Anthony PERARD 
wrote:

> Hi,
>
> I believe all the containers that needed to be updated in our GitLab CI
> to be able to access HTTPS URLs have now been updated.
>
> So I guess the series is good to go if it's reviewed.
>

Has it run and passed Gitlab-CI with the new container images?

 -George

Re: [PATCH v3] xen/console: Skip switching serial input to non existing domains

2023-03-20 Thread Jan Beulich

On 20.03.2023 09:19, Michal Orzel wrote:
> @@ -483,15 +485,34 @@ struct domain *console_input_domain(void)
>  
>  static void switch_serial_input(void)
>  {
> -if ( console_rx == max_init_domid + 1 )
> -{
> -console_rx = 0;
> -printk("*** Serial input to Xen");
> -}
> -else
> +unsigned int next_rx = console_rx + 1;
> +
> +/*
> + * Rotate among Xen, dom0 and boot-time created domUs while skipping
> + * switching serial input to non existing domains.
> + */
> +while ( next_rx <= max_console_rx + 1 )
>  {
> -console_rx++;
> -printk("*** Serial input to DOM%d", console_rx - 1);
> +if ( next_rx == max_console_rx + 1 )

Part of the earlier problems stemmed from the comparison being == here.
Could I talk you into using >= instead?

> +{
> +console_rx = 0;
> +printk("*** Serial input to Xen");
> +break;
> +}
> +else

No need for "else" after "break" (or alike). Omitting it will not only
decrease indentation, but also make more visible that the earlier if()
won't "fall through".

> +{
> +struct domain *d = rcu_lock_domain_by_id(next_rx - 1);
> +
> +if ( d )
> +{
> +rcu_unlock_domain(d);
> +console_rx = next_rx;
> +printk("*** Serial input to DOM%d", console_rx - 1);

While I expect the compiler will transform this to using "next_rx"
here anyway, I think it would be nice if it was written like this
right away.

Since you touch the printk() anyway, please also switch to using the
more applicable %u.

With the adjustments
Reviewed-by: Jan Beulich 

One other transformation for you to consider is to switch to a base
layout like

unsigned int next_rx = console_rx;
while ( next_rx++ <= max_console_rx )
{
...
}

i.e. without a separate increment at the bottom of the loop. Which,
now that I've spelled it out, raises the question of why the outer
loop needs a condition in the first place (because as written above
it clearly is always true). So perhaps better (and more directly
showing what's going on)

unsigned int next_rx = console_rx;
for ( ; ; )
{
if ( next_rx++ >= max_console_rx )
...
}

Jan

Re: [PATCH v5 0/5] Stop using insecure transports

2023-03-20 Thread Anthony PERARD

Hi,

I believe all the containers that needed to be updated in our GitLab CI
to be able to access HTTPS URLs have now been updated.

So I guess the series is good to go if it's reviewed.

Cheers,

-- 
Anthony PERARD

Re: [PATCH] Fix PCI hotplug AML

2023-03-20 Thread Paul Durrant


On 20/03/2023 10:34, Jan Beulich wrote:

On 20.03.2023 10:04, Paul Durrant wrote:

On 17/03/2023 10:32, David Woodhouse wrote:

From: David Woodhouse 

The emulated PIIX3 uses a nybble for the status of each PCI function,
so the status for e.g. slot 0 functions 0 and 1 respectively can be
read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).

The AML that Xen gives to a guest gets the operand order for the odd-
numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
instead.

As far as I can tell, this was the wrong way round in Xen from the
moment that PCI hotplug was first introduced in commit 83d82e6f35a8:

+ShiftRight (0x4, \_GPE.PH00, Local1)
+Return (Local1) /* IN status as the _STA */

Or maybe there's bizarre AML operand ordering going on there, like
Intel's wrong-way-round assembler, and it only broke later when it was
changed to being generated?

Either way, it's definitely wrong now, and instrumenting a Linux guest
shows that it correctly sees _STA being 0x00 in function 0 of an empty
slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
an adapter is present in every slot in /sys/bus/pci/slots/*

Quite why Linux wants to look for function 1 being physically present
when function 0 isn't... I don't want to think about right now.

Signed-off-by: David Woodhouse 
Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")
---
Utterly untested in Xen. Tested the same change in a different
environment which is using precisely the *same* AML for guest
compatibility.



This AML only relates to the hotplug controller for qemu-trad so it's
unlikely anyone particularly cares any more. In fact I'm kind of
surprised the generation code still exists.


Why would it not exist anymore? Use of qemu-trad is deprecated and
advised against, but it's still possible to use it. Otherwise quite a
bit of cleanup in libxl could also happen, for example.



Right. I'm just surprised that is not done already... seems like a while 
since trad was deprecated; I'd have thought it could be removed in the 
next release.


  Paul

Re: Aw: Re: Re: Re: [help] Xen 4.14.5 on Devuan 4.0 Chimaera, regression from Xen 4.0.1

2023-03-20 Thread Andrew Cooper

On 19/03/2023 7:38 pm, Denis wrote:
> On 14.03.2023 16:11, Andrew Cooper wrote:
>> On 14/03/2023 2:53 pm, Denis wrote:
>>> On 14.03.2023 07:37; Jan Beulich wrote:
 On 14.03.2023 02:15, Denis wrote:
> On 13.03.2023 10:36, Jan wrote
>> On 10.03.2023 21:50, Denis wrote:
>>> Should I test something else?
>> ... there was no request for any further testing here, for the moment.
> ah...sorry, going by "Would be nice to have this confirmed forthe system
> in question, i.e. without Xen underneath Linux" I thought I could test
> something which might help shed some light on all of this.
 Well, yes, that Linux-without-Xen test would still be useful to have
 results from. I didn't account for this in my earlier reply because
 I had asked for it before already, and I did take "something else"
 for meaning anything that might have turned up as useful from the new
 data you had provided.
>>> What tests could I do or what info should I provide to help?
>> Can you please rebuild Xen with this patch:
>>
>> diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c
>> b/xen/drivers/passthrough/amd/iommu_acpi.c
>> index 2fdebd2d74c9..747eae25f56c 100644
>> --- a/xen/drivers/passthrough/amd/iommu_acpi.c
>> +++ b/xen/drivers/passthrough/amd/iommu_acpi.c
>> @@ -1033,7 +1033,7 @@ static int __init parse_ivrs_table(struct
>> acpi_table_header *table)
>>  const struct acpi_ivrs_header *ivrs_block;
>>  unsigned long length;
>>  unsigned int apic;
>> -    bool_t sb_ioapic = !iommu_intremap;
>> +    bool_t sb_ioapic = 1;
>>  int error = 0;
>>  
>>  BUG_ON(!table);
>>
>> which should cause the behaviour to revert back to that of Xen 4.0.1 
>> (i.e. it will fully ignore the checks relating to the southbridge ioapic).
>>
>> Confirm that with this, and booting Xen simply with `iommu=1` that full
>> DMA remapping and interrupt remapping is considered active.
>>
>>
>> Then, can you play around with passing the soundblaster through to VMs. 
>> Looking at the LSPCI you provided, it only supports legacy line interrupts.
>>
>> Does the device work fine, or do you get a bunch of errors on `xl dmesg`
>> about IO page faults (which is a generic "IOMMU said no to something"
>> message)?
> Sorry, it took my awhile to get it done.
>
> The relevant things are enabled again, passthrough works (even the PCI 
> Audigy2) 
> and the devices are recognzied in the HVM domU.
>
> As you suspected, there are a few IO page faults at the end of the boot 
> process
> (from my limited understanding it's maybe related to 
> "00:14.0 SMBus: Advanced Micro Devices, Inc. [AMD/ATI] SBx00 SMBus Controller 
> (rev 41)")
>
> I'll attach the "xl dmesg" output file.

Do you have this file?

If they're only at the end of boot and not later around passthrough,
then they might be from other functionality in the Southbridge.

~Andrew

Re: [XEN PATCH v3] x86/monitor: Add new monitor event to catch I/O instructions

2023-03-20 Thread Jan Beulich

On 17.03.2023 13:01, Dmitry Isaykin wrote:
> --- a/xen/arch/x86/hvm/monitor.c
> +++ b/xen/arch/x86/hvm/monitor.c
> @@ -346,6 +346,27 @@ int hvm_monitor_vmexit(unsigned long exit_reason,
>  return monitor_traps(curr, ad->monitor.vmexit_sync, );
>  }
>  
> +int hvm_monitor_io(uint16_t port, unsigned int bytes,

Please avoid the use of fixed-width types where not really necessary. See
./CODING_STYLE.

> +   int dir, bool string_ins)

"ins" is an ambiguous abbreviation, even more so when anyway talking about
x86'es I/O instructions. You might consider simply omitting the suffix, or
alternatively I'd like to ask for "insn" (my preference) or "instr".

> +{
> +struct vcpu *curr = current;
> +struct arch_domain *ad = >domain->arch;
> +vm_event_request_t req = {};
> +
> +if ( !ad->monitor.io_enabled )
> +return 0;
> +
> +req.reason = VM_EVENT_REASON_IO_INSTRUCTION;
> +req.u.io.data_size = bytes;
> +req.u.io.port = port;
> +req.u.io.dir = dir;
> +req.u.io.string_ins = string_ins;

Having these be the variable's initializer would probably be more in line
with what we do elsewhere, including in many cases right in this same
source file (yet sadly it's not really consistent).

> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -4560,7 +4560,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
>  break;
>  
>  case EXIT_REASON_IO_INSTRUCTION:
> +{
> +uint16_t port;
> +int bytes, dir;

Since you move it, "bytes" wants to be "unsigned int" (together with "port").
At which point "dir" ...

> +bool string_ins;
> +int rc;

... can share a declaration with "rc".

>  __vmread(EXIT_QUALIFICATION, _qualification);
> +
> +port = (exit_qualification >> 16) & 0x;
> +bytes = (exit_qualification & 0x07) + 1;
> +dir = (exit_qualification & 0x08) ? IOREQ_READ : IOREQ_WRITE;
> +string_ins = (exit_qualification & 0x10);
> +rc = hvm_monitor_io(port, bytes, dir, string_ins);
> +if ( rc < 0 )
> +goto exit_and_crash;
> +if ( rc )
> +break;
> +
>  if ( exit_qualification & 0x10 )

Please either use the new local variable here then as well, or omit it
in favor of using the same expression in the other function call.

> --- a/xen/include/public/vm_event.h
> +++ b/xen/include/public/vm_event.h
> @@ -160,6 +160,8 @@
>  #define VM_EVENT_REASON_EMUL_UNIMPLEMENTED  14
>  /* VMEXIT */
>  #define VM_EVENT_REASON_VMEXIT  15
> +/* IN/OUT Instruction executed */
> +#define VM_EVENT_REASON_IO_INSTRUCTION  16
>  
>  /* Supported values for the vm_event_write_ctrlreg index. */
>  #define VM_EVENT_X86_CR00
> @@ -388,6 +390,13 @@ struct vm_event_vmexit {
>  } arch;
>  };
>  
> +struct vm_event_io {
> +uint32_t data_size;
> +uint16_t port;
> +uint8_t  dir; /* IOREQ_READ or IOREQ_WRITE */

Are you actually sure you want to tie the vm-event interface to the ioreq
one (this is also a question to you, Tamas)? It would look slightly better
to me if this was a simple boolean named after its purpose (e.g. "write"
or "out" when it's meant to be set for OUT / OUTS and clear for IN / INS).

Jan

Re: [PATCH v3 08/10] tools: add physinfo arch_capabilities handling for Arm

2023-03-20 Thread Christian Lindig




> On 17 Mar 2023, at 13:19, Luca Fancellu  wrote:
> 
> 
>   arch_cap_flags = caml_alloc_small(1, arch_cap_flags_tag);
>   Store_field(arch_cap_flags, 0, arch_cap_list);
>   Store_field(physinfo, 10, arch_cap_flags);
> +#elif defined(__aarch64__)
> + Store_field(physinfo, 10, Val_int(c_physinfo.arch_capabilities));
> +#else
> + caml_failwith("Unhandled architecture");
> +#endif
> 

Is this code overwriting an existing entry that was computed but now isn’t 
used? If so, should the conditional compilation not avoid this?

— C

Re: [PATCH] Fix PCI hotplug AML

2023-03-20 Thread Jan Beulich

On 20.03.2023 10:04, Paul Durrant wrote:
> On 17/03/2023 10:32, David Woodhouse wrote:
>> From: David Woodhouse 
>>
>> The emulated PIIX3 uses a nybble for the status of each PCI function,
>> so the status for e.g. slot 0 functions 0 and 1 respectively can be
>> read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).
>>
>> The AML that Xen gives to a guest gets the operand order for the odd-
>> numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
>> instead.
>>
>> As far as I can tell, this was the wrong way round in Xen from the
>> moment that PCI hotplug was first introduced in commit 83d82e6f35a8:
>>
>> +ShiftRight (0x4, \_GPE.PH00, Local1)
>> +Return (Local1) /* IN status as the _STA */
>>
>> Or maybe there's bizarre AML operand ordering going on there, like
>> Intel's wrong-way-round assembler, and it only broke later when it was
>> changed to being generated?
>>
>> Either way, it's definitely wrong now, and instrumenting a Linux guest
>> shows that it correctly sees _STA being 0x00 in function 0 of an empty
>> slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
>> look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
>> an adapter is present in every slot in /sys/bus/pci/slots/*
>>
>> Quite why Linux wants to look for function 1 being physically present
>> when function 0 isn't... I don't want to think about right now.
>>
>> Signed-off-by: David Woodhouse 
>> Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")
>> ---
>> Utterly untested in Xen. Tested the same change in a different
>> environment which is using precisely the *same* AML for guest
>> compatibility.
>>
> 
> This AML only relates to the hotplug controller for qemu-trad so it's 
> unlikely anyone particularly cares any more. In fact I'm kind of 
> surprised the generation code still exists.

Why would it not exist anymore? Use of qemu-trad is deprecated and
advised against, but it's still possible to use it. Otherwise quite a
bit of cleanup in libxl could also happen, for example.

Jan

Re: [PATCH] Fix PCI hotplug AML

2023-03-20 Thread Jan Beulich

On 17.03.2023 11:32, David Woodhouse wrote:
> From: David Woodhouse 
> 
> The emulated PIIX3 uses a nybble for the status of each PCI function,
> so the status for e.g. slot 0 functions 0 and 1 respectively can be
> read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).
> 
> The AML that Xen gives to a guest gets the operand order for the odd-
> numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
> instead.
> 
> As far as I can tell, this was the wrong way round in Xen from the
> moment that PCI hotplug was first introduced in commit 83d82e6f35a8:
> 
> +ShiftRight (0x4, \_GPE.PH00, Local1)
> +Return (Local1) /* IN status as the _STA */
> 
> Or maybe there's bizarre AML operand ordering going on there, like
> Intel's wrong-way-round assembler, and it only broke later when it was
> changed to being generated?
> 
> Either way, it's definitely wrong now, and instrumenting a Linux guest
> shows that it correctly sees _STA being 0x00 in function 0 of an empty
> slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
> look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
> an adapter is present in every slot in /sys/bus/pci/slots/*
> 
> Quite why Linux wants to look for function 1 being physically present
> when function 0 isn't... I don't want to think about right now.
> 
> Signed-off-by: David Woodhouse 
> Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")

Reviewed-by: Jan Beulich

Re: [PATCH v3 08/10] tools: add physinfo arch_capabilities handling for Arm

2023-03-20 Thread George Dunlap

On Fri, Mar 17, 2023 at 1:20 PM Luca Fancellu  wrote:

> On Arm, the SVE vector length is encoded in arch_capabilities field
> of struct xen_sysctl_physinfo, make use of this field in the tools
> when building for arm.
>
> Signed-off-by: Luca Fancellu 
>

Golang bits:

Acked-by: George Dunlap

Re: [PATCH] x86/Xen: make use of IBPB controlling VM assist

2023-03-20 Thread Jan Beulich

On 17.03.2023 15:21, Andrew Cooper wrote:
> On 17/03/2023 1:56 pm, Juergen Gross wrote:
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -1476,6 +1476,23 @@ static uint32_t __init xen_platform_pv(void)
>>     return 0;
>>  }
>>
>> +int __init xen_vm_assist_ibpb(bool enable)
>> +{
>> +   /*
>> +    * Note that the VM-assist is a disable, so a request to
>> enable IBPB
>> +    * on our behalf needs to turn the functionality off (and vice
>> versa).
>> +    */
>> +   return HYPERVISOR_vm_assist(enable ? VMASST_CMD_disable
>> +  : VMASST_CMD_enable,
>> +   VMASST_TYPE_mode_switch_no_ibpb);
>> +}
>> +
>> +void __init xen_pv_fix_mitigations(void)
>> +{
>> +   if (!xen_vm_assist_ibpb(true))
>> +   setup_clear_cpu_cap(X86_FEATURE_ENTRY_IBPB);
> 
> If nothing else, this needs a comment explaining what's going on.
> 
> "Xen PV guest kernels run in ring3, so share the same prediction domain
> as userspace.  Xen (since version $X) default to issuing an IBPB on
> guest user -> guest kernel transitions on behalf of the guest kernel. 
> If Linux isn't depending on mode based prediction separation, turn this
> behaviour off".

I would have thought the comment in the public header - saying exactly
that - is sufficient.

> But this does open the next question.  Yes, unilaterally turning turning
> this off restores the prior behaviour, but is this really the best thing
> to do ?

Unless this is purely a question on Jürgen's suggested version (in which
case I'd let him answer) - what alternative do you suggest, within the
present policy used in the kernel?

Jan

Re: [PATCH] x86/Xen: make use of IBPB controlling VM assist

2023-03-20 Thread Jan Beulich

On 17.03.2023 14:56, Juergen Gross wrote:
> On 15.02.23 09:31, Jan Beulich wrote:
>> Eventually yes. But I would prefer to sort the above question first
>> (which I'm sure would have been raised by them, in perhaps more
>> harsh a way), hence the initially limited exposure.
> 
> I'd rather add _one_ hook for Xen-PV in check_bugs() just before the call
> of arch_smt_update(). This can then correct any needed mitigation settings.

Doing this in single central place is what I was originally hoping I
could do. But that simply doesn't work (afaict): It is for a reason
that I apply the adjustment in the RETBLEED_MITIGATION_IBPB case, by
suppressing the setting of the feature bit. Not the least because ...

> So something like (note that due to using 
> cpu_feature_enabled(X86_FEATURE_XENPV)
> DCE is happening in case CONFIG_XEN_PV isn't defined)":
> 
> --- a/arch/x86/include/asm/xen/hypervisor.h
> +++ b/arch/x86/include/asm/xen/hypervisor.h
> @@ -63,4 +63,7 @@ void __init xen_pvh_init(struct boot_params *boot_params);
>   void __init mem_map_via_hcall(struct boot_params *boot_params_p);
>   #endif
> 
> +int __init xen_vm_assist_ibpb(bool enable);
> +void __init xen_pv_fix_mitigations(void);
> +
>   #endif /* _ASM_X86_XEN_HYPERVISOR_H */
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -18,6 +18,8 @@
>   #include 
>   #include 
> 
> +#include 
> +
>   #include 
>   #include 
>   #include 
> @@ -177,6 +179,9 @@ void __init check_bugs(void)
>  srbds_select_mitigation();
>  l1d_flush_select_mitigation();
> 
> +   if (cpu_feature_enabled(X86_FEATURE_XENPV))
> +   xen_pv_fix_mitigations();
> +
>  arch_smt_update();
> 
>   #ifdef CONFIG_X86_32
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -1476,6 +1476,23 @@ static uint32_t __init xen_platform_pv(void)
>  return 0;
>   }
> 
> +int __init xen_vm_assist_ibpb(bool enable)
> +{
> +   /*
> +* Note that the VM-assist is a disable, so a request to enable IBPB
> +* on our behalf needs to turn the functionality off (and vice versa).
> +*/
> +   return HYPERVISOR_vm_assist(enable ? VMASST_CMD_disable
> +  : VMASST_CMD_enable,
> +   VMASST_TYPE_mode_switch_no_ibpb);
> +}
> +
> +void __init xen_pv_fix_mitigations(void)
> +{
> +   if (!xen_vm_assist_ibpb(true))
> +   setup_clear_cpu_cap(X86_FEATURE_ENTRY_IBPB);

... using both setup_clear_cpu_cap() (here) and setup_force_cpu_cap()
(in retbleed_select_mitigation() won't work: The latter wins, due to
how apply_forced_caps() works.

But of course calling both functions for the same feature is bogus
anyway. In fact I think it is for a good reason that in Xen we log a
message in such an event.

A new helper could be introduced (and used in
retbleed_select_mitigation()) to check whether a feature was
previously cleared, but I did conclude that it's likely for a good
reason that such doesn't exist.

As to your use of cpu_feature_enabled(X86_FEATURE_XENPV) and DCE -
I can certainly switch to using that, which then ought allow to move
xen_vm_assist_ibpb() back to enlighten_pv.c (as you have it, and as
I first had it until noticing the build breakage with PVH=y and
PV=n).

Jan

Re: [PATCH v2] x86: use POPCNT for hweight() when available

2023-03-20 Thread Jan Beulich

On 17.03.2023 13:26, Andrew Cooper wrote:
> On 17/03/2023 11:22 am, Roger Pau Monné wrote:
>> On Mon, Jul 15, 2019 at 02:39:04PM +, Jan Beulich wrote:
>>> This is faster than using the software implementation, and the insn is
>>> available on all half-way recent hardware. Therefore convert
>>> generic_hweight() to out-of-line functions (without affecting Arm)
>>> and use alternatives patching to replace the function calls.
>>>
>>> Note that the approach doesn#t work for clang, due to it not recognizing
>>> -ffixed-*.
>> I've been giving this a look, and I wonder if it would be fine to
>> simply push and pop the scratch registers in the 'call' path of the
>> alternative, as that won't require any specific compiler option.

Hmm, ...

> It's been a long while, and in that time I've learnt a lot more about
> performance, but my root objection to the approach taken here still
> stands - it is penalising the common case to optimise some pointless
> corner cases.
> 
> Yes - on the call path, an extra push/pop pair (or few) to get temp
> registers is basically free.

... what is "a few"? We'd need to push/pop all call-clobbered registers
except %rax, i.e. a total of eight. I consider this too much. Unless,
as you suggest further down, we wrote the fallback in assembly. Which I
have to admit I'm surprised you propose when we strive to reduce the
amount of assembly we have to maintain.

> Right now, the generic_hweight() helpers are static inline in
> xen/bitops.h and this is nonsense.  The absolute best they should be is
> extern inline in our new lib/ and I'll bet that the compilers stop
> inlining them there and then.

That would be an orthogonal move, wouldn't it? I'm also not really
willing to go as far as calling the present way of it working "nonsense".
I could be talked into doing such a transformation in a separate patch,
but only if it is halfway certain that this won't again be effort
invested just to then face further opposition (other maintainers may not
agree with the movement, as we've seen for other remotely similar
changes to "extern inline").

> Given new abi's like x86_64-v2 (which guarantees POPCNT as an available
> feature), it would be nice to arrange to use __builtin_popcnt() to let
> the compiler optimise to its hearts content, but outside of this case,
> it is actively damaging to try and optimise for memory operands or to
> inline the 8/16 case.
> 
> So, for x86 specifically, we want:
> 
> if ( CONFIG_POPCNT )
>     __builtin_popcnt()
> else
>     ALT( "popcnt D -> a",
>          "call arch_popcnt$N" )
> 
> and we can write arch_popcnt* in x86's lib/ and in assembly, because
> these are trivial enough and we do need to be careful with registers/etc.

How does x86_64-v2 matter here? And how does using __builtin_popcnt()
help, which would use the "popcnt" insn only if we passed -march=popcnt
(or any wider option implying this one) to the compiler?

As to the 8-/16-bit case - I've already accepted to drop that special
casing. The main reason I had it separate was because the generic code
also has them special cased. In any event I would like to make all
agreed upon changes in one go, hence why I didn't submit a new version
yet.

> I'm not sure if a "+D" vs "D" will matter at the top level.  Probably
> not, so it might be an easy way to get one tempt register.  Other temp
> registers can come from push/pop.
> 
> 
> While we're at it, we should split hweight out of bitops and write the
> common header in such a way that it defaults to the generic
> implementations in lib/, and that will subsume the ARM header and also
> make this work on RISC-V for free.

Yet another independent change you're asking for. I've taken note of
both of these separate requests, but without any guarantee (yet) that
I'm going to actually carry them out.

Jan

Re: [PATCH] x86: extend coverage of HLE "bad page" workaround

2023-03-20 Thread Jan Beulich

On 17.03.2023 12:39, Roger Pau Monné wrote:
> On Tue, May 26, 2020 at 06:40:16PM +0200, Jan Beulich wrote:
>> On 26.05.2020 17:01, Andrew Cooper wrote:
>>> On 26/05/2020 14:35, Jan Beulich wrote:
 On 26.05.2020 13:17, Andrew Cooper wrote:
> On 26/05/2020 07:49, Jan Beulich wrote:
>> Respective Core Gen10 processor lines are affected, too.
>>
>> Signed-off-by: Jan Beulich 
>>
>> --- a/xen/arch/x86/mm.c
>> +++ b/xen/arch/x86/mm.c
>> @@ -6045,6 +6045,8 @@ const struct platform_bad_page *__init g
>>  case 0x000506e0: /* errata SKL167 / SKW159 */
>>  case 0x000806e0: /* erratum KBL??? */
>>  case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */
>> +case 0x000a0650: /* erratum Core Gen10 U/H/S 101 */
>> +case 0x000a0660: /* erratum Core Gen10 U/H/S 101 */
> This is marred in complexity.
>
> The enumeration of MSR_TSX_CTRL (from the TAA fix, but architectural
> moving forwards on any TSX-enabled CPU) includes a confirmation that HLE
> no longer exists/works.  This applies to IceLake systems, but possibly
> not their initial release configuration (hence, via a later microcode
> update).
>
> HLE is also disabled in microcode on all older parts for errata reasons,
> so in practice it doesn't exist anywhere now.
>
> I think it is safe to drop this workaround, and this does seem a more
> simple option than encoding which microcode turned HLE off (which sadly
> isn't covered by the spec updates, as even when turned off, HLE is still
> functioning according to its spec of "may speed things up, may do
> nothing"), or the interactions with the CPUID hiding capabilities of
> MSR_TSX_CTRL.
 I'm afraid I don't fully follow: For one, does what you say imply HLE is
 no longer enumerated in CPUID?
>>>
>>> No - sadly not.  For reasons of "not repeating the Haswell/Broadwell
>>> microcode fiasco", the HLE bit will continue to exist and be set. 
>>> (Although on CascadeLake and later, you can turn it off with MSR_TSX_CTRL.)
>>>
>>> It was always a weird CPUID bit.  You were supposed to put
>>> XACQUIRE/XRELEASE prefixes on your legacy locking, and it would be a nop
>>> on old hardware and go faster on newer hardware.
>>>
>>> There is nothing runtime code needs to look at the HLE bit for, except
>>> perhaps for UI reporting purposes.
>>
>> Do you know of some public Intel doc I could reference for all of this,
>> which I would kind of need in the description of a patch ...
>>
 But then this
 erratum does not have the usual text effectively meaning that an ucode
 update is or will be available to address the issue; instead it says
 that BIOS or VMM can reserve the respective address range.
>>>
>>> This is not surprising at all.  Turning off HLE was an unrelated
>>> activity, and I bet the link went unnoticed.
>>>
 This - assuming the alternative you describe is indeed viable - then is 
 surely
 a much more intrusive workaround than needed. Which I wouldn't assume
 they would suggest in such a case.
>>>
>>> My suggestion was to drop the workaround, not to complicated it with a
>>> microcode revision matrix.
>>
>> ... doing this? I don't think I've seen any of this in writing so far,
>> except by you. (I don't understand how this reply of yours relates to
>> what I was saying about the spec update. I understand what you are
>> suggesting. I merely tried to express that I'd have expected Intel to
>> point out the much easier workaround, rather than just a pretty involved
>> one.) Otherwise, may I suggest you make such a patch, to make sure it
>> has an adequate description?
> 
> Seeing as there seems to be some data missing to justify the commit -
> was has Linux done with those erratas?

While they deal with the SNB erratum in a similar way, I'm afraid I'm
unaware of Linux having or having had a workaround for the errata here.
Which, granted, is a little surprising when we did actually even issue
an XSA for this.

In fact I find Andrew's request even more surprising with that fact (us
having issued XSA-282 for it) in mind, which originally I don't think I
had paid attention to (nor recalled).

Jan

Re: [PATCH v3 06/10] xen/arm: enable Dom0 to use SVE feature

2023-03-20 Thread Jan Beulich

On 17.03.2023 14:19, Luca Fancellu wrote:
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -1005,6 +1005,19 @@ restrictions set up here. Note that the values to be 
> specified here are
>  ACPI PXM ones, not Xen internal node numbers. `relaxed` sets up vCPU
>  affinities to prefer but be not limited to the specified node(s).
>  
> +### dom0_sve (arm)
> +> `= `
> +
> +> Default: `0`
> +
> +Enable arm SVE usage for Dom0 domain and sets the maximum SVE vector length.
> +Values above 0 means feature is enabled for Dom0, otherwise feature is 
> disabled.
> +Possible values are from 0 to maximum 2048, being multiple of 128, that will 
> be
> +the maximum vector length.
> +Please note that the platform can supports a lower value, if the requested 
> value
> +is above the supported one, the domain creation will fail and the system will
> +stop.
> +
>  ### dom0_vcpus_pin
>  > `= `

I'd like to raise the question of proliferation of top-level command
line options controlling Dom0. In x86 we've specifically started to use
"dom0=" as the one top-level option where almost all new controls should
be added as sub-options.

_If_ a top-level option is indeed preferred, then please avoid the use
of an underscore in its name, when a dash does fine.

Jan

Re: [PATCH v3 02/10] xen/arm: add SVE vector length field to the domain

2023-03-20 Thread Jan Beulich

On 17.03.2023 14:19, Luca Fancellu wrote:
> @@ -744,6 +773,9 @@ int arch_domain_create(struct domain *d,
>  if ( (rc = domain_vpci_init(d)) != 0 )
>  goto fail;
>  
> +/* Copy and decode sve_vl from the domain configuration */
> +d->arch.sve_vl_bits = domainconfig_decode_vl(config->arch.sve_vl);

Considering that you now "encode" and "decode" the value when coming in /
going out for a hypercall, wouldn't it make sense to also have in internally
stored value in the same more compact format?

> --- a/xen/arch/arm/include/asm/arm64/sve.h
> +++ b/xen/arch/arm/include/asm/arm64/sve.h
> @@ -13,10 +13,23 @@
>  /* Vector length must be multiple of 128 */
>  #define SVE_VL_MULTIPLE_VAL (128U)
>  
> +static inline bool is_vl_valid(uint16_t vl)
> +{
> +/* SVE vector length is multiple of 128 and maximum 2048 */
> +return ((vl % SVE_VL_MULTIPLE_VAL) == 0) && (vl <= SVE_VL_MAX_BITS);
> +}
> +
> +static inline uint16_t domainconfig_decode_vl(uint8_t sve_vl)
> +{
> +/* SVE vector length is stored as VL/128 in xen_arch_domainconfig */
> +return sve_vl * SVE_VL_MULTIPLE_VAL;
> +}
> +
>  #ifdef CONFIG_ARM64_SVE
>  
>  register_t compute_max_zcr(void);
>  register_t vl_to_zcr(uint16_t vl);
> +uint16_t get_sys_vl_len(void);
>  
>  #else /* !CONFIG_ARM64_SVE */
>  
> @@ -30,6 +43,11 @@ static inline register_t vl_to_zcr(uint16_t vl)
>  return 0;
>  }
>  
> +static inline uint16_t get_sys_vl_len(void)
> +{
> +return 0;
> +}

Throughout here: Style - please limit the use of fixed width types to
cases where they're actually necessary to use to achieve a certain
effect (see ./CODING_STYLE). None of the cases above look to match that
criteria, merely ...

> @@ -114,6 +116,9 @@ struct arch_domain
>  void *tee;
>  #endif
>  
> +/* max SVE vector length in bits */
> +uint16_t sve_vl_bits;

... this may be justified (for space efficiency), while ...

> +
>  }  __cacheline_aligned;

(nit: stray insertion of a blank line)

> --- a/xen/include/public/arch-arm.h
> +++ b/xen/include/public/arch-arm.h
> @@ -300,6 +300,8 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
>  struct xen_arch_domainconfig {
>  /* IN/OUT */
>  uint8_t gic_version;
> +/* IN - Contains SVE vector length divided by 128 */
> +uint8_t sve_vl;

... in the public interface it's of course mandatory to use.

Jan

Re: [PATCH] Fix PCI hotplug AML

2023-03-20 Thread Paul Durrant


On 17/03/2023 10:32, David Woodhouse wrote:

From: David Woodhouse 

The emulated PIIX3 uses a nybble for the status of each PCI function,
so the status for e.g. slot 0 functions 0 and 1 respectively can be
read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).

The AML that Xen gives to a guest gets the operand order for the odd-
numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
instead.

As far as I can tell, this was the wrong way round in Xen from the
moment that PCI hotplug was first introduced in commit 83d82e6f35a8:

+ShiftRight (0x4, \_GPE.PH00, Local1)
+Return (Local1) /* IN status as the _STA */

Or maybe there's bizarre AML operand ordering going on there, like
Intel's wrong-way-round assembler, and it only broke later when it was
changed to being generated?

Either way, it's definitely wrong now, and instrumenting a Linux guest
shows that it correctly sees _STA being 0x00 in function 0 of an empty
slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
an adapter is present in every slot in /sys/bus/pci/slots/*

Quite why Linux wants to look for function 1 being physically present
when function 0 isn't... I don't want to think about right now.

Signed-off-by: David Woodhouse 
Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")
---
Utterly untested in Xen. Tested the same change in a different
environment which is using precisely the *same* AML for guest
compatibility.



This AML only relates to the hotplug controller for qemu-trad so it's 
unlikely anyone particularly cares any more. In fact I'm kind of 
surprised the generation code still exists.


  Paul


diff --git a/tools/libacpi/mk_dsdt.c b/tools/libacpi/mk_dsdt.c
index 1176da80ef..1d27809116 100644
--- a/tools/libacpi/mk_dsdt.c
+++ b/tools/libacpi/mk_dsdt.c
@@ -431,7 +431,7 @@ int main(int argc, char **argv)
  stmt("Store", "0x89, \\_GPE.DPT2");
  }
  if ( slot & 1 )
-stmt("ShiftRight", "0x4, \\_GPE.PH%02X, Local1", slot & ~1);
+stmt("ShiftRight", "\\_GPE.PH%02X, 0x04, Local1", slot & ~1);
  else
  stmt("And", "\\_GPE.PH%02X, 0x0f, Local1", slot & ~1);
  stmt("Return", "Local1"); /* IN status as the _STA */

Re: [PATCH v2 2/2] x86/APIC: modify error_interrupt() to output using single printk()

2023-03-20 Thread Jan Beulich

On 17.03.2023 20:53, Elliott Mitchell wrote:
> This takes care of the issue of APIC errors tending to occur on multiple
> cores at one.  In turn this tends to causes the error messages to be

Nit: "at once"?

> merged together, making understanding them difficult.
> 
> Signed-off-by: Elliott Mitchell 

Here it becomes clear why you're making the change in patch 1; as you say
in the cover letter these may better be folded (or else, as said there,
patch 1 needs better justification).

> @@ -1419,12 +1420,12 @@ static void cf_check error_interrupt(struct 
> cpu_user_regs *regs)
>  v1 = apic_read(APIC_ESR);
>  ack_APIC_irq();
>  
> -printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)",
> -smp_processor_id(), v , v1);
>  for ( i = 7; i >= 0; --i )
> -if ( v1 & (1 << i) )
> -printk("%s", esr_fields[i]);
> -printk("\n");
> +entries[i] = v1 & (1 << i) ? esr_fields[i] : "";
> +printk(XENLOG_DEBUG "APIC error on CPU%u: %02x(%02x)"
> +"%s%s%s%s%s%s%s%s" "\n",
> +smp_processor_id(), v , v1, entries[0], entries[1], entries[2],
> +entries[3], entries[4], entries[5], entries[6], entries[7]);

Two style nits: Indentation wants fixing here (it was wrong in the original
code already), and the stray blank between v and the comma also wants
dropping at this occasion.

Jan

Re: [PATCH v2 1/2] x86/APIC: include full string with error_interrupt() error messages

2023-03-20 Thread Jan Beulich

On 17.03.2023 20:45, Elliott Mitchell wrote:
> Rather than adding ", " with each printf(), simply include them in the
> string initially.

Why is this better? You're now using more space in .rodata. (I haven't
looked at patch 2 yet to see whether there's a possible reason there
for the change here, but if there was it would need saying here.)

Jan

[linux-linus test] 179797: regressions - trouble: fail/pass/starved

2023-03-20 Thread osstest service owner

flight 179797 linux-linus real [real]
http://logs.test-lab.xenproject.org/osstest/logs/179797/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-credit1  17 guest-saverestorefail REGR. vs. 178042
 test-amd64-amd64-freebsd12-amd64 16 guest-saverestorefail REGR. vs. 178042
 test-amd64-amd64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-amd 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-intel 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-freebsd11-amd64 16 guest-saverestorefail REGR. vs. 178042
 test-amd64-amd64-dom0pvh-xl-amd 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-libvirt-xsm 18 guest-saverestore.2  fail REGR. vs. 178042
 test-amd64-coresched-amd64-xl 17 guest-saverestore   fail REGR. vs. 178042
 test-amd64-amd64-qemuu-nested-intel 13 nested-setup  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-ovmf-amd64 17 guest-saverestore.2 fail REGR. vs. 
178042
 test-arm64-arm64-xl-thunderx 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl-xsm  14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 13 guest-stop fail REGR. 
vs. 178042
 test-arm64-arm64-libvirt-xsm 14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl  14 guest-start  fail REGR. vs. 178042
 test-arm64-arm64-xl-credit2  17 guest-stop   fail REGR. vs. 178042
 test-arm64-arm64-xl-credit1 18 guest-start/debian.repeat fail REGR. vs. 178042
 test-amd64-amd64-libvirt 14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 18 guest-localmigrate/x10 fail 
REGR. vs. 178042
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 12 debian-hvm-install fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-shadow 12 debian-hvm-install fail 
REGR. vs. 178042
 test-amd64-amd64-xl-qemut-debianhvm-amd64 12 debian-hvm-install fail REGR. vs. 
178042
 build-i386-pvops  6 kernel-build fail REGR. vs. 178042
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 12 debian-hvm-install 
fail REGR. vs. 178042
 test-amd64-amd64-pair25 guest-start/debian   fail REGR. vs. 178042
 test-amd64-amd64-qemuu-nested-amd 12 debian-hvm-install  fail REGR. vs. 178042
 test-amd64-amd64-xl-multivcpu 14 guest-start fail REGR. vs. 178042
 test-amd64-amd64-xl-pvshim   14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-pvhv2-intel 14 guest-start   fail REGR. vs. 178042
 test-amd64-amd64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-pygrub  12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-libvirt-qcow2 12 debian-di-install  fail REGR. vs. 178042
 test-arm64-arm64-xl-vhd  12 debian-di-installfail REGR. vs. 178042
 test-arm64-arm64-libvirt-raw 12 debian-di-installfail REGR. vs. 178042
 test-amd64-amd64-xl   17 guest-saverestore fail in 179791 REGR. vs. 178042
 test-amd64-amd64-xl-shadow 20 guest-localmigrate/x10 fail in 179791 REGR. vs. 
178042
 test-amd64-amd64-libvirt-pair 27 guest-migrate/dst_host/src_host fail in 
179791 REGR. vs. 178042
 test-amd64-amd64-xl-credit2 22 guest-start/debian.repeat fail in 179791 REGR. 
vs. 178042
 build-arm64-pvops 6 kernel-build   fail in 179791 REGR. vs. 178042

Tests which are failing intermittently (not blocking):
 test-amd64-amd64-freebsd11-amd64 13 guest-start  fail in 179791 pass in 179797
 test-amd64-amd64-libvirt-xsm 14 guest-start  fail in 179791 pass in 179797
 test-amd64-amd64-xl-qemut-debianhvm-i386-xsm 15 guest-saverestore fail in 
179791 pass in 179797
 test-amd64-amd64-qemuu-nested-intel 12 debian-hvm-install fail in 179791 pass 
in 179797
 test-amd64-coresched-amd64-xl 14 guest-start fail in 179791 pass in 179797
 test-amd64-amd64-xl-qemuu-ovmf-amd64 12 debian-hvm-install fail in 179791 pass 
in 179797
 test-amd64-amd64-xl-qemuu-dmrestrict-amd64-dmrestrict 12 debian-hvm-install 
fail in 179791 pass in 179797
 test-amd64-amd64-xl-shadow   17 guest-saverestore  fail pass in 179791
 test-amd64-amd64-xl  14 guest-startfail pass in 179791
 test-amd64-amd64-libvirt-pair 25 guest-start/debianfail pass in 179791
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 15 guest-saverestore fail pass in 
179791
 test-amd64-amd64-xl-credit2  20 guest-localmigrate/x10 fail pass in 179791

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-rtds 14 guest-start  fail REGR. vs. 178042
 test-amd64-amd64-xl-qemuu-debianhvm-i386-xsm 16 guest-localmigrate

[PATCH v4] x86: detect CMOS aliasing on ports other than 0x70/0x71

2023-03-20 Thread Jan Beulich

... in order to also intercept Dom0 accesses through the alias ports.

Also stop intercepting accesses to the CMOS ports if we won't ourselves
use the CMOS RTC.

Signed-off-by: Jan Beulich 
---
v4: Also conditionally mask top bit for guest index port accesses. Add
missing adjustments to rtc_init(). Re-work to avoid recursive
read_lock(). Also adjust guest_io_{read,write}(). Re-base.
v3: Re-base over change to earlier patch.
v2: Re-base.

--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -27,7 +27,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 
@@ -836,10 +836,18 @@ void rtc_init(struct domain *d)
 
 if ( !has_vrtc(d) )
 {
-if ( is_hardware_domain(d) )
-/* Hardware domain gets mediated access to the physical RTC. */
-register_portio_handler(d, RTC_PORT(0), 2, hw_rtc_io);
-return;
+unsigned int port;
+
+if ( !is_hardware_domain(d) )
+return;
+
+/*
+ * Hardware domain gets mediated access to the physical RTC/CMOS
+ * (of course unless we don't use it ourselves).
+ */
+for ( port = RTC_PORT(0); port < RTC_PORT(0) + 0x10; port += 2 )
+if ( is_cmos_port(port, 2, d) )
+register_portio_handler(d, port, 2, hw_rtc_io);
 }
 
 spin_lock_init(>lock);
--- a/xen/arch/x86/include/asm/mc146818rtc.h
+++ b/xen/arch/x86/include/asm/mc146818rtc.h
@@ -9,6 +9,10 @@
 
 extern spinlock_t rtc_lock; /* serialize CMOS RAM access */
 
+struct domain;
+bool is_cmos_port(unsigned int port, unsigned int bytes,
+  const struct domain *d);
+
 /**
  * register summary
  **/
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -220,7 +220,7 @@ static bool admin_io_okay(unsigned int p
 return false;
 
 /* We also never permit direct access to the RTC/CMOS registers. */
-if ( port <= RTC_PORT(1) && port + bytes > RTC_PORT(0) )
+if ( is_cmos_port(port, bytes, d) )
 return false;
 
 return ioports_access_permitted(d, port, port + bytes - 1);
@@ -290,7 +290,7 @@ static uint32_t guest_io_read(unsigned i
 {
 sub_data = pv_pit_handler(port, 0, 0);
 }
-else if ( port == RTC_PORT(0) || port == RTC_PORT(1) )
+else if ( is_cmos_port(port, 1, currd) )
 {
 sub_data = rtc_guest_read(port);
 }
@@ -436,7 +436,7 @@ static void guest_io_write(unsigned int
 {
 pv_pit_handler(port, (uint8_t)data, 1);
 }
-else if ( port == RTC_PORT(0) || port == RTC_PORT(1) )
+else if ( is_cmos_port(port, 1, currd) )
 {
 rtc_guest_write(port, data);
 }
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -2072,37 +2072,36 @@ int __hwdom_init xen_in_range(unsigned l
 static int __hwdom_init cf_check io_bitmap_cb(
 unsigned long s, unsigned long e, void *ctx)
 {
-struct domain *d = ctx;
+const struct domain *d = ctx;
 unsigned int i;
 
 ASSERT(e <= INT_MAX);
 for ( i = s; i <= e; i++ )
-__clear_bit(i, d->arch.hvm.io_bitmap);
+/*
+ * Accesses to RTC ports also need to be trapped in order to keep
+ * consistency with PV.
+ */
+if ( !is_cmos_port(i, 1, d) )
+__clear_bit(i, d->arch.hvm.io_bitmap);
 
 return 0;
 }
 
 void __hwdom_init setup_io_bitmap(struct domain *d)
 {
-int rc;
+if ( !is_hvm_domain(d) )
+return;
 
-if ( is_hvm_domain(d) )
-{
-bitmap_fill(d->arch.hvm.io_bitmap, 0x1);
-rc = rangeset_report_ranges(d->arch.ioport_caps, 0, 0x1,
-io_bitmap_cb, d);
-BUG_ON(rc);
-/*
- * NB: we need to trap accesses to 0xcf8 in order to intercept
- * 4 byte accesses, that need to be handled by Xen in order to
- * keep consistency.
- * Access to 1 byte RTC ports also needs to be trapped in order
- * to keep consistency with PV.
- */
-__set_bit(0xcf8, d->arch.hvm.io_bitmap);
-__set_bit(RTC_PORT(0), d->arch.hvm.io_bitmap);
-__set_bit(RTC_PORT(1), d->arch.hvm.io_bitmap);
-}
+bitmap_fill(d->arch.hvm.io_bitmap, 0x1);
+if ( rangeset_report_ranges(d->arch.ioport_caps, 0, 0x1,
+io_bitmap_cb, d) )
+BUG();
+
+/*
+ * We need to trap 4-byte accesses to 0xcf8 (see admin_io_okay(),
+ * guest_io_read(), and guest_io_write()).
+ */
+__set_bit(0xcf8, d->arch.hvm.io_bitmap);
 }
 
 /*
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1234,7 +1234,10 @@ static unsigned long get_cmos_time(void)
 if ( seconds < 60 )
 {
 if ( rtc.sec != seconds )
+{

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Jan Beulich

On 20.03.2023 09:14, Jan Beulich wrote:
> On 17.03.2023 18:26, Elliott Mitchell wrote:
>> On Fri, Mar 17, 2023 at 09:22:09AM +0100, Jan Beulich wrote:
>>> On 16.03.2023 23:03, Elliott Mitchell wrote:
 On Mon, Mar 13, 2023 at 08:01:02AM +0100, Jan Beulich wrote:
> On 11.03.2023 01:09, Elliott Mitchell wrote:
>> On Thu, Mar 09, 2023 at 10:03:23AM +0100, Jan Beulich wrote:
>>>
>>> In any event you will want to collect a serial log at maximum verbosity.
>>> It would also be of interest to know whether turning off the IOMMU 
>>> avoids
>>> the issue as well (on the assumption that your system has less than 255
>>> CPUs).
>>
>> I think I might have figured out the situation in a different fashion.
>>
>> I was taking a look at the BIOS manual for this motherboard and noticed
>> a mention of a "Local APIC Mode" setting.  Four values are listed
>> "Compatibility", "xAPIC", "x2APIC", and "Auto".
>>
>> That is the sort of setting I likely left at "Auto" and that may well
>> result in x2 functionality being disabled.  Perhaps the x2APIC
>> functionality on AMD is detecting whether the hardware is present, and
>> failing to test whether it has been enabled?  (could be useful to output
>> a message suggesting enabling the hardware feature)
>
> Can we please move to a little more technical terms here? What is 
> "present"
> and "enabled" in your view? I don't suppose you mean the CPUID bit (which
> we check) and the x2APIC-mode-enable one (which we drive as needed). It's
> also left unclear what the four modes of BIOS operation evaluate to. Even
> if we knew that, overriding e.g. "Compatibility" (which likely means some
> form of "disabled" / "hidden") isn't normally an appropriate thing to do.
> In "Auto" mode Xen likely should work - the only way I could interpret the
> the other modes are "xAPIC" meaning no x2APIC ACPI tables entries (and
> presumably the CPUID bit also masked), "x2APIC" meaning x2APIC mode pre-
> enabled by firmware, and "Auto" leaving it to the OS to select. Yet that's
> speculation on my part ...

 I provided the information I had discovered.  There is a setting for this
 motherboard (likely present on some similar motherboards) which /may/
 effect the issue.  I doubt I've tried "compatibility", but none of the
 values I've tried have gotten the system to boot without "x2apic=false"
 on Xen's command-line.

 When setting to "x2APIC" just after "(XEN) AMD-Vi: IOMMU Extended 
 Features:"
 I see the line "(XEN) - x2APIC".  Later is the line
 "(XEN) x2APIC mode is already enabled by BIOS."  I'll guess "Auto"
 leaves the x2APIC turned off since neither line is present.
>>>
>>> When "(XEN) - x2APIC" is absent the IOMMU can't be switched into x2APIC
>>> mode. Are you sure that's the case when using "Auto"?
>>
>> grep -eAPIC\ driver -e-\ x2APIC:
>>
>> "Auto":
>> (XEN) Using APIC driver default
>> (XEN) Overriding APIC driver with bigsmp
>> (XEN) Switched to APIC driver x2apic_cluster
>>
>> "x2APIC":
>> (XEN) Using APIC driver x2apic_cluster
>> (XEN) - x2APIC
>>
>> Yes, I'm sure.
> 
> Okay, this then means we're running in a mode we don't mean to run
> in: When the IOMMU claims to not support x2APIC mode (which is odd in
> the first place when at the same time the CPU reports x2APIC mode as
> supported), amd_iommu_prepare() is intended to switch interrupt
> remapping mode to "restricted" (which in turn would force x2APIC mode
> to "physical", not "clustered"). I notice though that there are a
> number of error paths in the function which bypass this setting. Could
> you add a couple of printk()s to understand which path is taken (each
> time; the function can be called more than once)?

I think I've spotted at least one issue. Could you give the patch below
a try please? (Patch is fine for master and 4.17 but would need context
adjustment for 4.16.)

Jan

AMD/IOMMU: without XT, x2APIC needs to be forced into physical mode

An earlier change with the same title (commit 1ba66a870eba) altered only
the path where x2apic_phys was already set to false (perhaps from the
command line). The same of course needs applying when the variable
wasn't modified yet from its initial value.

Reported-by: Elliott Mitchell 
Signed-off-by: Jan Beulich 

--- unstable.orig/xen/arch/x86/genapic/x2apic.c
+++ unstable/xen/arch/x86/genapic/x2apic.c
@@ -236,11 +236,11 @@ const struct genapic *__init apic_x2apic
 if ( x2apic_phys < 0 )
 {
 /*
- * Force physical mode if there's no interrupt remapping support: The
- * ID in clustered mode requires a 32 bit destination field due to
+ * Force physical mode if there's no (full) interrupt remapping 
support:
+ * The ID in clustered mode requires a 32 bit destination field due to
  * the usage of the high 16 bits to hold the cluster ID.
  */
-

[PATCH v3] xen/console: Skip switching serial input to non existing domains

2023-03-20 Thread Michal Orzel

At the moment, we direct serial input to hardware domain by default.
This does not make any sense when running in true dom0less mode, since
such domain does not exist. As a result, users wishing to write to
an emulated UART of a domU are always forced to execute CTRL-AAA first.
The same issue is when rotating among serial inputs, where we always
have to go through hardware domain case. This problem can be elaborated
further to all the domains that no longer exist.

Modify switch_serial_input() so that we skip switching serial input to
non existing domains. Take the opportunity to define and make use of
macro max_console_rx to make it clear what 'max_init_domid + 1' means
in the console code context.

For now, to minimize the required changes and to match the current
behavior with hwdom, the default input goes to the first real domain.
The choice is more or less arbitrary since dom0less domUs are supposedly
equal. This will be handled in the future by adding support in boot time
configuration for marking a specific domain preferred in terms of
directing serial input to.

Signed-off-by: Michal Orzel 
---
Changes in v3:
 - properly handle case where domain with highest ID no longer exists
 - define max_console_rx
Changes in v2:
 - was: xen/console: Handle true dom0less case when switching serial input
 - use a more generic approach to handle all non-existing domains
---
 xen/drivers/char/console.c | 39 +-
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 51e5408f2114..86aa2b9c7165 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -473,6 +473,8 @@ static void cf_check dump_console_ring_key(unsigned char 
key)
  */
 static unsigned int __read_mostly console_rx = 0;
 
+#define max_console_rx (max_init_domid + 1)
+
 /* Make sure to rcu_unlock_domain after use */
 struct domain *console_input_domain(void)
 {
@@ -483,15 +485,34 @@ struct domain *console_input_domain(void)
 
 static void switch_serial_input(void)
 {
-if ( console_rx == max_init_domid + 1 )
-{
-console_rx = 0;
-printk("*** Serial input to Xen");
-}
-else
+unsigned int next_rx = console_rx + 1;
+
+/*
+ * Rotate among Xen, dom0 and boot-time created domUs while skipping
+ * switching serial input to non existing domains.
+ */
+while ( next_rx <= max_console_rx + 1 )
 {
-console_rx++;
-printk("*** Serial input to DOM%d", console_rx - 1);
+if ( next_rx == max_console_rx + 1 )
+{
+console_rx = 0;
+printk("*** Serial input to Xen");
+break;
+}
+else
+{
+struct domain *d = rcu_lock_domain_by_id(next_rx - 1);
+
+if ( d )
+{
+rcu_unlock_domain(d);
+console_rx = next_rx;
+printk("*** Serial input to DOM%d", console_rx - 1);
+break;
+}
+
+next_rx++;
+}
 }
 
 if ( switch_code )
@@ -1089,7 +1110,7 @@ void __init console_endboot(void)
  * a useful 'how to switch' message.
  */
 if ( opt_conswitch[1] == 'x' )
-console_rx = max_init_domid + 1;
+console_rx = max_console_rx;
 
 register_keyhandler('w', dump_console_ring_key,
 "synchronously dump console ring buffer (dmesg)", 0);
-- 
2.25.1

Re: [BUG] x2apic broken with current AMD hardware

2023-03-20 Thread Jan Beulich

On 17.03.2023 18:26, Elliott Mitchell wrote:
> On Fri, Mar 17, 2023 at 09:22:09AM +0100, Jan Beulich wrote:
>> On 16.03.2023 23:03, Elliott Mitchell wrote:
>>> On Mon, Mar 13, 2023 at 08:01:02AM +0100, Jan Beulich wrote:
 On 11.03.2023 01:09, Elliott Mitchell wrote:
> On Thu, Mar 09, 2023 at 10:03:23AM +0100, Jan Beulich wrote:
>>
>> In any event you will want to collect a serial log at maximum verbosity.
>> It would also be of interest to know whether turning off the IOMMU avoids
>> the issue as well (on the assumption that your system has less than 255
>> CPUs).
>
> I think I might have figured out the situation in a different fashion.
>
> I was taking a look at the BIOS manual for this motherboard and noticed
> a mention of a "Local APIC Mode" setting.  Four values are listed
> "Compatibility", "xAPIC", "x2APIC", and "Auto".
>
> That is the sort of setting I likely left at "Auto" and that may well
> result in x2 functionality being disabled.  Perhaps the x2APIC
> functionality on AMD is detecting whether the hardware is present, and
> failing to test whether it has been enabled?  (could be useful to output
> a message suggesting enabling the hardware feature)

 Can we please move to a little more technical terms here? What is "present"
 and "enabled" in your view? I don't suppose you mean the CPUID bit (which
 we check) and the x2APIC-mode-enable one (which we drive as needed). It's
 also left unclear what the four modes of BIOS operation evaluate to. Even
 if we knew that, overriding e.g. "Compatibility" (which likely means some
 form of "disabled" / "hidden") isn't normally an appropriate thing to do.
 In "Auto" mode Xen likely should work - the only way I could interpret the
 the other modes are "xAPIC" meaning no x2APIC ACPI tables entries (and
 presumably the CPUID bit also masked), "x2APIC" meaning x2APIC mode pre-
 enabled by firmware, and "Auto" leaving it to the OS to select. Yet that's
 speculation on my part ...
>>>
>>> I provided the information I had discovered.  There is a setting for this
>>> motherboard (likely present on some similar motherboards) which /may/
>>> effect the issue.  I doubt I've tried "compatibility", but none of the
>>> values I've tried have gotten the system to boot without "x2apic=false"
>>> on Xen's command-line.
>>>
>>> When setting to "x2APIC" just after "(XEN) AMD-Vi: IOMMU Extended Features:"
>>> I see the line "(XEN) - x2APIC".  Later is the line
>>> "(XEN) x2APIC mode is already enabled by BIOS."  I'll guess "Auto"
>>> leaves the x2APIC turned off since neither line is present.
>>
>> When "(XEN) - x2APIC" is absent the IOMMU can't be switched into x2APIC
>> mode. Are you sure that's the case when using "Auto"?
> 
> grep -eAPIC\ driver -e-\ x2APIC:
> 
> "Auto":
> (XEN) Using APIC driver default
> (XEN) Overriding APIC driver with bigsmp
> (XEN) Switched to APIC driver x2apic_cluster
> 
> "x2APIC":
> (XEN) Using APIC driver x2apic_cluster
> (XEN) - x2APIC
> 
> Yes, I'm sure.

Okay, this then means we're running in a mode we don't mean to run
in: When the IOMMU claims to not support x2APIC mode (which is odd in
the first place when at the same time the CPU reports x2APIC mode as
supported), amd_iommu_prepare() is intended to switch interrupt
remapping mode to "restricted" (which in turn would force x2APIC mode
to "physical", not "clustered"). I notice though that there are a
number of error paths in the function which bypass this setting. Could
you add a couple of printk()s to understand which path is taken (each
time; the function can be called more than once)?

>>> Both cases the line "(XEN) Switched to APIC driver x2apic_cluster" is
>>> present (so perhaps "Auto" merely doesn't activate it).
>>
>> Did you also try "x2apic_phys" on the Xen command line (just to be sure
>> this isn't a clustered-mode only issue)?
> 
> No.  In fact x2apic_cluster is mentioned in all failure cases.

Could you give physical mode a try, please?

>>> Appears error_interrupt() needs locking or some concurrency handling
>>> mechanism since the last error is jumbled.  With the setting "x2APIC"
>>> I get a bunch of:
>>> "(XEN) APIC error on CPU#: 00(08)(XEN) APIC error on CPU#: 00(08)"
>>> (apparently one for each core)
>>> Followed by "Receive accept error, Receive accept error," (again,
>>> apparently one for each core).  Then a bunch of newlines (same pattern).
>>
>> This is a known issue, but since the messages shouldn't appear in the
>> first place so far no-one has bothered to address this.
> 
> I won't claim it is the best solution, but see other message.
> 
> I'm tempted to propose allowing _Static_assert() since it is valuable
> functionality for preventing issues.

How does _Static_assert() come into play here? Also note that we already
use it when available ...

Jan

Re: [PATCH v4] x86/HVM: support emulated UMIP

2023-03-20 Thread Jan Beulich

On 17.03.2023 17:09, Roger Pau Monné wrote:
> On Fri, Mar 17, 2023 at 04:01:59PM +0100, Jan Beulich wrote:
>> On 17.03.2023 15:29, Roger Pau Monné wrote:
>>> On Thu, Apr 15, 2021 at 11:47:42AM +0200, Jan Beulich wrote:
 There are three noteworthy drawbacks:
 1) The intercepts we need to enable here are CPL-independent, i.e. we
now have to emulate certain instructions for ring 0.
 2) On VMX there's no intercept for SMSW, so the emulation isn't really
complete there.
>>>
>>> Then I'm afraid we can't set the bit in the max CPUID policy.  What
>>> about domains being migrated from a host that has UMIP to an Intel
>>> host where UMIP is emulated?  They would see a change in behavior in
>>> SMSW, and the behavior won't match the ISA anymore.
>>
>> Right, but that's the price to pay if we want such emulation (which back
>> at the time did look at least desirable, because the other affected insns
>> are more important to deal with). Not setting the bit in the max policy
>> is as good as not having emulation on VMX at all then.
> 
> It would need some kind of justification at least on why it's deemed
> worth exposing in the max policy (and thus made available to incoming
> guests) even when not compliant to the specification.
> 
> Could the non-intercaption of CR0 reads and thus no #GP on SMSW on
> Intel lead to software malfunctioning as a result?

One can't exclude it of course, but I don't view this as very likely.

But as said in reply to Andrew - I guess I'll simply drop this patch
then (which also eliminates your request for further justification,
which I have to admit I don't really follow).

Jan

Re: [PATCH v4] x86/HVM: support emulated UMIP

2023-03-20 Thread Jan Beulich

On 17.03.2023 17:30, Andrew Cooper wrote:
> On 17/03/2023 2:29 pm, Roger Pau Monné wrote:
>> On Thu, Apr 15, 2021 at 11:47:42AM +0200, Jan Beulich wrote:
>>> There are three noteworthy drawbacks:
>>> 1) The intercepts we need to enable here are CPL-independent, i.e. we
>>>now have to emulate certain instructions for ring 0.
>>> 2) On VMX there's no intercept for SMSW, so the emulation isn't really
>>>complete there.
>> Then I'm afraid we can't set the bit in the max CPUID policy.  What
>> about domains being migrated from a host that has UMIP to an Intel
>> host where UMIP is emulated?  They would see a change in behavior in
>> SMSW, and the behavior won't match the ISA anymore.
> 
> There are conflicting opinions on this.  But the truth is that SMSW only
> leaks the bottom nibble(?) of CR0 and that simply isn't information that
> is of use to an attacker like SGDT/SIDT is.

No, with a register operand SMSW can read the entire CR0. But I dare to
ask what's interesting in the value for an attacker. Hardly any of the
bits will ever vary over time, once past boot.

> So from an entirely ideal point of view there is an argument to say that
> UMIP-but-can't-block-SMSW is better than no UMIP.
> 
> 
> Except, I'm not fully convinced by this argument.
> 
> SMSW aside, emulating UMIP on hardware without it involves emulating the
> guest being able to set CR4.UMIP which is reserved so we have to
> intercept #UD, and intercepting all #GP so we can find the
> S{I,LG}DT/STR/SMSW(on AMD) instructions and fail them in Ring3.
> 
> We went to a lot of effort to not intercept #UD (by default) because it
> exposed x86_emulate() to guest userspace and caused us a huge number of
> security headaches.  Similarly, #GP interception is the source of a lot
> of security bugs on other hypervisors.
> 
> So there is large security concern with this patch.  Which is not a no,
> but definitely is a "need to think about this more carefully".

You recall that this concern (not just here) is what we've introduced the
"verify" hook for? Yes, this doesn't entirely eliminate the concern, but
it reduces the risk quite significantly, I think.

> This logic isn't useful for Linux.  All versions of Linux which know
> about UMIP already put the IDT and GDT on read-only mappings to prevent
> SIDT/SGDT being useful to an attacker on hardware lacking UMIP.  I don't
> know what Windows does here, but I would be amazed if they don't
> something similar.
> 
> Therefore, this logic is only useful for guests which do know about
> UIMP, and do not have any other defences against SIDT/SGD.  If this
> isn't an empty set of kernels, it will be a small set.

Well, okay, I guess I'll simply drop this change then, and consider it
merely a (past) useful exercise.

Jan

[ImageBuilder][PATCH v3 1/2] uboot-script-gen: Add XEN_STATIC_HEAP

2023-03-20 Thread jiamei.xie

From: jiamei Xie 

Add a new config parameter to configure Xen static heap.
XEN_STATIC_HEAP="baseaddr1 size1 ... baseaddrN sizeN"
if specified, indicates the host physical address regions
[baseaddr, baseaddr + size) to be reserved as Xen static heap.

For instance, XEN_STATIC_HEAP="0x5000 0x3000", if specified,
indicates the host memory region starting from paddr 0x5000
with a size of 0x3000 to be reserved as static heap.

Signed-off-by: jiamei Xie 
Reviewed-by: Michal Orzel 
Acked-by: Stefano Stabellini 
---
Changes from v1 to v2:
 - add Reviewed-by and Acked-by
Changes from v1:
 - Rename STATIC_HEAP to XEN_STATIC_HEAP and move it right after
   XEN_CMD documentation.
 - Use split_value function instead of opencoding it.
---
 README.md|  4 
 scripts/uboot-script-gen | 20 
 2 files changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 814a004..78b83f1 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,10 @@ Where:
 - XEN_CMD specifies the command line arguments used for Xen.  If not
   set, the default one will be used.
 
+- XEN_STATIC_HEAP="baseaddr1 size1 ... baseaddrN sizeN"
+  if specified, indicates the host physical address regions
+  [baseaddr, baseaddr + size) to be reserved as Xen static heap.
+
 - PASSTHROUGH_DTS_REPO specifies the git repository and/or the directory
   which contains the partial device trees. This is optional. However, if
   this is specified, then DOMU_PASSTHROUGH_PATHS[number] need to be specified.
diff --git a/scripts/uboot-script-gen b/scripts/uboot-script-gen
index f07e334..cca3e59 100755
--- a/scripts/uboot-script-gen
+++ b/scripts/uboot-script-gen
@@ -189,6 +189,21 @@ function add_device_tree_static_mem()
 dt_set "$path" "xen,static-mem" "hex" "${cells[*]}"
 }
 
+function add_device_tree_xen_static_heap()
+{
+local path=$1
+local regions=$2
+local cells=()
+local val
+
+for val in ${regions[@]}
+do
+cells+=("$(split_value $val)")
+done
+
+dt_set "$path" "xen,static-heap" "hex" "${cells[*]}"
+}
+
 function add_device_tree_cpupools()
 {
 local cpu
@@ -344,6 +359,11 @@ function xen_device_tree_editing()
 then
 add_device_tree_cpupools
 fi
+
+if test "${XEN_STATIC_HEAP}"
+then
+add_device_tree_xen_static_heap "/chosen" "${XEN_STATIC_HEAP}"
+fi
 }
 
 function linux_device_tree_editing()
-- 
2.25.1

[ImageBuilder][PATCH v3 2/2] uboot-script-gen: add support for static shared memory

2023-03-20 Thread jiamei.xie

Introduce support for creating shared-mem node for dom0less domUs in
the device tree. Add the following option:
- DOMU_SHARED_MEM[number]="SHM-ID HPA GPA size"
  if specified, indicate the unique identifier of the shared memory
  region is SHM-ID, the host physical address HPA will get mapped at
  guest address GPA in domU and the memory of size will be reserved to
  be shared memory.

The static shared memory is used between two dom0less domUs.

Below is an example:
NUM_DOMUS=2
DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
DOMU_SHARED_MEM[1]="my-shared-mem-0 0x5000 0x600 0x1000"

This static shared memory region is identified as "my-shared-mem-0",
host physical address starting at 0x5000 of 256MB will be reserved
to be shared between two domUs. It will get mapped at 0x600 in both
guest physical address space. Both DomUs are the borrower domain, the
owner domain is the default owner domain DOMID_IO.

Signed-off-by: jiamei.xie 
---
Changes from v2:
 - Remove "domid" parameter
 - Use lower capital letters for local variables
Changes from v1:
 - Rather than two separate properties and just use one like follows:
   Change
 DOMU_SHARED_MEM[0]="0x5000 0x600 0x1000"
 DOMU_SHARED_MEM_ID[0]="my-shared-mem-0"
   to
 DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
 - Use split_value function instead of opencoding it.
---
 README.md| 17 +
 scripts/uboot-script-gen | 26 ++
 2 files changed, 43 insertions(+)

diff --git a/README.md b/README.md
index 78b83f1..fe5d205 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,23 @@ Where:
   if specified, indicates the host physical address regions
   [baseaddr, baseaddr + size) to be reserved to the VM for static allocation.
 
+- DOMU_SHARED_MEM[number]="SHM-ID HPA GPA size"
+  if specified, indicate SHM-ID represents the unique identifier of the shared
+  memory region, the host physical address HPA will get mapped at guest
+  address GPA in domU and the memory of size will be reserved to be shared
+  memory. The shared memory is used between two dom0less domUs.
+
+  Below is an example:
+  NUM_DOMUS=2
+  DOMU_SHARED_MEM[0]="my-shared-mem-0 0x5000 0x600 0x1000"
+  DOMU_SHARED_MEM[1]="my-shared-mem-0 0x5000 0x600 0x1000"
+
+  This static shared memory region is identified as "my-shared-mem-0", host
+  physical address starting at 0x5000 of 256MB will be reserved to be
+  shared between two domUs. It will get mapped at 0x600 in both guest
+  physical address space. Both DomUs are the borrower domain, the owner
+  domain is the default owner domain DOMID_IO.
+
 - DOMU_DIRECT_MAP[number] can be set to 1 or 0.
   If set to 1, the VM is direct mapped. The default is 1.
   This is only applicable when DOMU_STATIC_MEM is specified.
diff --git a/scripts/uboot-script-gen b/scripts/uboot-script-gen
index cca3e59..9656a45 100755
--- a/scripts/uboot-script-gen
+++ b/scripts/uboot-script-gen
@@ -204,6 +204,27 @@ function add_device_tree_xen_static_heap()
 dt_set "$path" "xen,static-heap" "hex" "${cells[*]}"
 }
 
+function add_device_tree_static_shared_mem()
+{
+local path=$1
+local shared_mem=$2
+local shared_mem_id=${shared_mem%% *}
+local regions="${shared_mem#* }"
+local cells=()
+local shared_mem_host=${regions%% *}
+
+dt_mknode "${path}" "shared-mem@${shared_mem_host}"
+
+for val in ${regions[@]}
+do
+cells+=("$(split_value $val)")
+done
+
+dt_set "${path}/shared-mem@${shared_mem_host}" "compatible" "str" 
"xen,domain-shared-memory-v1"
+dt_set "${path}/shared-mem@${shared_mem_host}" "xen,shm-id" "str" 
"${shared_mem_id}"
+dt_set "${path}/shared-mem@${shared_mem_host}" "xen,shared-mem" "hex" 
"${cells[*]}"
+}
+
 function add_device_tree_cpupools()
 {
 local cpu
@@ -329,6 +350,11 @@ function xen_device_tree_editing()
 dt_set "/chosen/domU$i" "xen,enhanced" "str" "enabled"
 fi
 
+if test -n "${DOMU_SHARED_MEM[i]}"
+then
+add_device_tree_static_shared_mem "/chosen/domU${i}" 
"${DOMU_SHARED_MEM[i]}"
+fi
+
 if test "${DOMU_COLORS[$i]}"
 then
 local startcolor=$(echo "${DOMU_COLORS[$i]}"  | cut -d "-" -f 1)
-- 
2.25.1

1 2 >

1 - 100 of 101 matches

Mail list logo