from:"Sergey Dyasli"

[PATCH v5 2/3] x86/platform: introduce XENPF_get_ucode_revision

2023-04-17 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision from Xen after
late loading without looking into Xen logs which is not always convenient.

Add a new platform op in order to get the required data from Xen and
provide a wrapper for libxenctrl.

Signed-off-by: Sergey Dyasli 
Reviewed-by: Jan Beulich 
---
v4 --> v5:
- Added Reviewed-by

v3 --> v4:
- clarified the commit message
- Renamed "ucode version" to "ucode revision"
- Removed DECLARE_PLATFORM_OP and NULL checks
- Added a TODO comment about parked CPUs
- Renamed struct xenpf_ucode_revision fields
---
 tools/include/xenctrl.h  |  2 ++
 tools/libs/ctrl/xc_misc.c| 18 +++
 xen/arch/x86/platform_hypercall.c| 29 
 xen/arch/x86/x86_64/platform_hypercall.c |  4 
 xen/include/public/platform.h| 11 +
 xen/include/xlat.lst |  1 +
 6 files changed, 65 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 34b3b25289..1149f805ba 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1187,6 +1187,8 @@ int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
 int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
+int xc_get_ucode_revision(xc_interface *xch,
+  struct xenpf_ucode_revision *ucode_rev);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 90d50faa4f..4159294b2e 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -243,6 +243,24 @@ int xc_get_cpu_version(xc_interface *xch, struct 
xenpf_pcpu_version *cpu_ver)
 return 0;
 }
 
+int xc_get_ucode_revision(xc_interface *xch,
+  struct xenpf_ucode_revision *ucode_rev)
+{
+int ret;
+struct xen_platform_op op = {
+.cmd = XENPF_get_ucode_revision,
+.u.ucode_revision.cpu = ucode_rev->cpu,
+};
+
+ret = do_platform_op(xch, );
+if ( ret != 0 )
+return ret;
+
+*ucode_rev = op.u.ucode_revision;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
diff --git a/xen/arch/x86/platform_hypercall.c 
b/xen/arch/x86/platform_hypercall.c
index a2d9526355..9ff2da8fc3 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -640,6 +640,35 @@ ret_t do_platform_op(
 }
 break;
 
+case XENPF_get_ucode_revision:
+{
+struct xenpf_ucode_revision *rev = >u.ucode_revision;
+
+if ( !get_cpu_maps() )
+{
+ret = -EBUSY;
+break;
+}
+
+/* TODO: make it possible to know ucode revisions for parked CPUs */
+if ( (rev->cpu >= nr_cpu_ids) || !cpu_online(rev->cpu) )
+ret = -ENOENT;
+else
+{
+const struct cpu_signature *sig = _cpu(cpu_sig, rev->cpu);
+
+rev->signature = sig->sig;
+rev->pf = sig->pf;
+rev->revision = sig->rev;
+}
+
+put_cpu_maps();
+
+if ( __copy_field_to_guest(u_xenpf_op, op, u.ucode_revision) )
+ret = -EFAULT;
+}
+break;
+
 case XENPF_cpu_online:
 {
 int cpu = op->u.cpu_ol.cpuid;
diff --git a/xen/arch/x86/x86_64/platform_hypercall.c 
b/xen/arch/x86/x86_64/platform_hypercall.c
index 5bf6b958d2..99440f4076 100644
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -28,6 +28,10 @@ CHECK_pf_pcpuinfo;
 CHECK_pf_pcpu_version;
 #undef xen_pf_pcpu_version
 
+#define xen_pf_ucode_revision xenpf_ucode_revision
+CHECK_pf_ucode_revision;
+#undef xen_pf_pucode_revision
+
 #define xen_pf_enter_acpi_sleep xenpf_enter_acpi_sleep
 CHECK_pf_enter_acpi_sleep;
 #undef xen_pf_enter_acpi_sleep
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 60caa5ce7e..15777b5416 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -614,6 +614,16 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 typedef struct dom0_vga_console_info xenpf_dom0_console_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_dom0_console_t);
 
+#define XENPF_get_ucode_revision 65
+struct xenpf_ucode_revision {
+uint32_t cpu; /* IN:  CPU number to get the revision from.  */
+uint32_t signature;   /* OUT: CPU signature (CPUID.1.EAX).  */
+uint32_t pf;  /* OUT: Platform Flags (Intel only)   */
+uint32_t revision;/* OUT: Microcode Revision.   */
+};
+typedef struct xenpf_ucode_revision xenpf_ucode_revision_t;
+

[PATCH v5 0/3] xen-ucode: print information about currently loaded ucode

2023-04-17 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision from Xen after
late loading without looking into Xen logs which is not always convenient.
Add an option to xen-ucode tool to print the currently loaded ucode
revision.

Sergey Dyasli (3):
  tools/xenctrl: add xc_get_cpu_version()
  x86/platform: introduce XENPF_get_ucode_revision
  tools/xen-ucode: print information about currently loaded ucode

 tools/include/xenctrl.h  |  3 +
 tools/libs/ctrl/xc_misc.c| 35 ++
 tools/misc/xen-ucode.c   | 85 +---
 xen/arch/x86/platform_hypercall.c| 29 
 xen/arch/x86/x86_64/platform_hypercall.c |  4 ++
 xen/include/public/platform.h| 11 +++
 xen/include/xlat.lst |  1 +
 7 files changed, 158 insertions(+), 10 deletions(-)

-- 
2.17.1

[PATCH v5 3/3] tools/xen-ucode: print information about currently loaded ucode

2023-04-17 Thread Sergey Dyasli

Add an option to xen-ucode tool to print the currently loaded ucode
revision and also print it during usage info.  Print CPU signature and
platform flags as well.  The raw data comes from XENPF_get_cpu_version
and XENPF_get_ucode_revision platform ops.

Example output:
Intel: CPU signature 06-55-04 (raw 0x00050654) pf 0x1 revision 0x02006e05
  AMD: CPU signature 19-01-01 (raw 0x00a00f11) revision 0x0a0011ce

Signed-off-by: Sergey Dyasli 
---
v4 --> v5:
- Changed AMD output to be FF-MM-SS instead of famXX
- Modified usage string
- Fixed fprintf indentation
- Printing error messages always to stderr
- Use appropriate exit codes in show_curr_cpu()
---
 tools/misc/xen-ucode.c | 85 +-
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
index ad32face2b..c6ae6498d6 100644
--- a/tools/misc/xen-ucode.c
+++ b/tools/misc/xen-ucode.c
@@ -12,22 +12,95 @@
 #include 
 #include 
 
+static xc_interface *xch;
+
+static const char intel_id[] = "GenuineIntel";
+static const char   amd_id[] = "AuthenticAMD";
+
+static void show_curr_cpu(FILE *f)
+{
+int ret;
+struct xenpf_pcpu_version cpu_ver = { .xen_cpuid = 0 };
+struct xenpf_ucode_revision ucode_rev = { .cpu = 0 };
+/* Always exit with 2 when called during usage-info */
+int exit_code = (f == stderr) ? 2 : 1;
+
+ret = xc_get_cpu_version(xch, _ver);
+if ( ret )
+{
+fprintf(stderr, "Failed to get CPU information. (err: %s)\n",
+strerror(errno));
+exit(exit_code);
+}
+
+ret = xc_get_ucode_revision(xch, _rev);
+if ( ret )
+{
+fprintf(stderr, "Failed to get microcode information. (err: %s)\n",
+strerror(errno));
+exit(exit_code);
+}
+
+/*
+ * Print signature in a form that allows to quickly identify which ucode
+ * blob to load, e.g.:
+ *
+ *  Intel:   /lib/firmware/intel-ucode/06-55-04
+ *  AMD: /lib/firmware/amd-ucode/microcode_amd_fam19h.bin
+ */
+if ( memcmp(cpu_ver.vendor_id, intel_id,
+sizeof(cpu_ver.vendor_id)) == 0 )
+{
+fprintf(f,
+"CPU signature %02x-%02x-%02x (raw 0x%08x) pf %#x revision 
0x%08x\n",
+cpu_ver.family, cpu_ver.model, cpu_ver.stepping,
+ucode_rev.signature, ucode_rev.pf, ucode_rev.revision);
+}
+else if ( memcmp(cpu_ver.vendor_id, amd_id,
+ sizeof(cpu_ver.vendor_id)) == 0 )
+{
+fprintf(f,
+"CPU signature %02x-%02x-%02x (raw 0x%08x) revision 0x%08x\n",
+cpu_ver.family, cpu_ver.model, cpu_ver.stepping,
+ucode_rev.signature, ucode_rev.revision);
+}
+else
+{
+fprintf(f, "Unsupported CPU vendor: %s\n", cpu_ver.vendor_id);
+exit(exit_code);
+}
+}
+
 int main(int argc, char *argv[])
 {
 int fd, ret;
 char *filename, *buf;
 size_t len;
 struct stat st;
-xc_interface *xch;
+
+xch = xc_interface_open(NULL, NULL, 0);
+if ( xch == NULL )
+{
+fprintf(stderr, "Error opening xc interface. (err: %s)\n",
+strerror(errno));
+exit(1);
+}
 
 if ( argc < 2 )
 {
 fprintf(stderr,
 "xen-ucode: Xen microcode updating tool\n"
-"Usage: %s \n", argv[0]);
+"Usage: %s [ | show-cpu-info]\n", argv[0]);
+show_curr_cpu(stderr);
 exit(2);
 }
 
+if ( !strcmp(argv[1], "show-cpu-info") )
+{
+show_curr_cpu(stdout);
+return 0;
+}
+
 filename = argv[1];
 fd = open(filename, O_RDONLY);
 if ( fd < 0 )
@@ -52,14 +125,6 @@ int main(int argc, char *argv[])
 exit(1);
 }
 
-xch = xc_interface_open(NULL, NULL, 0);
-if ( xch == NULL )
-{
-fprintf(stderr, "Error opening xc interface. (err: %s)\n",
-strerror(errno));
-exit(1);
-}
-
 ret = xc_microcode_update(xch, buf, len);
 if ( ret )
 {
-- 
2.17.1

[PATCH v5 1/3] tools/xenctrl: add xc_get_cpu_version()

2023-04-17 Thread Sergey Dyasli

As a wrapper for XENPF_get_cpu_version platform op.

Signed-off-by: Sergey Dyasli 
Reviewed-by: Andrew Cooper 
---
v4 --> v5:
- Added Reviewed-by

v3 --> v4:
- Replaced DECLARE_PLATFORM_OP
- Removed NULL checks
---
 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 05967ecc92..34b3b25289 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1186,6 +1186,7 @@ int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 265f15ec2d..90d50faa4f 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -226,6 +226,23 @@ int xc_microcode_update(xc_interface *xch, const void 
*buf, size_t len)
 return ret;
 }
 
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver)
+{
+int ret;
+struct xen_platform_op op = {
+.cmd = XENPF_get_cpu_version,
+.u.pcpu_version.xen_cpuid = cpu_ver->xen_cpuid,
+};
+
+ret = do_platform_op(xch, );
+if ( ret != 0 )
+return ret;
+
+*cpu_ver = op.u.pcpu_version;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
-- 
2.17.1

[PATCH v4 0/3] xen-ucode: print information about currently loaded ucode

2023-04-04 Thread Sergey Dyasli

Posting v4 with addressed review comments. Changes from v3 are available
in each patch.

Sergey Dyasli (3):
  tools/xenctrl: add xc_get_cpu_version()
  x86/platform: introduce XENPF_get_ucode_revision
  tools/xen-ucode: print information about currently loaded ucode

 tools/include/xenctrl.h  |  3 +
 tools/libs/ctrl/xc_misc.c| 35 ++
 tools/misc/xen-ucode.c   | 83 
 xen/arch/x86/platform_hypercall.c| 29 +
 xen/arch/x86/x86_64/platform_hypercall.c |  4 ++
 xen/include/public/platform.h| 11 
 xen/include/xlat.lst |  1 +
 7 files changed, 154 insertions(+), 12 deletions(-)

-- 
2.17.1

[PATCH v4 3/3] tools/xen-ucode: print information about currently loaded ucode

2023-04-04 Thread Sergey Dyasli

Add an option to xen-ucode tool to print the currently loaded ucode
revision and also print it during usage info.  Print CPU signature and
platform flags as well.  The raw data comes from XENPF_get_cpu_version
and XENPF_get_ucode_revision platform ops.

Example output:
Intel:
CPU signature 06-55-04 (raw 0x00050654) pf 0x1 revision 0x02006e05

AMD:
CPU signature fam19h (raw 0x00a00f11) revision 0x0a0011ce

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- changed the output to be 1-line long
- made xc_interface *xch global
- added error checking to xc calls
- added error for unsupported CPU vendor
- changed printf format to 0x%08x for raw signature and revision values
---
 tools/misc/xen-ucode.c | 83 --
 1 file changed, 71 insertions(+), 12 deletions(-)

diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
index ad32face2b..bd0bfaaa00 100644
--- a/tools/misc/xen-ucode.c
+++ b/tools/misc/xen-ucode.c
@@ -12,22 +12,89 @@
 #include 
 #include 
 
+static xc_interface *xch;
+
+static const char intel_id[] = "GenuineIntel";
+static const char   amd_id[] = "AuthenticAMD";
+
+static void show_curr_cpu(FILE *f)
+{
+int ret;
+struct xenpf_pcpu_version cpu_ver = { .xen_cpuid = 0 };
+struct xenpf_ucode_revision ucode_rev = { .cpu = 0 };
+
+ret = xc_get_cpu_version(xch, _ver);
+if ( ret )
+{
+fprintf(f, "Failed to get CPU information. (err: %s)\n",
+strerror(errno));
+exit(1);
+}
+
+ret = xc_get_ucode_revision(xch, _rev);
+if ( ret )
+{
+fprintf(f, "Failed to get microcode information. (err: %s)\n",
+strerror(errno));
+exit(1);
+}
+
+/*
+ * Print signature in a form that allows to quickly identify which ucode
+ * blob to load, e.g.:
+ *
+ *  Intel:   /lib/firmware/intel-ucode/06-55-04
+ *  AMD: /lib/firmware/amd-ucode/microcode_amd_fam19h.bin
+ */
+if ( memcmp(cpu_ver.vendor_id, intel_id,
+sizeof(cpu_ver.vendor_id)) == 0 )
+{
+fprintf(f, "CPU signature %02x-%02x-%02x (raw 0x%08x) pf %#x revision 
0x%08x\n",
+   cpu_ver.family, cpu_ver.model, cpu_ver.stepping,
+   ucode_rev.signature, ucode_rev.pf, ucode_rev.revision);
+}
+else if ( memcmp(cpu_ver.vendor_id, amd_id,
+ sizeof(cpu_ver.vendor_id)) == 0 )
+{
+fprintf(f, "CPU signature fam%xh (raw 0x%08x) revision 0x%08x\n",
+   cpu_ver.family, ucode_rev.signature, ucode_rev.revision);
+}
+else
+{
+fprintf(f, "Unsupported CPU vendor: %s\n", cpu_ver.vendor_id);
+exit(3);
+}
+}
+
 int main(int argc, char *argv[])
 {
 int fd, ret;
 char *filename, *buf;
 size_t len;
 struct stat st;
-xc_interface *xch;
+
+xch = xc_interface_open(NULL, NULL, 0);
+if ( xch == NULL )
+{
+fprintf(stderr, "Error opening xc interface. (err: %s)\n",
+strerror(errno));
+exit(1);
+}
 
 if ( argc < 2 )
 {
-fprintf(stderr,
-"xen-ucode: Xen microcode updating tool\n"
-"Usage: %s \n", argv[0]);
+fprintf(stderr, "xen-ucode: Xen microcode updating tool\n");
+show_curr_cpu(stderr);
+fprintf(stderr, "Usage: %s \n", argv[0]);
 exit(2);
 }
 
+if ( !strcmp(argv[1], "show-cpu-info") )
+{
+show_curr_cpu(stdout);
+return 0;
+}
+
 filename = argv[1];
 fd = open(filename, O_RDONLY);
 if ( fd < 0 )
@@ -52,14 +119,6 @@ int main(int argc, char *argv[])
 exit(1);
 }
 
-xch = xc_interface_open(NULL, NULL, 0);
-if ( xch == NULL )
-{
-fprintf(stderr, "Error opening xc interface. (err: %s)\n",
-strerror(errno));
-exit(1);
-}
-
 ret = xc_microcode_update(xch, buf, len);
 if ( ret )
 {
-- 
2.17.1

[PATCH v4 1/3] tools/xenctrl: add xc_get_cpu_version()

2023-04-04 Thread Sergey Dyasli

As a wrapper for XENPF_get_cpu_version platform op.

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- Replaced DECLARE_PLATFORM_OP
- Removed NULL checks
---
 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 05967ecc92..34b3b25289 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1186,6 +1186,7 @@ int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 265f15ec2d..90d50faa4f 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -226,6 +226,23 @@ int xc_microcode_update(xc_interface *xch, const void 
*buf, size_t len)
 return ret;
 }
 
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver)
+{
+int ret;
+struct xen_platform_op op = {
+.cmd = XENPF_get_cpu_version,
+.u.pcpu_version.xen_cpuid = cpu_ver->xen_cpuid,
+};
+
+ret = do_platform_op(xch, );
+if ( ret != 0 )
+return ret;
+
+*cpu_ver = op.u.pcpu_version;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
-- 
2.17.1

[PATCH v4 2/3] x86/platform: introduce XENPF_get_ucode_revision

2023-04-04 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision from Xen after
late loading without looking into Xen logs which is not always convenient.

Add a new platform op in order to get the required data from Xen and
provide a wrapper for libxenctrl.

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- clarified the commit message
- Renamed "ucode version" to "ucode revision"
- Removed DECLARE_PLATFORM_OP and NULL checks
- Added a TODO comment about parked CPUs
- Renamed struct xenpf_ucode_revision fields
---
 tools/include/xenctrl.h  |  2 ++
 tools/libs/ctrl/xc_misc.c| 18 +++
 xen/arch/x86/platform_hypercall.c| 29 
 xen/arch/x86/x86_64/platform_hypercall.c |  4 
 xen/include/public/platform.h| 11 +
 xen/include/xlat.lst |  1 +
 6 files changed, 65 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 34b3b25289..1149f805ba 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1187,6 +1187,8 @@ int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
 int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
+int xc_get_ucode_revision(xc_interface *xch,
+  struct xenpf_ucode_revision *ucode_rev);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 90d50faa4f..4159294b2e 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -243,6 +243,24 @@ int xc_get_cpu_version(xc_interface *xch, struct 
xenpf_pcpu_version *cpu_ver)
 return 0;
 }
 
+int xc_get_ucode_revision(xc_interface *xch,
+  struct xenpf_ucode_revision *ucode_rev)
+{
+int ret;
+struct xen_platform_op op = {
+.cmd = XENPF_get_ucode_revision,
+.u.ucode_revision.cpu = ucode_rev->cpu,
+};
+
+ret = do_platform_op(xch, );
+if ( ret != 0 )
+return ret;
+
+*ucode_rev = op.u.ucode_revision;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
diff --git a/xen/arch/x86/platform_hypercall.c 
b/xen/arch/x86/platform_hypercall.c
index a2d9526355..9ff2da8fc3 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -640,6 +640,35 @@ ret_t do_platform_op(
 }
 break;
 
+case XENPF_get_ucode_revision:
+{
+struct xenpf_ucode_revision *rev = >u.ucode_revision;
+
+if ( !get_cpu_maps() )
+{
+ret = -EBUSY;
+break;
+}
+
+/* TODO: make it possible to know ucode revisions for parked CPUs */
+if ( (rev->cpu >= nr_cpu_ids) || !cpu_online(rev->cpu) )
+ret = -ENOENT;
+else
+{
+const struct cpu_signature *sig = _cpu(cpu_sig, rev->cpu);
+
+rev->signature = sig->sig;
+rev->pf = sig->pf;
+rev->revision = sig->rev;
+}
+
+put_cpu_maps();
+
+if ( __copy_field_to_guest(u_xenpf_op, op, u.ucode_revision) )
+ret = -EFAULT;
+}
+break;
+
 case XENPF_cpu_online:
 {
 int cpu = op->u.cpu_ol.cpuid;
diff --git a/xen/arch/x86/x86_64/platform_hypercall.c 
b/xen/arch/x86/x86_64/platform_hypercall.c
index 5bf6b958d2..99440f4076 100644
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -28,6 +28,10 @@ CHECK_pf_pcpuinfo;
 CHECK_pf_pcpu_version;
 #undef xen_pf_pcpu_version
 
+#define xen_pf_ucode_revision xenpf_ucode_revision
+CHECK_pf_ucode_revision;
+#undef xen_pf_pucode_revision
+
 #define xen_pf_enter_acpi_sleep xenpf_enter_acpi_sleep
 CHECK_pf_enter_acpi_sleep;
 #undef xen_pf_enter_acpi_sleep
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 60caa5ce7e..15777b5416 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -614,6 +614,16 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 typedef struct dom0_vga_console_info xenpf_dom0_console_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_dom0_console_t);
 
+#define XENPF_get_ucode_revision 65
+struct xenpf_ucode_revision {
+uint32_t cpu; /* IN:  CPU number to get the revision from.  */
+uint32_t signature;   /* OUT: CPU signature (CPUID.1.EAX).  */
+uint32_t pf;  /* OUT: Platform Flags (Intel only)   */
+uint32_t revision;/* OUT: Microcode Revision.   */
+};
+typedef struct xenpf_ucode_revision xenpf_ucode_revision_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_ucode_revision_t);
+
 /*
  *

Re: [PATCH] x86/ucode: Fix error paths control_thread_fn()

2023-03-27 Thread Sergey Dyasli

On Fri, Mar 24, 2023 at 9:44 PM Andrew Cooper  wrote:
>
> These two early exits skipped re-enabling the watchdog, and restoring the NMI
> callback.  Always execute the tail of the function on the way out.
>
> Fixes: 8dd4dfa92d62 ("x86/microcode: Synchronize late microcode loading")
> Signed-off-by: Andrew Cooper 

Reviewed-by: Sergey Dyasli 

Thanks,
Sergey

[PATCH v3 1/3] tools/xenctrl: add xc_get_cpu_version()

2023-03-21 Thread Sergey Dyasli

As a wrapper for XENPF_get_cpu_version platform op.

Signed-off-by: Sergey Dyasli 
---
 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c | 20 
 2 files changed, 21 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 23037874d3..8aa747dc2e 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1186,6 +1186,7 @@ int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 265f15ec2d..f2f6e4348e 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -226,6 +226,26 @@ int xc_microcode_update(xc_interface *xch, const void 
*buf, size_t len)
 return ret;
 }
 
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver)
+{
+int ret;
+DECLARE_PLATFORM_OP;
+
+if ( !xch || !cpu_ver )
+return -1;
+
+platform_op.cmd = XENPF_get_cpu_version;
+platform_op.u.pcpu_version.xen_cpuid = cpu_ver->xen_cpuid;
+
+ret = do_platform_op(xch, _op);
+if ( ret != 0 )
+return ret;
+
+*cpu_ver = platform_op.u.pcpu_version;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
-- 
2.17.1

[PATCH v3 2/3] x86/platform: introduce XENPF_get_ucode_version

2023-03-21 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision after late
loading without looking into Xen logs which is not always convenient.

Add a new platform op in order to get the required data from Xen and
provide a wrapper for libxenctrl.

Signed-off-by: Sergey Dyasli 
---
 tools/include/xenctrl.h  |  2 ++
 tools/libs/ctrl/xc_misc.c| 21 +
 xen/arch/x86/platform_hypercall.c| 30 
 xen/arch/x86/x86_64/platform_hypercall.c |  4 
 xen/include/public/platform.h| 12 ++
 xen/include/xlat.lst |  1 +
 6 files changed, 70 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 8aa747dc2e..d3ef7a48a5 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1187,6 +1187,8 @@ int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
 int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
+int xc_get_ucode_version(xc_interface *xch,
+ struct xenpf_ucode_version *ucode_ver);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index f2f6e4348e..b93477d189 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -246,6 +246,27 @@ int xc_get_cpu_version(xc_interface *xch, struct 
xenpf_pcpu_version *cpu_ver)
 return 0;
 }
 
+int xc_get_ucode_version(xc_interface *xch,
+ struct xenpf_ucode_version *ucode_ver)
+{
+int ret;
+DECLARE_PLATFORM_OP;
+
+if ( !xch || !ucode_ver )
+return -1;
+
+platform_op.cmd = XENPF_get_ucode_version;
+platform_op.u.ucode_version.xen_cpuid = ucode_ver->xen_cpuid;
+
+ret = do_platform_op(xch, _op);
+if ( ret != 0 )
+return ret;
+
+*ucode_ver = platform_op.u.ucode_version;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
diff --git a/xen/arch/x86/platform_hypercall.c 
b/xen/arch/x86/platform_hypercall.c
index a2d9526355..d0818fea47 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -640,6 +640,36 @@ ret_t do_platform_op(
 }
 break;
 
+case XENPF_get_ucode_version:
+{
+struct xenpf_ucode_version *ver = >u.ucode_version;
+
+if ( !get_cpu_maps() )
+{
+ret = -EBUSY;
+break;
+}
+
+if ( (ver->xen_cpuid >= nr_cpu_ids) || !cpu_online(ver->xen_cpuid) )
+{
+ret = -ENOENT;
+}
+else
+{
+const struct cpu_signature *sig = _cpu(cpu_sig, 
ver->xen_cpuid);
+
+ver->cpu_signature = sig->sig;
+ver->pf = sig->pf;
+ver->ucode_revision = sig->rev;
+}
+
+put_cpu_maps();
+
+if ( __copy_field_to_guest(u_xenpf_op, op, u.ucode_version) )
+ret = -EFAULT;
+}
+break;
+
 case XENPF_cpu_online:
 {
 int cpu = op->u.cpu_ol.cpuid;
diff --git a/xen/arch/x86/x86_64/platform_hypercall.c 
b/xen/arch/x86/x86_64/platform_hypercall.c
index 5bf6b958d2..b876fd0c4a 100644
--- a/xen/arch/x86/x86_64/platform_hypercall.c
+++ b/xen/arch/x86/x86_64/platform_hypercall.c
@@ -28,6 +28,10 @@ CHECK_pf_pcpuinfo;
 CHECK_pf_pcpu_version;
 #undef xen_pf_pcpu_version
 
+#define xen_pf_ucode_version xenpf_ucode_version
+CHECK_pf_ucode_version;
+#undef xen_pf_pucode_version
+
 #define xen_pf_enter_acpi_sleep xenpf_enter_acpi_sleep
 CHECK_pf_enter_acpi_sleep;
 #undef xen_pf_enter_acpi_sleep
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 60caa5ce7e..232df79d5f 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -614,6 +614,17 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 typedef struct dom0_vga_console_info xenpf_dom0_console_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_dom0_console_t);
 
+#define XENPF_get_ucode_version 65
+struct xenpf_ucode_version {
+uint32_t xen_cpuid;   /* IN:  CPU number to get the revision from.  */
+uint32_t cpu_signature;   /* OUT: CPU signature (CPUID.1.EAX).  */
+uint32_t pf;  /* OUT: Processor Flags.  */
+  /*  Only applicable to Intel. */
+uint32_t ucode_revision;  /* OUT: Microcode Revision.   */
+};
+typedef struct xenpf_ucode_version xenpf_ucode_version_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_ucode_version_t);
+
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -645,6 +656,7 @@ struct xen_platform_op {
 xe

[PATCH v3 0/3] xen-ucode: print information about currently loaded ucode

2023-03-21 Thread Sergey Dyasli

For v3 I've removed the usage of xenhypfs and gone back to adding
a new platform op.

Sergey Dyasli (3):
  tools/xenctrl: add xc_get_cpu_version()
  x86/platform: introduce XENPF_get_ucode_version
  tools/xen-ucode: print information about currently loaded ucode

 tools/include/xenctrl.h  |  3 ++
 tools/libs/ctrl/xc_misc.c| 41 +++
 tools/misc/xen-ucode.c   | 66 
 xen/arch/x86/platform_hypercall.c| 30 +++
 xen/arch/x86/x86_64/platform_hypercall.c |  4 ++
 xen/include/public/platform.h| 12 +
 xen/include/xlat.lst |  1 +
 7 files changed, 157 insertions(+)

-- 
2.17.1

[PATCH v3 3/3] tools/xen-ucode: print information about currently loaded ucode

2023-03-21 Thread Sergey Dyasli

Add an option to xen-ucode tool to print the currently loaded ucode
version and also print it during usage info.  Print CPU signature and
processor flags as well.  The raw data comes from XENPF_get_cpu_version
and XENPF_get_ucode_version platform ops.

Example output:
Intel:
Current CPU signature is: 06-55-04 (raw 0x50654)
Current CPU microcode revision is: 0x2006e05
Current CPU processor flags are: 0x1

AMD:
Current CPU signature is: fam19h (raw 0xa00f11)
Current CPU microcode revision is: 0xa0011a8

Signed-off-by: Sergey Dyasli 
---
 tools/misc/xen-ucode.c | 66 ++
 1 file changed, 66 insertions(+)

diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
index ad32face2b..b9037ce6a1 100644
--- a/tools/misc/xen-ucode.c
+++ b/tools/misc/xen-ucode.c
@@ -12,6 +12,65 @@
 #include 
 #include 
 
+static const char intel_id[] = "GenuineIntel";
+static const char   amd_id[] = "AuthenticAMD";
+
+static void show_curr_cpu(FILE *f)
+{
+int ret;
+xc_interface *xch;
+struct xenpf_pcpu_version cpu_ver = { .xen_cpuid = 0 };
+struct xenpf_ucode_version ucode_ver = { .xen_cpuid = 0 };
+bool intel = false, amd = false;
+
+xch = xc_interface_open(0, 0, 0);
+if ( xch == NULL )
+return;
+
+ret = xc_get_cpu_version(xch, _ver);
+if ( ret )
+return;
+
+ret = xc_get_ucode_version(xch, _ver);
+if ( ret )
+return;
+
+if ( memcmp(cpu_ver.vendor_id, intel_id,
+sizeof(cpu_ver.vendor_id)) == 0 )
+intel = true;
+else if ( memcmp(cpu_ver.vendor_id, amd_id,
+ sizeof(cpu_ver.vendor_id)) == 0 )
+amd = true;
+
+/*
+ * Print signature in a form that allows to quickly identify which ucode
+ * blob to load, e.g.:
+ *
+ *  Intel:   /lib/firmware/intel-ucode/06-55-04
+ *  AMD: /lib/firmware/amd-ucode/microcode_amd_fam19h.bin
+ */
+if ( intel )
+{
+fprintf(f, "Current CPU signature is: %02x-%02x-%02x (raw %#x)\n",
+   cpu_ver.family, cpu_ver.model, cpu_ver.stepping,
+   ucode_ver.cpu_signature);
+}
+else if ( amd )
+{
+fprintf(f, "Current CPU signature is: fam%xh (raw %#x)\n",
+   cpu_ver.family, ucode_ver.cpu_signature);
+}
+
+if ( intel || amd )
+fprintf(f, "Current CPU microcode revision is: %#x\n",
+   ucode_ver.ucode_revision);
+
+if ( intel )
+fprintf(f, "Current CPU processor flags are: %#x\n", ucode_ver.pf);
+
+xc_interface_close(xch);
+}
+
 int main(int argc, char *argv[])
 {
 int fd, ret;
@@ -25,9 +84,16 @@ int main(int argc, char *argv[])
 fprintf(stderr,
 "xen-ucode: Xen microcode updating tool\n"
 "Usage: %s \n", argv[0]);
+show_curr_cpu(stderr);
 exit(2);
 }
 
+if ( !strcmp(argv[1], "show-cpu-info") )
+{
+show_curr_cpu(stdout);
+return 0;
+}
+
 filename = argv[1];
 fd = open(filename, O_RDONLY);
 if ( fd < 0 )
-- 
2.17.1

Re: [PATCH v2 3/3] tools/xen-ucode: print information about currently loaded ucode

2023-03-01 Thread Sergey Dyasli

On Wed, Mar 1, 2023 at 11:31 AM Jan Beulich  wrote:
>
> On 28.02.2023 18:39, Sergey Dyasli wrote:
> > Add an option to xen-ucode tool to print the currently loaded ucode
> > version and also print it during usage info.  Print CPU signature and
> > processor flags as well.  The raw data comes from cpuinfo directory in
> > xenhypfs and from XENPF_get_cpu_version platform op.
>
> While I don't mind the use of the platform-op, I'm little puzzled by the
> mix. If CPU information is to be exposed in hypfs, can't we expose there
> everything that's needed here?
>
> Then again, perhaps in a different context, Andrew pointed out that hypfs
> is an optional component, so relying on its presence in the underlying
> hypervisor will need weighing against the alternative of adding a new
> platform-op for the ucode-related data (as you had it in v1). Since I'm
> unaware of a request to switch, are there specific reasons you did?

Ideal situation would be microcode information in Dom0's /proc/cpuinfo
updated after late load, since that file already has most of the
information about the cpu. And the closest thing to /proc is xenhypfs.
It allows the user to query information directly, e.g.

# xenhypfs cat /cpuinfo/microcode-revision
33554509

Which could be used manually or in scripts, instead of relying on
xen-ucode utility. Though printing the value in hex would be nicer.
That was my motivation to go hypfs route. In general it feels like cpu
information is a good fit for hypfs, but agreement on its format and
exposed values is needed.
I can always switch back to a platform op if that would be the preference.

> > --- a/tools/misc/xen-ucode.c
> > +++ b/tools/misc/xen-ucode.c
> > @@ -11,6 +11,96 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +
> > +static const char intel_id[] = "GenuineIntel";
> > +static const char   amd_id[] = "AuthenticAMD";
> > +
> > +static const char sig_path[] = "/cpuinfo/cpu-signature";
> > +static const char rev_path[] = "/cpuinfo/microcode-revision";
> > +static const char  pf_path[] = "/cpuinfo/processor-flags";
>
> Together with the use below I conclude (without having looked at patch 1
> yet) that you only expose perhaps the BSP's data, rather than such for
> all CPUs. (And I was actually going to put up the question whether data
> like the one presented here might not also be of interest for parked
> CPUs.)

Yes, that comes from the BSP. Xen must make sure that all CPUs have
the same ucode revision for the system to work correctly.

Sergey

[PATCH v2 2/3] tools/xenctrl: add xc_get_cpu_version()

2023-02-28 Thread Sergey Dyasli

As a wrapper for XENPF_get_cpu_version platform op.

Signed-off-by: Sergey Dyasli 
---
 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c | 20 
 2 files changed, 21 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 23037874d3..8aa747dc2e 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1186,6 +1186,7 @@ int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
 int xc_pcitopoinfo(xc_interface *xch, unsigned num_devs,
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 265f15ec2d..f2f6e4348e 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -226,6 +226,26 @@ int xc_microcode_update(xc_interface *xch, const void 
*buf, size_t len)
 return ret;
 }
 
+int xc_get_cpu_version(xc_interface *xch, struct xenpf_pcpu_version *cpu_ver)
+{
+int ret;
+DECLARE_PLATFORM_OP;
+
+if ( !xch || !cpu_ver )
+return -1;
+
+platform_op.cmd = XENPF_get_cpu_version;
+platform_op.u.pcpu_version.xen_cpuid = cpu_ver->xen_cpuid;
+
+ret = do_platform_op(xch, _op);
+if ( ret != 0 )
+return ret;
+
+*cpu_ver = platform_op.u.pcpu_version;
+
+return 0;
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
-- 
2.17.1

[PATCH v2 3/3] tools/xen-ucode: print information about currently loaded ucode

2023-02-28 Thread Sergey Dyasli

Add an option to xen-ucode tool to print the currently loaded ucode
version and also print it during usage info.  Print CPU signature and
processor flags as well.  The raw data comes from cpuinfo directory in
xenhypfs and from XENPF_get_cpu_version platform op.

Example output:
Intel:
Current CPU signature is: 06-55-04 (raw 0x50654)
Current CPU microcode revision is: 0x2006e05
Current CPU processor flags are: 0x1

AMD:
Current CPU signature is: fam19h (raw 0xa00f11)
Current CPU microcode revision is: 0xa0011a8

Signed-off-by: Sergey Dyasli 
---
 tools/misc/Makefile|  2 +-
 tools/misc/xen-ucode.c | 97 ++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 1c6e1d6a04..e345ac76db 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -136,6 +136,6 @@ xencov: xencov.o
$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
 
 xen-ucode: xen-ucode.o
-   $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
+   $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(LDLIBS_libxenhypfs) 
$(APPEND_LDFLAGS)
 
 -include $(DEPS_INCLUDE)
diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
index ad32face2b..7e657689f4 100644
--- a/tools/misc/xen-ucode.c
+++ b/tools/misc/xen-ucode.c
@@ -11,6 +11,96 @@
 #include 
 #include 
 #include 
+#include 
+
+static const char intel_id[] = "GenuineIntel";
+static const char   amd_id[] = "AuthenticAMD";
+
+static const char sig_path[] = "/cpuinfo/cpu-signature";
+static const char rev_path[] = "/cpuinfo/microcode-revision";
+static const char  pf_path[] = "/cpuinfo/processor-flags";
+
+static int hypfs_read_uint(struct xenhypfs_handle *hdl, const char *path,
+   unsigned int *var)
+{
+char *result;
+result = xenhypfs_read(hdl, path);
+if ( !result )
+return -1;
+
+errno = 0;
+*var = strtol(result, NULL, 10);
+if ( errno )
+return -1;
+
+return 0;
+}
+
+static void show_curr_cpu(FILE *f)
+{
+int ret;
+struct xenhypfs_handle *hdl;
+xc_interface *xch;
+struct xenpf_pcpu_version cpu_ver = {0};
+bool intel = false, amd = false;
+unsigned int cpu_signature, pf, ucode_revision;
+
+hdl = xenhypfs_open(NULL, 0);
+if ( !hdl )
+return;
+
+xch = xc_interface_open(0, 0, 0);
+if ( xch == NULL )
+return;
+
+ret = xc_get_cpu_version(xch, _ver);
+if ( ret )
+return;
+
+if ( memcmp(cpu_ver.vendor_id, intel_id,
+sizeof(cpu_ver.vendor_id)) == 0 )
+intel = true;
+else if ( memcmp(cpu_ver.vendor_id, amd_id,
+ sizeof(cpu_ver.vendor_id)) == 0 )
+amd = true;
+
+if ( hypfs_read_uint(hdl, sig_path, _signature) != 0 )
+return;
+
+if ( hypfs_read_uint(hdl, rev_path, _revision) != 0 )
+return;
+
+if ( intel && hypfs_read_uint(hdl, pf_path,  ) != 0 )
+return;
+
+/*
+ * Print signature in a form that allows to quickly identify which ucode
+ * blob to load, e.g.:
+ *
+ *  Intel:   /lib/firmware/intel-ucode/06-55-04
+ *  AMD: /lib/firmware/amd-ucode/microcode_amd_fam19h.bin
+ */
+if ( intel )
+{
+fprintf(f, "Current CPU signature is: %02x-%02x-%02x (raw %#x)\n",
+   cpu_ver.family, cpu_ver.model, cpu_ver.stepping,
+   cpu_signature);
+}
+else if ( amd )
+{
+fprintf(f, "Current CPU signature is: fam%xh (raw %#x)\n",
+   cpu_ver.family, cpu_signature);
+}
+
+if ( intel || amd )
+fprintf(f, "Current CPU microcode revision is: %#x\n", ucode_revision);
+
+if ( intel )
+fprintf(f, "Current CPU processor flags are: %#x\n", pf);
+
+xc_interface_close(xch);
+xenhypfs_close(hdl);
+}
 
 int main(int argc, char *argv[])
 {
@@ -25,9 +115,16 @@ int main(int argc, char *argv[])
 fprintf(stderr,
 "xen-ucode: Xen microcode updating tool\n"
 "Usage: %s \n", argv[0]);
+show_curr_cpu(stderr);
 exit(2);
 }
 
+if ( !strcmp(argv[1], "show-cpu-info") )
+{
+show_curr_cpu(stdout);
+return 0;
+}
+
 filename = argv[1];
 fd = open(filename, O_RDONLY);
 if ( fd < 0 )
-- 
2.17.1

[PATCH v2 0/3] xen-ucode: print information about currently loaded ucode

2023-02-28 Thread Sergey Dyasli

I've split the patch into 3 parts. And now I'm using xenhypfs instead of
introducing another platform op. That's my first attempt at xenhypfs and
the patch itself is of RFC quality. Open questions are where to put the
new code and if it's possible to come up with a better hypfs functions.

Sergey Dyasli (3):
  xen/hypfs: add initial cpuinfo directory
  tools/xenctrl: add xc_get_cpu_version()
  tools/xen-ucode: print information about currently loaded ucode

 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c | 20 
 tools/misc/Makefile   |  2 +-
 tools/misc/xen-ucode.c| 97 +++
 xen/arch/x86/cpu/common.c | 58 +++
 5 files changed, 177 insertions(+), 1 deletion(-)

-- 
2.17.1

[PATCH v2/RFC 1/3] xen/hypfs: add initial cpuinfo directory

2023-02-28 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision after late
loading without looking into Xen logs which is not always convenient.

Leverage xenhypfs to expose struct cpu_signature in a new cpuinfo dir.
The tree structure is:

/
  cpuinfo/
cpu-signature
microcode-revision
processor-flags

The most useful bit is cpu microcode revision which will get updated
after late ucode loading.

Signed-off-by: Sergey Dyasli 
---
 xen/arch/x86/cpu/common.c | 58 +++
 1 file changed, 58 insertions(+)

diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
index 5ad347534a..aa864fdbab 100644
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -1005,3 +1005,61 @@ const struct x86_cpu_id *x86_match_cpu(const struct 
x86_cpu_id table[])
}
return NULL;
 }
+
+#ifdef CONFIG_HYPFS
+#include 
+#include 
+#include 
+
+static unsigned int cpu_signature;
+static unsigned int processor_flags;
+static unsigned int ucode_revision;
+
+int cf_check hypfs_read_cpusig(
+const struct hypfs_entry *entry, XEN_GUEST_HANDLE_PARAM(void) uaddr)
+{
+const struct hypfs_entry_leaf *l;
+unsigned int size = entry->funcs->getsize(entry);
+const struct cpu_signature *sig = _cpu(cpu_sig,
+   cpumask_first(_online_map));
+
+l = container_of(entry, const struct hypfs_entry_leaf, e);
+
+cpu_signature = sig->sig;
+processor_flags = sig->pf;
+ucode_revision = sig->rev;
+
+return copy_to_guest(uaddr, l->u.content, size) ?  -EFAULT : 0;
+}
+
+const struct hypfs_funcs ucode_rev_funcs = {
+.enter = hypfs_node_enter,
+.exit = hypfs_node_exit,
+.read = hypfs_read_cpusig,
+.write = hypfs_write_deny,
+.getsize = hypfs_getsize,
+.findentry = hypfs_leaf_findentry,
+};
+
+static HYPFS_DIR_INIT(cpuinfo, "cpuinfo");
+static HYPFS_FIXEDSIZE_INIT(signature, XEN_HYPFS_TYPE_UINT, "cpu-signature",
+cpu_signature, _rev_funcs, 0);
+static HYPFS_FIXEDSIZE_INIT(pf, XEN_HYPFS_TYPE_UINT, "processor-flags",
+processor_flags, _rev_funcs, 0);
+static HYPFS_FIXEDSIZE_INIT(revision, XEN_HYPFS_TYPE_UINT, 
"microcode-revision",
+ucode_revision, _rev_funcs, 0);
+
+static int __init cf_check cpuinfo_init(void)
+{
+hypfs_add_dir(_root, , true);
+hypfs_add_leaf(, , true);
+
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+hypfs_add_leaf(, , true);
+
+hypfs_add_leaf(, , true);
+
+return 0;
+}
+__initcall(cpuinfo_init);
+#endif /* CONFIG_HYPFS */
-- 
2.17.1

[PATCH v5] x86/ucode/AMD: late load the patch on every logical thread

2023-02-23 Thread Sergey Dyasli

Currently late ucode loading is performed only on the first core of CPU
siblings.  But according to the latest recommendation from AMD, late
ucode loading should happen on every logical thread/core on AMD CPUs.

To achieve that, introduce is_cpu_primary() helper which will consider
every logical cpu as "primary" when running on AMD CPUs.  Also include
Hygon in the check for future-proofing.

Signed-off-by: Sergey Dyasli 
---
v5:
- refactored the code by adding is_cpu_primary() helper
- include Hygon cpus into the check

v4:
- new patch
---
 xen/arch/x86/cpu/microcode/core.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index ba6e7b42c6..cfa2d5053a 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -276,6 +276,20 @@ static bool microcode_update_cache(struct microcode_patch 
*patch)
 return true;
 }
 
+/* Returns true if ucode should be loaded on a given cpu */
+static bool is_cpu_primary(unsigned int cpu)
+{
+if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
+/* Load ucode on every logical thread/core */
+return true;
+
+/* Intel CPUs should load ucode only on the first core of SMT siblings */
+if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
+return true;
+
+return false;
+}
+
 /* Wait for a condition to be met with a timeout (us). */
 static int wait_for_condition(bool (*func)(unsigned int data),
   unsigned int data, unsigned int timeout)
@@ -382,7 +396,7 @@ static int primary_thread_work(const struct microcode_patch 
*patch)
 static int cf_check microcode_nmi_callback(
 const struct cpu_user_regs *regs, int cpu)
 {
-unsigned int primary = cpumask_first(this_cpu(cpu_sibling_mask));
+bool primary_cpu = is_cpu_primary(cpu);
 int ret;
 
 /* System-generated NMI, leave to main handler */
@@ -395,10 +409,10 @@ static int cf_check microcode_nmi_callback(
  * ucode_in_nmi.
  */
 if ( cpu == cpumask_first(_online_map) ||
- (!ucode_in_nmi && cpu == primary) )
+ (!ucode_in_nmi && primary_cpu) )
 return 0;
 
-if ( cpu == primary )
+if ( primary_cpu )
 ret = primary_thread_work(nmi_patch);
 else
 ret = secondary_nmi_work();
@@ -549,7 +563,7 @@ static int cf_check do_microcode_update(void *patch)
  */
 if ( cpu == cpumask_first(_online_map) )
 ret = control_thread_fn(patch);
-else if ( cpu == cpumask_first(this_cpu(cpu_sibling_mask)) )
+else if ( is_cpu_primary(cpu) )
 ret = primary_thread_fn(patch);
 else
 ret = secondary_thread_fn();
@@ -642,7 +656,7 @@ static long cf_check microcode_update_helper(void *data)
 /* Calculate the number of online CPU core */
 nr_cores = 0;
 for_each_online_cpu(cpu)
-if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
+if ( is_cpu_primary(cpu) )
 nr_cores++;
 
 printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores);
-- 
2.17.1

Re: [PATCH v4 2/2] x86/ucode/AMD: late load the patch on every logical thread

2023-02-21 Thread Sergey Dyasli

On Tue, Feb 21, 2023 at 2:03 PM Jan Beulich  wrote:
>
> On 15.02.2023 16:38, Sergey Dyasli wrote:
> > --- a/xen/arch/x86/cpu/microcode/core.c
> > +++ b/xen/arch/x86/cpu/microcode/core.c
> > @@ -398,10 +398,16 @@ static int cf_check microcode_nmi_callback(
> >   (!ucode_in_nmi && cpu == primary) )
> >  return 0;
> >
> > -if ( cpu == primary )
> > +if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>
> Given their origin, I'm pretty certain Hygon wants treating the same here
> and below.

Hygon? ucode_ops is currently initialised only for Amd and Intel.
Speaking of which, I'm thinking about adding a new function
is_cpu_primary() there. This would make the core code much cleaner.
I'll see if I can make it work.

Thanks,
Sergey

[PATCH v4 2/2] x86/ucode/AMD: late load the patch on every logical thread

2023-02-15 Thread Sergey Dyasli

Currently late ucode loading is performed only on the first core of CPU
siblings.  But according to the latest recommendation from AMD, late
ucode loading should happen on every logical thread/core.

To achieve that, consider every logical cpu as "primary" when running on
AMD cpus, i.e. skip cpu_sibling_mask checks.

Signed-off-by: Sergey Dyasli 
---
v4:
- new patch
---
 xen/arch/x86/cpu/microcode/core.c | 35 ++-
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index ba6e7b42c6..f720030761 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -398,10 +398,16 @@ static int cf_check microcode_nmi_callback(
  (!ucode_in_nmi && cpu == primary) )
 return 0;
 
-if ( cpu == primary )
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+/* load ucode on every logical thread/core */
 ret = primary_thread_work(nmi_patch);
 else
-ret = secondary_nmi_work();
+{
+if ( cpu == primary )
+ret = primary_thread_work(nmi_patch);
+else
+ret = secondary_nmi_work();
+}
 this_cpu(loading_err) = ret;
 
 return 0;
@@ -540,7 +546,6 @@ static int control_thread_fn(const struct microcode_patch 
*patch)
 static int cf_check do_microcode_update(void *patch)
 {
 unsigned int cpu = smp_processor_id();
-int ret;
 
 /*
  * The control thread set state to coordinate ucode loading. Primary
@@ -548,13 +553,18 @@ static int cf_check do_microcode_update(void *patch)
  * the completion of the ucode loading process.
  */
 if ( cpu == cpumask_first(_online_map) )
-ret = control_thread_fn(patch);
-else if ( cpu == cpumask_first(this_cpu(cpu_sibling_mask)) )
-ret = primary_thread_fn(patch);
-else
-ret = secondary_thread_fn();
+return control_thread_fn(patch);
 
-return ret;
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+/* load ucode on every logical thread/core */
+return primary_thread_fn(patch);
+else
+{
+if ( cpu == cpumask_first(this_cpu(cpu_sibling_mask)) )
+return primary_thread_fn(patch);
+else
+return secondary_thread_fn();
+}
 }
 
 struct ucode_buf {
@@ -642,8 +652,13 @@ static long cf_check microcode_update_helper(void *data)
 /* Calculate the number of online CPU core */
 nr_cores = 0;
 for_each_online_cpu(cpu)
-if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
+{
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+/* load ucode on every logical thread/core */
+nr_cores++;
+else if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
 nr_cores++;
+}
 
 printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores);
 
-- 
2.31.1

[PATCH v4 0/2] x86/ucode/AMD: load ucode on every logical thread

2023-02-15 Thread Sergey Dyasli

I've added a second patch to cover late loading as that should also
happen on every cpu, according to AMD.

Sergey Dyasli (2):
  x86/ucode/AMD: apply the patch early on every logical thread
  x86/ucode/AMD: late load the patch on every logical thread

 xen/arch/x86/cpu/microcode/amd.c | 11 +++--
 xen/arch/x86/cpu/microcode/core.c| 61 +++-
 xen/arch/x86/cpu/microcode/intel.c   | 10 +++--
 xen/arch/x86/cpu/microcode/private.h |  3 +-
 4 files changed, 59 insertions(+), 26 deletions(-)

-- 
2.31.1

[PATCH v4 1/2] x86/ucode/AMD: apply the patch early on every logical thread

2023-02-15 Thread Sergey Dyasli

The original issue has been reported on AMD Bulldozer-based CPUs where
ucode loading loses the LWP feature bit in order to gain the IBPB bit.
LWP disabling is per-SMT/CMT core modification and needs to happen on
each sibling thread despite the shared microcode engine. Otherwise,
logical CPUs will end up with different cpuid capabilities.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211

Guests running under Xen happen to be not affected because of levelling
logic for the feature masking/override MSRs which causes the LWP bit to
fall out and hides the issue. The latest recommendation from AMD, after
discussing this bug, is to load ucode on every logical CPU.

In Linux kernel this issue has been addressed by e7ad18d1169c
("x86/microcode/AMD: Apply the patch early on every logical thread").
Follow the same approach in Xen.

Introduce SAME_UCODE match result and use it for early AMD ucode
loading. Take this opportunity and move opt_ucode_allow_same out of
compare_revisions() to the relevant callers and also modify the warning
message based on it. Intel's side of things is modified for consistency
but provides no functional change.

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- Coding style fixes
- Removed goto
- Removed the paragraph about late loading in the commit message

v2 --> v3:
- Moved opt_ucode_allow_same out of compare_revisions() and updated
  the commit message
- Adjusted the warning message

v1 --> v2:
- Expanded the commit message with the levelling section
- Adjusted comment for OLD_UCODE
---
 xen/arch/x86/cpu/microcode/amd.c | 11 ---
 xen/arch/x86/cpu/microcode/core.c| 26 +-
 xen/arch/x86/cpu/microcode/intel.c   | 10 +++---
 xen/arch/x86/cpu/microcode/private.h |  3 ++-
 4 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 4b097187a0..a9a5557835 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -176,8 +176,8 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+return SAME_UCODE;
 
 return OLD_UCODE;
 }
@@ -220,8 +220,13 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 unsigned int cpu = smp_processor_id();
 struct cpu_signature *sig = _cpu(cpu_sig, cpu);
 uint32_t rev, old_rev = sig->rev;
+enum microcode_match_result result = microcode_fits(patch);
 
-if ( microcode_fits(patch) != NEW_UCODE )
+/*
+ * Allow application of the same revision to pick up SMT-specific changes
+ * even if the revision of the other SMT thread is already up-to-date.
+ */
+if ( result != NEW_UCODE && result != SAME_UCODE )
 return -EINVAL;
 
 if ( check_final_patch_levels(sig) )
diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index d14754e222..ba6e7b42c6 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -612,17 +612,25 @@ static long cf_check microcode_update_helper(void *data)
  * that ucode revision.
  */
 spin_lock(_mutex);
-if ( microcode_cache &&
- alternative_call(ucode_ops.compare_patch,
-  patch, microcode_cache) != NEW_UCODE )
+if ( microcode_cache )
 {
-spin_unlock(_mutex);
-printk(XENLOG_WARNING "microcode: couldn't find any newer revision "
-  "in the provided blob!\n");
-microcode_free_patch(patch);
-ret = -ENOENT;
+enum microcode_match_result result;
 
-goto put;
+result = alternative_call(ucode_ops.compare_patch, patch,
+  microcode_cache);
+
+if ( result != NEW_UCODE &&
+ !(opt_ucode_allow_same && result == SAME_UCODE) )
+{
+spin_unlock(_mutex);
+printk(XENLOG_WARNING
+   "microcode: couldn't find any newer%s revision in the 
provided blob!\n",
+   opt_ucode_allow_same ? " (or the same)" : "");
+microcode_free_patch(patch);
+ret = -ENOENT;
+
+goto put;
+}
 }
 spin_unlock(_mutex);
 
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f7fec4b4ed..8d4d6574aa 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -232,8 +232,8 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+return SAME_UCODE;

[PATCH v3] x86/ucode/AMD: apply the patch early on every logical thread

2023-01-30 Thread Sergey Dyasli

The original issue has been reported on AMD Bulldozer-based CPUs where
ucode loading loses the LWP feature bit in order to gain the IBPB bit.
LWP disabling is per-SMT/CMT core modification and needs to happen on
each sibling thread despite the shared microcode engine. Otherwise,
logical CPUs will end up with different cpuid capabilities.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211

Guests running under Xen happen to be not affected because of levelling
logic for the feature masking/override MSRs which causes the LWP bit to
fall out and hides the issue. The latest recommendation from AMD, after
discussing this bug, is to load ucode on every logical CPU.

In Linux kernel this issue has been addressed by e7ad18d1169c
("x86/microcode/AMD: Apply the patch early on every logical thread").
Follow the same approach in Xen.

Introduce SAME_UCODE match result and use it for early AMD ucode
loading. Take this opportunity and move opt_ucode_allow_same out of
compare_revisions() to the relevant callers and also modify the warning
message based on it. Intel's side of things is modified for consistency
but provides no functional change.

Late loading is still performed only on the first of SMT/CMT
siblings and only if a newer ucode revision has been provided (unless
allow_same option is specified).

Signed-off-by: Sergey Dyasli 
---
v2 --> v3:
- Moved opt_ucode_allow_same out of compare_revisions() and updated
  the commit message
- Adjusted the warning message

v1 --> v2:
- Expanded the commit message with the levelling section
- Adjusted comment for OLD_UCODE

CC: Jan Beulich 
CC: Andrew Cooper 
CC: "Roger Pau Monné" 
CC: Wei Liu 
---
 xen/arch/x86/cpu/microcode/amd.c | 11 ---
 xen/arch/x86/cpu/microcode/core.c| 19 ++-
 xen/arch/x86/cpu/microcode/intel.c   | 10 +++---
 xen/arch/x86/cpu/microcode/private.h |  3 ++-
 4 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 4b097187a0..a9a5557835 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -176,8 +176,8 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+return SAME_UCODE;
 
 return OLD_UCODE;
 }
@@ -220,8 +220,13 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 unsigned int cpu = smp_processor_id();
 struct cpu_signature *sig = _cpu(cpu_sig, cpu);
 uint32_t rev, old_rev = sig->rev;
+enum microcode_match_result result = microcode_fits(patch);
 
-if ( microcode_fits(patch) != NEW_UCODE )
+/*
+ * Allow application of the same revision to pick up SMT-specific changes
+ * even if the revision of the other SMT thread is already up-to-date.
+ */
+if ( result != NEW_UCODE && result != SAME_UCODE )
 return -EINVAL;
 
 if ( check_final_patch_levels(sig) )
diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index d14754e222..912ef2c7be 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -612,13 +612,21 @@ static long cf_check microcode_update_helper(void *data)
  * that ucode revision.
  */
 spin_lock(_mutex);
-if ( microcode_cache &&
- alternative_call(ucode_ops.compare_patch,
-  patch, microcode_cache) != NEW_UCODE )
+if ( microcode_cache )
 {
+enum microcode_match_result result;
+
+result = alternative_call(ucode_ops.compare_patch, patch,
+   microcode_cache);
 spin_unlock(_mutex);
-printk(XENLOG_WARNING "microcode: couldn't find any newer revision "
-  "in the provided blob!\n");
+
+if ( result == NEW_UCODE ||
+ (opt_ucode_allow_same && result == SAME_UCODE) )
+goto apply;
+
+printk(XENLOG_WARNING "microcode: couldn't find any newer%s revision "
+  "in the provided blob!\n", opt_ucode_allow_same ?
+ " (or the same)" : 
"");
 microcode_free_patch(patch);
 ret = -ENOENT;
 
@@ -626,6 +634,7 @@ static long cf_check microcode_update_helper(void *data)
 }
 spin_unlock(_mutex);
 
+apply:
 cpumask_clear(_callin_map);
 atomic_set(_out, 0);
 atomic_set(_updated, 0);
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f7fec4b4ed..8d4d6574aa 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -232,8 +232,8 @@ static enum microcode_match_result compar

Re: [PATCH v2] x86/ucode/AMD: apply the patch early on every logical thread

2023-01-23 Thread Sergey Dyasli

On Mon, Jan 16, 2023 at 2:47 PM Jan Beulich  wrote:
>
> On 11.01.2023 15:23, Sergey Dyasli wrote:
> > --- a/xen/arch/x86/cpu/microcode/amd.c
> > +++ b/xen/arch/x86/cpu/microcode/amd.c
> > @@ -176,8 +176,13 @@ static enum microcode_match_result compare_revisions(
> >  if ( new_rev > old_rev )
> >  return NEW_UCODE;
> >
> > -if ( opt_ucode_allow_same && new_rev == old_rev )
> > -return NEW_UCODE;
> > +if ( new_rev == old_rev )
> > +{
> > +if ( opt_ucode_allow_same )
> > +return NEW_UCODE;
> > +else
> > +return SAME_UCODE;
> > +}
>
> I find this misleading: "same" should not depend on the command line
> option.

The alternative diff I was considering is this:

--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -179,6 +179,9 @@ static enum microcode_match_result compare_revisions(
 if ( opt_ucode_allow_same && new_rev == old_rev )
 return NEW_UCODE;

+if ( new_rev == old_rev )
+return SAME_UCODE;
+
 return OLD_UCODE;
 }

Do you think the logic is clearer this way? Or should I simply remove
"else" from the first diff above?

> In fact the command line option should affect only the cases
> where ucode is actually to be loaded; it should not affect cases where
> the check is done merely to know whether the cache needs updating.
>
> With that e.g. microcode_update_helper() should then also be adjusted:
> It shouldn't say merely "newer" when "allow-same" is in effect.

I haven't tried late-loading an older ucode blob to see this
inconsistency, but you should be right. I'll test and adjust the
message.

Sergey

[PATCH] tools/xen-ucode: print information about currently loaded ucode

2023-01-13 Thread Sergey Dyasli

Currently it's impossible to get CPU's microcode revision after late
loading without looking into Xen logs which is not always convenient.
Add an option to xen-ucode tool to print the currently loaded ucode
version and also print it during usage info.

Add a new platform op in order to get the required data from Xen.
Print CPU signature and processor flags as well.

Example output:
Intel:
Current CPU signature is: 06-55-04 (raw 0x50654)
Current CPU microcode revision is: 0x2006e05
Current CPU processor flags are: 0x1

AMD:
Current CPU signature is: fam19h (raw 0xa00f11)
Current CPU microcode revision is: 0xa0011a8

Signed-off-by: Sergey Dyasli 
---
CC: Wei Liu 
CC: Anthony PERARD 
CC: Juergen Gross 
CC: Andrew Cooper 
CC: George Dunlap 
CC: Jan Beulich 
CC: Julien Grall 
CC: Stefano Stabellini 
CC: "Roger Pau Monné" 
---
 tools/include/xenctrl.h   |  1 +
 tools/libs/ctrl/xc_misc.c |  5 +++
 tools/misc/xen-ucode.c| 68 +++
 xen/arch/x86/platform_hypercall.c | 32 +++
 xen/include/public/platform.h | 14 +++
 xen/include/xlat.lst  |  1 +
 6 files changed, 121 insertions(+)

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index 23037874d3..e9911da5ea 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1185,6 +1185,7 @@ typedef uint32_t xc_node_to_node_dist_t;
 int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo);
+int xc_platform_op(xc_interface *xch, struct xen_platform_op *op);
 int xc_microcode_update(xc_interface *xch, const void *buf, size_t len);
 int xc_numainfo(xc_interface *xch, unsigned *max_nodes,
 xc_meminfo_t *meminfo, uint32_t *distance);
diff --git a/tools/libs/ctrl/xc_misc.c b/tools/libs/ctrl/xc_misc.c
index 265f15ec2d..d03c240d14 100644
--- a/tools/libs/ctrl/xc_misc.c
+++ b/tools/libs/ctrl/xc_misc.c
@@ -226,6 +226,11 @@ int xc_microcode_update(xc_interface *xch, const void 
*buf, size_t len)
 return ret;
 }
 
+int xc_platform_op(xc_interface *xch, struct xen_platform_op *op)
+{
+return do_platform_op(xch, op);
+}
+
 int xc_cputopoinfo(xc_interface *xch, unsigned *max_cpus,
xc_cputopo_t *cputopo)
 {
diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c
index ad32face2b..c4cb4fbb50 100644
--- a/tools/misc/xen-ucode.c
+++ b/tools/misc/xen-ucode.c
@@ -12,6 +12,67 @@
 #include 
 #include 
 
+static const char *intel_id = "GenuineIntel";
+static const char *amd_id   = "AuthenticAMD";
+
+void show_curr_cpu(FILE *f)
+{
+int ret;
+xc_interface *xch;
+struct xen_platform_op op_cpu = {0}, op_ucode = {0};
+struct xenpf_pcpu_version *cpu_ver = _cpu.u.pcpu_version;
+struct xenpf_ucode_version *ucode_ver = _ucode.u.ucode_version;
+bool intel = false, amd = false;
+
+xch = xc_interface_open(0, 0, 0);
+if ( xch == NULL )
+return;
+
+op_cpu.cmd = XENPF_get_cpu_version;
+op_cpu.interface_version = XENPF_INTERFACE_VERSION;
+op_cpu.u.pcpu_version.xen_cpuid = 0;
+
+ret = xc_platform_op(xch, _cpu);
+if ( ret )
+return;
+
+op_ucode.cmd = XENPF_get_ucode_version;
+op_ucode.interface_version = XENPF_INTERFACE_VERSION;
+op_ucode.u.pcpu_version.xen_cpuid = 0;
+
+ret = xc_platform_op(xch, _ucode);
+if ( ret )
+return;
+
+if ( memcmp(cpu_ver->vendor_id, intel_id,
+sizeof(cpu_ver->vendor_id)) == 0 )
+intel = true;
+else if ( memcmp(cpu_ver->vendor_id, amd_id,
+ sizeof(cpu_ver->vendor_id)) == 0 )
+amd = true;
+
+if ( intel )
+{
+fprintf(f, "Current CPU signature is: %02x-%02x-%02x (raw %#x)\n",
+   cpu_ver->family, cpu_ver->model, cpu_ver->stepping,
+   ucode_ver->cpu_signature);
+}
+else if ( amd )
+{
+fprintf(f, "Current CPU signature is: fam%xh (raw %#x)\n",
+   cpu_ver->family, ucode_ver->cpu_signature);
+}
+
+if ( intel || amd )
+fprintf(f, "Current CPU microcode revision is: %#x\n",
+   ucode_ver->ucode_revision);
+
+if ( intel )
+fprintf(f, "Current CPU processor flags are: %#x\n", ucode_ver->pf);
+
+xc_interface_close(xch);
+}
+
 int main(int argc, char *argv[])
 {
 int fd, ret;
@@ -20,11 +81,18 @@ int main(int argc, char *argv[])
 struct stat st;
 xc_interface *xch;
 
+if ( argc >= 2 && !strcmp(argv[1], "show-cpu-info") )
+{
+show_curr_cpu(stdout);
+return 0;
+}
+
 if ( argc < 2 )
 {
 fprintf(stderr,
 "xen-ucode: Xen microcode updating tool\n"
 "Usage: %s \n", argv[0]);
+show_curr_cpu(stder

[PATCH v2] x86/ucode/AMD: apply the patch early on every logical thread

2023-01-11 Thread Sergey Dyasli

The original issue has been reported on AMD Bulldozer-based CPUs where
ucode loading loses the LWP feature bit in order to gain the IBPB bit.
LWP disabling is per-SMT/CMT core modification and needs to happen on
each sibling thread despite the shared microcode engine. Otherwise,
logical CPUs will end up with different cpuid capabilities.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211

Guests running under Xen happen to be not affected because of levelling
logic for the feature masking/override MSRs which causes the LWP bit to
fall out and hides the issue. The latest recommendation from AMD, after
discussing this bug, is to load ucode on every logical CPU.

In Linux kernel this issue has been addressed by e7ad18d1169c
("x86/microcode/AMD: Apply the patch early on every logical thread").
Follow the same approach in Xen.

Introduce SAME_UCODE match result and use it for early AMD ucode
loading. Late loading is still performed only on the first of SMT/CMT
siblings and only if a newer ucode revision has been provided (unless
allow_same option is specified).

Intel's side of things is modified for consistency but provides no
functional change.

Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- Expanded the commit message with the levelling section
- Adjusted comment for OLD_UCODE

CC: Jan Beulich 
CC: Andrew Cooper 
CC: "Roger Pau Monné" 
CC: Wei Liu 
---
 xen/arch/x86/cpu/microcode/amd.c | 16 +---
 xen/arch/x86/cpu/microcode/intel.c   |  9 +++--
 xen/arch/x86/cpu/microcode/private.h |  3 ++-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 4b097187a0..96db10a2e0 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -176,8 +176,13 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+{
+if ( opt_ucode_allow_same )
+return NEW_UCODE;
+else
+return SAME_UCODE;
+}
 
 return OLD_UCODE;
 }
@@ -220,8 +225,13 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 unsigned int cpu = smp_processor_id();
 struct cpu_signature *sig = _cpu(cpu_sig, cpu);
 uint32_t rev, old_rev = sig->rev;
+enum microcode_match_result result = microcode_fits(patch);
 
-if ( microcode_fits(patch) != NEW_UCODE )
+/*
+ * Allow application of the same revision to pick up SMT-specific changes
+ * even if the revision of the other SMT thread is already up-to-date.
+ */
+if ( result != NEW_UCODE && result != SAME_UCODE )
 return -EINVAL;
 
 if ( check_final_patch_levels(sig) )
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f7fec4b4ed..59a99eee4e 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -232,8 +232,13 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+{
+if ( opt_ucode_allow_same )
+return NEW_UCODE;
+else
+return SAME_UCODE;
+}
 
 /*
  * Treat pre-production as always applicable - anyone using pre-production
diff --git a/xen/arch/x86/cpu/microcode/private.h 
b/xen/arch/x86/cpu/microcode/private.h
index 73b095d5bf..626aeb4d08 100644
--- a/xen/arch/x86/cpu/microcode/private.h
+++ b/xen/arch/x86/cpu/microcode/private.h
@@ -6,7 +6,8 @@
 extern bool opt_ucode_allow_same;
 
 enum microcode_match_result {
-OLD_UCODE, /* signature matched, but revision id is older or equal */
+OLD_UCODE, /* signature matched, but revision id is older */
+SAME_UCODE, /* signature matched, but revision id is the same */
 NEW_UCODE, /* signature matched, but revision id is newer */
 MIS_UCODE, /* signature mismatched */
 };
-- 
2.17.1

Re: [PATCH] x86/ucode/AMD: apply the patch early on every logical thread

2023-01-09 Thread Sergey Dyasli

On Thu, Jan 5, 2023 at 10:56 PM Andrew Cooper  wrote:
> > diff --git a/xen/arch/x86/cpu/microcode/private.h 
> > b/xen/arch/x86/cpu/microcode/private.h
> > index 73b095d5bf..c4c6729f56 100644
> > --- a/xen/arch/x86/cpu/microcode/private.h
> > +++ b/xen/arch/x86/cpu/microcode/private.h
> > @@ -7,6 +7,7 @@ extern bool opt_ucode_allow_same;
> >
> >  enum microcode_match_result {
> >  OLD_UCODE, /* signature matched, but revision id is older or equal */
> > +SAME_UCODE, /* signature matched, but revision id is the same */
> >  NEW_UCODE, /* signature matched, but revision id is newer */
> >  MIS_UCODE, /* signature mismatched */
> >  };
>
> I don't think this is a clever idea.  For one, OLD and SAME are now
> ambiguous (at least as far as the comments go), and having the
> difference between the two depend on allow_same is unexpected to say the
> least.

Sorry I missed that "equal" comment which is easily removable. What I
don't follow is your concern about allow_same. It's already changing
if OLD/NEW is returned and my patch makes it SAME/NEW.

> I never really liked the enum to begin with, and I think the logic would
> be cleaner without it.
>
>
> We depend entirely on there being one ucode blob which is applicable
> globally across the system, so MIS_UCODE can be expressed as returning
> NULL from the initial searches.  Everything else can then be expressed
> in a normal {mem,str}cmp() way (i.e. -1/0/+1).

This idea sounds good but in practice there are vendor-specific functions
which return enum microcode_match_result and I don't see how it could be
easily replaced with NULL/-1/0/+1 without code changes. I also find the
enum values easier to read.

Sergey

[PATCH] x86/ucode/AMD: apply the patch early on every logical thread

2023-01-05 Thread Sergey Dyasli

The original issue has been reported on AMD Bulldozer-based CPUs where
ucode loading loses the LWP feature bit in order to gain the IBPB bit.
LWP disabling is per-SMT core modification and needs to happen on each
sibling SMT thread despite the shared microcode engine. Otherwise,
logical CPUs will end up with different cpuid capabilities.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211

In Linux kernel this issue has been addressed by e7ad18d1169c
("x86/microcode/AMD: Apply the patch early on every logical thread").
Follow the same approach in Xen.

Introduce SAME_UCODE match result and use it for early AMD ucode
loading. Late loading is still performed only on the first of SMT
siblings and only if a newer ucode revision has been provided (unless
allow_same option is specified).

Intel's side of things is modified for consistency but provides no
functional change.

Signed-off-by: Sergey Dyasli 
---
CC: Jan Beulich 
CC: Andrew Cooper 
CC: "Roger Pau Monné" 
CC: Wei Liu 
---
 xen/arch/x86/cpu/microcode/amd.c | 16 +---
 xen/arch/x86/cpu/microcode/intel.c   |  9 +++--
 xen/arch/x86/cpu/microcode/private.h |  1 +
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 4b097187a0..96db10a2e0 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -176,8 +176,13 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+{
+if ( opt_ucode_allow_same )
+return NEW_UCODE;
+else
+return SAME_UCODE;
+}
 
 return OLD_UCODE;
 }
@@ -220,8 +225,13 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 unsigned int cpu = smp_processor_id();
 struct cpu_signature *sig = _cpu(cpu_sig, cpu);
 uint32_t rev, old_rev = sig->rev;
+enum microcode_match_result result = microcode_fits(patch);
 
-if ( microcode_fits(patch) != NEW_UCODE )
+/*
+ * Allow application of the same revision to pick up SMT-specific changes
+ * even if the revision of the other SMT thread is already up-to-date.
+ */
+if ( result != NEW_UCODE && result != SAME_UCODE )
 return -EINVAL;
 
 if ( check_final_patch_levels(sig) )
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f7fec4b4ed..59a99eee4e 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -232,8 +232,13 @@ static enum microcode_match_result compare_revisions(
 if ( new_rev > old_rev )
 return NEW_UCODE;
 
-if ( opt_ucode_allow_same && new_rev == old_rev )
-return NEW_UCODE;
+if ( new_rev == old_rev )
+{
+if ( opt_ucode_allow_same )
+return NEW_UCODE;
+else
+return SAME_UCODE;
+}
 
 /*
  * Treat pre-production as always applicable - anyone using pre-production
diff --git a/xen/arch/x86/cpu/microcode/private.h 
b/xen/arch/x86/cpu/microcode/private.h
index 73b095d5bf..c4c6729f56 100644
--- a/xen/arch/x86/cpu/microcode/private.h
+++ b/xen/arch/x86/cpu/microcode/private.h
@@ -7,6 +7,7 @@ extern bool opt_ucode_allow_same;
 
 enum microcode_match_result {
 OLD_UCODE, /* signature matched, but revision id is older or equal */
+SAME_UCODE, /* signature matched, but revision id is the same */
 NEW_UCODE, /* signature matched, but revision id is newer */
 MIS_UCODE, /* signature mismatched */
 };
-- 
2.17.1

[PATCH v2 2/3] x86/ucode: allow cpu_request_microcode() to skip memory allocation

2022-12-19 Thread Sergey Dyasli

This is a preparatory step in order to do earlier microcode loading on
the boot CPU when the domain heap has not been initialized yet and
xmalloc still unavailable.

Add make_copy argument which will allow to load microcode directly from
the blob bypassing microcode_cache.

Signed-off-by: Sergey Dyasli 

---
v1 --> v2:
- Don't add extra consts
---
 xen/arch/x86/cpu/microcode/amd.c | 13 +
 xen/arch/x86/cpu/microcode/core.c|  2 +-
 xen/arch/x86/cpu/microcode/intel.c   | 13 +
 xen/arch/x86/cpu/microcode/private.h | 15 +++
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 8195707ee1..4b097187a0 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -300,7 +300,7 @@ static int scan_equiv_cpu_table(const struct 
container_equiv_table *et)
 }
 
 static struct microcode_patch *cf_check cpu_request_microcode(
-const void *buf, size_t size)
+const void *buf, size_t size, bool make_copy)
 {
 const struct microcode_patch *saved = NULL;
 struct microcode_patch *patch = NULL;
@@ -411,9 +411,14 @@ static struct microcode_patch *cf_check 
cpu_request_microcode(
 
 if ( saved )
 {
-patch = xmemdup_bytes(saved, saved_size);
-if ( !patch )
-error = -ENOMEM;
+if ( make_copy )
+{
+patch = xmemdup_bytes(saved, saved_size);
+if ( !patch )
+error = -ENOMEM;
+}
+else
+patch = (struct microcode_patch *)saved;
 }
 
 if ( error && !patch )
diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 452a7ca773..85c05e480d 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -244,7 +244,7 @@ static struct microcode_patch *parse_blob(const char *buf, 
size_t len)
 {
 alternative_vcall(ucode_ops.collect_cpu_info);
 
-return alternative_call(ucode_ops.cpu_request_microcode, buf, len);
+return alternative_call(ucode_ops.cpu_request_microcode, buf, len, true);
 }
 
 static void microcode_free_patch(struct microcode_patch *patch)
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f5ba6d76d7..f7fec4b4ed 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -324,7 +324,7 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 }
 
 static struct microcode_patch *cf_check cpu_request_microcode(
-const void *buf, size_t size)
+const void *buf, size_t size, bool make_copy)
 {
 int error = 0;
 const struct microcode_patch *saved = NULL;
@@ -364,10 +364,15 @@ static struct microcode_patch *cf_check 
cpu_request_microcode(
 
 if ( saved )
 {
-patch = xmemdup_bytes(saved, get_totalsize(saved));
+if ( make_copy )
+{
+patch = xmemdup_bytes(saved, get_totalsize(saved));
 
-if ( !patch )
-error = -ENOMEM;
+if ( !patch )
+error = -ENOMEM;
+}
+else
+patch = (struct microcode_patch *)saved;
 }
 
 if ( error && !patch )
diff --git a/xen/arch/x86/cpu/microcode/private.h 
b/xen/arch/x86/cpu/microcode/private.h
index c085a10268..58c5dffd7b 100644
--- a/xen/arch/x86/cpu/microcode/private.h
+++ b/xen/arch/x86/cpu/microcode/private.h
@@ -23,15 +23,22 @@ struct microcode_ops {
  * older that what is running in the CPU.  This is a feature, to better
  * cope with corner cases from buggy firmware.)
  *
- * If one is found, allocate and return a struct microcode_patch
- * encapsulating the appropriate microcode patch.  Does not alias the
- * original buffer.  Must be suitable to be freed with a single xfree().
+ * If one is found, behaviour depends on the make_copy argument:
+ *
+ * true: allocate and return a struct microcode_patch encapsulating
+ *   the appropriate microcode patch.  Does not alias the original
+ *   buffer.  Must be suitable to be freed with a single xfree().
+ *
+ *false: return a pointer to the patch within the original buffer.
+ *   This is useful for early microcode loading when xmalloc might
+ *   not be available yet.
  *
  * If one is not found, (nothing matches the current CPU), return NULL.
  * Also may return ERR_PTR(-err), e.g. bad container, out of memory.
  */
 struct microcode_patch *(*cpu_request_microcode)(const void *buf,
- size_t size);
+ size_t size,
+ bool make_copy);
 
 /*
  * Obtain microcode-relevant details for the current CPU.  Results in
-- 
2.17.1

[PATCH v2 0/3] x86: load microcode earlier on boot CPU

2022-12-19 Thread Sergey Dyasli

The second version of patches. Changelog is available in each patch.

Sergey Dyasli (3):
  xen/multiboot: add proper struct definitions to typedefs
  x86/ucode: allow cpu_request_microcode() to skip memory allocation
  x86/ucode: load microcode earlier on boot CPU

 xen/arch/x86/cpu/microcode/amd.c | 13 --
 xen/arch/x86/cpu/microcode/core.c| 70 +++-
 xen/arch/x86/cpu/microcode/intel.c   | 13 --
 xen/arch/x86/cpu/microcode/private.h | 15 --
 xen/arch/x86/include/asm/microcode.h |  7 ++-
 xen/arch/x86/include/asm/setup.h |  3 --
 xen/arch/x86/setup.c | 10 ++--
 xen/include/xen/multiboot.h  | 25 ++
 8 files changed, 115 insertions(+), 41 deletions(-)

-- 
2.17.1

[PATCH v2 3/3] x86/ucode: load microcode earlier on boot CPU

2022-12-19 Thread Sergey Dyasli

Call early_microcode_init() straight after multiboot modules become
accessible. Modify it to load the ucode directly from the blob bypassing
populating microcode_cache because xmalloc is still not available at
that point during Xen boot.

Introduce early_microcode_init_cache() for populating microcode_cache.
It needs to rescan the modules in order to find the new virtual address
of the ucode blob because it changes during the boot process, e.g.
from 0x010802fc to 0x83204dac52fc.

While at it, drop alternative_vcall() from early_microcode_init() since
it's not useful in an __init fuction.

Signed-off-by: Sergey Dyasli 

---
v1 --> v2:
- Don't call microcode_grab_module() the second time, use
  microcode_scan_module() instead
- Use forward declaration of struct multiboot_info
- Don't use alternative calls
- Rename early_microcode_update_cache() to early_update_cache() and
  move it around a bit
---
 xen/arch/x86/cpu/microcode/core.c| 66 +++-
 xen/arch/x86/include/asm/microcode.h |  7 ++-
 xen/arch/x86/include/asm/setup.h |  3 --
 xen/arch/x86/setup.c | 10 +++--
 4 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 85c05e480d..04b5d346ab 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -198,7 +199,8 @@ void __init microcode_scan_module(
 bootstrap_map(NULL);
 }
 }
-void __init microcode_grab_module(
+
+static void __init microcode_grab_module(
 unsigned long *module_map,
 const multiboot_info_t *mbi)
 {
@@ -732,10 +734,54 @@ int microcode_update_one(void)
 return microcode_update_cpu(NULL);
 }
 
+static int __init early_update_cache(const void *data, size_t len)
+{
+int rc = 0;
+struct microcode_patch *patch;
+
+if ( !data )
+return -ENOMEM;
+
+patch = parse_blob(data, len);
+if ( IS_ERR(patch) )
+{
+printk(XENLOG_WARNING "Parsing microcode blob error %ld\n",
+   PTR_ERR(patch));
+return PTR_ERR(patch);
+}
+
+if ( !patch )
+return -ENOENT;
+
+spin_lock(_mutex);
+rc = microcode_update_cache(patch);
+spin_unlock(_mutex);
+ASSERT(rc);
+
+return rc;
+}
+
+int __init early_microcode_init_cache(unsigned long *module_map,
+  const struct multiboot_info *mbi)
+{
+int rc = 0;
+
+if ( ucode_scan )
+/* Need to rescan the modules because they might have been relocated */
+microcode_scan_module(module_map, mbi);
+
+if ( ucode_mod.mod_end )
+rc = early_update_cache(bootstrap_map(_mod),
+ucode_mod.mod_end);
+else if ( ucode_blob.size )
+rc = early_update_cache(ucode_blob.data, ucode_blob.size);
+
+return rc;
+}
+
 /* BSP calls this function to parse ucode blob and then apply an update. */
 static int __init early_microcode_update_cpu(void)
 {
-int rc = 0;
 const void *data = NULL;
 size_t len;
 struct microcode_patch *patch;
@@ -754,7 +800,7 @@ static int __init early_microcode_update_cpu(void)
 if ( !data )
 return -ENOMEM;
 
-patch = parse_blob(data, len);
+patch = ucode_ops.cpu_request_microcode(data, len, false);
 if ( IS_ERR(patch) )
 {
 printk(XENLOG_WARNING "Parsing microcode blob error %ld\n",
@@ -765,15 +811,11 @@ static int __init early_microcode_update_cpu(void)
 if ( !patch )
 return -ENOENT;
 
-spin_lock(_mutex);
-rc = microcode_update_cache(patch);
-spin_unlock(_mutex);
-ASSERT(rc);
-
-return microcode_update_one();
+return microcode_update_cpu(patch);
 }
 
-int __init early_microcode_init(void)
+int __init early_microcode_init(unsigned long *module_map,
+const struct multiboot_info *mbi)
 {
 const struct cpuinfo_x86 *c = _cpu_data;
 int rc = 0;
@@ -797,7 +839,9 @@ int __init early_microcode_init(void)
 return -ENODEV;
 }
 
-alternative_vcall(ucode_ops.collect_cpu_info);
+microcode_grab_module(module_map, mbi);
+
+ucode_ops.collect_cpu_info();
 
 if ( ucode_mod.mod_end || ucode_blob.size )
 rc = early_microcode_update_cpu();
diff --git a/xen/arch/x86/include/asm/microcode.h 
b/xen/arch/x86/include/asm/microcode.h
index 3b0234e9fa..170481d257 100644
--- a/xen/arch/x86/include/asm/microcode.h
+++ b/xen/arch/x86/include/asm/microcode.h
@@ -6,6 +6,8 @@
 
 #include 
 
+struct multiboot_info;
+
 struct cpu_signature {
 /* CPU signature (CPUID.1.EAX). */
 unsigned int sig;
@@ -21,7 +23,10 @@ DECLARE_PER_CPU(struct cpu_signature, cpu_sig);
 
 void microcode_set_module(unsigned int idx);
 int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
-int early_microcode_init(void);
+int early_micro

[PATCH v2 1/3] xen/multiboot: add proper struct definitions to typedefs

2022-12-19 Thread Sergey Dyasli

This allows to use them for forward declaration in other headers.

Signed-off-by: Sergey Dyasli 

---
CC: George Dunlap 
CC: Julien Grall 
CC: Stefano Stabellini 

v1 --> v2:
- New patch
---
 xen/include/xen/multiboot.h | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/xen/include/xen/multiboot.h b/xen/include/xen/multiboot.h
index d1b43e1183..a541bdf8a8 100644
--- a/xen/include/xen/multiboot.h
+++ b/xen/include/xen/multiboot.h
@@ -46,23 +46,25 @@
 #ifndef __ASSEMBLY__
 
 /* The symbol table for a.out.  */
-typedef struct {
+struct aout_symbol_table {
 u32 tabsize;
 u32 strsize;
 u32 addr;
 u32 reserved;
-} aout_symbol_table_t;
+};
+typedef struct aout_symbol_table aout_symbol_table_t;
 
 /* The section header table for ELF.  */
-typedef struct {
+struct elf_section_header_table{
 u32 num;
 u32 size;
 u32 addr;
 u32 shndx;
-} elf_section_header_table_t;
+};
+typedef struct elf_section_header_table elf_section_header_table_t;
 
 /* The Multiboot information.  */
-typedef struct {
+struct multiboot_info {
 u32 flags;
 
 /* Valid if flags sets MBI_MEMLIMITS */
@@ -101,26 +103,29 @@ typedef struct {
 
 /* Valid if flags sets MBI_APM */
 u32 apm_table;
-} multiboot_info_t;
+};
+typedef struct multiboot_info multiboot_info_t;
 
 /* The module structure.  */
-typedef struct {
+struct module {
 u32 mod_start;
 u32 mod_end;
 u32 string;
 u32 reserved;
-} module_t;
+};
+typedef struct module module_t;
 
 /* The memory map. Be careful that the offset 0 is base_addr_low
but no size.  */
-typedef struct {
+struct memory_map {
 u32 size;
 u32 base_addr_low;
 u32 base_addr_high;
 u32 length_low;
 u32 length_high;
 u32 type;
-} memory_map_t;
+};
+typedef struct memory_map memory_map_t;
 
 
 #endif /* __ASSEMBLY__ */
-- 
2.17.1

Re: [PATCH 1/2] x86/ucode: allow cpu_request_microcode() to skip memory allocation

2022-12-12 Thread Sergey Dyasli

On Thu, Dec 8, 2022 at 3:34 PM Jan Beulich  wrote:
>
> On 08.12.2022 14:59, Andrew Cooper wrote:
> > On 08/12/2022 13:26, Sergey Dyasli wrote:
> >> @@ -240,20 +240,20 @@ static const struct microcode_patch *nmi_patch = 
> >> ZERO_BLOCK_PTR;
> >>   * patch is found and an error occurs during the parsing process. 
> >> Otherwise
> >>   * return NULL.
> >>   */
> >> -static struct microcode_patch *parse_blob(const char *buf, size_t len)
> >> +static const struct microcode_patch *parse_blob(const char *buf, size_t 
> >> len)
> >>  {
> >>  alternative_vcall(ucode_ops.collect_cpu_info);
> >>
> >> -return alternative_call(ucode_ops.cpu_request_microcode, buf, len);
> >> +return alternative_call(ucode_ops.cpu_request_microcode, buf, len, 
> >> true);
> >>  }
> >>
> >> -static void microcode_free_patch(struct microcode_patch *patch)
> >> +static void microcode_free_patch(const struct microcode_patch *patch)
> >>  {
> >> -xfree(patch);
> >> +xfree((void *)patch);
> >
> > This hunk demonstrates why the hook wants to return a non-const
> > pointer.  Keeping it non-const will shrink this patch quite a bit.
>
> Alternatively it demonstrates why xfree() should take const void *,
> just like e.g. unmap_domain_page() or vunmap() already do. We've
> talked about this before, and the argument hasn't changed: Neither
> unmapping nor freeing really alters the contents of the pointed to
> area from the perspective of the caller, as the contents simply
> disappears altogether.

Despite my love of const, const correctness in C is quite a pain. I've
tried to make xfree() take a const pointer but then issues began with
add/strip_padding() functions and I couldn't overcome those without
further (void *) casts which just takes the problem to a different
layer.

I think I'll have to go with Andrew's suggestion and continue to return
non-const pointers from cpu_request_microcode(). This will include
a cast though:

patch = (struct microcode_patch *)saved;

Sergey

[PATCH 2/2] x86/ucode: load microcode earlier on boot CPU

2022-12-08 Thread Sergey Dyasli

Call early_microcode_init() straight after multiboot modules become
accessible. Modify it to load the ucode directly from the blob bypassing
populating microcode_cache because xmalloc is still not available at
that point during Xen boot.

Introduce early_microcode_init_cache() for populating microcode_cache.
It needs to find the new virtual address of the ucode blob because it
changes during boot, e.g. from 0x010802fc to 0x83204dac52fc.

Signed-off-by: Sergey Dyasli 
---
 xen/arch/x86/cpu/microcode/core.c| 61 
 xen/arch/x86/include/asm/microcode.h |  6 ++-
 xen/arch/x86/include/asm/setup.h |  3 --
 xen/arch/x86/setup.c | 10 +++--
 4 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 924a2bd7b5..b04b30ce5e 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -198,7 +198,7 @@ void __init microcode_scan_module(
 bootstrap_map(NULL);
 }
 }
-void __init microcode_grab_module(
+static void __init microcode_grab_module(
 unsigned long *module_map,
 const multiboot_info_t *mbi)
 {
@@ -733,9 +733,35 @@ int microcode_update_one(void)
 }
 
 /* BSP calls this function to parse ucode blob and then apply an update. */
-static int __init early_microcode_update_cpu(void)
+static int __init early_microcode_update_cache(const void *data, size_t len)
 {
 int rc = 0;
+const struct microcode_patch *patch;
+
+if ( !data )
+return -ENOMEM;
+
+patch = parse_blob(data, len);
+if ( IS_ERR(patch) )
+{
+printk(XENLOG_WARNING "Parsing microcode blob error %ld\n",
+   PTR_ERR(patch));
+return PTR_ERR(patch);
+}
+
+if ( !patch )
+return -ENOENT;
+
+spin_lock(_mutex);
+rc = microcode_update_cache(patch);
+spin_unlock(_mutex);
+ASSERT(rc);
+
+return rc;
+}
+
+static int __init early_microcode_update_cpu(void)
+{
 const void *data = NULL;
 size_t len;
 const struct microcode_patch *patch;
@@ -754,7 +780,9 @@ static int __init early_microcode_update_cpu(void)
 if ( !data )
 return -ENOMEM;
 
-patch = parse_blob(data, len);
+alternative_vcall(ucode_ops.collect_cpu_info);
+
+patch = alternative_call(ucode_ops.cpu_request_microcode, data, len, 
false);
 if ( IS_ERR(patch) )
 {
 printk(XENLOG_WARNING "Parsing microcode blob error %ld\n",
@@ -765,15 +793,28 @@ static int __init early_microcode_update_cpu(void)
 if ( !patch )
 return -ENOENT;
 
-spin_lock(_mutex);
-rc = microcode_update_cache(patch);
-spin_unlock(_mutex);
-ASSERT(rc);
+return microcode_update_cpu(patch);
+}
+
+int __init early_microcode_init_cache(unsigned long *module_map,
+  const multiboot_info_t *mbi)
+{
+int rc = 0;
+
+/* Need to rescan the modules because they might have been relocated */
+microcode_grab_module(module_map, mbi);
+
+if ( ucode_mod.mod_end )
+rc = early_microcode_update_cache(bootstrap_map(_mod),
+  ucode_mod.mod_end);
+else if ( ucode_blob.size )
+rc = early_microcode_update_cache(ucode_blob.data, ucode_blob.size);
 
-return microcode_update_one();
+return rc;
 }
 
-int __init early_microcode_init(void)
+int __init early_microcode_init(unsigned long *module_map,
+const multiboot_info_t *mbi)
 {
 const struct cpuinfo_x86 *c = _cpu_data;
 int rc = 0;
@@ -797,6 +838,8 @@ int __init early_microcode_init(void)
 return -ENODEV;
 }
 
+microcode_grab_module(module_map, mbi);
+
 alternative_vcall(ucode_ops.collect_cpu_info);
 
 if ( ucode_mod.mod_end || ucode_blob.size )
diff --git a/xen/arch/x86/include/asm/microcode.h 
b/xen/arch/x86/include/asm/microcode.h
index 3b0234e9fa..c5f9897535 100644
--- a/xen/arch/x86/include/asm/microcode.h
+++ b/xen/arch/x86/include/asm/microcode.h
@@ -3,6 +3,7 @@
 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -21,7 +22,10 @@ DECLARE_PER_CPU(struct cpu_signature, cpu_sig);
 
 void microcode_set_module(unsigned int idx);
 int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
-int early_microcode_init(void);
+int early_microcode_init(unsigned long *module_map,
+ const multiboot_info_t *mbi);
+int early_microcode_init_cache(unsigned long *module_map,
+   const multiboot_info_t *mbi);
 int microcode_update_one(void);
 
 #endif /* ASM_X86__MICROCODE_H */
diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
index 21037b7f31..82ee51c2dc 100644
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -45,9 +45,6 @@ void *bootstrap_map(const module_t *mod);
 
 int xen_in_range(unsigned long mfn);
 
-void microcode_grab_module(
-un

[PATCH 1/2] x86/ucode: allow cpu_request_microcode() to skip memory allocation

2022-12-08 Thread Sergey Dyasli

This is a preparatory step in order to do earlier microcode loading on
the boot CPU when the domain heap has not been initialized yet and
xmalloc still unavailable.

Add make_copy argument which will allow to load microcode directly from
the blob bypassing microcode_cache. Add const qualifiers where required.

Signed-off-by: Sergey Dyasli 
---
 xen/arch/x86/cpu/microcode/amd.c | 17 +++--
 xen/arch/x86/cpu/microcode/core.c| 18 +-
 xen/arch/x86/cpu/microcode/intel.c   | 17 +++--
 xen/arch/x86/cpu/microcode/private.h | 18 --
 4 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
index 8195707ee1..d4df3c4806 100644
--- a/xen/arch/x86/cpu/microcode/amd.c
+++ b/xen/arch/x86/cpu/microcode/amd.c
@@ -299,11 +299,11 @@ static int scan_equiv_cpu_table(const struct 
container_equiv_table *et)
 return -ESRCH;
 }
 
-static struct microcode_patch *cf_check cpu_request_microcode(
-const void *buf, size_t size)
+static const struct microcode_patch *cf_check cpu_request_microcode(
+const void *buf, size_t size, bool make_copy)
 {
 const struct microcode_patch *saved = NULL;
-struct microcode_patch *patch = NULL;
+const struct microcode_patch *patch = NULL;
 size_t saved_size = 0;
 int error = 0;
 
@@ -411,9 +411,14 @@ static struct microcode_patch *cf_check 
cpu_request_microcode(
 
 if ( saved )
 {
-patch = xmemdup_bytes(saved, saved_size);
-if ( !patch )
-error = -ENOMEM;
+if ( make_copy )
+{
+patch = xmemdup_bytes(saved, saved_size);
+if ( !patch )
+error = -ENOMEM;
+}
+else
+patch = saved;
 }
 
 if ( error && !patch )
diff --git a/xen/arch/x86/cpu/microcode/core.c 
b/xen/arch/x86/cpu/microcode/core.c
index 452a7ca773..924a2bd7b5 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -99,7 +99,7 @@ static bool ucode_in_nmi = true;
 bool __read_mostly opt_ucode_allow_same;
 
 /* Protected by microcode_mutex */
-static struct microcode_patch *microcode_cache;
+static const struct microcode_patch *microcode_cache;
 
 void __init microcode_set_module(unsigned int idx)
 {
@@ -240,20 +240,20 @@ static const struct microcode_patch *nmi_patch = 
ZERO_BLOCK_PTR;
  * patch is found and an error occurs during the parsing process. Otherwise
  * return NULL.
  */
-static struct microcode_patch *parse_blob(const char *buf, size_t len)
+static const struct microcode_patch *parse_blob(const char *buf, size_t len)
 {
 alternative_vcall(ucode_ops.collect_cpu_info);
 
-return alternative_call(ucode_ops.cpu_request_microcode, buf, len);
+return alternative_call(ucode_ops.cpu_request_microcode, buf, len, true);
 }
 
-static void microcode_free_patch(struct microcode_patch *patch)
+static void microcode_free_patch(const struct microcode_patch *patch)
 {
-xfree(patch);
+xfree((void *)patch);
 }
 
 /* Return true if cache gets updated. Otherwise, return false */
-static bool microcode_update_cache(struct microcode_patch *patch)
+static bool microcode_update_cache(const struct microcode_patch *patch)
 {
 ASSERT(spin_is_locked(_mutex));
 
@@ -565,7 +565,7 @@ static long cf_check microcode_update_helper(void *data)
 int ret;
 struct ucode_buf *buffer = data;
 unsigned int cpu, updated;
-struct microcode_patch *patch;
+const struct microcode_patch *patch;
 
 /* cpu_online_map must not change during update */
 if ( !get_cpu_maps() )
@@ -648,7 +648,7 @@ static long cf_check microcode_update_helper(void *data)
  *   this requirement can be relaxed in the future. Right now, this is
  *   conservative and good.
  */
-ret = stop_machine_run(do_microcode_update, patch, NR_CPUS);
+ret = stop_machine_run(do_microcode_update, (void *)patch, NR_CPUS);
 
 updated = atomic_read(_updated);
 if ( updated > 0 )
@@ -738,7 +738,7 @@ static int __init early_microcode_update_cpu(void)
 int rc = 0;
 const void *data = NULL;
 size_t len;
-struct microcode_patch *patch;
+const struct microcode_patch *patch;
 
 if ( ucode_blob.size )
 {
diff --git a/xen/arch/x86/cpu/microcode/intel.c 
b/xen/arch/x86/cpu/microcode/intel.c
index f5ba6d76d7..017f37e43d 100644
--- a/xen/arch/x86/cpu/microcode/intel.c
+++ b/xen/arch/x86/cpu/microcode/intel.c
@@ -323,12 +323,12 @@ static int cf_check apply_microcode(const struct 
microcode_patch *patch)
 return 0;
 }
 
-static struct microcode_patch *cf_check cpu_request_microcode(
-const void *buf, size_t size)
+static const struct microcode_patch *cf_check cpu_request_microcode(
+const void *buf, size_t size, bool make_copy)
 {
 int error = 0;
 const struct microcode_patch *saved = NULL;
-struct microcode_patch *patch = NULL;
+const struct microcode_patch *p

[PATCH v4] sched: print information about scheduling granularity

2020-05-06 Thread Sergey Dyasli

Currently it might be not obvious which scheduling mode (e.g. core-
scheduling) is being used by the scheduler. Alleviate this by printing
additional information about the selected granularity per-cpupool.

Note: per-cpupool granularity selection is not implemented yet. Every
  cpupool gets its granularity from the single global value.

Take this opportunity to introduce struct sched_gran_name array and
refactor sched_select_granularity().

Signed-off-by: Sergey Dyasli 
---
v4:
- use char[8]

v3:
- use const char*
- use sched_gran_name array instead of switch
- updated commit message

v2:
- print information on a separate line
- use per-cpupool granularity
- updated commit message

CC: Juergen Gross 
CC: Dario Faggioli 
CC: George Dunlap 
CC: Jan Beulich 
---
 xen/common/sched/cpupool.c | 51 +++---
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d40345b585..97c2d5b3c1 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -40,19 +40,50 @@ static DEFINE_SPINLOCK(cpupool_lock);
 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
 static unsigned int __read_mostly sched_granularity = 1;
 
+struct sched_gran_name {
+enum sched_gran mode;
+char name[8];
+};
+
+static const struct sched_gran_name sg_name[] = {
+{SCHED_GRAN_cpu, "cpu"},
+{SCHED_GRAN_core, "core"},
+{SCHED_GRAN_socket, "socket"},
+};
+
+static void sched_gran_print(enum sched_gran mode, unsigned int gran)
+{
+const char *name = "";
+unsigned int i;
+
+for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
+{
+if ( mode == sg_name[i].mode )
+{
+name = sg_name[i].name;
+break;
+}
+}
+
+printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
+   name, gran, gran == 1 ? "" : "s");
+}
+
 #ifdef CONFIG_HAS_SCHED_GRANULARITY
 static int __init sched_select_granularity(const char *str)
 {
-if ( strcmp("cpu", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_cpu;
-else if ( strcmp("core", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_core;
-else if ( strcmp("socket", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_socket;
-else
-return -EINVAL;
+unsigned int i;
 
-return 0;
+for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
+{
+if ( strcmp(sg_name[i].name, str) == 0 )
+{
+opt_sched_granularity = sg_name[i].mode;
+return 0;
+}
+}
+
+return -EINVAL;
 }
 custom_param("sched-gran", sched_select_granularity);
 #endif
@@ -115,6 +146,7 @@ static void __init cpupool_gran_init(void)
 warning_add(fallback);
 
 sched_granularity = gran;
+sched_gran_print(opt_sched_granularity, sched_granularity);
 }
 
 unsigned int cpupool_get_granularity(const struct cpupool *c)
@@ -911,6 +943,7 @@ void dump_runq(unsigned char key)
 {
 printk("Cpupool %d:\n", (*c)->cpupool_id);
 printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
+sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
 schedule_dump(*c);
 }
 
-- 
2.17.1

Re: Cpu on/offlining crash with core scheduling

2020-04-29 Thread Sergey Dyasli

On 29/04/2020 09:09, Jürgen Groß wrote:
> On 27.04.20 15:49, Sergey Dyasli wrote:
>> Hi Juergen,
>>
>> When I'm testing vcpu pinning with something like:
>>
>>   # xl vcpu-pin 0 0 2
>>   # xen-hptool cpu-offline 3
>>
>>   (offline / online CPUs {2,3} if the above is successful)
>>
>> I'm reliably seeing the following crash on the latest staging:
>>
>> (XEN) Watchdog timer detects that CPU1 is stuck!
>> (XEN) [ Xen-4.14-unstable  x86_64  debug=y   Not tainted ]
>> (XEN) CPU:    1
>> (XEN) RIP:    e008:[] 
>> common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385
>> (XEN) RFLAGS: 0002   CONTEXT: hypervisor
>> (XEN) rax: f001   rbx: 82d0805c9118   rcx: 83085e750301
>> (XEN) rdx: 0001   rsi: 83086499b972   rdi: 83085e7503a6
>> (XEN) rbp: 83085e7dfe28   rsp: 83085e7dfdd8   r8:  830864985440
>> (XEN) r9:  83085e714068   r10: 0014   r11: 0056b6a1aab2
>> (XEN) r12: 83086499e490   r13: 82d0805f26e0   r14: 83085e7503a0
>> (XEN) r15: 0001   cr0: 80050033   cr4: 00362660
>> (XEN) cr3: 000823a8e000   cr2: 6026000f6fc0
>> (XEN) fsb:    gsb: 888138dc   gss: 
>> (XEN) ds: 002b   es: 002b   fs:    gs:    ss: e010   cs: e008
>> (XEN) Xen code around  
>> (common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385):
>> (XEN)  4c 89 f7 e8 dc a5 fd ff <4b> 8b 44 fd 00 48 8b 04 18 4c 3b 70 10 0f 
>> 85 3f
>> (XEN) Xen stack trace from rsp=83085e7dfdd8:
>> (XEN)    0056b42128a6 83086499ff30 83086498a000 83085e7dfe48
>> (XEN)    00010001 0056b42128a6 83086499e490 
>> (XEN)    0001 0001 83085e7dfe78 82d080252ae8
>> (XEN)    83086498a000 000180230434 83085e7503a0 82d0805ceb00
>> (XEN)     82d0805cea80  82d0805dea80
>> (XEN)    83085e7dfeb0 82d08022c232 0001 82d0805ceb00
>> (XEN)    0001 0001 0001 83085e7dfec0
>> (XEN)    82d08022c2cd 83085e7dfef0 82d08031cae9 83086498a000
>> (XEN)    83086498a000 0001 0001 83085e7dfde8
>> (XEN)    88813021d700 88813021d700  
>> (XEN)    0007 88813021d700 0246 7ff0
>> (XEN)     0001ca00  810013aa
>> (XEN)    8203d210 deadbeefdeadf00d deadbeefdeadf00d 0100
>> (XEN)    810013aa e033 0246 c900400dfeb0
>> (XEN)    e02b   
>> (XEN)     e011 83086498a000 0037e43bd000
>> (XEN)    00362660  800864980002 0601
>> (XEN)    
>> (XEN) Xen call trace:
>> (XEN)    [] R 
>> common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385
>> (XEN)    [] F common/sched/core.c#sched_slave+0x262/0x31e
>> (XEN)    [] F common/softirq.c#__do_softirq+0x8a/0xbc
>> (XEN)    [] F do_softirq+0x13/0x15
>> (XEN)    [] F arch/x86/domain.c#idle_loop+0x57/0xa7
>> (XEN)
>> (XEN) CPU0 @ e008:82d08022c2b7 (process_pending_softirqs+0x53/0x56)
>> (XEN) CPU4 @ e008:82d08022bc40 
>> (common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
>> (XEN) CPU2 @ e008:82d08022c26f (process_pending_softirqs+0xb/0x56)
>> (XEN) CPU7 @ e008:82d08022bc40 
>> (common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
>> (XEN) CPU3 @ e008:82d08022bc40 
>> (common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
>> (XEN) CPU5 @ e008:82d08022cc34 (_spin_lock+0x4d/0x62)
>> (XEN) CPU6 @ e008:82d08022c264 (process_pending_softirqs+0/0x56)
>> (XEN)
>> (XEN) 
>> (XEN) Panic on CPU 1:
>> (XEN) FATAL TRAP: vector = 2 (nmi)
>> (XEN) [error_code=] , IN INTERRUPT CONTEXT
>> (XEN) 
>> (XEN)
>> (XEN) Reboot in five seconds...
>> (XEN) Executing kexec image on cpu1
>> (XEN) Shot down all CPUs
>>
>>
>> Is this something you can reproduce?
> 
> Yes, I was able to hit this.
> 
> Attached patch is fixing it for me. Could you give it a try?

The patch fixes the immediate issue:

Tested-by: Sergey Dyasli 

Thanks!

However, when running the following script:

while :; do xen-hptool cpu-offlin

Cpu on/offlining crash with core scheduling

2020-04-27 Thread Sergey Dyasli

Hi Juergen,

When I'm testing vcpu pinning with something like:

 # xl vcpu-pin 0 0 2
 # xen-hptool cpu-offline 3

 (offline / online CPUs {2,3} if the above is successful)

I'm reliably seeing the following crash on the latest staging:

(XEN) Watchdog timer detects that CPU1 is stuck!
(XEN) [ Xen-4.14-unstable  x86_64  debug=y   Not tainted ]
(XEN) CPU:1
(XEN) RIP:e008:[] 
common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385
(XEN) RFLAGS: 0002   CONTEXT: hypervisor
(XEN) rax: f001   rbx: 82d0805c9118   rcx: 83085e750301
(XEN) rdx: 0001   rsi: 83086499b972   rdi: 83085e7503a6
(XEN) rbp: 83085e7dfe28   rsp: 83085e7dfdd8   r8:  830864985440
(XEN) r9:  83085e714068   r10: 0014   r11: 0056b6a1aab2
(XEN) r12: 83086499e490   r13: 82d0805f26e0   r14: 83085e7503a0
(XEN) r15: 0001   cr0: 80050033   cr4: 00362660
(XEN) cr3: 000823a8e000   cr2: 6026000f6fc0
(XEN) fsb:    gsb: 888138dc   gss: 
(XEN) ds: 002b   es: 002b   fs:    gs:    ss: e010   cs: e008
(XEN) Xen code around  
(common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385):
(XEN)  4c 89 f7 e8 dc a5 fd ff <4b> 8b 44 fd 00 48 8b 04 18 4c 3b 70 10 0f 85 3f
(XEN) Xen stack trace from rsp=83085e7dfdd8:
(XEN)0056b42128a6 83086499ff30 83086498a000 83085e7dfe48
(XEN)00010001 0056b42128a6 83086499e490 
(XEN)0001 0001 83085e7dfe78 82d080252ae8
(XEN)83086498a000 000180230434 83085e7503a0 82d0805ceb00
(XEN) 82d0805cea80  82d0805dea80
(XEN)83085e7dfeb0 82d08022c232 0001 82d0805ceb00
(XEN)0001 0001 0001 83085e7dfec0
(XEN)82d08022c2cd 83085e7dfef0 82d08031cae9 83086498a000
(XEN)83086498a000 0001 0001 83085e7dfde8
(XEN)88813021d700 88813021d700  
(XEN)0007 88813021d700 0246 7ff0
(XEN) 0001ca00  810013aa
(XEN)8203d210 deadbeefdeadf00d deadbeefdeadf00d 0100
(XEN)810013aa e033 0246 c900400dfeb0
(XEN)e02b   
(XEN) e011 83086498a000 0037e43bd000
(XEN)00362660  800864980002 0601
(XEN)
(XEN) Xen call trace:
(XEN)[] R 
common/sched/core.c#sched_wait_rendezvous_in+0x16c/0x385
(XEN)[] F common/sched/core.c#sched_slave+0x262/0x31e
(XEN)[] F common/softirq.c#__do_softirq+0x8a/0xbc
(XEN)[] F do_softirq+0x13/0x15
(XEN)[] F arch/x86/domain.c#idle_loop+0x57/0xa7
(XEN)
(XEN) CPU0 @ e008:82d08022c2b7 (process_pending_softirqs+0x53/0x56)
(XEN) CPU4 @ e008:82d08022bc40 
(common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
(XEN) CPU2 @ e008:82d08022c26f (process_pending_softirqs+0xb/0x56)
(XEN) CPU7 @ e008:82d08022bc40 
(common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
(XEN) CPU3 @ e008:82d08022bc40 
(common/rcupdate.c#rcu_process_callbacks+0x22e/0x24b)
(XEN) CPU5 @ e008:82d08022cc34 (_spin_lock+0x4d/0x62)
(XEN) CPU6 @ e008:82d08022c264 (process_pending_softirqs+0/0x56)
(XEN)
(XEN) 
(XEN) Panic on CPU 1:
(XEN) FATAL TRAP: vector = 2 (nmi)
(XEN) [error_code=] , IN INTERRUPT CONTEXT
(XEN) 
(XEN)
(XEN) Reboot in five seconds...
(XEN) Executing kexec image on cpu1
(XEN) Shot down all CPUs


Is this something you can reproduce?

--
Thanks,
Sergey

[PATCH v3] sched: print information about scheduling granularity

2020-04-22 Thread Sergey Dyasli

Currently it might be not obvious which scheduling mode (e.g. core-
scheduling) is being used by the scheduler. Alleviate this by printing
additional information about the selected granularity per-cpupool.

Note: per-cpupool granularity selection is not implemented yet. Every
  cpupool gets its granularity from the single global value.

Take this opportunity to introduce struct sched_gran_name array and
refactor sched_select_granularity().

Signed-off-by: Sergey Dyasli 
---
v3:
- use const char*
- use sched_gran_name array instead of switch
- updated commit message

v2:
- print information on a separate line
- use per-cpupool granularity
- updated commit message

CC: Juergen Gross 
CC: Dario Faggioli 
CC: George Dunlap 
CC: Jan Beulich 
---
 xen/common/sched/cpupool.c | 51 +++---
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d40345b585..b60799a558 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -40,19 +40,50 @@ static DEFINE_SPINLOCK(cpupool_lock);
 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
 static unsigned int __read_mostly sched_granularity = 1;
 
+struct sched_gran_name {
+enum sched_gran mode;
+const char *name;
+};
+
+static const struct sched_gran_name sg_name[] = {
+{SCHED_GRAN_cpu, "cpu"},
+{SCHED_GRAN_core, "core"},
+{SCHED_GRAN_socket, "socket"},
+};
+
+static void sched_gran_print(enum sched_gran mode, unsigned int gran)
+{
+const char *name = "";
+unsigned int i;
+
+for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
+{
+if ( mode == sg_name[i].mode )
+{
+name = sg_name[i].name;
+break;
+}
+}
+
+printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
+   name, gran, gran == 1 ? "" : "s");
+}
+
 #ifdef CONFIG_HAS_SCHED_GRANULARITY
 static int __init sched_select_granularity(const char *str)
 {
-if ( strcmp("cpu", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_cpu;
-else if ( strcmp("core", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_core;
-else if ( strcmp("socket", str) == 0 )
-opt_sched_granularity = SCHED_GRAN_socket;
-else
-return -EINVAL;
+unsigned int i;
 
-return 0;
+for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
+{
+if ( strcmp(sg_name[i].name, str) == 0 )
+{
+opt_sched_granularity = sg_name[i].mode;
+return 0;
+}
+}
+
+return -EINVAL;
 }
 custom_param("sched-gran", sched_select_granularity);
 #endif
@@ -115,6 +146,7 @@ static void __init cpupool_gran_init(void)
 warning_add(fallback);
 
 sched_granularity = gran;
+sched_gran_print(opt_sched_granularity, sched_granularity);
 }
 
 unsigned int cpupool_get_granularity(const struct cpupool *c)
@@ -911,6 +943,7 @@ void dump_runq(unsigned char key)
 {
 printk("Cpupool %d:\n", (*c)->cpupool_id);
 printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
+sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
 schedule_dump(*c);
 }
 
-- 
2.17.1

Re: [PATCH v2] sched: print information about scheduling granularity

2020-04-21 Thread Sergey Dyasli

On 20/04/2020 14:45, Jürgen Groß wrote:
> On 20.04.20 15:06, Sergey Dyasli wrote:
>> Currently it might be not obvious which scheduling mode (e.g. core-
>> scheduling) is being used by the scheduler. Alleviate this by printing
>> additional information about the selected granularity per-cpupool.
>>
>> Note: per-cpupool granularity selection is not implemented yet.
>>    The single global value is being used for each cpupool.
> 
> This is misleading. You are using the per-cpupool values, but they
> are all the same right now.

This is what I meant by my note, but I might need to improve the wording
since the current one looks ambiguous to you.

> 
>>
>> Signed-off-by: Sergey Dyasli 
>> ---
>> v2:
>> - print information on a separate line
>> - use per-cpupool granularity
>> - updated commit message
>>
>> CC: Juergen Gross 
>> CC: Dario Faggioli 
>> CC: George Dunlap 
>> CC: Jan Beulich 
>> ---
>>   xen/common/sched/cpupool.c | 26 ++
>>   1 file changed, 26 insertions(+)
>>
>> diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
>> index d40345b585..68106f6c15 100644
>> --- a/xen/common/sched/cpupool.c
>> +++ b/xen/common/sched/cpupool.c
>> @@ -40,6 +40,30 @@ static DEFINE_SPINLOCK(cpupool_lock);
>>   static enum sched_gran __read_mostly opt_sched_granularity = 
>> SCHED_GRAN_cpu;
>>   static unsigned int __read_mostly sched_granularity = 1;
>> +static void sched_gran_print(enum sched_gran mode, unsigned int gran)
>> +{
>> +    char *str = "";
>> +
>> +    switch ( mode )
>> +    {
>> +    case SCHED_GRAN_cpu:
>> +    str = "cpu";
>> +    break;
>> +    case SCHED_GRAN_core:
>> +    str = "core";
>> +    break;
>> +    case SCHED_GRAN_socket:
>> +    str = "socket";
>> +    break;
>> +    default:
>> +    ASSERT_UNREACHABLE();
>> +    break;
>> +    }
> 
> With this addition it might make sense to have an array indexed by
> mode to get the string. This array could then be used in
> sched_select_granularity(), too.

I had thoughts about that, and with your suggestion looks like I need
to go and do it.

> 
>> +
>> +    printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
>> +   str, gran, gran == 1 ? "" : "s");
>> +}
>> +
>>   #ifdef CONFIG_HAS_SCHED_GRANULARITY
>>   static int __init sched_select_granularity(const char *str)
>>   {
>> @@ -115,6 +139,7 @@ static void __init cpupool_gran_init(void)
>>   warning_add(fallback);
>>   sched_granularity = gran;
>> +    sched_gran_print(opt_sched_granularity, sched_granularity);
>>   }
>>   unsigned int cpupool_get_granularity(const struct cpupool *c)
>> @@ -911,6 +936,7 @@ void dump_runq(unsigned char key)
>>   {
>>   printk("Cpupool %d:\n", (*c)->cpupool_id);
>>   printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
>> +    sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
>>   schedule_dump(*c);
>>   }
> 

--
Thanks,
Sergey

[PATCH v2] sched: print information about scheduling granularity

2020-04-20 Thread Sergey Dyasli

Currently it might be not obvious which scheduling mode (e.g. core-
scheduling) is being used by the scheduler. Alleviate this by printing
additional information about the selected granularity per-cpupool.

Note: per-cpupool granularity selection is not implemented yet.
  The single global value is being used for each cpupool.

Signed-off-by: Sergey Dyasli 
---
v2:
- print information on a separate line
- use per-cpupool granularity
- updated commit message

CC: Juergen Gross 
CC: Dario Faggioli 
CC: George Dunlap 
CC: Jan Beulich 
---
 xen/common/sched/cpupool.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d40345b585..68106f6c15 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -40,6 +40,30 @@ static DEFINE_SPINLOCK(cpupool_lock);
 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
 static unsigned int __read_mostly sched_granularity = 1;
 
+static void sched_gran_print(enum sched_gran mode, unsigned int gran)
+{
+char *str = "";
+
+switch ( mode )
+{
+case SCHED_GRAN_cpu:
+str = "cpu";
+break;
+case SCHED_GRAN_core:
+str = "core";
+break;
+case SCHED_GRAN_socket:
+str = "socket";
+break;
+default:
+ASSERT_UNREACHABLE();
+break;
+}
+
+printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
+   str, gran, gran == 1 ? "" : "s");
+}
+
 #ifdef CONFIG_HAS_SCHED_GRANULARITY
 static int __init sched_select_granularity(const char *str)
 {
@@ -115,6 +139,7 @@ static void __init cpupool_gran_init(void)
 warning_add(fallback);
 
 sched_granularity = gran;
+sched_gran_print(opt_sched_granularity, sched_granularity);
 }
 
 unsigned int cpupool_get_granularity(const struct cpupool *c)
@@ -911,6 +936,7 @@ void dump_runq(unsigned char key)
 {
 printk("Cpupool %d:\n", (*c)->cpupool_id);
 printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
+sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
 schedule_dump(*c);
 }
 
-- 
2.17.1

Re: [PATCH] sched: print information about scheduler granularity

2020-04-17 Thread Sergey Dyasli

On 17/04/2020 08:57, Jürgen Groß wrote:
> On 16.04.20 18:43, Dario Faggioli wrote:
>> On Thu, 2020-04-16 at 09:33 +0100, Sergey Dyasli wrote:
>>> Currently it might be not obvious which scheduling mode is being used
>>> by the scheduler. Alleviate this by printing additional information
>>> about the selected granularity.
>>>
>> I like the idea. However, I don't like how verbose and long that line
>> becomes.
>>
>>>   Messages now look like these:
>>>
>>> 1. boot
>>> (XEN) [00089808f0ea7496] Using scheduler: SMP Credit Scheduler
>>> (credit) in core-scheduling mode
>>>
>>> 2. xl debug-keys r
>>> (XEN) [   45.914314] Scheduler: SMP Credit Scheduler (credit) in 2-
>>> way core-scheduling mode
>>>
>> What about adding an entry, just below these ones. Something looking
>> like, for instance (both at boot and in the debug-key dump):
>>
>> "Scheduling granularity: cpu"
>>
>> (or "core", or "socket")

I agree that the line becomes too long. I'll print the new information
on a separate line as you suggest in v2.

>>
>> Also
>>
>>> --- a/xen/common/sched/cpupool.c
>>> +++ b/xen/common/sched/cpupool.c
>>> @@ -38,7 +38,35 @@ static cpumask_t cpupool_locked_cpus;
>>>   static DEFINE_SPINLOCK(cpupool_lock);
>>>   static enum sched_gran __read_mostly opt_sched_granularity =
>>> SCHED_GRAN_cpu;
>>> -static unsigned int __read_mostly sched_granularity = 1;
>>> +static unsigned int __read_mostly sched_granularity;
>>> +
>>> +char *sched_gran_str(char *str, size_t size)
>>> +{
>>> +    char *mode = "";
>>> +
>>> +    switch ( opt_sched_granularity )
>>> +    {
>>> +    case SCHED_GRAN_cpu:
>>> +    mode = "cpu";
>>> +    break;
>>> +    case SCHED_GRAN_core:
>>> +    mode = "core";
>>> +    break;
>>> +    case SCHED_GRAN_socket:
>>> +    mode = "socket";
>>> +    break;
>>> +    default:
>>> +    ASSERT_UNREACHABLE();
>>> +    break;
>>> +    }
>>> +
>>> +    if ( sched_granularity )
>>> +    snprintf(str, size, "%u-way %s", sched_granularity, mode);
>>>
>> I'm not sure about using the value of the enum like this.
> 
> enum? sched_granularity holds the number of cpus per scheduling
> resource. opt_sched_granularity is the enum.
> 
>>
>> E.g., in a system with 4 threads per core, enabling core scheduling
>> granularity would mean having 4 vCPUs in the scheduling units. But this
>> will still print "2-way core-scheduling", which I think would sound
>> confusing.
> 
> It would print "4-way", of course.
> 
>>
>> So I'd just go with "cpu", "core" and "socket" strings.
> 
> No, this is not a good idea. With e.g. smt=0 you'll be able to have
> "1-way core" which is much more informative than "core".

Can confirm the above. "sched-gran=core" on a Knights Mill produces:
(XEN) [  232.018648] Scheduler: SMP Credit Scheduler (credit) in 4-way 
core-scheduling mode

While "sched-gran=core smt=0" gives:
(XEN) [  259.337588] Scheduler: SMP Credit Scheduler (credit) in 1-way 
core-scheduling mode

--
Thanks,
Sergey

Re: [PATCH] sched: print information about scheduler granularity

2020-04-16 Thread Sergey Dyasli

On 16/04/2020 10:25, Jürgen Groß wrote:
> On 16.04.20 11:20, Sergey Dyasli wrote:
>> On 16/04/2020 09:57, Jürgen Groß wrote:
>>> On 16.04.20 10:33, Sergey Dyasli wrote:
>>>> Currently it might be not obvious which scheduling mode is being used
>>>> by the scheduler. Alleviate this by printing additional information
>>>> about the selected granularity. Messages now look like these:
>>>>
>>>> 1. boot
>>>> (XEN) [00089808f0ea7496] Using scheduler: SMP Credit Scheduler (credit) in 
>>>> core-scheduling mode
>>>>
>>>> 2. xl debug-keys r
>>>> (XEN) [   45.914314] Scheduler: SMP Credit Scheduler (credit) in 2-way 
>>>> core-scheduling mode
>>>>
>>>> Signed-off-by: Sergey Dyasli 
>>>
>>> Hmm, do we need that?
>>>
>>> The xen commandline ins part of the boot messages and is contained
>>> in the "xl info" output.
>>
>> It's true that you can see "sched-gran=core" in "xl info" output. But that's
>> just the switch - not the end result. A user might want to verify that he did
>> everything correctly and core-scheduling mode has indeed been enabled.
> 
> I'm planning to add this information in the pending hypfs (per cpupool).

hypfs is certainly nice, but I doubt it'll be available for Xen 4.13.

> I'm not opposed to your patch, but as soon as we have per-cpupool
> granularity it should be reverted again.

"xl debug-keys r" already prints the granularity information per cpupool.
It's just opt_sched_granularity is currently a single global variable. Once
per-cpupool granularity is implemented, sched_gran_str() should simply gain
granularity as a parameter.

--
Thanks,
Sergey

Re: [PATCH] sched: print information about scheduler granularity

2020-04-16 Thread Sergey Dyasli

On 16/04/2020 09:57, Jürgen Groß wrote:
> On 16.04.20 10:33, Sergey Dyasli wrote:
>> Currently it might be not obvious which scheduling mode is being used
>> by the scheduler. Alleviate this by printing additional information
>> about the selected granularity. Messages now look like these:
>>
>> 1. boot
>> (XEN) [00089808f0ea7496] Using scheduler: SMP Credit Scheduler (credit) in 
>> core-scheduling mode
>>
>> 2. xl debug-keys r
>> (XEN) [   45.914314] Scheduler: SMP Credit Scheduler (credit) in 2-way 
>> core-scheduling mode
>>
>> Signed-off-by: Sergey Dyasli 
> 
> Hmm, do we need that?
> 
> The xen commandline ins part of the boot messages and is contained
> in the "xl info" output.

It's true that you can see "sched-gran=core" in "xl info" output. But that's
just the switch - not the end result. A user might want to verify that he did
everything correctly and core-scheduling mode has indeed been enabled.

--
Thanks,
Sergey

[PATCH] sched: print information about scheduler granularity

2020-04-16 Thread Sergey Dyasli

Currently it might be not obvious which scheduling mode is being used
by the scheduler. Alleviate this by printing additional information
about the selected granularity. Messages now look like these:

1. boot
(XEN) [00089808f0ea7496] Using scheduler: SMP Credit Scheduler (credit) in 
core-scheduling mode

2. xl debug-keys r
(XEN) [   45.914314] Scheduler: SMP Credit Scheduler (credit) in 2-way 
core-scheduling mode

Signed-off-by: Sergey Dyasli 
---
CC: Juergen Gross 
CC: Dario Faggioli 
CC: George Dunlap 
CC: Jan Beulich 
---
 xen/common/sched/core.c| 10 --
 xen/common/sched/cpupool.c | 30 +-
 xen/common/sched/private.h |  2 ++
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index d4a6489929..b1b09a159b 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -2883,6 +2883,7 @@ void scheduler_enable(void)
 void __init scheduler_init(void)
 {
 struct domain *idle_domain;
+char sched_gran[20];
 int i;
 
 scheduler_enable();
@@ -2937,7 +2938,9 @@ void __init scheduler_init(void)
 BUG();
 register_cpu_notifier(_schedule_nfb);
 
-printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
+printk("Using scheduler: %s (%s) in %s-scheduling mode\n",
+   ops.name, ops.opt_name,
+   sched_gran_str(sched_gran, sizeof(sched_gran)));
 if ( sched_init() )
 panic("scheduler returned error on init\n");
 
@@ -3267,6 +3270,7 @@ void schedule_dump(struct cpupool *c)
 unsigned int  i, j;
 struct scheduler *sched;
 cpumask_t*cpus;
+char  sched_gran[20];
 
 /* Locking, if necessary, must be handled withing each scheduler */
 
@@ -3276,7 +3280,9 @@ void schedule_dump(struct cpupool *c)
 {
 sched = c->sched;
 cpus = c->res_valid;
-printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name);
+printk("Scheduler: %s (%s) in %s-scheduling mode\n",
+   sched->name, sched->opt_name,
+   sched_gran_str(sched_gran, sizeof(sched_gran)));
 sched_dump_settings(sched);
 }
 else
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index d40345b585..a37b97f4c2 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -38,7 +38,35 @@ static cpumask_t cpupool_locked_cpus;
 static DEFINE_SPINLOCK(cpupool_lock);
 
 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
-static unsigned int __read_mostly sched_granularity = 1;
+static unsigned int __read_mostly sched_granularity;
+
+char *sched_gran_str(char *str, size_t size)
+{
+char *mode = "";
+
+switch ( opt_sched_granularity )
+{
+case SCHED_GRAN_cpu:
+mode = "cpu";
+break;
+case SCHED_GRAN_core:
+mode = "core";
+break;
+case SCHED_GRAN_socket:
+mode = "socket";
+break;
+default:
+ASSERT_UNREACHABLE();
+break;
+}
+
+if ( sched_granularity )
+snprintf(str, size, "%u-way %s", sched_granularity, mode);
+else
+snprintf(str, size, "%s", mode);
+
+return str;
+}
 
 #ifdef CONFIG_HAS_SCHED_GRANULARITY
 static int __init sched_select_granularity(const char *str)
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 367811a12f..fd49f545cb 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -30,6 +30,8 @@ enum sched_gran {
 SCHED_GRAN_socket
 };
 
+char *sched_gran_str(char *str, size_t size);
+
 /*
  * In order to allow a scheduler to remap the lock->cpu mapping,
  * we have a per-cpu pointer, along with a pre-allocated set of
-- 
2.17.1

Re: [PATCH] sched: fix scheduler_disable() with core scheduling

2020-04-14 Thread Sergey Dyasli

(CC Igor)

On 09/04/2020 13:50, Jürgen Groß wrote:
> On 09.04.20 11:41, Sergey Dyasli wrote:
>> In core-scheduling mode, Xen might crash when entering ACPI S5 state.
>> This happens in sched_slave() during is_idle_unit(next) check because
>> next->vcpu_list is stale and points to an already freed memory.
>>
>> This situation happens shortly after scheduler_disable() is called if
>> some CPU is still inside sched_slave() softirq. Current logic simply
>> returns prev->next_task from sched_wait_rendezvous_in() which causes
>> the described crash because next_task->vcpu_list has become invalid.
>>
>> Fix the crash by returning NULL from sched_wait_rendezvous_in() in
>> the case when scheduler_disable() has been called.
>>
>> Signed-off-by: Sergey Dyasli 
> 
> Good catch!
> 
> Have you seen any further problems (e.g. with cpu on/offlining) with
> this patch applied?

This patch shouldn't affect cpu on/offlining AFAICS. Igor was the one testing
cpu on/offlining and I think he came to a conclusion that it's broken even
without core-scheduling enabled.

> Reviewed-by: Juergen Gross 

Thanks!

--
Sergey

[PATCH] sched: fix scheduler_disable() with core scheduling

2020-04-09 Thread Sergey Dyasli

In core-scheduling mode, Xen might crash when entering ACPI S5 state.
This happens in sched_slave() during is_idle_unit(next) check because
next->vcpu_list is stale and points to an already freed memory.

This situation happens shortly after scheduler_disable() is called if
some CPU is still inside sched_slave() softirq. Current logic simply
returns prev->next_task from sched_wait_rendezvous_in() which causes
the described crash because next_task->vcpu_list has become invalid.

Fix the crash by returning NULL from sched_wait_rendezvous_in() in
the case when scheduler_disable() has been called.

Signed-off-by: Sergey Dyasli 
---
CC: Juergen Gross 
CC: Dario Faggioli 
CC: George Dunlap 
CC: Jan Beulich 
---
 xen/common/sched/core.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index 626861a3fe..d4a6489929 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -2484,19 +2484,15 @@ static struct sched_unit 
*sched_wait_rendezvous_in(struct sched_unit *prev,
 
 *lock = pcpu_schedule_lock_irq(cpu);
 
-if ( unlikely(!scheduler_active) )
-{
-ASSERT(is_idle_unit(prev));
-atomic_set(>next_task->rendezvous_out_cnt, 0);
-prev->rendezvous_in_cnt = 0;
-}
-
 /*
  * Check for scheduling resource switched. This happens when we are
  * moved away from our cpupool and cpus are subject of the idle
  * scheduler now.
+ *
+ * This is also a bail out case when scheduler_disable() has been
+ * called.
  */
-if ( unlikely(sr != get_sched_res(cpu)) )
+if ( unlikely(sr != get_sched_res(cpu) || !scheduler_active) )
 {
 ASSERT(is_idle_unit(prev));
 atomic_set(>next_task->rendezvous_out_cnt, 0);
-- 
2.17.1

[Xen-devel] xl vcpu-pin peculiarities in core scheduling mode

2020-03-24 Thread Sergey Dyasli

Hi Juergen,

I've notived there is no documentation about how vcpu-pin is supposed to work
with core scheduling enabled. I did some experiments and noticed the following
inconsistencies:

  1. xl vcpu-pin 5 0 0
 Windows 10 (64-bit) (1)  5 00   -b-1644.0  0 / all
 Windows 10 (64-bit) (1)  5 11   -b-1650.1  0 / all
 ^  ^
 CPU 1 doesn't match reported hard-affinity of 0. Should this command set
 hard-affinity of vCPU 1 to 1? Or should it be 0-1 for both vCPUs instead?


  2. xl vcpu-pin 5 0 1
 libxl: error: libxl_sched.c:62:libxl__set_vcpuaffinity: Domain 5:Setting 
vcpu affinity: Invalid argument
 This is expected but perhaps needs documenting somewhere?


  3. xl vcpu-pin 5 0 1-2
 Windows 10 (64-bit) (1)  5 02   -b-1646.7  1-2 / 
all
 Windows 10 (64-bit) (1)  5 13   -b-1651.6  1-2 / 
all
 ^  ^^^
 Here is a CPU / affinity mismatch again, but the more interesting fact
 is that setting 1-2 is allowed at all, I'd expect CPU would never be set
 to 1 with such settings.

Please let me know what you think about the above cases.

--
Thanks,
Sergey

Re: [Xen-devel] Core Scheduling "lock == schedule_lock" assertion failure

2020-02-13 Thread Sergey Dyasli

On 12/02/2020 12:24, Jürgen Groß wrote:
> On 12.02.20 12:21, Sergey Dyasli wrote:
>> Hi Juergen,
>>
>> Recently our testing has found a host crash which is reproducible.
>> Do you have any idea what might be going on here?
>
> Oh, nice catch!
>
> The problem is that get_cpu_idle_time() is calling vcpu_runstate_get()
> for an idle vcpu. This is fragile as idle vcpus are sometimes assigned
> temporarily to normal scheduling units, thus the ASSERT() in the unlock
> function is failing when the assignment of the idle vcpu is modified
> under the feet of vcpu_runstate_get() and the unit it has been assigned
> to before is already scheduled on another cpu.
>
> The patch is rather easy, though. Can you try it, please?

Thank you for the patch! I put it into testing yesterday and it looks
good so far. It also seems that the issue is well understood and the
patch should go into the main tree.

--
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v4 1/2] xsm: add Kconfig option for denied string

2020-02-12 Thread Sergey Dyasli

On 12/02/2020 09:32, Jan Beulich wrote:
> On 11.02.2020 14:42, Sergey Dyasli wrote:
>> --- a/xen/common/Kconfig
>> +++ b/xen/common/Kconfig
>> @@ -228,6 +228,14 @@ choice
>>  bool "SILO" if XSM_SILO
>>  endchoice
>>
>> +config XSM_DENIED_STRING
>> +string "xen_version hypercall denied information replacement string"
>> +default ""
>> +depends on XSM
>
> Why would this string want to be configurable only in XSM-
> enabled builds?

For some reason I assumed that xsm_xen_version() is a no-op when
CONFIG_XSM is undefined. I can now see that it doesn't depend on any
config in which case the dependency (and #ifdef) should indeed be
removed.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] Core Scheduling "lock == schedule_lock" assertion failure

2020-02-12 Thread Sergey Dyasli

Hi Juergen,

Recently our testing has found a host crash which is reproducible.
Do you have any idea what might be going on here?

(XEN) [175654.165126] Assertion 'lock == 
get_sched_res(i->res->master_cpu)->schedule_lock' failed at 
...ild/BUILD/xen-4.13.1/xen/include/xen/sched-if.h:269
(XEN) [175654.165133] [ Xen-4.13.1-9.0.3-d  x86_64  debug=y   Not tainted 
]
(XEN) [175654.165136] CPU:28
(XEN) [175654.165138] RIP:e008:[] 
vcpu_runstate_get+0x11e/0x14f
(XEN) [175654.165146] RFLAGS: 00010083   CONTEXT: hypervisor (d0v4)
(XEN) [175654.165151] rax: 83403ff0d340   rbx: 83807cc97ac8   rcx: 
0006
(XEN) [175654.165154] rdx: 006fbf942000   rsi: 83400f8e1cd8   rdi: 
107898e2
(XEN) [175654.165158] rbp: 83807cc97ab8   rsp: 83807cc97a88   r8:  
deadbeefdeadf00d
(XEN) [175654.165160] r9:  deadbeefdeadf00d   r10:    r11: 

(XEN) [175654.165164] r12: 83400fa6f000   r13: 83400f8c9778   r14: 
82d0805c8008
(XEN) [175654.165167] r15: 832e30854ae0   cr0: 80050033   cr4: 
00362660
(XEN) [175654.165170] cr3: 002130811000   cr2: 88817f50b728
(XEN) [175654.165172] fsb: 7f40a40da740   gsb: 88831d30   gss: 

(XEN) [175654.165175] ds:    es:    fs:    gs:    ss: e010   
cs: e008
(XEN) [175654.165179] Xen code around  
(vcpu_runstate_get+0x11e/0x14f):
(XEN) [175654.165181]  04 10 4c 3b 68 10 74 02 <0f> 0b 4c 89 ef e8 7e 5d 00 00 
48 8d 05 41 9d 38
(XEN) [175654.165192] Xen stack trace from rsp=83807cc97a88:
(XEN) [175654.165194]83807cc97aa8 83400fa75a60  
83807cc97da0
(XEN) [175654.165199]0230 83807cc97fff 83807cc97af8 
82d08023d41f
(XEN) [175654.165204]0001 9fc1ac1cb2f4 4840c423acdc 
5780e7f9735a
(XEN) [175654.165207]  83807cc97c98 
82d0802ea9f7
(XEN) [175654.165211] 9fc1ac1c6b99 00050007 
83807cc97c10
(XEN) [175654.165215]83807cc97bb0 0020  

(XEN) [175654.165251]   

(XEN) [175654.165254]   

(XEN) [175654.165258]82d0805c8038 82d0805c74a0  
aa00
(XEN) [175654.165263]   

(XEN) [175654.165266]   

(XEN) [175654.165269]   

(XEN) [175654.165273]   

(XEN) [175654.165276]   

(XEN) [175654.165279]   

(XEN) [175654.165283]83400f813000 83807cc97d98  
82d0805cda80
(XEN) [175654.165287]0230 83807cc97fff 83807cc97cc8 
82d08026d99b
(XEN) [175654.165291]83807cc97ef8 83400f813000 82d0805cda80 
0230
(XEN) [175654.165295]83807cc97e48 82d080244573 7f40a40e6000 
0206
(XEN) [175654.165300]82004006c000   
82e08a815e80
(XEN) [175654.165304] Xen call trace:
(XEN) [175654.165306][] R vcpu_runstate_get+0x11e/0x14f
(XEN) [175654.165310][] F get_cpu_idle_time+0x4d/0x53
(XEN) [175654.165315][] F pmstat_get_cx_stat+0x82/0x8e7
(XEN) [175654.165319][] F do_get_pm_info+0x27b/0x2d4
(XEN) [175654.165322][] F do_sysctl+0x633/0x14e0
(XEN) [175654.165327][] F pv_hypercall+0x1f5/0x567
(XEN) [175654.165330][] F lstar_enter+0x112/0x120
(XEN) [175654.165332]
(XEN) [175654.550916]
(XEN) [175654.553243] 
(XEN) [175654.559449] Panic on CPU 28:
(XEN) [175654.563328] Assertion 'lock == 
get_sched_res(i->res->master_cpu)->schedule_lock' failed at 
...ild/BUILD/xen-4.13.1/xen/include/xen/sched-if
(XEN) [175654.581847]
(XEN) [175654.584173] Reboot in five seconds...
(XEN) [175654.588925] Executing kexec image on cpu28
(XEN) [175654.594987] Shot down all CPUs


The state of the sibling was:


  PCPU 29 Host state:
RIP:e008:[] Ring 0
RFLAGS: 00040002  AC IOPL0

rax: 83400f8c91e4   rbx: 001d   rcx: 83400f8c91f4
rdx: 83400f8c9104   rsi: 83400f8c9094   rdi: 0004
rbp: 83807cc89f28   rsp: 83807cc89f28   r8:  
r9:     r10:    r11: 
r12:    r13:    r14: 83807cc8
r15: 

cr0: 80050033   PG

[Xen-devel] [PATCH v4 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-02-11 Thread Sergey Dyasli

Hide the following information that can help identify the running Xen
binary version: XENVER_[extraversion|compile_info|changeset]
This makes harder for malicious guests to fingerprint Xen to identify
exploitable systems.

Introduce xsm_filter_denied() to hvmloader to remove "" string
from guest's DMI tables that otherwise would be shown in tools like
dmidecode.

While at it, add explicit cases for XENVER_[commandline|build_id]
for better code readability. Add a default case with an ASSERT to make
sure that every case is explicitly listed as well.

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- Updated commit message
- Re-add hvmloader filtering

v2 --> v3:
- Remove hvmloader filtering
- Add ASSERT_UNREACHABLE

v1 --> v2:
- Added xsm_filter_denied() to hvmloader instead of modifying xen_deny()
- Made behaviour the same for both Release and Debug builds
- XENVER_capabilities is no longer hided

---
 tools/firmware/hvmloader/hvmloader.c |  1 +
 tools/firmware/hvmloader/smbios.c|  1 +
 tools/firmware/hvmloader/util.c  | 11 +++
 tools/firmware/hvmloader/util.h  |  2 ++
 xen/include/xsm/dummy.h  | 15 +++
 5 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/tools/firmware/hvmloader/hvmloader.c 
b/tools/firmware/hvmloader/hvmloader.c
index 598a226278..b35899f2fb 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -147,6 +147,7 @@ static void init_hypercalls(void)
 /* Print version information. */
 cpuid(base + 1, , , , );
 hypercall_xen_version(XENVER_extraversion, extraversion);
+xsm_filter_denied(extraversion);
 printf("Detected Xen v%u.%u%s\n", eax >> 16, eax & 0x, extraversion);
 }
 
diff --git a/tools/firmware/hvmloader/smbios.c 
b/tools/firmware/hvmloader/smbios.c
index 97a054e9e3..a71bfe8392 100644
--- a/tools/firmware/hvmloader/smbios.c
+++ b/tools/firmware/hvmloader/smbios.c
@@ -275,6 +275,7 @@ hvm_write_smbios_tables(
 xen_minor_version = (uint16_t) xen_version;
 
 hypercall_xen_version(XENVER_extraversion, xen_extra_version);
+xsm_filter_denied(xen_extra_version);
 
 /* build up human-readable Xen version string */
 p = xen_version_str;
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index 0c3f2d24cd..49b4b321e3 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -995,6 +996,16 @@ void hvmloader_acpi_build_tables(struct acpi_config 
*config,
 hvm_param_set(HVM_PARAM_VM_GENERATION_ID_ADDR, config->vm_gid_addr);
 }
 
+void xsm_filter_denied(char *str)
+{
+xen_denied_string_t deny_str = "";
+
+hypercall_xen_version(XENVER_denied_string, deny_str);
+
+if ( strcmp(str, deny_str) == 0 )
+*str = '\0';
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 7bca6418d2..e4fd26de9d 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -286,6 +286,8 @@ struct acpi_config;
 void hvmloader_acpi_build_tables(struct acpi_config *config,
  unsigned int physical);
 
+void xsm_filter_denied(char *str);
+
 #endif /* __HVMLOADER_UTIL_H__ */
 
 /*
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 72a101b106..2567ccaa0a 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -751,16 +751,23 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG 
uint32_t op)
 case XENVER_denied_string:
 /* These sub-ops ignore the permission checks and return data. */
 return 0;
-case XENVER_extraversion:
-case XENVER_compile_info:
+
 case XENVER_capabilities:
-case XENVER_changeset:
 case XENVER_pagesize:
 case XENVER_guest_handle:
 /* These MUST always be accessible to any guest by default. */
 return xsm_default_action(XSM_HOOK, current->domain, NULL);
-default:
+
+case XENVER_extraversion:
+case XENVER_compile_info:
+case XENVER_changeset:
+case XENVER_commandline:
+case XENVER_build_id:
 return xsm_default_action(XSM_PRIV, current->domain, NULL);
+
+default:
+ASSERT_UNREACHABLE();
+return -EPERM;
 }
 }
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v4 1/2] xsm: add Kconfig option for denied string

2020-02-11 Thread Sergey Dyasli

Add Kconfig option to make it possible to configure the string returned
to non-privileged guests instead of the default "" which could
propagate to UI / logs after the subsequent patch that hides detailed
Xen version information from unprivileged guests.

Introduce XENVER_denied_string to allow guests to set up UI / logs
filtering which dependens on the new CONFIG_XSM_DENIED_STRING.

Signed-off-by: Sergey Dyasli 
---
v3 --> v4:
- Updated kconfig prompt description
- Added XENVER_denied_string
- Added #ifdef to fix build when CONFIG_XSM is not set

v2 --> v3:
- new patch

---
 xen/common/Kconfig   |  8 
 xen/common/kernel.c  | 11 +++
 xen/common/version.c |  4 
 xen/include/public/version.h |  5 +
 xen/include/xsm/dummy.h  |  1 +
 5 files changed, 29 insertions(+)

diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index a6914fcae9..4a1a9398cd 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -228,6 +228,14 @@ choice
bool "SILO" if XSM_SILO
 endchoice
 
+config XSM_DENIED_STRING
+   string "xen_version hypercall denied information replacement string"
+   default ""
+   depends on XSM
+   ---help---
+ A string which substitutes sensitive information returned via
+ xen_version hypercall to non-privileged guests
+
 config LATE_HWDOM
bool "Dedicated hardware domain"
default n
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 22941cec94..1c22e5d167 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -561,6 +561,17 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
 return sz;
 }
+
+case XENVER_denied_string:
+{
+xen_denied_string_t str;
+
+safe_strcpy(str, xen_deny());
+if ( copy_to_guest(arg, str, XEN_DENIED_STRING_LEN) )
+return -EFAULT;
+
+return 0;
+}
 }
 
 return -ENOSYS;
diff --git a/xen/common/version.c b/xen/common/version.c
index 937eb1281c..fbd0ef4668 100644
--- a/xen/common/version.c
+++ b/xen/common/version.c
@@ -67,7 +67,11 @@ const char *xen_banner(void)
 
 const char *xen_deny(void)
 {
+#ifdef CONFIG_XSM_DENIED_STRING
+return CONFIG_XSM_DENIED_STRING;
+#else
 return "";
+#endif
 }
 
 static const void *build_id_p __read_mostly;
diff --git a/xen/include/public/version.h b/xen/include/public/version.h
index 17a81e23cd..f65001d2d9 100644
--- a/xen/include/public/version.h
+++ b/xen/include/public/version.h
@@ -100,6 +100,11 @@ struct xen_build_id {
 };
 typedef struct xen_build_id xen_build_id_t;
 
+/* arg == xen_denied_string_t. */
+#define XENVER_denied_string 11
+typedef char xen_denied_string_t[64];
+#define XEN_DENIED_STRING_LEN (sizeof(xen_denied_string_t))
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
 
 /*
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index b8e185e6fa..72a101b106 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -748,6 +748,7 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG 
uint32_t op)
 case XENVER_version:
 case XENVER_platform_parameters:
 case XENVER_get_features:
+case XENVER_denied_string:
 /* These sub-ops ignore the permission checks and return data. */
 return 0;
 case XENVER_extraversion:
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v4 0/2] xsm: hide detailed Xen version

2020-02-11 Thread Sergey Dyasli

Now a proper 2 patches series.

Sergey Dyasli (2):
  xsm: add Kconfig option for denied string
  xsm: hide detailed Xen version from unprivileged guests

 tools/firmware/hvmloader/hvmloader.c |  1 +
 tools/firmware/hvmloader/smbios.c|  1 +
 tools/firmware/hvmloader/util.c  | 11 +++
 tools/firmware/hvmloader/util.h  |  2 ++
 xen/common/Kconfig   |  8 
 xen/common/kernel.c  | 11 +++
 xen/common/version.c |  4 
 xen/include/public/version.h |  5 +
 xen/include/xsm/dummy.h  | 16 
 9 files changed, 55 insertions(+), 4 deletions(-)

-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Live-Patch application failure in core-scheduling mode

2020-02-11 Thread Sergey Dyasli

On 07/02/2020 08:04, Jürgen Groß wrote:
> On 06.02.20 15:02, Sergey Dyasli wrote:
>> On 06/02/2020 11:05, Sergey Dyasli wrote:
>>> On 06/02/2020 09:57, Jürgen Groß wrote:
>>>> On 05.02.20 17:03, Sergey Dyasli wrote:
>>>>> Hello,
>>>>>
>>>>> I'm currently investigating a Live-Patch application failure in core-
>>>>> scheduling mode and this is an example of what I usually get:
>>>>> (it's easily reproducible)
>>>>>
>>>>>   (XEN) [  342.528305] livepatch: lp: CPU8 - IPIing the other 15 CPUs
>>>>>   (XEN) [  342.558340] livepatch: lp: Timed out on semaphore in CPU 
>>>>> quiesce phase 13/15
>>>>>   (XEN) [  342.558343] bad cpus: 6 9
>>>>>
>>>>>   (XEN) [  342.559293] CPU:6
>>>>>   (XEN) [  342.559562] Xen call trace:
>>>>>   (XEN) [  342.559565][] R 
>>>>> common/schedule.c#sched_wait_rendezvous_in+0xa4/0x270
>>>>>   (XEN) [  342.559568][] F 
>>>>> common/schedule.c#schedule+0x17a/0x260
>>>>>   (XEN) [  342.559571][] F 
>>>>> common/softirq.c#__do_softirq+0x5a/0x90
>>>>>   (XEN) [  342.559574][] F 
>>>>> arch/x86/domain.c#guest_idle_loop+0x35/0x60
>>>>>
>>>>>   (XEN) [  342.559761] CPU:9
>>>>>   (XEN) [  342.560026] Xen call trace:
>>>>>   (XEN) [  342.560029][] R 
>>>>> _spin_lock_irq+0x11/0x40
>>>>>   (XEN) [  342.560032][] F 
>>>>> common/schedule.c#sched_wait_rendezvous_in+0xc3/0x270
>>>>>   (XEN) [  342.560036][] F 
>>>>> common/schedule.c#schedule+0x17a/0x260
>>>>>   (XEN) [  342.560039][] F 
>>>>> common/softirq.c#__do_softirq+0x5a/0x90
>>>>>   (XEN) [  342.560042][] F 
>>>>> arch/x86/domain.c#idle_loop+0x55/0xb0
>>>>>
>>>>> The first HT sibling is waiting for the second in the LP-application
>>>>> context while the second waits for the first in the scheduler context.
>>>>>
>>>>> Any suggestions on how to improve this situation are welcome.
>>>>
>>>> Can you test the attached patch, please? It is only tested to boot, so
>>>> I did no livepatch tests with it.
>>>
>>> Thank you for the patch! It seems to fix the issue in my manual testing.
>>> I'm going to submit automatic LP testing for both thread/core modes.
>>
>> Andrew suggested to test late ucode loading as well and so I did.
>> It uses stop_machine() to rendezvous cpus and it failed with a similar
>> backtrace for a problematic CPU. But in this case the system crashed
>> since there is no timeout involved:
>>
>>  (XEN) [  155.025168] Xen call trace:
>>  (XEN) [  155.040095][] R 
>> _spin_unlock_irq+0x22/0x30
>>  (XEN) [  155.069549][] S 
>> common/schedule.c#sched_wait_rendezvous_in+0xa2/0x270
>>  (XEN) [  155.109696][] F 
>> common/schedule.c#sched_slave+0x198/0x260
>>  (XEN) [  155.145521][] F 
>> common/softirq.c#__do_softirq+0x5a/0x90
>>  (XEN) [  155.180223][] F 
>> x86_64/entry.S#process_softirqs+0x6/0x20
>>
>> It looks like your patch provides a workaround for LP case, but other
>> cases like stop_machine() remain broken since the underlying issue with
>> the scheduler is still there.
>
> And here is the fix for ucode loading (that was in fact the only case
> where stop_machine_run() wasn't already called in a tasklet).
>
> I have done a manual test loading new ucode with core scheduling
> active.

The patch seems to fix the issue, thanks!
Do you plan to post the 2 patches to the ML now for proper review?

--
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 4/4] xen/netback: fix grant copy across page boundary

2020-02-10 Thread Sergey Dyasli

On 07/02/2020 14:36, David Miller wrote:
> From: Sergey Dyasli 
> Date: Fri, 7 Feb 2020 14:26:52 +
>
>> From: Ross Lagerwall 
>>
>> When KASAN (or SLUB_DEBUG) is turned on, there is a higher chance that
>> non-power-of-two allocations are not aligned to the next power of 2 of
>> the size. Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall 
>> Signed-off-by: Sergey Dyasli 
>> Acked-by: Paul Durrant 
>
> This is part of a larger patch series to which netdev was not CC:'d
>
> Where is this patch targetted to be applied?
>
> Do you expect a networking ACK on this?
>
> Please do not submit patches in such an ambiguous manner like this
> in the future, thank you.

Please see the following for more context:

https://lore.kernel.org/linux-mm/20200122140512.zxtld5sanohpmgt2@debian/

Sorry for not providing enough context with this submission.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 0/4] basic KASAN support for Xen PV domains

2020-02-07 Thread Sergey Dyasli

This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
CONFIG_KASAN=y. It has been used internally for some time now with good
results for finding memory corruption issues in Dom0 kernel.

Only Outline instrumentation is supported at the moment.

Sergey Dyasli (2):
  kasan: introduce set_pmd_early_shadow()
  x86/xen: add basic KASAN support for PV kernel

Ross Lagerwall (2):
  xen: teach KASAN about grant tables
  xen/netback: fix grant copy across page boundary

 arch/x86/mm/kasan_init_64.c   | 10 +-
 arch/x86/xen/Makefile |  7 
 arch/x86/xen/enlighten_pv.c   |  3 ++
 arch/x86/xen/mmu_pv.c | 43 ++
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 60 +--
 drivers/xen/Makefile  |  2 ++
 drivers/xen/grant-table.c |  5 ++-
 include/linux/kasan.h |  2 ++
 include/xen/xen-ops.h | 10 ++
 lib/Kconfig.kasan |  3 +-
 mm/kasan/init.c   | 32 -
 12 files changed, 156 insertions(+), 23 deletions(-)

-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 4/4] xen/netback: fix grant copy across page boundary

2020-02-07 Thread Sergey Dyasli

From: Ross Lagerwall 

When KASAN (or SLUB_DEBUG) is turned on, there is a higher chance that
non-power-of-two allocations are not aligned to the next power of 2 of
the size. Therefore, handle grant copies that cross page boundaries.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
Acked-by: Paul Durrant 
---
v2 --> v3:
- Added Acked-by: Paul Durrant 
CC: "David S. Miller" 
CC: net...@vger.kernel.org

v1 --> v2:
- Use sizeof_field(struct sk_buff, cb)) instead of magic number 48
- Slightly update commit message

RFC --> v1:
- Added BUILD_BUG_ON to the netback patch
- xenvif_idx_release() now located outside the loop

CC: Wei Liu 
CC: Paul Durrant 
---
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 60 +--
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 05847eb91a1b..e57684415edd 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 
-   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
/* passed to gnttab_[un]map_refs with pages under (un)mapping */
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 315dfc6ea297..41054de38a62 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
 
 struct xenvif_tx_cb {
u16 pending_idx;
+   u8 copies;
 };
 
 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
@@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 {
struct gnttab_map_grant_ref *gop_map = *gopp_map;
u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+   u8 copies = XENVIF_TX_CB(skb)->copies;
/* This always points to the shinfo of the skb being checked, which
 * could be either the first or the one on the frag_list
 */
@@ -450,23 +452,26 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
int nr_frags = shinfo->nr_frags;
const bool sharedslot = nr_frags &&
frag_get_pending_idx(>frags[0]) == 
pending_idx;
-   int i, err;
+   int i, err = 0;
 
-   /* Check status of header. */
-   err = (*gopp_copy)->status;
-   if (unlikely(err)) {
-   if (net_ratelimit())
-   netdev_dbg(queue->vif->dev,
+   while (copies) {
+   /* Check status of header. */
+   int newerr = (*gopp_copy)->status;
+   if (unlikely(newerr)) {
+   if (net_ratelimit())
+   netdev_dbg(queue->vif->dev,
   "Grant copy of header failed! status: %d 
pending_idx: %u ref: %u\n",
   (*gopp_copy)->status,
   pending_idx,
   (*gopp_copy)->source.u.ref);
-   /* The first frag might still have this slot mapped */
-   if (!sharedslot)
-   xenvif_idx_release(queue, pending_idx,
-  XEN_NETIF_RSP_ERROR);
+   err = newerr;
+   }
+   (*gopp_copy)++;
+   copies--;
}
-   (*gopp_copy)++;
+   /* The first frag might still have this slot mapped */
+   if (unlikely(err) && !sharedslot)
+   xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 
 check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -910,6 +915,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
xenvif_tx_err(queue, , extra_count, idx);
break;
}
+   XENVIF_TX_CB(skb)->copies = 0;
 
skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size)
@@ -933,6 +939,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
   "Can't allocate the 
frag_list skb.\n");
break;
}
+   XENVIF_TX_CB(nskb)->copies = 0;
}
 
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
@@ -990,6 +997,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue 
*queue,
 
queue->

[Xen-devel] [PATCH v3 2/4] x86/xen: add basic KASAN support for PV kernel

2020-02-07 Thread Sergey Dyasli

Introduce and use xen_kasan_* functions that are needed to properly
initialise KASAN for Xen PV domains. Disable instrumentation for files
that are used by xen_start_kernel() before kasan_early_init() could
be called.

This enables to use Outline instrumentation for Xen PV kernels.
KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
and hence disabled.

Signed-off-by: Sergey Dyasli 
---
v2 --> v3:
- Fix compilation without CONFIG_KASAN
- Dropped _pv prefixes from new functions
- Made xen_kasan_early_init() call kasan_map_early_shadow() directly
- Updated description

v1 --> v2:
- Fix compilation without CONFIG_XEN_PV
- Use macros for KASAN_SHADOW_START

RFC --> v1:
- New functions with declarations in xen/xen-ops.h
- Fixed the issue with free_kernel_image_pages() with the help of
  xen_pv_kasan_unpin_pgd()
---
 arch/x86/mm/kasan_init_64.c | 10 -
 arch/x86/xen/Makefile   |  7 ++
 arch/x86/xen/enlighten_pv.c |  3 +++
 arch/x86/xen/mmu_pv.c   | 43 +
 drivers/xen/Makefile|  2 ++
 include/linux/kasan.h   |  2 ++
 include/xen/xen-ops.h   | 10 +
 lib/Kconfig.kasan   |  3 ++-
 8 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 763e71abc0fe..b862c03a2019 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include 
+
 #include 
 #include 
 #include 
@@ -231,7 +233,7 @@ static void __init kasan_early_p4d_populate(pgd_t *pgd,
} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
 }
 
-static void __init kasan_map_early_shadow(pgd_t *pgd)
+void __init kasan_map_early_shadow(pgd_t *pgd)
 {
/* See comment in kasan_init() */
unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
@@ -317,6 +319,8 @@ void __init kasan_early_init(void)
 
kasan_map_early_shadow(early_top_pgt);
kasan_map_early_shadow(init_top_pgt);
+
+   xen_kasan_early_init();
 }
 
 void __init kasan_init(void)
@@ -348,6 +352,8 @@ void __init kasan_init(void)
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
}
 
+   xen_kasan_pin_pgd(early_top_pgt);
+
load_cr3(early_top_pgt);
__flush_tlb_all();
 
@@ -412,6 +418,8 @@ void __init kasan_init(void)
load_cr3(init_top_pgt);
__flush_tlb_all();
 
+   xen_kasan_unpin_pgd(early_top_pgt);
+
/*
 * kasan_early_shadow_page has been used as early shadow memory, thus
 * it may contain some garbage. Now we can clear and write protect it,
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 084de77a109e..102fad0b0bca 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,10 @@
+KASAN_SANITIZE_enlighten_pv.o := n
+KASAN_SANITIZE_enlighten.o := n
+KASAN_SANITIZE_irq.o := n
+KASAN_SANITIZE_mmu_pv.o := n
+KASAN_SANITIZE_p2m.o := n
+KASAN_SANITIZE_multicalls.o := n
+
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae4a41ca19f6..27de55699f24 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ACPI
 #include 
@@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
 
+   kasan_early_init();
+
/*
 * Set up kernel GDT and segment registers, mainly so that
 * -fstack-protector code can be executed.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index bbba8b17829a..a9a47f0bf22e 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1771,6 +1771,41 @@ static void __init set_page_prot(void *addr, pgprot_t 
prot)
 {
return set_page_prot_flags(addr, prot, UVMF_NONE);
 }
+
+#ifdef CONFIG_KASAN
+void __init xen_kasan_early_init(void)
+{
+   if (!xen_pv_domain())
+   return;
+
+   /* PV page tables must be read-only */
+   set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+   /* Add KASAN mappings into initial PV page tables */
+   kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
+}
+
+void __init xen_kasan_pin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   set_page_prot(pgd, PAGE_KERNEL_RO);
+   pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+}
+
+void __init xen_kasan_unpin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+   set_page_prot(pgd, PAGE_KERNEL);
+}
+#endif /* ifdef CONFIG_KASAN */

[Xen-devel] [PATCH v3 3/4] xen: teach KASAN about grant tables

2020-02-07 Thread Sergey Dyasli

From: Ross Lagerwall 

Otherwise it produces lots of false positives when a guest starts using
PV I/O devices.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
---
v2 --> v3: no changes

v1 --> v2: no changes

RFC --> v1:
- Slightly clarified the commit message
---
 drivers/xen/grant-table.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7b36b51cdb9f..ce95f7232de6 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1048,6 +1048,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
foreign = xen_page_foreign(pages[i]);
foreign->domid = map_ops[i].dom;
foreign->gref = map_ops[i].ref;
+   kasan_alloc_pages(pages[i], 0);
break;
}
 
@@ -1084,8 +1085,10 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref 
*unmap_ops,
if (ret)
return ret;
 
-   for (i = 0; i < count; i++)
+   for (i = 0; i < count; i++) {
ClearPageForeign(pages[i]);
+   kasan_free_pages(pages[i], 0);
+   }
 
return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 1/4] kasan: introduce set_pmd_early_shadow()

2020-02-07 Thread Sergey Dyasli

It is incorrect to call pmd_populate_kernel() multiple times for the
same page table from inside Xen PV domains. Xen notices it during
kasan_populate_early_shadow():

(XEN) mm.c:3222:d155v0 mfn 3704b already pinned

This happens for kasan_early_shadow_pte when USE_SPLIT_PTE_PTLOCKS is
enabled. Fix this by introducing set_pmd_early_shadow() which calls
pmd_populate_kernel() only once and uses set_pmd() afterwards.

Signed-off-by: Sergey Dyasli 
---
v2 --> v3: no changes

v1 --> v2:
- Fix compilation without CONFIG_XEN_PV
- Slightly updated description

RFC --> v1:
- New patch
---
 mm/kasan/init.c | 32 
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index ce45c491ebcd..7791fe0a7704 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -81,6 +81,26 @@ static inline bool kasan_early_shadow_page_entry(pte_t pte)
return pte_page(pte) == virt_to_page(lm_alias(kasan_early_shadow_page));
 }
 
+#ifdef CONFIG_XEN_PV
+static inline void set_pmd_early_shadow(pmd_t *pmd)
+{
+   static bool pmd_populated = false;
+   pte_t *early_shadow = lm_alias(kasan_early_shadow_pte);
+
+   if (likely(pmd_populated)) {
+   set_pmd(pmd, __pmd(__pa(early_shadow) | _PAGE_TABLE));
+   } else {
+   pmd_populate_kernel(_mm, pmd, early_shadow);
+   pmd_populated = true;
+   }
+}
+#else
+static inline void set_pmd_early_shadow(pmd_t *pmd)
+{
+   pmd_populate_kernel(_mm, pmd, lm_alias(kasan_early_shadow_pte));
+}
+#endif /* ifdef CONFIG_XEN_PV */
+
 static __init void *early_alloc(size_t size, int node)
 {
void *ptr = memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
@@ -120,8 +140,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned 
long addr,
next = pmd_addr_end(addr, end);
 
if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -157,8 +176,7 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -198,8 +216,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -271,8 +288,7 @@ int __ref kasan_populate_early_shadow(const void 
*shadow_start,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Live-Patch application failure in core-scheduling mode

2020-02-06 Thread Sergey Dyasli

On 06/02/2020 11:05, Sergey Dyasli wrote:
> On 06/02/2020 09:57, Jürgen Groß wrote:
>> On 05.02.20 17:03, Sergey Dyasli wrote:
>>> Hello,
>>>
>>> I'm currently investigating a Live-Patch application failure in core-
>>> scheduling mode and this is an example of what I usually get:
>>> (it's easily reproducible)
>>>
>>>  (XEN) [  342.528305] livepatch: lp: CPU8 - IPIing the other 15 CPUs
>>>  (XEN) [  342.558340] livepatch: lp: Timed out on semaphore in CPU 
>>> quiesce phase 13/15
>>>  (XEN) [  342.558343] bad cpus: 6 9
>>>
>>>  (XEN) [  342.559293] CPU:6
>>>  (XEN) [  342.559562] Xen call trace:
>>>  (XEN) [  342.559565][] R 
>>> common/schedule.c#sched_wait_rendezvous_in+0xa4/0x270
>>>  (XEN) [  342.559568][] F 
>>> common/schedule.c#schedule+0x17a/0x260
>>>  (XEN) [  342.559571][] F 
>>> common/softirq.c#__do_softirq+0x5a/0x90
>>>  (XEN) [  342.559574][] F 
>>> arch/x86/domain.c#guest_idle_loop+0x35/0x60
>>>
>>>  (XEN) [  342.559761] CPU:9
>>>  (XEN) [  342.560026] Xen call trace:
>>>  (XEN) [  342.560029][] R _spin_lock_irq+0x11/0x40
>>>  (XEN) [  342.560032][] F 
>>> common/schedule.c#sched_wait_rendezvous_in+0xc3/0x270
>>>  (XEN) [  342.560036][] F 
>>> common/schedule.c#schedule+0x17a/0x260
>>>  (XEN) [  342.560039][] F 
>>> common/softirq.c#__do_softirq+0x5a/0x90
>>>  (XEN) [  342.560042][] F 
>>> arch/x86/domain.c#idle_loop+0x55/0xb0
>>>
>>> The first HT sibling is waiting for the second in the LP-application
>>> context while the second waits for the first in the scheduler context.
>>>
>>> Any suggestions on how to improve this situation are welcome.
>>
>> Can you test the attached patch, please? It is only tested to boot, so
>> I did no livepatch tests with it.
>
> Thank you for the patch! It seems to fix the issue in my manual testing.
> I'm going to submit automatic LP testing for both thread/core modes.

Andrew suggested to test late ucode loading as well and so I did.
It uses stop_machine() to rendezvous cpus and it failed with a similar
backtrace for a problematic CPU. But in this case the system crashed
since there is no timeout involved:

(XEN) [  155.025168] Xen call trace:
(XEN) [  155.040095][] R _spin_unlock_irq+0x22/0x30
(XEN) [  155.069549][] S 
common/schedule.c#sched_wait_rendezvous_in+0xa2/0x270
(XEN) [  155.109696][] F 
common/schedule.c#sched_slave+0x198/0x260
(XEN) [  155.145521][] F 
common/softirq.c#__do_softirq+0x5a/0x90
(XEN) [  155.180223][] F 
x86_64/entry.S#process_softirqs+0x6/0x20

It looks like your patch provides a workaround for LP case, but other
cases like stop_machine() remain broken since the underlying issue with
the scheduler is still there.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Live-Patch application failure in core-scheduling mode

2020-02-06 Thread Sergey Dyasli

On 06/02/2020 09:57, Jürgen Groß wrote:
> On 05.02.20 17:03, Sergey Dyasli wrote:
>> Hello,
>>
>> I'm currently investigating a Live-Patch application failure in core-
>> scheduling mode and this is an example of what I usually get:
>> (it's easily reproducible)
>>
>>  (XEN) [  342.528305] livepatch: lp: CPU8 - IPIing the other 15 CPUs
>>  (XEN) [  342.558340] livepatch: lp: Timed out on semaphore in CPU 
>> quiesce phase 13/15
>>  (XEN) [  342.558343] bad cpus: 6 9
>>
>>  (XEN) [  342.559293] CPU:6
>>  (XEN) [  342.559562] Xen call trace:
>>  (XEN) [  342.559565][] R 
>> common/schedule.c#sched_wait_rendezvous_in+0xa4/0x270
>>  (XEN) [  342.559568][] F 
>> common/schedule.c#schedule+0x17a/0x260
>>  (XEN) [  342.559571][] F 
>> common/softirq.c#__do_softirq+0x5a/0x90
>>  (XEN) [  342.559574][] F 
>> arch/x86/domain.c#guest_idle_loop+0x35/0x60
>>
>>  (XEN) [  342.559761] CPU:9
>>  (XEN) [  342.560026] Xen call trace:
>>  (XEN) [  342.560029][] R _spin_lock_irq+0x11/0x40
>>  (XEN) [  342.560032][] F 
>> common/schedule.c#sched_wait_rendezvous_in+0xc3/0x270
>>  (XEN) [  342.560036][] F 
>> common/schedule.c#schedule+0x17a/0x260
>>  (XEN) [  342.560039][] F 
>> common/softirq.c#__do_softirq+0x5a/0x90
>>  (XEN) [  342.560042][] F 
>> arch/x86/domain.c#idle_loop+0x55/0xb0
>>
>> The first HT sibling is waiting for the second in the LP-application
>> context while the second waits for the first in the scheduler context.
>>
>> Any suggestions on how to improve this situation are welcome.
>
> Can you test the attached patch, please? It is only tested to boot, so
> I did no livepatch tests with it.

Thank you for the patch! It seems to fix the issue in my manual testing.
I'm going to submit automatic LP testing for both thread/core modes.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] Live-Patch application failure in core-scheduling mode

2020-02-05 Thread Sergey Dyasli

Hello,

I'm currently investigating a Live-Patch application failure in core-
scheduling mode and this is an example of what I usually get:
(it's easily reproducible)

(XEN) [  342.528305] livepatch: lp: CPU8 - IPIing the other 15 CPUs
(XEN) [  342.558340] livepatch: lp: Timed out on semaphore in CPU quiesce 
phase 13/15
(XEN) [  342.558343] bad cpus: 6 9

(XEN) [  342.559293] CPU:6
(XEN) [  342.559562] Xen call trace:
(XEN) [  342.559565][] R 
common/schedule.c#sched_wait_rendezvous_in+0xa4/0x270
(XEN) [  342.559568][] F 
common/schedule.c#schedule+0x17a/0x260
(XEN) [  342.559571][] F 
common/softirq.c#__do_softirq+0x5a/0x90
(XEN) [  342.559574][] F 
arch/x86/domain.c#guest_idle_loop+0x35/0x60

(XEN) [  342.559761] CPU:9
(XEN) [  342.560026] Xen call trace:
(XEN) [  342.560029][] R _spin_lock_irq+0x11/0x40
(XEN) [  342.560032][] F 
common/schedule.c#sched_wait_rendezvous_in+0xc3/0x270
(XEN) [  342.560036][] F 
common/schedule.c#schedule+0x17a/0x260
(XEN) [  342.560039][] F 
common/softirq.c#__do_softirq+0x5a/0x90
(XEN) [  342.560042][] F 
arch/x86/domain.c#idle_loop+0x55/0xb0

The first HT sibling is waiting for the second in the LP-application
context while the second waits for the first in the scheduler context.

Any suggestions on how to improve this situation are welcome.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-01-23 Thread Sergey Dyasli

On 22/01/2020 11:25, Julien Grall wrote:
>
>
> On 22/01/2020 11:19, Sergey Dyasli wrote:
>> On 22/01/2020 10:14, Julien Grall wrote:
>>>
>>>
>>> On 22/01/2020 10:01, Sergey Dyasli wrote:
>>>> On 20/01/2020 10:01, Jan Beulich wrote:
>>>>> On 17.01.2020 17:44, Sergey Dyasli wrote:
>>>>>> v2 --> v3:
>>>>>> - Remove hvmloader filtering
>>>>>
>>>>> Why? Seeing the prior discussion, how about adding XENVER_denied to
>>>>> return the "denied" string, allowing components which want to filter
>>>>> to know exactly what to look for? And then re-add the filtering you
>>>>> had? (The help text of the config option should then perhaps be
>>>>> extended to make very clear that the chosen string should not match
>>>>> anything that could potentially be returned by any of the XENVER_
>>>>> sub-ops.)
>>>>
>>>> I had the following reasoning:
>>>>
>>>> 1. Most real-world users would set CONFIG_XSM_DENIED_STRING="" anyway.
>>>>
>>>> 2. Filtering in DMI tables is not a complete solution, since denied
>>>> string leaks elsewhere through the hypercall (PV guests, sysfs, driver
>>>> logs) as Andrew has pointed out in the previous discussion.
>>>>
>>>> On the other hand, SMBios filtering slightly improves the situation for
>>>> HVM domains, so I can return it if maintainers find it worthy.
>>>
>>> While I am not a maintainer of this code, my concern is you impose the 
>>> conversion from "denied" to "" to all the users (include those who wants to 
>>> keep "denied").
>>
>> This is not what's happening here: the default is still "" (as
>> per patch 1); but patch 2 makes XENVER_extraversion, XENVER_compile_info
>> and XENVER_changeset to return "" instead of the real values
>> which causes the UI / logs issues.
>
> I was referring the SMBios filtering... I don't think doing a blank filtering 
> is the right thing to do in the hvmloader for the reason explained above.

Apologies for misunderstanding the context. But I disagree about
hvmloader. Returning "denied" from xen_version hypercall to guests is
one thing, but hvmloader and SMBios tables are parts of the hypervisor
and putting "denied" there is simply a terrible user experience.

>
> Regarding CONFIG_XSM_DENIED_STRING, I think this is a good step as it allows 
> the vendor to configure it.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-01-22 Thread Sergey Dyasli

On 22/01/2020 10:57, George Dunlap wrote:
> On 1/22/20 10:14 AM, Julien Grall wrote:
>>
>>
>> On 22/01/2020 10:01, Sergey Dyasli wrote:
>>> On 20/01/2020 10:01, Jan Beulich wrote:
>>>> On 17.01.2020 17:44, Sergey Dyasli wrote:
>>>>> v2 --> v3:
>>>>> - Remove hvmloader filtering
>>>>
>>>> Why? Seeing the prior discussion, how about adding XENVER_denied to
>>>> return the "denied" string, allowing components which want to filter
>>>> to know exactly what to look for? And then re-add the filtering you
>>>> had? (The help text of the config option should then perhaps be
>>>> extended to make very clear that the chosen string should not match
>>>> anything that could potentially be returned by any of the XENVER_
>>>> sub-ops.)
>>>
>>> I had the following reasoning:
>>>
>>> 1. Most real-world users would set CONFIG_XSM_DENIED_STRING="" anyway.
>>>
>>> 2. Filtering in DMI tables is not a complete solution, since denied
>>> string leaks elsewhere through the hypercall (PV guests, sysfs, driver
>>> logs) as Andrew has pointed out in the previous discussion.
>>>
>>> On the other hand, SMBios filtering slightly improves the situation for
>>> HVM domains, so I can return it if maintainers find it worthy.
>>
>> While I am not a maintainer of this code, my concern is you impose the
>> conversion from "denied" to "" to all the users (include those who wants
>> to keep "denied").
>>
>> If you were doing any filtering in hvmloader, then it would be best if
>> this is configurable. But this is a bit pointless if you already allow
>> the user to configure the string at the hypervisor level :).
>
> So there are two things we're concerned about:
> - Some people don't want to scare users with a "" string
> - Some people don't want to "silently fail" with a "" string
>
> The fact is, in *both cases*, this is a UI problem.  EVERY caller of
> this interface should figure out independently what a graceful way of
> handling failure is for their target UI.  Any caller who does not think
> carefully about what to do in the failure case is buggy -- which
> includes every single caller today.  The CONFIG_XSM_DENIED_STRING is a
> gross hack fallback for buggy UIs.
>
> Now, I don't like to tell other people to do work, and I certainly don't
> plan on fixing hvmloader at the moment, because it's low-priority for
> me.  But I do think that having hvmloader detect failure and explicitly
> make a sensible decision is the right thing to do, regardless of the
> availability of CONFIG_XSM_DENIED_STRING to work around buggy callers.

It's not entirely clear to me what you suggest to do with hvmloader.
Could you elaborate a bit?

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-01-22 Thread Sergey Dyasli

On 22/01/2020 10:14, Julien Grall wrote:
>
>
> On 22/01/2020 10:01, Sergey Dyasli wrote:
>> On 20/01/2020 10:01, Jan Beulich wrote:
>>> On 17.01.2020 17:44, Sergey Dyasli wrote:
>>>> v2 --> v3:
>>>> - Remove hvmloader filtering
>>>
>>> Why? Seeing the prior discussion, how about adding XENVER_denied to
>>> return the "denied" string, allowing components which want to filter
>>> to know exactly what to look for? And then re-add the filtering you
>>> had? (The help text of the config option should then perhaps be
>>> extended to make very clear that the chosen string should not match
>>> anything that could potentially be returned by any of the XENVER_
>>> sub-ops.)
>>
>> I had the following reasoning:
>>
>> 1. Most real-world users would set CONFIG_XSM_DENIED_STRING="" anyway.
>>
>> 2. Filtering in DMI tables is not a complete solution, since denied
>> string leaks elsewhere through the hypercall (PV guests, sysfs, driver
>> logs) as Andrew has pointed out in the previous discussion.
>>
>> On the other hand, SMBios filtering slightly improves the situation for
>> HVM domains, so I can return it if maintainers find it worthy.
>
> While I am not a maintainer of this code, my concern is you impose the 
> conversion from "denied" to "" to all the users (include those who wants to 
> keep "denied").

This is not what's happening here: the default is still "" (as
per patch 1); but patch 2 makes XENVER_extraversion, XENVER_compile_info
and XENVER_changeset to return "" instead of the real values
which causes the UI / logs issues.

>
> If you were doing any filtering in hvmloader, then it would be best if this 
> is configurable. But this is a bit pointless if you already allow the user to 
> configure the string at the hypervisor level :).

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 2/4] x86/xen: add basic KASAN support for PV kernel

2020-01-22 Thread Sergey Dyasli

On 17/01/2020 14:56, Boris Ostrovsky wrote:
>
>
> On 1/17/20 7:58 AM, Sergey Dyasli wrote:
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -13,6 +13,9 @@
>>   #include 
>>   #include 
>>   +#include 
>> +#include 
>> +
>>   #include 
>>   #include 
>>   #include 
>> @@ -332,6 +335,11 @@ void __init kasan_early_init(void)
>>   for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>   kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>   +if (xen_pv_domain()) {
>> +pgd_t *pv_top_pgt = xen_pv_kasan_early_init();
>> +kasan_map_early_shadow(pv_top_pgt);
>> +}
>> +
>
>
> I'd suggest replacing this with xen_kasan_early_init() and doing everything, 
> including PV check, there. This way non-Xen code won't need to be aware of 
> Xen-specific details such as guest types.

This would require exporting kasan_map_early_shadow() via kasan.h.
I'm fine with either approach.

>>   kasan_map_early_shadow(early_top_pgt);
>>   kasan_map_early_shadow(init_top_pgt);
>>   }
>> @@ -369,6 +377,8 @@ void __init kasan_init(void)
>>   __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>>   }
>>   +xen_pv_kasan_pin_pgd(early_top_pgt);
>> +
>
> And drop "_pv" here (and below) for the same reason.

This is a reasonable suggestion.

>>   load_cr3(early_top_pgt);
>>   __flush_tlb_all();
>>   @@ -433,6 +443,8 @@ void __init kasan_init(void)
>>   load_cr3(init_top_pgt);
>>   __flush_tlb_all();
>>   +xen_pv_kasan_unpin_pgd(early_top_pgt);
>> +
>>

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 4/4] xen/netback: fix grant copy across page boundary

2020-01-22 Thread Sergey Dyasli

On 20/01/2020 08:58, Paul Durrant wrote:
> On Fri, 17 Jan 2020 at 12:59, Sergey Dyasli  wrote:
>>
>> From: Ross Lagerwall 
>>
>> When KASAN (or SLUB_DEBUG) is turned on, there is a higher chance that
>> non-power-of-two allocations are not aligned to the next power of 2 of
>> the size. Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall 
>> Signed-off-by: Sergey Dyasli 
>> ---
>> v1 --> v2:
>> - Use sizeof_field(struct sk_buff, cb)) instead of magic number 48
>> - Slightly update commit message
>>
>> RFC --> v1:
>> - Added BUILD_BUG_ON to the netback patch
>> - xenvif_idx_release() now located outside the loop
>>
>> CC: Wei Liu 
>> CC: Paul Durrant 
>
> Acked-by: Paul Durrant 

Thanks! I believe this patch can go in independently from the other
patches in the series. What else is required for this?

--
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-01-22 Thread Sergey Dyasli

On 20/01/2020 10:01, Jan Beulich wrote:
> On 17.01.2020 17:44, Sergey Dyasli wrote:
>> v2 --> v3:
>> - Remove hvmloader filtering
>
> Why? Seeing the prior discussion, how about adding XENVER_denied to
> return the "denied" string, allowing components which want to filter
> to know exactly what to look for? And then re-add the filtering you
> had? (The help text of the config option should then perhaps be
> extended to make very clear that the chosen string should not match
> anything that could potentially be returned by any of the XENVER_
> sub-ops.)

I had the following reasoning:

1. Most real-world users would set CONFIG_XSM_DENIED_STRING="" anyway.

2. Filtering in DMI tables is not a complete solution, since denied
string leaks elsewhere through the hypercall (PV guests, sysfs, driver
logs) as Andrew has pointed out in the previous discussion.

On the other hand, SMBios filtering slightly improves the situation for
HVM domains, so I can return it if maintainers find it worthy.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 2/2] xsm: hide detailed Xen version from unprivileged guests

2020-01-17 Thread Sergey Dyasli

Hide the following information that can help identify the running Xen
binary version: XENVER_extraversion, XENVER_compile_info, XENVER_changeset.
This makes harder for malicious guests to fingerprint Xen to identify
exploitable systems.

Add explicit cases for XENVER_commandline and XENVER_build_id as well
for better code readability.

Signed-off-by: Sergey Dyasli 
---
v2 --> v3:
- Remove hvmloader filtering
- Add ASSERT_UNREACHABLE

v1 --> v2:
- Added xsm_filter_denied() to hvmloader instead of modifying xen_deny()
- Made behaviour the same for both Release and Debug builds
- XENVER_capabilities is no longer hided

CC: Andrew Cooper 
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Daniel De Graaf 
CC: Doug Goldstein 
---
 xen/include/xsm/dummy.h | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index b8e185e6fa..c00186d7b6 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -750,16 +750,23 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG 
uint32_t op)
 case XENVER_get_features:
 /* These sub-ops ignore the permission checks and return data. */
 return 0;
-case XENVER_extraversion:
-case XENVER_compile_info:
+
 case XENVER_capabilities:
-case XENVER_changeset:
 case XENVER_pagesize:
 case XENVER_guest_handle:
 /* These MUST always be accessible to any guest by default. */
 return xsm_default_action(XSM_HOOK, current->domain, NULL);
-default:
+
+case XENVER_extraversion:
+case XENVER_compile_info:
+case XENVER_changeset:
+case XENVER_commandline:
+case XENVER_build_id:
 return xsm_default_action(XSM_PRIV, current->domain, NULL);
+
+default:
+ASSERT_UNREACHABLE();
+return -EPERM;
 }
 }
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v3 1/2] xsm: add config option for denied string

2020-01-17 Thread Sergey Dyasli

Signed-off-by: Sergey Dyasli 
---
v2 --> v3:
- new patch

CC: Andrew Cooper 
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Daniel De Graaf 
CC: Doug Goldstein 
---
 xen/common/Kconfig   | 8 
 xen/common/version.c | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index b3d161d057..f0a3f0da0f 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -236,6 +236,14 @@ choice
bool "SILO" if XSM_SILO
 endchoice
 
+config XSM_DENIED_STRING
+   string "xen_version denied string"
+   default ""
+   depends on XSM
+   ---help---
+ A string which substitutes sensitive information returned via
+ xen_version hypercall to non-privileged guests
+
 config LATE_HWDOM
bool "Dedicated hardware domain"
default n
diff --git a/xen/common/version.c b/xen/common/version.c
index 937eb1281c..14b205af48 100644
--- a/xen/common/version.c
+++ b/xen/common/version.c
@@ -67,7 +67,7 @@ const char *xen_banner(void)
 
 const char *xen_deny(void)
 {
-return "";
+return CONFIG_XSM_DENIED_STRING;
 }
 
 static const void *build_id_p __read_mostly;
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 4/4] xen/netback: fix grant copy across page boundary

2020-01-17 Thread Sergey Dyasli

From: Ross Lagerwall 

When KASAN (or SLUB_DEBUG) is turned on, there is a higher chance that
non-power-of-two allocations are not aligned to the next power of 2 of
the size. Therefore, handle grant copies that cross page boundaries.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- Use sizeof_field(struct sk_buff, cb)) instead of magic number 48
- Slightly update commit message

RFC --> v1:
- Added BUILD_BUG_ON to the netback patch
- xenvif_idx_release() now located outside the loop

CC: Wei Liu 
CC: Paul Durrant 
---
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 60 +--
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 05847eb91a1b..e57684415edd 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 
-   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
/* passed to gnttab_[un]map_refs with pages under (un)mapping */
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 0020b2e8c279..f8774ede9f0e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
 
 struct xenvif_tx_cb {
u16 pending_idx;
+   u8 copies;
 };
 
 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
@@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 {
struct gnttab_map_grant_ref *gop_map = *gopp_map;
u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+   u8 copies = XENVIF_TX_CB(skb)->copies;
/* This always points to the shinfo of the skb being checked, which
 * could be either the first or the one on the frag_list
 */
@@ -450,23 +452,26 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
int nr_frags = shinfo->nr_frags;
const bool sharedslot = nr_frags &&
frag_get_pending_idx(>frags[0]) == 
pending_idx;
-   int i, err;
+   int i, err = 0;
 
-   /* Check status of header. */
-   err = (*gopp_copy)->status;
-   if (unlikely(err)) {
-   if (net_ratelimit())
-   netdev_dbg(queue->vif->dev,
+   while (copies) {
+   /* Check status of header. */
+   int newerr = (*gopp_copy)->status;
+   if (unlikely(newerr)) {
+   if (net_ratelimit())
+   netdev_dbg(queue->vif->dev,
   "Grant copy of header failed! status: %d 
pending_idx: %u ref: %u\n",
   (*gopp_copy)->status,
   pending_idx,
   (*gopp_copy)->source.u.ref);
-   /* The first frag might still have this slot mapped */
-   if (!sharedslot)
-   xenvif_idx_release(queue, pending_idx,
-  XEN_NETIF_RSP_ERROR);
+   err = newerr;
+   }
+   (*gopp_copy)++;
+   copies--;
}
-   (*gopp_copy)++;
+   /* The first frag might still have this slot mapped */
+   if (unlikely(err) && !sharedslot)
+   xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 
 check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -910,6 +915,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
xenvif_tx_err(queue, , extra_count, idx);
break;
}
+   XENVIF_TX_CB(skb)->copies = 0;
 
skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size)
@@ -933,6 +939,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
   "Can't allocate the 
frag_list skb.\n");
break;
}
+   XENVIF_TX_CB(nskb)->copies = 0;
}
 
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
@@ -990,6 +997,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue 
*queue,
 
queue->tx_copy_ops[*copy_ops].len = data_len;
queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+   X

[Xen-devel] [PATCH v2 0/4] basic KASAN support for Xen PV domains

2020-01-17 Thread Sergey Dyasli

This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
CONFIG_KASAN=y. It has been used internally for some time now with good
results for finding memory corruption issues in Dom0 kernel.

Only Outline instrumentation is supported at the moment.

Sergey Dyasli (2):
  kasan: introduce set_pmd_early_shadow()
  x86/xen: add basic KASAN support for PV kernel

Ross Lagerwall (2):
  xen: teach KASAN about grant tables
  xen/netback: fix grant copy across page boundary

 arch/x86/mm/kasan_init_64.c   | 12 +++
 arch/x86/xen/Makefile |  7 
 arch/x86/xen/enlighten_pv.c   |  3 ++
 arch/x86/xen/mmu_pv.c | 38 
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 60 +--
 drivers/xen/Makefile  |  2 ++
 drivers/xen/grant-table.c |  5 ++-
 include/xen/xen-ops.h | 10 ++
 kernel/Makefile   |  2 ++
 lib/Kconfig.kasan |  3 +-
 mm/kasan/init.c   | 32 -
 12 files changed, 154 insertions(+), 22 deletions(-)

-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 1/4] kasan: introduce set_pmd_early_shadow()

2020-01-17 Thread Sergey Dyasli

It is incorrect to call pmd_populate_kernel() multiple times for the
same page table from inside Xen PV domains. Xen notices it during
kasan_populate_early_shadow():

(XEN) mm.c:3222:d155v0 mfn 3704b already pinned

This happens for kasan_early_shadow_pte when USE_SPLIT_PTE_PTLOCKS is
enabled. Fix this by introducing set_pmd_early_shadow() which calls
pmd_populate_kernel() only once and uses set_pmd() afterwards.

Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- Fix compilation without CONFIG_XEN_PV
- Slightly updated description

RFC --> v1:
- New patch
---
 mm/kasan/init.c | 32 
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index ce45c491ebcd..7791fe0a7704 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -81,6 +81,26 @@ static inline bool kasan_early_shadow_page_entry(pte_t pte)
return pte_page(pte) == virt_to_page(lm_alias(kasan_early_shadow_page));
 }
 
+#ifdef CONFIG_XEN_PV
+static inline void set_pmd_early_shadow(pmd_t *pmd)
+{
+   static bool pmd_populated = false;
+   pte_t *early_shadow = lm_alias(kasan_early_shadow_pte);
+
+   if (likely(pmd_populated)) {
+   set_pmd(pmd, __pmd(__pa(early_shadow) | _PAGE_TABLE));
+   } else {
+   pmd_populate_kernel(_mm, pmd, early_shadow);
+   pmd_populated = true;
+   }
+}
+#else
+static inline void set_pmd_early_shadow(pmd_t *pmd)
+{
+   pmd_populate_kernel(_mm, pmd, lm_alias(kasan_early_shadow_pte));
+}
+#endif /* ifdef CONFIG_XEN_PV */
+
 static __init void *early_alloc(size_t size, int node)
 {
void *ptr = memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
@@ -120,8 +140,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned 
long addr,
next = pmd_addr_end(addr, end);
 
if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -157,8 +176,7 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -198,8 +216,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -271,8 +288,7 @@ int __ref kasan_populate_early_shadow(const void 
*shadow_start,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2 2/4] x86/xen: add basic KASAN support for PV kernel

2020-01-17 Thread Sergey Dyasli

This enables to use Outline instrumentation for Xen PV kernels.

KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
and hence disabled.

Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- Fix compilation without CONFIG_XEN_PV
- Use macros for KASAN_SHADOW_START

RFC --> v1:
- New functions with declarations in xen/xen-ops.h
- Fixed the issue with free_kernel_image_pages() with the help of
  xen_pv_kasan_unpin_pgd()
---
 arch/x86/mm/kasan_init_64.c | 12 
 arch/x86/xen/Makefile   |  7 +++
 arch/x86/xen/enlighten_pv.c |  3 +++
 arch/x86/xen/mmu_pv.c   | 38 +
 drivers/xen/Makefile|  2 ++
 include/xen/xen-ops.h   | 10 ++
 kernel/Makefile |  2 ++
 lib/Kconfig.kasan   |  3 ++-
 8 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..902a6a152d33 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -13,6 +13,9 @@
 #include 
 #include 
 
+#include 
+#include 
+
 #include 
 #include 
 #include 
@@ -332,6 +335,11 @@ void __init kasan_early_init(void)
for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_early_shadow_p4d[i] = __p4d(p4d_val);
 
+   if (xen_pv_domain()) {
+   pgd_t *pv_top_pgt = xen_pv_kasan_early_init();
+   kasan_map_early_shadow(pv_top_pgt);
+   }
+
kasan_map_early_shadow(early_top_pgt);
kasan_map_early_shadow(init_top_pgt);
 }
@@ -369,6 +377,8 @@ void __init kasan_init(void)
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
}
 
+   xen_pv_kasan_pin_pgd(early_top_pgt);
+
load_cr3(early_top_pgt);
__flush_tlb_all();
 
@@ -433,6 +443,8 @@ void __init kasan_init(void)
load_cr3(init_top_pgt);
__flush_tlb_all();
 
+   xen_pv_kasan_unpin_pgd(early_top_pgt);
+
/*
 * kasan_early_shadow_page has been used as early shadow memory, thus
 * it may contain some garbage. Now we can clear and write protect it,
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 084de77a109e..102fad0b0bca 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,10 @@
+KASAN_SANITIZE_enlighten_pv.o := n
+KASAN_SANITIZE_enlighten.o := n
+KASAN_SANITIZE_irq.o := n
+KASAN_SANITIZE_mmu_pv.o := n
+KASAN_SANITIZE_p2m.o := n
+KASAN_SANITIZE_multicalls.o := n
+
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae4a41ca19f6..27de55699f24 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ACPI
 #include 
@@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
 
+   kasan_early_init();
+
/*
 * Set up kernel GDT and segment registers, mainly so that
 * -fstack-protector code can be executed.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..5cd63e37a2db 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1771,6 +1771,36 @@ static void __init set_page_prot(void *addr, pgprot_t 
prot)
 {
return set_page_prot_flags(addr, prot, UVMF_NONE);
 }
+
+pgd_t * __init xen_pv_kasan_early_init(void)
+{
+   /* PV page tables must be read-only */
+   set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+   /* Return a pointer to the initial PV page tables */
+   return (pgd_t *)xen_start_info->pt_base;
+}
+
+void __init xen_pv_kasan_pin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   set_page_prot(pgd, PAGE_KERNEL_RO);
+   pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+}
+
+void __init xen_pv_kasan_unpin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+   set_page_prot(pgd, PAGE_KERNEL);
+}
+
 #ifdef CONFIG_X86_32
 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
@@ -1943,6 +1973,14 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, 
unsigned long max_pfn)
if (i && i < pgd_index(__START_KERNEL_map))
init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
 
+#ifdef CONFIG_KASAN
+   /* Copy KASAN mappings */
+   for (i = pgd_index(KASAN_SHADOW_START);
+i < pgd_index(KASAN_SHADOW_END);
+i++)
+   init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+#endif /* ifdef CONFIG_KASAN

[Xen-devel] [PATCH v2 3/4] xen: teach KASAN about grant tables

2020-01-17 Thread Sergey Dyasli

From: Ross Lagerwall 

Otherwise it produces lots of false positives when a guest starts using
PV I/O devices.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- no changes

RFC --> v1:
- Slightly clarified the commit message
---
 drivers/xen/grant-table.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7b36b51cdb9f..ce95f7232de6 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1048,6 +1048,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
foreign = xen_page_foreign(pages[i]);
foreign->domid = map_ops[i].dom;
foreign->gref = map_ops[i].ref;
+   kasan_alloc_pages(pages[i], 0);
break;
}
 
@@ -1084,8 +1085,10 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref 
*unmap_ops,
if (ret)
return ret;
 
-   for (i = 0; i < count; i++)
+   for (i = 0; i < count; i++) {
ClearPageForeign(pages[i]);
+   kasan_free_pages(pages[i], 0);
+   }
 
return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 1/4] kasan: introduce set_pmd_early_shadow()

2020-01-15 Thread Sergey Dyasli

On 15/01/2020 11:09, Jürgen Groß wrote:
> On 15.01.20 11:54, Sergey Dyasli wrote:
>> Hi Juergen,
>>
>> On 08/01/2020 15:20, Sergey Dyasli wrote:
>>> It is incorrect to call pmd_populate_kernel() multiple times for the
>>> same page table. Xen notices it during kasan_populate_early_shadow():
>>>
>>>  (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>>>
>>> This happens for kasan_early_shadow_pte when USE_SPLIT_PTE_PTLOCKS is
>>> enabled. Fix this by introducing set_pmd_early_shadow() which calls
>>> pmd_populate_kernel() only once and uses set_pmd() afterwards.
>>>
>>> Signed-off-by: Sergey Dyasli 
>>
>> Looks like the plan to use set_pmd() directly has failed: it's an
>> arch-specific function and can't be used in arch-independent code
>> (as kbuild test robot has proven).
>>
>> Do you see any way out of this other than disabling SPLIT_PTE_PTLOCKS
>> for PV KASAN?
>
> Change set_pmd_early_shadow() like the following:
>
> #ifdef CONFIG_XEN_PV
> static inline void set_pmd_early_shadow(pmd_t *pmd, pte_t *early_shadow)
> {
> static bool pmd_populated = false;
>
> if (likely(pmd_populated)) {
> set_pmd(pmd, __pmd(__pa(early_shadow) | _PAGE_TABLE));
> } else {
> pmd_populate_kernel(_mm, pmd, early_shadow);
> pmd_populated = true;
> }
> }
> #else
> static inline void set_pmd_early_shadow(pmd_t *pmd, pte_t *early_shadow)
> {
> pmd_populate_kernel(_mm, pmd, early_shadow);
> }
> #endif
>
> ... and move it to include/xen/xen-ops.h and call it with
> lm_alias(kasan_early_shadow_pte) as the second parameter.

Your suggestion to use ifdef is really good, especially now when I
figured out that CONFIG_XEN_PV implies X86. But I don't like the idea
of kasan code calling a non-empty function from xen-ops.h when
CONFIG_XEN_PV is not defined. I'd prefer to keep set_pmd_early_shadow()
in mm/kasan/init.c with the suggested ifdef.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 4/4] xen/netback: Fix grant copy across page boundary with KASAN

2020-01-15 Thread Sergey Dyasli

On 09/01/2020 10:33, Vlastimil Babka wrote:
> On 1/8/20 4:21 PM, Sergey Dyasli wrote:
>> From: Ross Lagerwall 
>>
>> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
>> allocations are aligned to the next power of 2 of the size does not
>> hold.
>
> Hmm, really? They should after 59bb47985c1d ("mm, sl[aou]b: guarantee
> natural alignment for kmalloc(power-of-two)"), i.e. since 5.4.
>
> But actually the guarantee is only for precise power of two sizes given
> to kmalloc(). Allocations of sizes that end up using the 96 or 192 bytes
> kmalloc cache have no such guarantee. But those might then cross page
> boundary also without SLUB_DEBUG.

That's interesting to know. It's certainly not the case for 4.19 kernel
for which PV KASAN was initially developed. But I guess this means that
only patch description needs updating.

>
>> Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall 
>> Signed-off-by: Sergey Dyasli 

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 1/4] kasan: introduce set_pmd_early_shadow()

2020-01-15 Thread Sergey Dyasli

Hi Juergen,

On 08/01/2020 15:20, Sergey Dyasli wrote:
> It is incorrect to call pmd_populate_kernel() multiple times for the
> same page table. Xen notices it during kasan_populate_early_shadow():
>
> (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>
> This happens for kasan_early_shadow_pte when USE_SPLIT_PTE_PTLOCKS is
> enabled. Fix this by introducing set_pmd_early_shadow() which calls
> pmd_populate_kernel() only once and uses set_pmd() afterwards.
>
> Signed-off-by: Sergey Dyasli 

Looks like the plan to use set_pmd() directly has failed: it's an
arch-specific function and can't be used in arch-independent code
(as kbuild test robot has proven).

Do you see any way out of this other than disabling SPLIT_PTE_PTLOCKS
for PV KASAN?

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Recent cores-scheduling failures

2020-01-15 Thread Sergey Dyasli

On 19/12/2019 16:14, Jürgen Groß wrote:
> On 19.12.19 13:45, Sergey Dyasli wrote:
>> Hi Juergen,
>>
>> We recently did another quick test of core scheduling mode, and the following
>> failures were found:
>>
>> 1. live-patch apply failures:
>>
>>  (XEN) [ 1058.751974] livepatch: lp_1_1: Timed out on semaphore in CPU 
>> quiesce phase 30/31
>>  (XEN) [ 1058.751982] livepatch: lp_1_1 finished REPLACE with rc=-16

Have you been able to look into this one?

>>
>> 2. ACPI S5 crash:
>>
>>  https://paste.debian.net/1121748/
>
> So in sched_slave() *vprev is already scrubbed.
>
> I have currently no idea how that could happen, is vprev->is_running
> should be cleared only a little bit later.

Have you been able to identify the place in code where this happens?
I can try adding some debug messages.

In some good news, we did more XenRT testing with core scheduling mode
and there were no other issues found so far.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2] xsm: hide detailed Xen version from unprivileged guests

2020-01-14 Thread Sergey Dyasli

On 13/01/2020 14:40, Andrew Cooper wrote:
> On 13/01/2020 12:51, George Dunlap wrote:
>>   So Sergey's second patch:
>>  - Still denies XENVER_extraversion at the hypervisor level
>>  - Leaves the value returned by the hypervisor as ""
>>  - Filters the "" string at the hvmloader level, to prevent it
>> leaking into a GUI and scaring customers.
>
> The SMBios table isn't the only way XENVER_extraversion leaks up into
> the UI.
>
> XENVER_extraversion isn't the only source of redacted information
> leaking up into the UI.
>
> Linux for example exports it all via sysfs.  The windows drivers put
> XENVER_extraversion into several other logs.

I've found that /sys/hypervisor/version/extra returns "".
"" would have looked better there.

>> Now we get to Andy's objection on the 10th:
>>
>> ---
>> The reason for this (which ought to be obvious, but I guess only to
>> those who actually do customer support) is basic human physiology.
>> "denied" means something has gone wrong.  It scares people, and causes
>> them to seek help to change fix whatever is broken.
>>
>> It is not appropriate for it to find its way into the guest in the first
>> place, and that includes turning up in `dmesg` and other logs, and
>> expecting guest runtime to filter for it is complete nonsense.
>> ---
>>
>> Basically, Andy says that *anywhere* it might show up is way too scary,
>> even a guest dmesg log.
>>
>> Well, I disagree; I look in "dmesg" and I see loads of "scary" things.
>
> Just because dmesg is not an example of a good UI, doesn't mean its ok
> for us to make:
>
> Xen version: 4.14 (preserve-AD)

And the above is indeed found in dmesg of PV domains (they have no SMbios).
"" is not appropriate here indeed. It should be either "" or
generic ".0" IMHO.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2] xsm: hide detailed Xen version from unprivileged guests

2020-01-13 Thread Sergey Dyasli

On 10/01/2020 11:02, Andrew Cooper wrote:
> On 10/01/2020 10:37, Sergey Dyasli wrote:
>> Hide the following information that can help identify the running Xen
>> binary version: XENVER_extraversion, XENVER_compile_info, XENVER_changeset.
>> Add explicit cases for XENVER_commandline and XENVER_build_id as well.
>>
>> Introduce xsm_filter_denied() to hvmloader to remove "" string
>> from guest's DMI tables that otherwise would be shown in tools like
>> dmidecode.
>>
>> Signed-off-by: Sergey Dyasli 
>> ---
>> v1 --> v2:
>> - Added xsm_filter_denied() to hvmloader instead of modifying xen_deny()
>> - Made behaviour the same for both Release and Debug builds
>> - XENVER_capabilities is no longer hided
>>
>> CC: Andrew Cooper 
>> CC: George Dunlap 
>> CC: Ian Jackson 
>> CC: Jan Beulich 
>> CC: Julien Grall 
>> CC: Konrad Rzeszutek Wilk 
>> CC: Stefano Stabellini 
>> CC: Wei Liu 
>> CC: Daniel De Graaf 
>
> I realise there are arguments over how to fix this, but we (the Xen
> community) have already f*cked up once here, and this is doing so a
> second time.
>
> Nack.
>
> Fixing it anywhere other than Xen is simply not appropriate.
>
> The reason for this (which ought to be obvious, but I guess only to
> those who actually do customer support) is basic human physiology.
> "denied" means something has gone wrong.  It scares people, and causes
> them to seek help to change fix whatever is broken.

But the patch takes care of that by removing "denied" from DMI tables.
Functionally it should have the same effect as v1 to ordinary guests.

> It is not appropriate for it to find its way into the guest in the first
> place, and that includes turning up in `dmesg` and other logs, and
> expecting guest runtime to filter for it is complete nonsense.

`dmesg` will have only Xen major version (e.g. Xen 4.13) with this patch
applied. Even if there exists a tool which uses xen_version hypercall
for information gathering, it would show you "" for fields like
commandline and build_id already (without the patch). So extending this
behaviour for other sensitive fields is not a regression IMHO.

> As said several times before, the empty string is completely fine ABI
> wise, doesn't confuse customers, and really really does work in practice.

I agree with the other opinion that returning an empty string is too
ambiguous. I'd prefer to retain the current behaviour with (whatever)
non-empty descriptive string.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 4/4] xen/netback: Fix grant copy across page boundary with KASAN

2020-01-10 Thread Sergey Dyasli

On 09/01/2020 13:36, Paul Durrant wrote:
> On Wed, 8 Jan 2020 at 15:21, Sergey Dyasli  wrote:
>>
>> From: Ross Lagerwall 
>>
>> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
>> allocations are aligned to the next power of 2 of the size does not
>> hold. Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall 
>> Signed-off-by: Sergey Dyasli 
>> ---
>> RFC --> v1:
>> - Added BUILD_BUG_ON to the netback patch
>> - xenvif_idx_release() now located outside the loop
>>
>> CC: Wei Liu 
>> CC: Paul Durrant 
> [snip]
>>
>> +static void __init __maybe_unused build_assertions(void)
>> +{
>> +   BUILD_BUG_ON(sizeof(struct xenvif_tx_cb) > 48);
>
> FIELD_SIZEOF(struct sk_buff, cb) rather than a magic '48' I think.

The macro got renamed recently, so now it should be:

sizeof_field(struct sk_buff, cb))

Thanks for the suggestion.

--
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 2/4] x86/xen: add basic KASAN support for PV kernel

2020-01-10 Thread Sergey Dyasli

On 09/01/2020 23:27, Boris Ostrovsky wrote:
> 
> 
> On 1/8/20 10:20 AM, Sergey Dyasli wrote:
>> @@ -1943,6 +1973,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, 
>> unsigned long max_pfn)
>>   if (i && i < pgd_index(__START_KERNEL_map))
>>   init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>   +#ifdef CONFIG_KASAN
>> +    /*
>> + * Copy KASAN mappings
>> + * ec00 - fbff (=44 bits) kasan shadow memory 
>> (16TB)
>> + */
>> +    for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
> 
> Are you referring here to  KASAN_SHADOW_START and KASAN_SHADOW_END? If so, 
> can you use them instead?

Indeed, the following macros make the code neater:

#ifdef CONFIG_KASAN
/* Copy KASAN mappings */
for (i = pgd_index(KASAN_SHADOW_START);
 i < pgd_index(KASAN_SHADOW_END);
 i++)
init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
#endif /* ifdef CONFIG_KASAN */

--
Thanks,
Sergey


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v1 2/4] x86/xen: add basic KASAN support for PV kernel

2020-01-10 Thread Sergey Dyasli

On 09/01/2020 09:15, Jürgen Groß wrote:
> On 08.01.20 16:20, Sergey Dyasli wrote:
>> This enables to use Outline instrumentation for Xen PV kernels.
>>
>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>> and hence disabled.
>>
>> Signed-off-by: Sergey Dyasli 
>> ---
>> RFC --> v1:
>> - New functions with declarations in xen/xen-ops.h
>> - Fixed the issue with free_kernel_image_pages() with the help of
>>xen_pv_kasan_unpin_pgd()
>> ---
>>   arch/x86/mm/kasan_init_64.c | 12 
>>   arch/x86/xen/Makefile   |  7 +++
>>   arch/x86/xen/enlighten_pv.c |  3 +++
>>   arch/x86/xen/mmu_pv.c   | 39 +
>>   drivers/xen/Makefile|  2 ++
>>   include/xen/xen-ops.h   |  4 
>>   kernel/Makefile |  2 ++
>>   lib/Kconfig.kasan   |  3 ++-
>>   8 files changed, 71 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>> index cf5bc37c90ac..902a6a152d33 100644
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -13,6 +13,9 @@
>>   #include 
>>   #include 
>>   +#include 
>> +#include 
>> +
>>   #include 
>>   #include 
>>   #include 
>> @@ -332,6 +335,11 @@ void __init kasan_early_init(void)
>>   for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>   kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>   +if (xen_pv_domain()) {
>> +pgd_t *pv_top_pgt = xen_pv_kasan_early_init();
>
> You are breaking the build with CONFIG_XEN_PV undefined here.

Right, the following is needed:

diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 91d66520f0a3..3d20f000af12 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -241,8 +241,14 @@ static inline void xen_preemptible_hcall_end(void)

 #endif /* CONFIG_PREEMPT */

+#if defined(CONFIG_XEN_PV)
 pgd_t *xen_pv_kasan_early_init(void);
 void xen_pv_kasan_pin_pgd(pgd_t *pgd);
 void xen_pv_kasan_unpin_pgd(pgd_t *pgd);
+#else
+static inline pgd_t *xen_pv_kasan_early_init(void) { return NULL; }
+static inline void xen_pv_kasan_pin_pgd(pgd_t *pgd) { }
+static inline void xen_pv_kasan_unpin_pgd(pgd_t *pgd) { }
+#endif /* defined(CONFIG_XEN_PV) */

 #endif /* INCLUDE_XEN_OPS_H */

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v2] xsm: hide detailed Xen version from unprivileged guests

2020-01-10 Thread Sergey Dyasli

Hide the following information that can help identify the running Xen
binary version: XENVER_extraversion, XENVER_compile_info, XENVER_changeset.
Add explicit cases for XENVER_commandline and XENVER_build_id as well.

Introduce xsm_filter_denied() to hvmloader to remove "" string
from guest's DMI tables that otherwise would be shown in tools like
dmidecode.

Signed-off-by: Sergey Dyasli 
---
v1 --> v2:
- Added xsm_filter_denied() to hvmloader instead of modifying xen_deny()
- Made behaviour the same for both Release and Debug builds
- XENVER_capabilities is no longer hided

CC: Andrew Cooper 
CC: George Dunlap 
CC: Ian Jackson 
CC: Jan Beulich 
CC: Julien Grall 
CC: Konrad Rzeszutek Wilk 
CC: Stefano Stabellini 
CC: Wei Liu 
CC: Daniel De Graaf 
---
 tools/firmware/hvmloader/hvmloader.c | 1 +
 tools/firmware/hvmloader/smbios.c| 1 +
 tools/firmware/hvmloader/util.c  | 6 ++
 tools/firmware/hvmloader/util.h  | 2 ++
 xen/include/xsm/dummy.h  | 9 ++---
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/firmware/hvmloader/hvmloader.c 
b/tools/firmware/hvmloader/hvmloader.c
index 598a226278..e760ed5fa6 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -147,6 +147,7 @@ static void init_hypercalls(void)
 /* Print version information. */
 cpuid(base + 1, , , , );
 hypercall_xen_version(XENVER_extraversion, extraversion);
+xsm_filter_denied(extraversion, sizeof(extraversion));
 printf("Detected Xen v%u.%u%s\n", eax >> 16, eax & 0x, extraversion);
 }
 
diff --git a/tools/firmware/hvmloader/smbios.c 
b/tools/firmware/hvmloader/smbios.c
index 97a054e9e3..1ba352ed2c 100644
--- a/tools/firmware/hvmloader/smbios.c
+++ b/tools/firmware/hvmloader/smbios.c
@@ -275,6 +275,7 @@ hvm_write_smbios_tables(
 xen_minor_version = (uint16_t) xen_version;
 
 hypercall_xen_version(XENVER_extraversion, xen_extra_version);
+xsm_filter_denied(xen_extra_version, sizeof(xen_extra_version));
 
 /* build up human-readable Xen version string */
 p = xen_version_str;
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index 0c3f2d24cd..09e355fa3d 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -995,6 +995,12 @@ void hvmloader_acpi_build_tables(struct acpi_config 
*config,
 hvm_param_set(HVM_PARAM_VM_GENERATION_ID_ADDR, config->vm_gid_addr);
 }
 
+void xsm_filter_denied(char *str, size_t len)
+{
+if ( strcmp(str, "") == 0 )
+memset(str, 0, len);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 7bca6418d2..f7d907ca00 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -286,6 +286,8 @@ struct acpi_config;
 void hvmloader_acpi_build_tables(struct acpi_config *config,
  unsigned int physical);
 
+void xsm_filter_denied(char *str, size_t len);
+
 #endif /* __HVMLOADER_UTIL_H__ */
 
 /*
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index b8e185e6fa..d15b078f10 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -750,14 +750,17 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG 
uint32_t op)
 case XENVER_get_features:
 /* These sub-ops ignore the permission checks and return data. */
 return 0;
-case XENVER_extraversion:
-case XENVER_compile_info:
 case XENVER_capabilities:
-case XENVER_changeset:
 case XENVER_pagesize:
 case XENVER_guest_handle:
 /* These MUST always be accessible to any guest by default. */
 return xsm_default_action(XSM_HOOK, current->domain, NULL);
+
+case XENVER_extraversion:
+case XENVER_compile_info:
+case XENVER_changeset:
+case XENVER_commandline:
+case XENVER_build_id:
 default:
 return xsm_default_action(XSM_PRIV, current->domain, NULL);
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v1 0/4] basic KASAN support for Xen PV domains

2020-01-08 Thread Sergey Dyasli

This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
CONFIG_KASAN=y. It has been used internally for some time now with good
results for finding memory corruption issues in Dom0 kernel.

Only Outline instrumentation is supported at the moment.

Sergey Dyasli (2):
  kasan: introduce set_pmd_early_shadow()
  x86/xen: add basic KASAN support for PV kernel

Ross Lagerwall (2):
  xen: teach KASAN about grant tables
  xen/netback: Fix grant copy across page boundary with KASAN

 arch/x86/mm/kasan_init_64.c   | 12 +++
 arch/x86/xen/Makefile |  7 
 arch/x86/xen/enlighten_pv.c   |  3 ++
 arch/x86/xen/mmu_pv.c | 39 
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 59 +--
 drivers/xen/Makefile  |  2 ++
 drivers/xen/grant-table.c |  5 ++-
 include/xen/xen-ops.h |  4 +++
 kernel/Makefile   |  2 ++
 lib/Kconfig.kasan |  3 +-
 mm/kasan/init.c   | 25 -
 12 files changed, 141 insertions(+), 22 deletions(-)

-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v1 3/4] xen: teach KASAN about grant tables

2020-01-08 Thread Sergey Dyasli

From: Ross Lagerwall 

Otherwise it produces lots of false positives when a guest starts using
PV I/O devices.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
---
RFC --> v1:
- Slightly clarified the commit message
---
 drivers/xen/grant-table.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7b36b51cdb9f..ce95f7232de6 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1048,6 +1048,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
foreign = xen_page_foreign(pages[i]);
foreign->domid = map_ops[i].dom;
foreign->gref = map_ops[i].ref;
+   kasan_alloc_pages(pages[i], 0);
break;
}
 
@@ -1084,8 +1085,10 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref 
*unmap_ops,
if (ret)
return ret;
 
-   for (i = 0; i < count; i++)
+   for (i = 0; i < count; i++) {
ClearPageForeign(pages[i]);
+   kasan_free_pages(pages[i], 0);
+   }
 
return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
 }
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

[Xen-devel] [PATCH v1 4/4] xen/netback: Fix grant copy across page boundary with KASAN

2020-01-08 Thread Sergey Dyasli

From: Ross Lagerwall 

When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
allocations are aligned to the next power of 2 of the size does not
hold. Therefore, handle grant copies that cross page boundaries.

Signed-off-by: Ross Lagerwall 
Signed-off-by: Sergey Dyasli 
---
RFC --> v1:
- Added BUILD_BUG_ON to the netback patch
- xenvif_idx_release() now located outside the loop

CC: Wei Liu 
CC: Paul Durrant 
---
 drivers/net/xen-netback/common.h  |  2 +-
 drivers/net/xen-netback/netback.c | 59 +--
 2 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 05847eb91a1b..e57684415edd 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
 
-   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+   struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
/* passed to gnttab_[un]map_refs with pages under (un)mapping */
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 0020b2e8c279..33b8f8d043e6 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
 
 struct xenvif_tx_cb {
u16 pending_idx;
+   u8 copies;
 };
 
 #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
@@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 {
struct gnttab_map_grant_ref *gop_map = *gopp_map;
u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+   u8 copies = XENVIF_TX_CB(skb)->copies;
/* This always points to the shinfo of the skb being checked, which
 * could be either the first or the one on the frag_list
 */
@@ -450,23 +452,26 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
int nr_frags = shinfo->nr_frags;
const bool sharedslot = nr_frags &&
frag_get_pending_idx(>frags[0]) == 
pending_idx;
-   int i, err;
+   int i, err = 0;
 
-   /* Check status of header. */
-   err = (*gopp_copy)->status;
-   if (unlikely(err)) {
-   if (net_ratelimit())
-   netdev_dbg(queue->vif->dev,
+   while (copies) {
+   /* Check status of header. */
+   int newerr = (*gopp_copy)->status;
+   if (unlikely(newerr)) {
+   if (net_ratelimit())
+   netdev_dbg(queue->vif->dev,
   "Grant copy of header failed! status: %d 
pending_idx: %u ref: %u\n",
   (*gopp_copy)->status,
   pending_idx,
   (*gopp_copy)->source.u.ref);
-   /* The first frag might still have this slot mapped */
-   if (!sharedslot)
-   xenvif_idx_release(queue, pending_idx,
-  XEN_NETIF_RSP_ERROR);
+   err = newerr;
+   }
+   (*gopp_copy)++;
+   copies--;
}
-   (*gopp_copy)++;
+   /* The first frag might still have this slot mapped */
+   if (unlikely(err) && !sharedslot)
+   xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 
 check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -910,6 +915,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
xenvif_tx_err(queue, , extra_count, idx);
break;
}
+   XENVIF_TX_CB(skb)->copies = 0;
 
skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size)
@@ -933,6 +939,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
   "Can't allocate the 
frag_list skb.\n");
break;
}
+   XENVIF_TX_CB(nskb)->copies = 0;
}
 
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
@@ -990,6 +997,31 @@ static void xenvif_tx_build_gops(struct xenvif_queue 
*queue,
 
queue->tx_copy_ops[*copy_ops].len = data_len;
queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
+   XENVIF_TX_CB(skb)->copies++;
+
+   if (offset_in_page(skb->data) + data_len > XEN_PAGE_SIZE) {
+

[Xen-devel] [PATCH v1 2/4] x86/xen: add basic KASAN support for PV kernel

2020-01-08 Thread Sergey Dyasli

This enables to use Outline instrumentation for Xen PV kernels.

KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
and hence disabled.

Signed-off-by: Sergey Dyasli 
---
RFC --> v1:
- New functions with declarations in xen/xen-ops.h
- Fixed the issue with free_kernel_image_pages() with the help of
  xen_pv_kasan_unpin_pgd()
---
 arch/x86/mm/kasan_init_64.c | 12 
 arch/x86/xen/Makefile   |  7 +++
 arch/x86/xen/enlighten_pv.c |  3 +++
 arch/x86/xen/mmu_pv.c   | 39 +
 drivers/xen/Makefile|  2 ++
 include/xen/xen-ops.h   |  4 
 kernel/Makefile |  2 ++
 lib/Kconfig.kasan   |  3 ++-
 8 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..902a6a152d33 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -13,6 +13,9 @@
 #include 
 #include 
 
+#include 
+#include 
+
 #include 
 #include 
 #include 
@@ -332,6 +335,11 @@ void __init kasan_early_init(void)
for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_early_shadow_p4d[i] = __p4d(p4d_val);
 
+   if (xen_pv_domain()) {
+   pgd_t *pv_top_pgt = xen_pv_kasan_early_init();
+   kasan_map_early_shadow(pv_top_pgt);
+   }
+
kasan_map_early_shadow(early_top_pgt);
kasan_map_early_shadow(init_top_pgt);
 }
@@ -369,6 +377,8 @@ void __init kasan_init(void)
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
}
 
+   xen_pv_kasan_pin_pgd(early_top_pgt);
+
load_cr3(early_top_pgt);
__flush_tlb_all();
 
@@ -433,6 +443,8 @@ void __init kasan_init(void)
load_cr3(init_top_pgt);
__flush_tlb_all();
 
+   xen_pv_kasan_unpin_pgd(early_top_pgt);
+
/*
 * kasan_early_shadow_page has been used as early shadow memory, thus
 * it may contain some garbage. Now we can clear and write protect it,
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 084de77a109e..102fad0b0bca 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,10 @@
+KASAN_SANITIZE_enlighten_pv.o := n
+KASAN_SANITIZE_enlighten.o := n
+KASAN_SANITIZE_irq.o := n
+KASAN_SANITIZE_mmu_pv.o := n
+KASAN_SANITIZE_p2m.o := n
+KASAN_SANITIZE_multicalls.o := n
+
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae4a41ca19f6..27de55699f24 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_ACPI
 #include 
@@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
 
+   kasan_early_init();
+
/*
 * Set up kernel GDT and segment registers, mainly so that
 * -fstack-protector code can be executed.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..cf6ff214d9ea 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1771,6 +1771,36 @@ static void __init set_page_prot(void *addr, pgprot_t 
prot)
 {
return set_page_prot_flags(addr, prot, UVMF_NONE);
 }
+
+pgd_t * __init xen_pv_kasan_early_init(void)
+{
+   /* PV page tables must be read-only */
+   set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
+   set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+   /* Return a pointer to the initial PV page tables */
+   return (pgd_t *)xen_start_info->pt_base;
+}
+
+void __init xen_pv_kasan_pin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   set_page_prot(pgd, PAGE_KERNEL_RO);
+   pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+}
+
+void __init xen_pv_kasan_unpin_pgd(pgd_t *pgd)
+{
+   if (!xen_pv_domain())
+   return;
+
+   pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa_symbol(pgd)));
+   set_page_prot(pgd, PAGE_KERNEL);
+}
+
 #ifdef CONFIG_X86_32
 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
@@ -1943,6 +1973,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, 
unsigned long max_pfn)
if (i && i < pgd_index(__START_KERNEL_map))
init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
 
+#ifdef CONFIG_KASAN
+   /*
+* Copy KASAN mappings
+* ec00 - fbff (=44 bits) kasan shadow memory 
(16TB)
+*/
+   for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
+   init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+#endif
+
/* Make pagetable pieces RO */
set_page_prot(init_top_pgt, PAGE_K

[Xen-devel] [PATCH v1 1/4] kasan: introduce set_pmd_early_shadow()

2020-01-08 Thread Sergey Dyasli

It is incorrect to call pmd_populate_kernel() multiple times for the
same page table. Xen notices it during kasan_populate_early_shadow():

(XEN) mm.c:3222:d155v0 mfn 3704b already pinned

This happens for kasan_early_shadow_pte when USE_SPLIT_PTE_PTLOCKS is
enabled. Fix this by introducing set_pmd_early_shadow() which calls
pmd_populate_kernel() only once and uses set_pmd() afterwards.

Signed-off-by: Sergey Dyasli 
---
RFC --> v1:
- New patch
---
 mm/kasan/init.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index ce45c491ebcd..a4077320777f 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -81,6 +81,19 @@ static inline bool kasan_early_shadow_page_entry(pte_t pte)
return pte_page(pte) == virt_to_page(lm_alias(kasan_early_shadow_page));
 }
 
+static inline void set_pmd_early_shadow(pmd_t *pmd)
+{
+   static bool pmd_populated = false;
+   pte_t *early_shadow = lm_alias(kasan_early_shadow_pte);
+
+   if (likely(pmd_populated)) {
+   set_pmd(pmd, __pmd(__pa(early_shadow) | _PAGE_TABLE));
+   } else {
+   pmd_populate_kernel(_mm, pmd, early_shadow);
+   pmd_populated = true;
+   }
+}
+
 static __init void *early_alloc(size_t size, int node)
 {
void *ptr = memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
@@ -120,8 +133,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned 
long addr,
next = pmd_addr_end(addr, end);
 
if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -157,8 +169,7 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -198,8 +209,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned 
long addr,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
@@ -271,8 +281,7 @@ int __ref kasan_populate_early_shadow(const void 
*shadow_start,
pud_populate(_mm, pud,
lm_alias(kasan_early_shadow_pmd));
pmd = pmd_offset(pud, addr);
-   pmd_populate_kernel(_mm, pmd,
-   lm_alias(kasan_early_shadow_pte));
+   set_pmd_early_shadow(pmd);
continue;
}
 
-- 
2.17.1


___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] Recent cores-scheduling failures

2020-01-07 Thread Sergey Dyasli

On 20/12/2019 06:26, Jürgen Groß wrote:
> On 19.12.19 13:45, Sergey Dyasli wrote:
>> Hi Juergen,
>>
>> We recently did another quick test of core scheduling mode, and the following
>> failures were found:
>>
>> 1. live-patch apply failures:
>>
>>  (XEN) [ 1058.751974] livepatch: lp_1_1: Timed out on semaphore in CPU 
>> quiesce phase 30/31
>>  (XEN) [ 1058.751982] livepatch: lp_1_1 finished REPLACE with rc=-16
>>
>> 2. ACPI S5 crash:
>>
>>  https://paste.debian.net/1121748/
>
> Are there any XenServer patches in your hypervisor?
>
> I'm asking because I don't see why a vcpu would be freed when shutting
> down the host (other than by any shutdown scripts, but those should be
> long finished when trying to enter S5).

While we have the patch-queue applied in our testing, there is nothing
there that would affect the scheduler directly.

The S5 crash reproduces reliably in automated testing, but I still don't
know how to trigger the issue manually.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xsm: hide detailed Xen version from unprivileged guests

2020-01-07 Thread Sergey Dyasli

On 06/01/2020 14:40, Jan Beulich wrote:
> On 06.01.2020 15:35, Sergey Dyasli wrote:
>> On 06/01/2020 11:28, George Dunlap wrote:
>>> On 12/19/19 11:15 PM, Andrew Cooper wrote:
>>>> On 19/12/2019 11:35, Jan Beulich wrote:
>>>>>>>> XENVER_changeset
>>>>>>>> XENVER_commandline
>>>>>>>> XENVER_build_id
>>>>>>>>
>>>>>>>> Return a more customer friendly empty string instead of ""
>>>>>>>> which would be shown in tools like dmidecode.>
>>>>>>> I think "" is quite fine for many of the original purposes.
>>>>>>> Maybe it would be better to filter for this when populating guest
>>>>>>> DMI tables?
>>>>>> I don't know how DMI tables are populated, but nothing stops a guest
>>>>>> from using these hypercalls directly.
>>>>> And this is precisely the case where I think "" is better
>>>>> than an empty string.
>>>>
>>>> "" was a terrible choice back when it was introduced, and its
>>>> still a terrible choice today.
>>>>
>>>> These are ASCII string fields, and the empty string is a perfectly good
>>>> string.  Nothing is going to break, because it would have broken the
>>>> first time around.
>>>>
>>>> The end result without denied sprayed all over this interface is much
>>>> cleaner overall.
>>>
>>> Unfortunately this mail doesn't contain any facts or arguments, just
>>> unsubstantiated value judgements.  What's so terrible about ""
>>> -- what bad effect does it have?  Why is "" better / cleaner?
>>
>> It can be explained with a picture (attached) ;)
>
> But that's something better addressed at or close to the presentation
> layer, not deep down in Xen.

I agree with that. And looks like the following diff does the trick:

diff --git a/tools/firmware/hvmloader/smbios.c 
b/tools/firmware/hvmloader/smbios.c
index 97a054e9e3..b4d72c375f 100644
--- a/tools/firmware/hvmloader/smbios.c
+++ b/tools/firmware/hvmloader/smbios.c
@@ -275,6 +275,8 @@ hvm_write_smbios_tables(
 xen_minor_version = (uint16_t) xen_version;

 hypercall_xen_version(XENVER_extraversion, xen_extra_version);
+if ( strcmp(xen_extra_version, "") == 0 )
+memset(xen_extra_version, 0, sizeof(xen_extra_version));

 /* build up human-readable Xen version string */
 p = xen_version_str;

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across page boundary with KASAN

2020-01-07 Thread Sergey Dyasli

On 17/12/2019 15:14, Durrant, Paul wrote:
>> -Original Message-
>> From: Xen-devel  On Behalf Of
>> Sergey Dyasli
>> Sent: 17 December 2019 14:08
>> To: xen-de...@lists.xen.org; kasan-...@googlegroups.com; linux-
>> ker...@vger.kernel.org
>> Cc: Juergen Gross ; Sergey Dyasli
>> ; Stefano Stabellini ;
>> George Dunlap ; Ross Lagerwall
>> ; Alexander Potapenko ;
>> Andrey Ryabinin ; Boris Ostrovsky
>> ; Dmitry Vyukov 
>> Subject: [Xen-devel] [RFC PATCH 3/3] xen/netback: Fix grant copy across
>> page boundary with KASAN
>>
>> From: Ross Lagerwall 
>>
>> When KASAN (or SLUB_DEBUG) is turned on, the normal expectation that
>> allocations are aligned to the next power of 2 of the size does not
>> hold. Therefore, handle grant copies that cross page boundaries.
>>
>> Signed-off-by: Ross Lagerwall 
>> Signed-off-by: Sergey Dyasli 
>
> Would have been nice to cc netback maintainers...

Sorry, I'll try to be more careful next time.

>
>> ---
>>  drivers/net/xen-netback/common.h  |  2 +-
>>  drivers/net/xen-netback/netback.c | 55 ---
>>  2 files changed, 45 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
>> netback/common.h
>> index 05847eb91a1b..e57684415edd 100644
>> --- a/drivers/net/xen-netback/common.h
>> +++ b/drivers/net/xen-netback/common.h
>> @@ -155,7 +155,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
>>  struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
>>  grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
>>
>> -struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
>> +struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS * 2];
>>  struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
>>  struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
>>  /* passed to gnttab_[un]map_refs with pages under (un)mapping */
>> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-
>> netback/netback.c
>> index 0020b2e8c279..1541b6e0cc62 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -320,6 +320,7 @@ static int xenvif_count_requests(struct xenvif_queue
>> *queue,
>>
>>  struct xenvif_tx_cb {
>>  u16 pending_idx;
>> +u8 copies;
>>  };
>
> I know we're a way off the limit (48 bytes) but I wonder if we ought to have 
> a compile time check here that we're not overflowing skb->cb.

I will add a BUILD_BUG_ON()

>
>>
>>  #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
>> @@ -439,6 +440,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue
>> *queue,
>>  {
>>  struct gnttab_map_grant_ref *gop_map = *gopp_map;
>>  u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
>> +u8 copies = XENVIF_TX_CB(skb)->copies;
>>  /* This always points to the shinfo of the skb being checked, which
>>   * could be either the first or the one on the frag_list
>>   */
>> @@ -450,23 +452,27 @@ static int xenvif_tx_check_gop(struct xenvif_queue
>> *queue,
>>  int nr_frags = shinfo->nr_frags;
>>  const bool sharedslot = nr_frags &&
>>  frag_get_pending_idx(>frags[0]) ==
>> pending_idx;
>> -int i, err;
>> +int i, err = 0;
>>
>> -/* Check status of header. */
>> -err = (*gopp_copy)->status;
>> -if (unlikely(err)) {
>> -if (net_ratelimit())
>> -netdev_dbg(queue->vif->dev,
>> +while (copies) {
>> +/* Check status of header. */
>> +int newerr = (*gopp_copy)->status;
>> +if (unlikely(newerr)) {
>> +if (net_ratelimit())
>> +netdev_dbg(queue->vif->dev,
>> "Grant copy of header failed! status: %d
>> pending_idx: %u ref: %u\n",
>> (*gopp_copy)->status,
>> pending_idx,
>> (*gopp_copy)->source.u.ref);
>> -/* The first frag might still have this slot mapped */
>> -if (!sharedslot)
>> -xenvif_idx_release(queue, pending_idx,
>> -   XEN_NETIF_RSP_ERROR);
>> +/* The first frag might still have this slot mapped */
>> +if (!sharedslot

Re: [Xen-devel] [PATCH] x86/shim: Short circuit control/hardware checks in PV_SHIM_EXCLUSIVE builds

2020-01-07 Thread Sergey Dyasli

On 03/01/2020 20:07, Andrew Cooper wrote:
> The net diffstat is:
>   add/remove: 0/13 grow/shrink: 25/129 up/down: 6297/-20469 (-14172)
> 
> With the following objects/functions removed entirely:
>   iommu_hwdom_none   1   -  -1
>   hwdom_max_order4   -  -4
>   extra_hwdom_irqs   4   -  -4
>   ctldom_max_order   4   -  -4
>   acpi_c1e_quirk43   - -43
>   hvm_pirq_eoi  62   - -62
>   max_order 94   - -94
>   conring_puts 104   --104
>   propagate_node   119   --119
>   mmio_ro_emulate_ops  224   --224
>   mmcfg_intercept_ops  224   --224
>   pci_cfg_ok   295   --295
>   p2m_lock 546   --546
> 
> And the following reduced to stubs:
>   arch_iommu_hwdom_init852   2-850
>   p2m_add_foreign  880  16-864
> 
> Signed-off-by: Andrew Cooper 

I tested this patch some time ago on a private branch, so

Tested-by: Sergey Dyasli 

Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] xsm: hide detailed Xen version from unprivileged guests

2020-01-06 Thread Sergey Dyasli

On 06/01/2020 11:28, George Dunlap wrote:
> On 12/19/19 11:15 PM, Andrew Cooper wrote:
>> On 19/12/2019 11:35, Jan Beulich wrote:
>> XENVER_changeset
>> XENVER_commandline
>> XENVER_build_id
>>
>> Return a more customer friendly empty string instead of ""
>> which would be shown in tools like dmidecode.>
> I think "" is quite fine for many of the original purposes.
> Maybe it would be better to filter for this when populating guest
> DMI tables?
 I don't know how DMI tables are populated, but nothing stops a guest
 from using these hypercalls directly.
>>> And this is precisely the case where I think "" is better
>>> than an empty string.
>>
>> "" was a terrible choice back when it was introduced, and its
>> still a terrible choice today.
>>
>> These are ASCII string fields, and the empty string is a perfectly good
>> string.  Nothing is going to break, because it would have broken the
>> first time around.
>>
>> The end result without denied sprayed all over this interface is much
>> cleaner overall.
> 
> Unfortunately this mail doesn't contain any facts or arguments, just
> unsubstantiated value judgements.  What's so terrible about ""
> -- what bad effect does it have?  Why is "" better / cleaner?

It can be explained with a picture (attached) ;)

> 
> One negative effect of returning "" is that if you have a tool which
> doesn't check the value but just dumps it into a log somewhere, then the
> log just contains nothing at all.  A log which contains "" makes
> it clear to the person reading it that something has been hidden on
> purpose.  You can totally imagine someone wasting several hours trying
> to figure out why their logging isn't working, only to discover that it
> is working, but that it was just logging an empty string.
> 
> And is it so bad for dmidecode to return something like "" in
> that case?
> 
>  -George
> 
___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [RFC PATCH 0/3] basic KASAN support for Xen PV domains

2019-12-20 Thread Sergey Dyasli

On 17/12/2019 18:06, Boris Ostrovsky wrote:
> 
> 
>> On Dec 17, 2019, at 9:08 AM, Sergey Dyasli  wrote:
>>
>> This series allows to boot and run Xen PV kernels (Dom0 and DomU) with
>> CONFIG_KASAN=y. It has been used internally for some time now with good
>> results for finding memory corruption issues in Dom0 kernel.
>>
>> Only Outline instrumentation is supported at the moment.
>>
>> Patch 1 is of RFC quality
>> Patches 2-3 are independent and quite self-contained.
> 
> 
> Don’t you need to initialize kasan before, for example, calling 
> kasan_alloc_pages() in patch 2?

Patch 1 is enough to correctly initialise PV Kasan. But without patch 2, lots
of false positive out-of-bounds accesses are reported once a guest starts using
PV I/O devices.

--
Thanks,
Sergey

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [RFC PATCH 1/3] x86/xen: add basic KASAN support for PV kernel

2019-12-19 Thread Sergey Dyasli

On 18/12/2019 09:24, Jürgen Groß wrote:
> On 17.12.19 15:08, Sergey Dyasli wrote:
>> This enables to use Outline instrumentation for Xen PV kernels.
>>
>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>> and hence disabled.
>>
>> Rough edges in the patch are marked with XXX.
>>
>> Signed-off-by: Sergey Dyasli 
>> ---
>>   arch/x86/mm/init.c  | 14 ++
>>   arch/x86/mm/kasan_init_64.c | 28 
>>   arch/x86/xen/Makefile   |  7 +++
>>   arch/x86/xen/enlighten_pv.c |  3 +++
>>   arch/x86/xen/mmu_pv.c   | 13 +++--
>>   arch/x86/xen/multicalls.c   | 10 ++
>>   drivers/xen/Makefile|  2 ++
>>   kernel/Makefile |  2 ++
>>   lib/Kconfig.kasan   |  3 ++-
>>   9 files changed, 79 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>> index e7bb483557c9..0c98a45eec6c 100644
>> --- a/arch/x86/mm/init.c
>> +++ b/arch/x86/mm/init.c
>> @@ -8,6 +8,8 @@
>>   #include 
>>   #include 
>>   +#include 
>> +
>>   #include 
>>   #include 
>>   #include 
>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void 
>> *begin, void *end)
>>   unsigned long end_ul = (unsigned long)end;
>>   unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>>   +/*
>> + * XXX: skip this for now. Otherwise it leads to:
>> + *
>> + * (XEN) mm.c:2713:d157v0 Bad type (saw 8c01 != exp 
>> e000) for mfn 36f40 (pfn 02f40)
>> + * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
>> + * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 
>> entry 801036f40067 for l1e_owner d157, pg_owner d157
>> + *
>> + * and further #PF error: [PROT] [WRITE] in the kernel.
>> + */
>> +if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
>> +return;
>> +
> 
> I guess this is related to freeing some kasan page tables without
> unpinning them?

Your guess was correct. Turned out that early_top_pgt which I pinned and made RO
is located in .init section and that was causing issues. Unpinning it and making
RW again right after kasan_init() switches to use init_top_pgt seem to fix this
issue.

> 
>>   free_init_pages(what, begin_ul, end_ul);
>> /*
>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>> index cf5bc37c90ac..caee2022f8b0 100644
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -13,6 +13,8 @@
>>   #include 
>>   #include 
>>   +#include 
>> +
>>   #include 
>>   #include 
>>   #include 
>> @@ -20,6 +22,9 @@
>>   #include 
>>   #include 
>>   +#include 
>> +#include 
>> +
>>   extern struct range pfn_mapped[E820_MAX_ENTRIES];
>> static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata 
>> __aligned(PAGE_SIZE);
>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>>   };
>>   #endif
>>   +#ifdef CONFIG_XEN
>> +/* XXX: this should go to some header */
>> +void __init set_page_prot(void *addr, pgprot_t prot);
>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
>> +#endif
>> +
> 
> Instead of exporting those, why don't you ...
> 
>>   void __init kasan_early_init(void)
>>   {
>>   int i;
>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>>   for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>   kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>   +if (xen_pv_domain()) {
>> +/* PV page tables must have PAGE_KERNEL_RO */
>> +set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
>> +set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
>> +set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
> 
> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?

Sounds like a good suggestion, but new functions still need some header for
declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting
through kasan.h as well, but that's probably not an issue.

> 
>> +
>> +/* Add mappings to the initial PV page tables */
>> +kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
>> +}
>> +
>>   kasan_map_early_shadow(early_top_pgt);
>>   kasan_map_early_shadow(init_top_pgt);
>>   }
>> @@ -369,6 +390,13 @@

1 2 3 >

1 - 100 of 259 matches

Mail list logo