commit xen for openSUSE:Factory

root Thu, 18 Apr 2019 04:58:40 -0700

Hello community,

here is the log from the commit of package xen for openSUSE:Factory checked in 
at 2019-04-18 13:57:59
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/xen (Old)
 and      /work/SRC/openSUSE:Factory/.xen.new.5536 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "xen"

Thu Apr 18 13:57:59 2019 rev:265 rq:695298 version:4.12.0_10

Changes:
--------
--- /work/SRC/openSUSE:Factory/xen/xen.changes  2019-04-04 12:05:40.969434536 
+0200
+++ /work/SRC/openSUSE:Factory/.xen.new.5536/xen.changes        2019-04-18 
13:58:17.887992681 +0200
@@ -1,0 +2,45 @@
+Wed Apr 17 08:28:50 MDT 2019 - [email protected]
+
+- bsc#1131811 - [XEN] internal error: libxenlight failed to create
+  new domain. This patch is a workaround for a systemd issue. See
+  patch header for additional comments.
+  xenstore-launch.patch
+
+-------------------------------------------------------------------
+Thu Apr 11 16:29:39 MDT 2019 - [email protected]
+
+- bsc#1125378 - [xen][pygrub] Can not restore sle11sp4 pv guest
+  after upgrading host from sle11sp4 to sle15sp1
+  pygrub-python3-conversion.patch
+- Fix "TypeError: virDomainDefineXML() argument 2 must be str or
+  None, not bytes" when converting VMs from using the xm/xend
+  toolstack to the libxl/libvirt toolstack. (bsc#1123378)
+  xen2libvirt.py
+
+-------------------------------------------------------------------
+Mon Apr  8 08:13:04 MDT 2019 - [email protected]
+
+- bsc#1124560 - Fully virtualized guests crash on boot
+  5cac6cba-vmx-Fixup-removals-of-MSR-load-save-list-entries.patch
+- bsc#1121391 - GCC 9: xen build fails
+  5c8f752c-x86-e820-build-with-gcc9.patch
+- Upstream bug fixes (bsc#1027519)
+  5c87b644-IOMMU-leave-enabled-for-kexec-crash.patch
+  5c87b6a2-x86-HVM-dont-crash-guest-in-find_mmio_cache.patch
+  5c87e6d1-x86-TSX-controls-for-RTM-force-abort-mode.patch
+  5c8fb92d-x86-HVM-split-linear-reads-and-writes.patch
+  5c8fb951-x86-HVM-finish-IOREQs-correctly-on-completion.patch
+  5c8fc6c0-x86-MSR-shorten-ARCH_CAPABILITIES.patch
+  5c8fc6c0-x86-SC-retpoline-safety-calculations-for-eIBRS.patch
+  5c9e63c5-credit2-SMT-idle-handling.patch
+  5ca46b68-x86emul-no-GPR-update-upon-AVX-gather-failures.patch
+  5ca773d1-x86emul-dont-read-mask-reg-without-AVX512F.patch
+  5cab1f66-timers-fix-memory-leak-with-cpu-plug.patch
+  5cac6219-xen-cpu-Fix-ARM-build-following-cs-597fbb8.patch
+
+-------------------------------------------------------------------
+Thu Apr  4 08:53:02 UTC 2019 - [email protected]
+
+- Install pkgconfig files into libdir instead of datadir
+
+-------------------------------------------------------------------

New:
----
  5c87b644-IOMMU-leave-enabled-for-kexec-crash.patch
  5c87b6a2-x86-HVM-dont-crash-guest-in-find_mmio_cache.patch
  5c87e6d1-x86-TSX-controls-for-RTM-force-abort-mode.patch
  5c8f752c-x86-e820-build-with-gcc9.patch
  5c8fb92d-x86-HVM-split-linear-reads-and-writes.patch
  5c8fb951-x86-HVM-finish-IOREQs-correctly-on-completion.patch
  5c8fc6c0-x86-MSR-shorten-ARCH_CAPABILITIES.patch
  5c8fc6c0-x86-SC-retpoline-safety-calculations-for-eIBRS.patch
  5c9e63c5-credit2-SMT-idle-handling.patch
  5ca46b68-x86emul-no-GPR-update-upon-AVX-gather-failures.patch
  5ca773d1-x86emul-dont-read-mask-reg-without-AVX512F.patch
  5cab1f66-timers-fix-memory-leak-with-cpu-plug.patch
  5cac6219-xen-cpu-Fix-ARM-build-following-cs-597fbb8.patch
  5cac6cba-vmx-Fixup-removals-of-MSR-load-save-list-entries.patch
  xenstore-launch.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ xen.spec ++++++
--- /var/tmp/diff_new_pack.U92DbZ/_old  2019-04-18 13:58:20.875993677 +0200
+++ /var/tmp/diff_new_pack.U92DbZ/_new  2019-04-18 13:58:20.875993677 +0200
@@ -127,7 +127,7 @@
 BuildRequires:  pesign-obs-integration
 %endif
 
-Version:        4.12.0_08
+Version:        4.12.0_10
 Release:        0
 Summary:        Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
 License:        GPL-2.0-only
@@ -162,6 +162,20 @@
 # For xen-libs
 Source99:       baselibs.conf
 # Upstream patches
+Patch1:         5c87b644-IOMMU-leave-enabled-for-kexec-crash.patch
+Patch2:         5c87b6a2-x86-HVM-dont-crash-guest-in-find_mmio_cache.patch
+Patch3:         5c87e6d1-x86-TSX-controls-for-RTM-force-abort-mode.patch
+Patch4:         5c8f752c-x86-e820-build-with-gcc9.patch
+Patch5:         5c8fb92d-x86-HVM-split-linear-reads-and-writes.patch
+Patch6:         5c8fb951-x86-HVM-finish-IOREQs-correctly-on-completion.patch
+Patch7:         5c8fc6c0-x86-MSR-shorten-ARCH_CAPABILITIES.patch
+Patch8:         5c8fc6c0-x86-SC-retpoline-safety-calculations-for-eIBRS.patch
+Patch9:         5c9e63c5-credit2-SMT-idle-handling.patch
+Patch10:        5ca46b68-x86emul-no-GPR-update-upon-AVX-gather-failures.patch
+Patch11:        5ca773d1-x86emul-dont-read-mask-reg-without-AVX512F.patch
+Patch12:        5cab1f66-timers-fix-memory-leak-with-cpu-plug.patch
+Patch13:        5cac6219-xen-cpu-Fix-ARM-build-following-cs-597fbb8.patch
+Patch14:        5cac6cba-vmx-Fixup-removals-of-MSR-load-save-list-entries.patch
 # Our platform specific patches
 Patch400:       xen-destdir.patch
 Patch401:       vif-bridge-no-iptables.patch
@@ -172,6 +186,7 @@
 Patch406:       suse-xendomains-service.patch
 Patch407:       replace-obsolete-network-configuration-commands-in-s.patch
 Patch408:       disable-building-pv-shim.patch
+Patch409:       xenstore-launch.patch
 # Needs to go upstream
 Patch420:       suspend_evtchn_lock.patch
 Patch421:       xenpaging.doc.patch
@@ -359,6 +374,20 @@
 %prep
 %setup -q -n %xen_build_dir -a 1 -a 5 -a 6 -a 57
 # Upstream patches
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
+%patch14 -p1
 # Our platform specific patches
 %patch400 -p1
 %patch401 -p1
@@ -369,6 +398,7 @@
 %patch406 -p1
 %patch407 -p1
 %patch408 -p1
+%patch409 -p1
 # Needs to go upstream
 %patch420 -p1
 %patch421 -p1
@@ -520,6 +550,7 @@
 make \
        DESTDIR=%{buildroot} \
        SYSCONFIG_DIR=%{_fillupdir} \
+       PKG_INSTALLDIR=%{_libdir}/pkgconfig \
        %{?_smp_mflags} \
        install
 find %{buildroot} -ls
@@ -1127,20 +1158,20 @@
 %endif
 %endif
 /usr/include/*
-%{_datadir}/pkgconfig/xenlight.pc
-%{_datadir}/pkgconfig/xlutil.pc
-%{_datadir}/pkgconfig/xencall.pc
-%{_datadir}/pkgconfig/xencontrol.pc
-%{_datadir}/pkgconfig/xendevicemodel.pc
-%{_datadir}/pkgconfig/xenevtchn.pc
-%{_datadir}/pkgconfig/xenforeignmemory.pc
-%{_datadir}/pkgconfig/xengnttab.pc
-%{_datadir}/pkgconfig/xenguest.pc
-%{_datadir}/pkgconfig/xenstat.pc
-%{_datadir}/pkgconfig/xenstore.pc
-%{_datadir}/pkgconfig/xentoolcore.pc
-%{_datadir}/pkgconfig/xentoollog.pc
-%{_datadir}/pkgconfig/xenvchan.pc
+%{_libdir}/pkgconfig/xenlight.pc
+%{_libdir}/pkgconfig/xlutil.pc
+%{_libdir}/pkgconfig/xencall.pc
+%{_libdir}/pkgconfig/xencontrol.pc
+%{_libdir}/pkgconfig/xendevicemodel.pc
+%{_libdir}/pkgconfig/xenevtchn.pc
+%{_libdir}/pkgconfig/xenforeignmemory.pc
+%{_libdir}/pkgconfig/xengnttab.pc
+%{_libdir}/pkgconfig/xenguest.pc
+%{_libdir}/pkgconfig/xenstat.pc
+%{_libdir}/pkgconfig/xenstore.pc
+%{_libdir}/pkgconfig/xentoolcore.pc
+%{_libdir}/pkgconfig/xentoollog.pc
+%{_libdir}/pkgconfig/xenvchan.pc
 
 %if %{?with_dom0_support}0
 

++++++ 5c87b644-IOMMU-leave-enabled-for-kexec-crash.patch ++++++
# Commit 12c36f577d454996c882ecdc5da8113ca2613646
# Date 2019-03-12 14:38:12 +0100
# Author Igor Druzhinin <[email protected]>
# Committer Jan Beulich <[email protected]>
iommu: leave IOMMU enabled by default during kexec crash transition

It's unsafe to disable IOMMU on a live system which is the case
if we're crashing since remapping hardware doesn't usually know what
to do with ongoing bus transactions and frequently raises NMI/MCE/SMI,
etc. (depends on the firmware configuration) to signal these abnormalities.
This, in turn, doesn't play well with kexec transition process as there is
no handling available at the moment for this kind of events resulting
in failures to enter the kernel.

Modern Linux kernels taught to copy all the necessary DMAR/IR tables
following kexec from the previous kernel (Xen in our case) - so it's
currently normal to keep IOMMU enabled. It might require minor changes to
kdump command line that enables IOMMU drivers (e.g. intel_iommu=on /
intremap=on) but recent kernels don't require any additional changes for
the transition to be transparent.

A fallback option is still left for compatibility with ancient crash
kernels which didn't like to have IOMMU active under their feet on boot.

Signed-off-by: Igor Druzhinin <[email protected]>
Acked-by: Jan Beulich <[email protected]>

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1172,7 +1172,7 @@ detection of systems known to misbehave
 
 ### iommu
     = List of [ <bool>, verbose, debug, force, required,
-                sharept, intremap, intpost,
+                sharept, intremap, intpost, crash-disable,
                 snoop, qinval, igfx, amd-iommu-perdev-intremap,
                 dom0-{passthrough,strict} ]
 
@@ -1234,6 +1234,12 @@ boolean (e.g. `iommu=no`) can override t
     This option depends on `intremap`, and is disabled by default due to some
     corner cases in the implementation which have yet to be resolved.
 
+*   The `crash-disable` boolean controls disabling IOMMU functionality 
(DMAR/IR/QI)
+    before switching to a crash kernel. This option is inactive by default and
+    is for compatibility with older kdump kernels only. Modern kernels copy
+    all the necessary tables from the previous one following kexec which makes
+    the transition transparent for them with IOMMU functions still on.
+
 The following options are specific to Intel VT-d hardware:
 
 *   The `snoop` boolean controls the Snoop Control sub-feature, and is active
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -162,8 +162,11 @@ static void nmi_shootdown_cpus(void)
         printk("Failed to shoot down CPUs {%*pbl}\n",
                nr_cpu_ids, cpumask_bits(&waiting_to_crash));
 
-    /* Crash shutdown any IOMMU functionality as the crashdump kernel is not
-     * happy when booting if interrupt/dma remapping is still enabled */
+    /*
+     * Try to crash shutdown IOMMU functionality as some old crashdump
+     * kernels are not happy when booting if interrupt/dma remapping
+     * is still enabled.
+     */
     iommu_crash_shutdown();
 
     __stop_this_cpu();
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -34,6 +34,7 @@ bool_t __read_mostly iommu_igfx = 1;
 bool_t __read_mostly iommu_snoop = 1;
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
+bool_t __read_mostly iommu_crash_disable;
 
 static bool __hwdom_initdata iommu_hwdom_none;
 bool __hwdom_initdata iommu_hwdom_strict;
@@ -85,6 +86,10 @@ static int __init parse_iommu_param(cons
             iommu_intremap = val;
         else if ( (val = parse_boolean("intpost", s, ss)) >= 0 )
             iommu_intpost = val;
+#ifdef CONFIG_KEXEC
+        else if ( (val = parse_boolean("crash-disable", s, ss)) >= 0 )
+            iommu_crash_disable = val;
+#endif
         else if ( (val = parse_boolean("debug", s, ss)) >= 0 )
         {
             iommu_debug = val;
@@ -576,6 +581,9 @@ void iommu_share_p2m_table(struct domain
 
 void iommu_crash_shutdown(void)
 {
+    if ( !iommu_crash_disable )
+        return;
+
     if ( iommu_enabled )
         iommu_get_ops()->crash_shutdown();
     iommu_enabled = iommu_intremap = iommu_intpost = 0;
++++++ 5c87b6a2-x86-HVM-dont-crash-guest-in-find_mmio_cache.patch ++++++
# Commit a43c1dec246bdee484e6a3de001cc6850a107abe
# Date 2019-03-12 14:39:46 +0100
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/HVM: don't crash guest in hvmemul_find_mmio_cache()

Commit 35a61c05ea ("x86emul: adjust handling of AVX2 gathers") builds
upon the fact that the domain will actually survive running out of MMIO
result buffer space. Drop the domain_crash() invocation. Also delay
incrementing of the usage counter, such that the function can't possibly
use/return an out-of-bounds slot/pointer in case execution subsequently
makes it into the function again without a prior reset of state.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -966,12 +966,11 @@ static struct hvm_mmio_cache *hvmemul_fi
             return cache;
     }
 
-    i = vio->mmio_cache_count++;
+    i = vio->mmio_cache_count;
     if( i == ARRAY_SIZE(vio->mmio_cache) )
-    {
-        domain_crash(current->domain);
         return NULL;
-    }
+
+    ++vio->mmio_cache_count;
 
     cache = &vio->mmio_cache[i];
     memset(cache, 0, sizeof (*cache));
++++++ 5c87e6d1-x86-TSX-controls-for-RTM-force-abort-mode.patch ++++++
# Commit 6be613f29b4205349275d24367bd4c82fb2960dd
# Date 2019-03-12 17:05:21 +0000
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/tsx: Implement controls for RTM force-abort mode

The CPUID bit and MSR are deliberately not exposed to guests, because they
won't exist on newer processors.  As vPMU isn't security supported, the
misbehaviour of PCR3 isn't expected to impact production deployments.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2109,7 +2109,7 @@ Use Virtual Processor ID support if avai
 flushes on VM entry and exit, increasing performance.
 
 ### vpmu (x86)
-    = List of [ <bool>, bts, ipc, arch ]
+    = List of [ <bool>, bts, ipc, arch, rtm-abort=<bool> ]
 
     Applicability: x86.  Default: false
 
@@ -2142,6 +2142,21 @@ provide access to a wealth of low level
 
 *   The `arch` option allows access to the pre-defined architectural events.
 
+*   The `rtm-abort` boolean controls a trade-off between working Restricted
+    Transactional Memory, and working performance counters.
+
+    All processors released to date (Q1 2019) supporting Transactional Memory
+    Extensions suffer an erratum which has been addressed in microcode.
+
+    Processors based on the Skylake microarchitecture with up-to-date
+    microcode internally use performance counter 3 to work around the erratum.
+    A consequence is that the counter gets reprogrammed whenever an `XBEGIN`
+    instruction is executed.
+
+    An alternative mode exists where PCR3 behaves as before, at the cost of
+    `XBEGIN` unconditionally aborting.  Enabling `rtm-abort` mode will
+    activate this alternative mode.
+
 *Warning:*
 As the virtualisation is not 100% safe, don't use the vpmu flag on
 production systems (see http://xenbits.xen.org/xsa/advisory-163.html)!
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -146,6 +146,8 @@ static const char *str_7d0[32] =
 {
     [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
 
+    /* 12 */                [13] = "tsx-force-abort",
+
     [26] = "ibrsb",         [27] = "stibp",
     [28] = "l1d_flush",     [29] = "arch_caps",
     /* 30 */                [31] = "ssbd",
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -286,6 +286,9 @@ static void Intel_errata_workarounds(str
        if (c->x86 == 6 && cpu_has_clflush &&
            (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
                __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
+
+       if (cpu_has_tsx_force_abort && opt_rtm_abort)
+               wrmsrl(MSR_TSX_FORCE_ABORT, TSX_FORCE_ABORT_RTM);
 }
 
 
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -45,6 +45,7 @@ CHECK_pmu_params;
 static unsigned int __read_mostly opt_vpmu_enabled;
 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
 unsigned int __read_mostly vpmu_features = 0;
+bool __read_mostly opt_rtm_abort;
 
 static DEFINE_SPINLOCK(vpmu_lock);
 static unsigned vpmu_count;
@@ -73,6 +74,8 @@ static int __init parse_vpmu_params(cons
             vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
         else if ( !cmdline_strcmp(s, "arch") )
             vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
+        else if ( (val = parse_boolean("rtm-abort", s, ss)) >= 0 )
+            opt_rtm_abort = val;
         else
             rc = -EINVAL;
 
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -131,6 +131,8 @@ int guest_rdmsr(const struct vcpu *v, ui
     case MSR_PRED_CMD:
     case MSR_FLUSH_CMD:
         /* Write-only */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         goto gp_fault;
 
     case MSR_SPEC_CTRL:
@@ -230,6 +232,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t
     case MSR_INTEL_PLATFORM_INFO:
     case MSR_ARCH_CAPABILITIES:
         /* Read-only */
+    case MSR_TSX_FORCE_ABORT:
+        /* Not offered to guests. */
         goto gp_fault;
 
     case MSR_AMD_PATCHLOADER:
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -112,6 +112,9 @@
 /* CPUID level 0x80000007.edx */
 #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
 
+/* CPUID level 0x00000007:0.edx */
+#define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
+
 /* Synthesized. */
 #define cpu_has_arch_perfmon    boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -51,6 +51,9 @@
 #define MSR_FLUSH_CMD                  0x0000010b
 #define FLUSH_CMD_L1D                  (_AC(1, ULL) << 0)
 
+#define MSR_TSX_FORCE_ABORT             0x0000010f
+#define TSX_FORCE_ABORT_RTM             (_AC(1, ULL) <<  0)
+
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0              0x000000c1
 #define MSR_IA32_A_PERFCTR0            0x000004c1
--- a/xen/include/asm-x86/vpmu.h
+++ b/xen/include/asm-x86/vpmu.h
@@ -125,6 +125,7 @@ static inline int vpmu_do_rdmsr(unsigned
 
 extern unsigned int vpmu_mode;
 extern unsigned int vpmu_features;
+extern bool opt_rtm_abort;
 
 /* Context switch */
 static inline void vpmu_switch_from(struct vcpu *prev)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -242,6 +242,7 @@ XEN_CPUFEATURE(IBPB,          8*32+12) /
 /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
 XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions 
*/
 XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation 
Single Precision */
+XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by 
Intel) */
 XEN_CPUFEATURE(STIBP,         9*32+27) /*A  STIBP */
 XEN_CPUFEATURE(L1D_FLUSH,     9*32+28) /*S  MSR_FLUSH_CMD and L1D flush. */
++++++ 5c8f752c-x86-e820-build-with-gcc9.patch ++++++
References: bsc#1121391

# Commit 22e2f8dddf5fbed885b5e4db3ffc9e1101be9ec0
# Date 2019-03-18 11:38:36 +0100
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/e820: fix build with gcc9

e820.c: In function ‘clip_to_limit’:
.../xen/include/asm/string.h:10:26: error: ‘__builtin_memmove’ offset [-16, 
-36] is out of the bounds [0, 20484] of object ‘e820’ with type ‘struct 
e820map’ [-Werror=array-bounds]
   10 | #define memmove(d, s, n) __builtin_memmove(d, s, n)
      |                          ^~~~~~~~~~~~~~~~~~~~~~~~~~
e820.c:404:13: note: in expansion of macro ‘memmove’
  404 |             memmove(&e820.map[i], &e820.map[i+1],
      |             ^~~~~~~
e820.c:36:16: note: ‘e820’ declared here
   36 | struct e820map e820;
      |                ^~~~

While I can't see where the negative offsets would come from, converting
the loop index to unsigned type helps. Take the opportunity and also
convert several other local variables and copy_e820_map()'s second
parameter to unsigned int (and bool in one case).

Reported-by: Charles Arnold <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Roger Pau Monné <[email protected]>
Reviewed-by: Wei Liu <[email protected]>
Acked-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/e820.c
+++ b/xen/arch/x86/e820.c
@@ -44,7 +44,7 @@ struct e820map __initdata e820_raw;
  */
 int __init e820_all_mapped(u64 start, u64 end, unsigned type)
 {
-       int i;
+       unsigned int i;
 
        for (i = 0; i < e820.nr_map; i++) {
                struct e820entry *ei = &e820.map[i];
@@ -73,9 +73,7 @@ int __init e820_all_mapped(u64 start, u6
 static void __init add_memory_region(unsigned long long start,
                                      unsigned long long size, int type)
 {
-    int x;
-
-    x = e820.nr_map;
+    unsigned int x = e820.nr_map;
 
     if (x == ARRAY_SIZE(e820.map)) {
         printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
@@ -140,11 +138,9 @@ int __init sanitize_e820_map(struct e820
     struct change_member *change_tmp;
     unsigned long current_type, last_type;
     unsigned long long last_addr;
-    int chgidx, still_changing;
-    int overlap_entries;
-    int new_bios_entry;
-    int old_nr, new_nr, chg_nr;
-    int i;
+    bool still_changing;
+    unsigned int i, chgidx, overlap_entries, new_bios_entry;
+    unsigned int old_nr, new_nr, chg_nr;
 
     /*
       Visually we're performing the following (1,2,3,4 = memory types)...
@@ -211,9 +207,9 @@ int __init sanitize_e820_map(struct e820
     chg_nr = chgidx;           /* true number of change-points */
 
     /* sort change-point list by memory addresses (low -> high) */
-    still_changing = 1;
+    still_changing = true;
     while (still_changing)     {
-        still_changing = 0;
+        still_changing = false;
         for (i=1; i < chg_nr; i++)  {
             /* if <current_addr> > <last_addr>, swap */
             /* or, if current=<start_addr> & last=<end_addr>, swap */
@@ -226,7 +222,7 @@ int __init sanitize_e820_map(struct e820
                 change_tmp = change_point[i];
                 change_point[i] = change_point[i-1];
                 change_point[i-1] = change_tmp;
-                still_changing=1;
+                still_changing = true;
             }
         }
     }
@@ -304,9 +300,9 @@ int __init sanitize_e820_map(struct e820
  * thinkpad 560x, for example, does not cooperate with the memory
  * detection code.)
  */
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+static int __init copy_e820_map(struct e820entry * biosmap, unsigned int 
nr_map)
 {
-    /* Only one memory region (or negative)? Ignore it */
+    /* Only one memory region? Ignore it */
     if (nr_map < 2)
         return -1;
 
@@ -345,7 +341,7 @@ static int __init copy_e820_map(struct e
  */
 static unsigned long __init find_max_pfn(void)
 {
-    int i;
+    unsigned int i;
     unsigned long max_pfn = 0;
 
     for (i = 0; i < e820.nr_map; i++) {
@@ -366,7 +362,7 @@ static unsigned long __init find_max_pfn
 
 static void __init clip_to_limit(uint64_t limit, char *warnmsg)
 {
-    int i;
+    unsigned int i;
     char _warnmsg[160];
     uint64_t old_limit = 0;
 
@@ -514,7 +510,7 @@ static void __init machine_specific_memo
 {
     unsigned long mpt_limit, ro_mpt_limit;
     uint64_t top_of_ram, size;
-    int i;
+    unsigned int i;
 
     sanitize_e820_map(raw->map, &raw->nr_map);
     copy_e820_map(raw->map, raw->nr_map);
@@ -604,7 +600,7 @@ int __init e820_change_range_type(
     uint32_t orig_type, uint32_t new_type)
 {
     uint64_t rs = 0, re = 0;
-    int i;
+    unsigned int i;
 
     for ( i = 0; i < e820->nr_map; i++ )
     {
++++++ 5c8fb92d-x86-HVM-split-linear-reads-and-writes.patch ++++++
# Commit 2d527ba310dc6695bba2df118ff9e053f7e40c82
# Date 2019-03-18 16:28:45 +0100
# Author Igor Druzhinin <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/hvm: split all linear reads and writes at page boundary

Ruling out page straddling at linear level makes it easier to
distinguish chunks that require proper handling as MMIO access
and not complete them as page straddling memory transactions
prematurely. This doesn't change the general behavior.

Signed-off-by: Igor Druzhinin <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1089,12 +1089,25 @@ static int linear_read(unsigned long add
                        uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
     pagefault_info_t pfinfo;
-    int rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
+    unsigned int offset = addr & ~PAGE_MASK;
+    int rc;
 
-    switch ( rc )
+    if ( offset + bytes > PAGE_SIZE )
     {
-        unsigned int offset, part1;
+        unsigned int part1 = PAGE_SIZE - offset;
+
+        /* Split the access at the page boundary. */
+        rc = linear_read(addr, part1, p_data, pfec, hvmemul_ctxt);
+        if ( rc == X86EMUL_OKAY )
+            rc = linear_read(addr + part1, bytes - part1, p_data + part1,
+                             pfec, hvmemul_ctxt);
+        return rc;
+    }
+
+    rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
 
+    switch ( rc )
+    {
     case HVMTRANS_okay:
         return X86EMUL_OKAY;
 
@@ -1106,19 +1119,9 @@ static int linear_read(unsigned long add
         if ( pfec & PFEC_insn_fetch )
             return X86EMUL_UNHANDLEABLE;
 
-        offset = addr & ~PAGE_MASK;
-        if ( offset + bytes <= PAGE_SIZE )
-            return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec,
-                                            hvmemul_ctxt,
-                                            known_gla(addr, bytes, pfec));
-
-        /* Split the access at the page boundary. */
-        part1 = PAGE_SIZE - offset;
-        rc = linear_read(addr, part1, p_data, pfec, hvmemul_ctxt);
-        if ( rc == X86EMUL_OKAY )
-            rc = linear_read(addr + part1, bytes - part1, p_data + part1,
-                             pfec, hvmemul_ctxt);
-        return rc;
+        return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec,
+                                        hvmemul_ctxt,
+                                        known_gla(addr, bytes, pfec));
 
     case HVMTRANS_gfn_paged_out:
     case HVMTRANS_gfn_shared:
@@ -1132,12 +1135,25 @@ static int linear_write(unsigned long ad
                         uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
     pagefault_info_t pfinfo;
-    int rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo);
+    unsigned int offset = addr & ~PAGE_MASK;
+    int rc;
 
-    switch ( rc )
+    if ( offset + bytes > PAGE_SIZE )
     {
-        unsigned int offset, part1;
+        unsigned int part1 = PAGE_SIZE - offset;
+
+        /* Split the access at the page boundary. */
+        rc = linear_write(addr, part1, p_data, pfec, hvmemul_ctxt);
+        if ( rc == X86EMUL_OKAY )
+            rc = linear_write(addr + part1, bytes - part1, p_data + part1,
+                              pfec, hvmemul_ctxt);
+        return rc;
+    }
+
+    rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo);
 
+    switch ( rc )
+    {
     case HVMTRANS_okay:
         return X86EMUL_OKAY;
 
@@ -1146,19 +1162,9 @@ static int linear_write(unsigned long ad
         return X86EMUL_EXCEPTION;
 
     case HVMTRANS_bad_gfn_to_mfn:
-        offset = addr & ~PAGE_MASK;
-        if ( offset + bytes <= PAGE_SIZE )
-            return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec,
-                                             hvmemul_ctxt,
-                                             known_gla(addr, bytes, pfec));
-
-        /* Split the access at the page boundary. */
-        part1 = PAGE_SIZE - offset;
-        rc = linear_write(addr, part1, p_data, pfec, hvmemul_ctxt);
-        if ( rc == X86EMUL_OKAY )
-            rc = linear_write(addr + part1, bytes - part1, p_data + part1,
-                              pfec, hvmemul_ctxt);
-        return rc;
+        return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec,
+                                         hvmemul_ctxt,
+                                         known_gla(addr, bytes, pfec));
 
     case HVMTRANS_gfn_paged_out:
     case HVMTRANS_gfn_shared:
++++++ 5c8fb951-x86-HVM-finish-IOREQs-correctly-on-completion.patch ++++++
# Commit 522a2f3c5c89cc78c0e2b05af924b76cef7d4bff
# Date 2019-03-18 16:29:21 +0100
# Author Igor Druzhinin <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/hvm: finish IOREQs correctly on completion path

Since the introduction of linear_{read,write}() helpers in 3bdec530a5
(x86/HVM: split page straddling emulated accesses in more cases) the
completion path for IOREQs has been broken: if there is an IOREQ in
progress but hvm_copy_{to,from}_guest_linear() returns HVMTRANS_okay
(e.g. when P2M type of source/destination has been changed by IOREQ
handler) the execution will never re-enter hvmemul_do_io() where
IOREQs are completed. This usually results in a domain crash upon
the execution of the next IOREQ entering hvmemul_do_io() and finding
the remnants of the previous IOREQ in the state machine.

This particular issue has been discovered in relation to p2m_ioreq_server
type where an emulator changed the memory type between p2m_ioreq_server
and p2m_ram_rw in process of responding to IOREQ which made
hvm_copy_..() to behave differently on the way back.

Fix it for now by checking if IOREQ completion is required (which
can be identified by querying MMIO cache) before trying to finish
a memory access immediately through hvm_copy_..(), re-enter
hvmemul_do_io() otherwise. This change alone only addresses IOREQ
completion issue for P2M type changing from MMIO to RAM in the
middle of emulation but leaves a case where new IOREQs might be
introduced by P2M changes from RAM to MMIO (which is less likely
to find in practice) that requires more substantial changes in
MMIO emulation code.

Signed-off-by: Igor Druzhinin <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -952,7 +952,7 @@ static int hvmemul_phys_mmio_access(
  * cache indexed by linear MMIO address.
  */
 static struct hvm_mmio_cache *hvmemul_find_mmio_cache(
-    struct hvm_vcpu_io *vio, unsigned long gla, uint8_t dir)
+    struct hvm_vcpu_io *vio, unsigned long gla, uint8_t dir, bool create)
 {
     unsigned int i;
     struct hvm_mmio_cache *cache;
@@ -966,6 +966,9 @@ static struct hvm_mmio_cache *hvmemul_fi
             return cache;
     }
 
+    if ( !create )
+        return NULL;
+
     i = vio->mmio_cache_count;
     if( i == ARRAY_SIZE(vio->mmio_cache) )
         return NULL;
@@ -1000,7 +1003,7 @@ static int hvmemul_linear_mmio_access(
 {
     struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
     unsigned long offset = gla & ~PAGE_MASK;
-    struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(vio, gla, dir);
+    struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(vio, gla, dir, 
true);
     unsigned int chunk, buffer_offset = 0;
     paddr_t gpa;
     unsigned long one_rep = 1;
@@ -1089,8 +1092,9 @@ static int linear_read(unsigned long add
                        uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
     pagefault_info_t pfinfo;
+    struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
     unsigned int offset = addr & ~PAGE_MASK;
-    int rc;
+    int rc = HVMTRANS_bad_gfn_to_mfn;
 
     if ( offset + bytes > PAGE_SIZE )
     {
@@ -1104,7 +1108,14 @@ static int linear_read(unsigned long add
         return rc;
     }
 
-    rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
+    /*
+     * If there is an MMIO cache entry for the access then we must be 
re-issuing
+     * an access that was previously handled as MMIO. Thus it is imperative 
that
+     * we handle this access in the same way to guarantee completion and hence
+     * clean up any interim state.
+     */
+    if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_READ, false) )
+        rc = hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo);
 
     switch ( rc )
     {
@@ -1135,8 +1146,9 @@ static int linear_write(unsigned long ad
                         uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
     pagefault_info_t pfinfo;
+    struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
     unsigned int offset = addr & ~PAGE_MASK;
-    int rc;
+    int rc = HVMTRANS_bad_gfn_to_mfn;
 
     if ( offset + bytes > PAGE_SIZE )
     {
@@ -1150,7 +1162,14 @@ static int linear_write(unsigned long ad
         return rc;
     }
 
-    rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo);
+    /*
+     * If there is an MMIO cache entry for the access then we must be 
re-issuing
+     * an access that was previously handled as MMIO. Thus it is imperative 
that
+     * we handle this access in the same way to guarantee completion and hence
+     * clean up any interim state.
+     */
+    if ( !hvmemul_find_mmio_cache(vio, addr, IOREQ_WRITE, false) )
+        rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo);
 
     switch ( rc )
     {
++++++ 5c8fc6c0-x86-MSR-shorten-ARCH_CAPABILITIES.patch ++++++
# Commit ba27aaa88548c824a47dcf5609288ee1c05d2946
# Date 2019-03-18 16:26:40 +0000
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/msr: Shorten ARCH_CAPABILITIES_* constants

They are unnecesserily verbose, and ARCH_CAPS_* is already the more common
version.

Signed-off-by: Andrew Cooper <[email protected]>
Acked-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -230,8 +230,8 @@ static void __init print_details(enum in
            (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
            (_7d0 & cpufeat_mask(X86_FEATURE_SSBD))  ? " SSBD"      : "",
            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
-           (caps & ARCH_CAPABILITIES_IBRS_ALL)      ? " IBRS_ALL"  : "",
-           (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "",
+           (caps & ARCH_CAPS_IBRS_ALL)              ? " IBRS_ALL"  : "",
+           (caps & ARCH_CAPS_RDCL_NO)               ? " RDCL_NO"   : "",
            (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "",
            (caps & ARCH_CAPS_SKIP_L1DFL)            ? " SKIP_L1DFL": "",
            (caps & ARCH_CAPS_SSB_NO)                ? " SSB_NO"    : "");
@@ -549,7 +549,7 @@ static __init void l1tf_calculations(uin
     }
 
     /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */
-    if ( caps & ARCH_CAPABILITIES_RDCL_NO )
+    if ( caps & ARCH_CAPS_RDCL_NO )
         cpu_has_bug_l1tf = false;
 
     if ( cpu_has_bug_l1tf && hit_default )
@@ -613,9 +613,9 @@ int8_t __read_mostly opt_xpti_domu = -1;
 static __init void xpti_init_default(uint64_t caps)
 {
     if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
-        caps = ARCH_CAPABILITIES_RDCL_NO;
+        caps = ARCH_CAPS_RDCL_NO;
 
-    if ( caps & ARCH_CAPABILITIES_RDCL_NO )
+    if ( caps & ARCH_CAPS_RDCL_NO )
     {
         if ( opt_xpti_hwdom < 0 )
             opt_xpti_hwdom = 0;
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -42,8 +42,8 @@
 #define PRED_CMD_IBPB                  (_AC(1, ULL) << 0)
 
 #define MSR_ARCH_CAPABILITIES          0x0000010a
-#define ARCH_CAPABILITIES_RDCL_NO      (_AC(1, ULL) << 0)
-#define ARCH_CAPABILITIES_IBRS_ALL     (_AC(1, ULL) << 1)
+#define ARCH_CAPS_RDCL_NO              (_AC(1, ULL) << 0)
+#define ARCH_CAPS_IBRS_ALL             (_AC(1, ULL) << 1)
 #define ARCH_CAPS_RSBA                 (_AC(1, ULL) << 2)
 #define ARCH_CAPS_SKIP_L1DFL           (_AC(1, ULL) << 3)
 #define ARCH_CAPS_SSB_NO               (_AC(1, ULL) << 4)
++++++ 5c8fc6c0-x86-SC-retpoline-safety-calculations-for-eIBRS.patch ++++++
# Commit 17f74242ccf0ce6e51c03a5860947865c0ef0dc2
# Date 2019-03-18 16:26:40 +0000
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/spec-ctrl: Extend repoline safey calcuations for eIBRS and Atom parts

All currently-released Atom processors are in practice retpoline-safe, because
they don't fall back to a BTB prediction on RSB underflow.

However, an additional meaning of Enhanced IRBS is that the processor may not
be retpoline-safe.  The Gemini Lake platform, based on the Goldmont Plus
microarchitecture is the first Atom processor to support eIBRS.

Until Xen gets full eIBRS support, Gemini Lake will still be safe using
regular IBRS.

Signed-off-by: Andrew Cooper <[email protected]>
Acked-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -316,8 +316,11 @@ static bool __init retpoline_safe(uint64
     /*
      * RSBA may be set by a hypervisor to indicate that we may move to a
      * processor which isn't retpoline-safe.
+     *
+     * Processors offering Enhanced IBRS are not guarenteed to be
+     * repoline-safe.
      */
-    if ( caps & ARCH_CAPS_RSBA )
+    if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) )
         return false;
 
     switch ( boot_cpu_data.x86_model )
@@ -377,6 +380,23 @@ static bool __init retpoline_safe(uint64
     case 0x9e:
         return false;
 
+        /*
+         * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe.
+         */
+    case 0x1c: /* Pineview */
+    case 0x26: /* Lincroft */
+    case 0x27: /* Penwell */
+    case 0x35: /* Cloverview */
+    case 0x36: /* Cedarview */
+    case 0x37: /* Baytrail / Valleyview (Silvermont) */
+    case 0x4d: /* Avaton / Rangely (Silvermont) */
+    case 0x4c: /* Cherrytrail / Brasswell */
+    case 0x4a: /* Merrifield */
+    case 0x5a: /* Moorefield */
+    case 0x5c: /* Goldmont */
+    case 0x5f: /* Denverton */
+        return true;
+
     default:
         printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
                boot_cpu_data.x86_model);
++++++ 5c9e63c5-credit2-SMT-idle-handling.patch ++++++
# Commit 753ba43d6d16e688f688e01e1c77463ea2c6ec9f
# Date 2019-03-29 18:28:21 +0000
# Author Juergen Gross <[email protected]>
# Committer Andrew Cooper <[email protected]>
xen/sched: fix credit2 smt idle handling

Credit2's smt_idle_mask_set() and smt_idle_mask_clear() are used to
identify idle cores where vcpus can be moved to. A core is thought to
be idle when all siblings are known to have the idle vcpu running on
them.

Unfortunately the information of a vcpu running on a cpu is per
runqueue. So in case not all siblings are in the same runqueue a core
will never be regarded to be idle, as the sibling not in the runqueue
is never known to run the idle vcpu.

Use a credit2 specific cpumask of siblings with only those cpus
being marked which are in the same runqueue as the cpu in question.

Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Dario Faggioli <[email protected]>

--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -504,6 +504,7 @@ struct csched2_private {
  * Physical CPU
  */
 struct csched2_pcpu {
+    cpumask_t sibling_mask;            /* Siblings in the same runqueue      */
     int runq_id;
 };
 
@@ -656,7 +657,7 @@ static inline
 void smt_idle_mask_set(unsigned int cpu, const cpumask_t *idlers,
                        cpumask_t *mask)
 {
-    const cpumask_t *cpu_siblings = per_cpu(cpu_sibling_mask, cpu);
+    const cpumask_t *cpu_siblings = &csched2_pcpu(cpu)->sibling_mask;
 
     if ( cpumask_subset(cpu_siblings, idlers) )
         cpumask_or(mask, mask, cpu_siblings);
@@ -668,10 +669,10 @@ void smt_idle_mask_set(unsigned int cpu,
 static inline
 void smt_idle_mask_clear(unsigned int cpu, cpumask_t *mask)
 {
-    const cpumask_t *cpu_siblings = per_cpu(cpu_sibling_mask, cpu);
+    const cpumask_t *cpu_siblings = &csched2_pcpu(cpu)->sibling_mask;
 
     if ( cpumask_subset(cpu_siblings, mask) )
-        cpumask_andnot(mask, mask, per_cpu(cpu_sibling_mask, cpu));
+        cpumask_andnot(mask, mask, cpu_siblings);
 }
 
 /*
@@ -3793,6 +3794,7 @@ init_pdata(struct csched2_private *prv,
            unsigned int cpu)
 {
     struct csched2_runqueue_data *rqd;
+    unsigned int rcpu;
 
     ASSERT(rw_is_write_locked(&prv->lock));
     ASSERT(!cpumask_test_cpu(cpu, &prv->initialized));
@@ -3810,12 +3812,23 @@ init_pdata(struct csched2_private *prv,
         printk(XENLOG_INFO " First cpu on runqueue, activating\n");
         activate_runqueue(prv, spc->runq_id);
     }
-    
+
     __cpumask_set_cpu(cpu, &rqd->idle);
     __cpumask_set_cpu(cpu, &rqd->active);
     __cpumask_set_cpu(cpu, &prv->initialized);
     __cpumask_set_cpu(cpu, &rqd->smt_idle);
 
+    /* On the boot cpu we are called before cpu_sibling_mask has been set up. 
*/
+    if ( cpu == 0 && system_state < SYS_STATE_active )
+        __cpumask_set_cpu(cpu, &csched2_pcpu(cpu)->sibling_mask);
+    else
+        for_each_cpu ( rcpu, per_cpu(cpu_sibling_mask, cpu) )
+            if ( cpumask_test_cpu(rcpu, &rqd->active) )
+            {
+                __cpumask_set_cpu(cpu, &csched2_pcpu(rcpu)->sibling_mask);
+                __cpumask_set_cpu(rcpu, &csched2_pcpu(cpu)->sibling_mask);
+            }
+
     if ( cpumask_weight(&rqd->active) == 1 )
         rqd->pick_bias = cpu;
 
@@ -3897,6 +3910,7 @@ csched2_deinit_pdata(const struct schedu
     struct csched2_private *prv = csched2_priv(ops);
     struct csched2_runqueue_data *rqd;
     struct csched2_pcpu *spc = pcpu;
+    unsigned int rcpu;
 
     write_lock_irqsave(&prv->lock, flags);
 
@@ -3923,6 +3937,9 @@ csched2_deinit_pdata(const struct schedu
 
     printk(XENLOG_INFO "Removing cpu %d from runqueue %d\n", cpu, 
spc->runq_id);
 
+    for_each_cpu ( rcpu, &rqd->active )
+        __cpumask_clear_cpu(cpu, &csched2_pcpu(rcpu)->sibling_mask);
+
     __cpumask_clear_cpu(cpu, &rqd->idle);
     __cpumask_clear_cpu(cpu, &rqd->smt_idle);
     __cpumask_clear_cpu(cpu, &rqd->active);
++++++ 5ca46b68-x86emul-no-GPR-update-upon-AVX-gather-failures.patch ++++++
# Commit 74f299bbd7d5cc52325b5866c17b44dd0bd1c5a2
# Date 2019-04-03 10:14:32 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86emul: suppress general register update upon AVX gather failures

While destination and mask registers may indeed need updating in this
case, the rIP update in particular needs to be avoided, as well as e.g.
raising a single step trap.

Reported-by: George Dunlap <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Acked-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -8549,6 +8549,9 @@ x86_emulate(
         invoke_stub("", "", "+m" (mask) : "a" (&mask));
         put_stub(stub);
 
+        if ( rc != X86EMUL_OKAY )
+            goto done;
+
         state->simd_size = simd_none;
         break;
     }
++++++ 5ca773d1-x86emul-dont-read-mask-reg-without-AVX512F.patch ++++++
# Commit 6cb7e52edf823fd89fe14da94f9bf3e5cf99d1ff
# Date 2019-04-05 17:27:13 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86emul: don't read mask register on AVX512F-incapable platforms

Nor when register state isn't sufficiently enabled.

Reported-by: George Dunlap <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3511,7 +3511,8 @@ x86_emulate(
     }
 
     /* With a memory operand, fetch the mask register in use (if any). */
-    if ( ea.type == OP_MEM && evex.opmsk )
+    if ( ea.type == OP_MEM && evex.opmsk &&
+         _get_fpu(fpu_type = X86EMUL_FPU_opmask, ctxt, ops) == X86EMUL_OKAY )
     {
         uint8_t *stb = get_stub(stub);
 
@@ -3532,6 +3533,14 @@ x86_emulate(
         fault_suppression = true;
     }
 
+    if ( fpu_type == X86EMUL_FPU_opmask )
+    {
+        /* Squash (side) effects of the _get_fpu() above. */
+        x86_emul_reset_event(ctxt);
+        put_fpu(X86EMUL_FPU_opmask, false, state, ctxt, ops);
+        fpu_type = X86EMUL_FPU_none;
+    }
+
     /* Decode (but don't fetch) the destination operand: register or memory. */
     switch ( d & DstMask )
     {
++++++ 5cab1f66-timers-fix-memory-leak-with-cpu-plug.patch ++++++

WARNING: Breaks Arm build! Fix is being discussed upstream.

# Commit 597fbb8be6021440cd53493c14201c32671bade1
# Date 2019-04-08 11:16:06 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
xen/timers: Fix memory leak with cpu unplug/plug

timer_softirq_action() realloc's itself a larger timer heap whenever
necessary, which includes bootstrapping from the empty dummy_heap.  Nothing
ever freed this allocation.

CPU plug and unplug has the side effect of zeroing the percpu data area, which
clears ts->heap.  This in turn causes new timers to be put on the list rather
than the heap, and for timer_softirq_action() to bootstrap itself again.

This in practice leaks ts->heap every time a CPU is unplugged and replugged.

Implement free_percpu_timers() which includes freeing ts->heap when
appropriate, and update the notifier callback with the recent cpu parking
logic and free-avoidance across suspend.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -601,6 +601,22 @@ static void migrate_timers_from_cpu(unsi
 
 static struct timer *dummy_heap;
 
+static void free_percpu_timers(unsigned int cpu)
+{
+    struct timers *ts = &per_cpu(timers, cpu);
+
+    migrate_timers_from_cpu(cpu);
+
+    ASSERT(GET_HEAP_SIZE(ts->heap) == 0);
+    if ( GET_HEAP_LIMIT(ts->heap) )
+    {
+        xfree(ts->heap);
+        ts->heap = &dummy_heap;
+    }
+    else
+        ASSERT(ts->heap == &dummy_heap);
+}
+
 static int cpu_callback(
     struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
@@ -614,10 +630,18 @@ static int cpu_callback(
         spin_lock_init(&ts->lock);
         ts->heap = &dummy_heap;
         break;
+
     case CPU_UP_CANCELED:
     case CPU_DEAD:
-        migrate_timers_from_cpu(cpu);
+        if ( !park_offline_cpus && system_state != SYS_STATE_suspend )
+            free_percpu_timers(cpu);
         break;
+
+    case CPU_REMOVE:
+        if ( park_offline_cpus )
+            free_percpu_timers(cpu);
+        break;
+
     default:
         break;
     }
++++++ 5cac6219-xen-cpu-Fix-ARM-build-following-cs-597fbb8.patch ++++++
Subject: xen/cpu: Fix ARM build following c/s 597fbb8
From: Andrew Cooper [email protected] Mon Apr 8 18:20:07 2019 +0100
Date: Tue Apr 9 10:12:57 2019 +0100:
Git: a6448adfd3d537aacbbd784e5bf1777ab3ff5f85

c/s 597fbb8 "xen/timers: Fix memory leak with cpu unplug/plug" broke the ARM
build by being the first patch to add park_offline_cpus to common code.

While it is currently specific to Intel hardware (for reasons of being able to
handle machine check exceptions without an immediate system reset), it isn't
inherently architecture specific, so define it to be false on ARM for now.

Add a comment in both smp.h headers explaining the intended behaviour of the
option.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Julien Grall <[email protected]>
Reviewed-by: Wei Liu <[email protected]>

diff --git a/xen/include/asm-arm/smp.h b/xen/include/asm-arm/smp.h
index 3c122681d7..fdbcefa241 100644
--- a/xen/include/asm-arm/smp.h
+++ b/xen/include/asm-arm/smp.h
@@ -14,6 +14,12 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
 
 #define raw_smp_processor_id() (get_processor_id())
 
+/*
+ * Do we, for platform reasons, need to actually keep CPUs online when we
+ * would otherwise prefer them to be off?
+ */
+#define park_offline_cpus false
+
 extern void noreturn stop_cpu(void);
 
 extern int arch_smp_init(void);
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index 09c55458df..9f533f9072 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -26,6 +26,10 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
 DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
 DECLARE_PER_CPU(cpumask_var_t, scratch_cpumask);
 
+/*
+ * Do we, for platform reasons, need to actually keep CPUs online when we
+ * would otherwise prefer them to be off?
+ */
 extern bool park_offline_cpus;
 
 void smp_send_nmi_allbutself(void);
++++++ 5cac6cba-vmx-Fixup-removals-of-MSR-load-save-list-entries.patch ++++++
Subject: x86/vmx: Fixup removals of MSR load/save list entries
From: Igor Druzhinin [email protected] Thu Apr 4 17:25:10 2019 +0100
Date: Tue Apr 9 10:58:18 2019 +0100:
Git: e28c0ee3356f52f589bbae54e89aaed25c1f599d

Commit 540d5422 ("x86/vmx: Support removing MSRs from the host/guest
load/save lists") introduced infrastructure finally exposed by
commit fd32dcfe ("x86/vmx: Don't leak EFER.NXE into guest context")
that led to a functional regression on Harpertown and earlier cores
(Gen 1 VT-x) due to MSR count being incorrectly set in VMCS.
As the result, as soon as guest EFER becomes equal to Xen EFER
(which eventually happens in almost every 64-bit VM) and its MSR
entry is supposed to be removed, a stale version of EFER is loaded
into a guest instead causing almost immediate guest failure.

Signed-off-by: Igor Druzhinin <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>
Acked-by: Kevin Tian <[email protected]>

diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 74f2a08cfd..45d18493df 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1490,15 +1490,15 @@ int vmx_del_msr(struct vcpu *v, uint32_t msr, enum 
vmx_msr_list_type type)
     switch ( type )
     {
     case VMX_MSR_HOST:
-        __vmwrite(VM_EXIT_MSR_LOAD_COUNT, vmx->host_msr_count--);
+        __vmwrite(VM_EXIT_MSR_LOAD_COUNT, --vmx->host_msr_count);
         break;
 
     case VMX_MSR_GUEST:
-        __vmwrite(VM_EXIT_MSR_STORE_COUNT, vmx->msr_save_count--);
+        __vmwrite(VM_EXIT_MSR_STORE_COUNT, --vmx->msr_save_count);
 
         /* Fallthrough */
     case VMX_MSR_GUEST_LOADONLY:
-        __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_load_count--);
+        __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, --vmx->msr_load_count);
         break;
     }
 
++++++ gcc9-ignore-warnings.patch ++++++
--- /var/tmp/diff_new_pack.U92DbZ/_old  2019-04-18 13:58:21.039993732 +0200
+++ /var/tmp/diff_new_pack.U92DbZ/_new  2019-04-18 13:58:21.039993732 +0200
@@ -4,13 +4,6 @@
     trace.c: In function '__trace_hypercall':
     trace.c:826:19: error: taking address of packed member of 'struct 
<anonymous>' may result in an unaligned pointer value 
[-Werror=address-of-packed-member]
       826 |     uint32_t *a = d.args;
-Error in e820.c:
-    e820.c: In function 'clip_to_limit':
-    
/home/abuild/rpmbuild/BUILD/xen-4.11.1-testing/xen/include/asm/string.h:10:26: 
error: '__builtin_memmove' offset [-16, -36] is out of the bounds [0, 20484] of 
object 'e820' with type 'struct e820map' [-Werror=array-bounds]
-       10 | #define memmove(d, s, n) __builtin_memmove(d, s, n)
-          |                          ^~~~~~~~~~~~~~~~~~~~~~~~~~
-    e820.c:404:13: note: in expansion of macro 'memmove'
-      404 |             memmove(&e820.map[i], &e820.map[i+1],
 Error in generic.c:
     generic.c: In function 'print_mtrr_state':
     generic.c:177:11: error: '%0*lx' directive output between 1 and 1073741823 
bytes may cause result to exceed 'INT_MAX' [-Werror=format-overflow=]
@@ -26,10 +19,8 @@
       338 |     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
           |                         ^~~~~~~~~~~~~~~~~~~~~~~~~
    
-Index: xen-4.12.0-testing/xen/common/trace.c
-===================================================================
---- xen-4.12.0-testing.orig/xen/common/trace.c
-+++ xen-4.12.0-testing/xen/common/trace.c
+--- a/xen/common/trace.c
++++ b/xen/common/trace.c
 @@ -816,6 +816,9 @@ unlock:
          tasklet_schedule(&trace_notify_dom0_tasklet);
  }
@@ -48,37 +39,19 @@
  
  #define APPEND_ARG32(i)                         \
      do {                                        \
-Index: xen-4.12.0-testing/xen/arch/x86/e820.c
-===================================================================
---- xen-4.12.0-testing.orig/xen/arch/x86/e820.c
-+++ xen-4.12.0-testing/xen/arch/x86/e820.c
-@@ -366,7 +366,7 @@ static unsigned long __init find_max_pfn
- 
- static void __init clip_to_limit(uint64_t limit, char *warnmsg)
- {
--    int i;
-+    unsigned int i;
-     char _warnmsg[160];
-     uint64_t old_limit = 0;
- 
-Index: xen-4.12.0-testing/xen/arch/x86/cpu/mtrr/generic.c
-===================================================================
---- xen-4.12.0-testing.orig/xen/arch/x86/cpu/mtrr/generic.c
-+++ xen-4.12.0-testing/xen/arch/x86/cpu/mtrr/generic.c
-@@ -179,6 +179,9 @@ static void __init print_fixed(unsigned
-       }
- }
- 
-+#if __GNUC__ > 8
-+#pragma GCC diagnostic ignored "-Wformat-overflow"
-+#endif
+--- a/xen/arch/x86/cpu/mtrr/generic.c
++++ b/xen/arch/x86/cpu/mtrr/generic.c
+@@ -182,7 +182,7 @@ static void __init print_fixed(unsigned
  static void __init print_mtrr_state(const char *level)
  {
        unsigned int i;
-Index: xen-4.12.0-testing/xen/drivers/passthrough/vtd/utils.c
-===================================================================
---- xen-4.12.0-testing.orig/xen/drivers/passthrough/vtd/utils.c
-+++ xen-4.12.0-testing/xen/drivers/passthrough/vtd/utils.c
+-      int width;
++      unsigned char width; /* gcc9 doesn't like plain "int" here */
+ 
+       printk("%sMTRR default type: %s\n", level,
+              mtrr_attrib_to_str(mtrr_state.def_type));
+--- a/xen/drivers/passthrough/vtd/utils.c
++++ b/xen/drivers/passthrough/vtd/utils.c
 @@ -172,6 +172,9 @@ void print_vtd_entries(struct iommu *iom
      } while ( --level );
  }
@@ -89,10 +62,8 @@
  void vtd_dump_iommu_info(unsigned char key)
  {
      struct acpi_drhd_unit *drhd;
-Index: xen-4.12.0-testing/xen/drivers/passthrough/vtd/intremap.c
-===================================================================
---- xen-4.12.0-testing.orig/xen/drivers/passthrough/vtd/intremap.c
-+++ xen-4.12.0-testing/xen/drivers/passthrough/vtd/intremap.c
+--- a/xen/drivers/passthrough/vtd/intremap.c
++++ b/xen/drivers/passthrough/vtd/intremap.c
 @@ -322,6 +322,9 @@ static int remap_entry_to_ioapic_rte(
      return 0;
  }

++++++ pygrub-python3-conversion.patch ++++++
--- /var/tmp/diff_new_pack.U92DbZ/_old  2019-04-18 13:58:21.095993751 +0200
+++ /var/tmp/diff_new_pack.U92DbZ/_new  2019-04-18 13:58:21.095993751 +0200
@@ -657,7 +657,7 @@
 +
  PyMODINIT_FUNC
 -initxenfsimage(void)
-+PyInit_fsimage(void)
++PyInit_xenfsimage(void)
  {
 -      Py_InitModule("xenfsimage", fsimage_module_methods);
 +      return PyModule_Create(&fsimage_module);

++++++ xen2libvirt.py ++++++
--- /var/tmp/diff_new_pack.U92DbZ/_old  2019-04-18 13:58:21.171993776 +0200
+++ /var/tmp/diff_new_pack.U92DbZ/_new  2019-04-18 13:58:21.171993776 +0200
@@ -107,7 +107,7 @@
         print(xml)
     else:
         print_verbose('Importing converted libvirt domXML into libvirt...')
-        dom = conn.defineXML(xml)
+        dom = conn.defineXML(xml.decode("utf-8"))
         if dom is None:
             print('Failed to define domain from converted domXML')
             sys.exit(1)

++++++ xenstore-launch.patch ++++++
References: bsc#1131811

When the xenstored service is started it exits successfully but systemd seems to
lose track of the service and reports an error causing other xen services to 
fail.
This patch is a workaround giving systemd time to acknowledge a succesful start
of xenstored. The real fix is believed to be needed in systemd.

diff --git a/tools/hotplug/Linux/launch-xenstore.in 
b/tools/hotplug/Linux/launch-xenstore.in
index 991dec8d25..eb3d7c964c 100644
--- a/tools/hotplug/Linux/launch-xenstore.in
+++ b/tools/hotplug/Linux/launch-xenstore.in
@@ -79,6 +79,7 @@ test -f @CONFIG_DIR@/@CONFIG_LEAF_DIR@/xencommons && . 
@CONFIG_DIR@/@CONFIG_LEAF
        echo -n Starting $XENSTORE_DOMAIN_KERNEL...
        ${LIBEXEC_BIN}/init-xenstore-domain $XENSTORE_DOMAIN_ARGS || exit 1
        systemd-notify --ready 2>/dev/null
+       systemd-notify --booted 2>/dev/null && sleep 60
 
        exit 0
 }

commit xen for openSUSE:Factory

Reply via email to