commit xen for openSUSE:Factory

h_root Sat, 17 Sep 2016 05:34:41 -0700

Hello community,

here is the log from the commit of package xen for openSUSE:Factory checked in 
at 2016-09-17 14:34:14
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/xen (Old)
 and      /work/SRC/openSUSE:Factory/.xen.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "xen"

Changes:
--------
--- /work/SRC/openSUSE:Factory/xen/xen.changes  2016-08-26 23:14:41.000000000 
+0200
+++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes     2016-09-17 
14:34:17.000000000 +0200
@@ -1,0 +2,45 @@
+Mon Sep 12 08:44:11 MDT 2016 - [email protected]
+
+- bsc#995785 - VUL-0: CVE-2016-7092: xen: x86: Disallow L3
+  recursive pagetable for 32-bit PV guests (XSA-185)
+  57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch
+- bsc#995789 - VUL-0: CVE-2016-7093: xen: x86: Mishandling of
+  instruction pointer truncation during emulation (XSA-186)
+  57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch
+  57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch
+- bsc#995792 - VUL-0: CVE-2016-7094: xen: x86 HVM: Overflow of
+  sh_ctxt->seg_reg[] (XSA-187)
+  57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch
+  57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch
+- bsc#991934 - xen hypervisor crash in csched_acct
+  57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch
+- Upstream patches from Jan
+  57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch
+  57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch
+  57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch
+  57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch
+  57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch
+  57c82be2-x86-32on64-adjust-call-gate-emulation.patch
+  57c96e2c-x86-correct-PT_NOTE-file-position.patch
+  57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch
+
+-------------------------------------------------------------------
+Mon Sep 12 13:10:21 UTC 2016 - [email protected]
+
+- bsc#979002 - add 60-persistent-xvd.rules and helper script
+  also to initrd, add the relevant dracut helper
+
+-------------------------------------------------------------------
+Mon Sep  5 11:39:21 UTC 2016 - [email protected]
+
+- bnc#953518 - unplug also SCSI disks in qemu-xen-traditional for
+  upstream unplug protocol
+
+-------------------------------------------------------------------
+Fri Sep  2 08:32:44 MDT 2016 - [email protected]
+
+- bsc#989679 - [pvusb feature] USB device not found when
+  'virsh detach-device guest usb.xml'
+  57c93e52-fix-error-in-libxl_device_usbdev_list.patch
+
+-------------------------------------------------------------------

New:
----
  57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch
  57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch
  57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch
  57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch
  57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch
  57c82be2-x86-32on64-adjust-call-gate-emulation.patch
  57c93e52-fix-error-in-libxl_device_usbdev_list.patch
  57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch
  57c96e2c-x86-correct-PT_NOTE-file-position.patch
  57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch
  57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch
  57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch
  57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch
  57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch
  57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ xen.spec ++++++
--- /var/tmp/diff_new_pack.3aw7sj/_old  2016-09-17 14:34:20.000000000 +0200
+++ /var/tmp/diff_new_pack.3aw7sj/_new  2016-09-17 14:34:20.000000000 +0200
@@ -165,7 +165,7 @@
 %endif
 %endif
 
-Version:        4.7.0_10
+Version:        4.7.0_12
 Release:        0
 Summary:        Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
 License:        GPL-2.0
@@ -219,6 +219,21 @@
 Patch14:        57ac6316-don-t-restrict-DMA-heap-to-node-0.patch
 Patch15:        
57b71fc5-x86-EFI-don-t-apply-relocations-to-l-2-3-_bootmap.patch
 Patch16:        
57b7447b-dont-permit-guest-to-populate-PoD-pages-for-itself.patch
+Patch17:        57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch
+Patch18:        
57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch
+Patch19:        
57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch
+Patch20:        57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch
+Patch21:        
57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch
+Patch22:        57c82be2-x86-32on64-adjust-call-gate-emulation.patch
+Patch23:        57c93e52-fix-error-in-libxl_device_usbdev_list.patch
+Patch24:        57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch
+Patch25:        57c96e2c-x86-correct-PT_NOTE-file-position.patch
+Patch26:        57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch
+Patch27:        
57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch
+Patch28:        
57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch
+Patch29:        57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch
+Patch30:        57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch
+Patch31:        
57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch
 # Upstream qemu-traditional patches
 Patch250:       VNC-Support-for-ExtendedKeyEvent-client-message.patch
 Patch251:       0001-net-move-the-tap-buffer-into-TAPState.patch
@@ -418,7 +433,8 @@
 %if %suse_version >= 1315
 Requires:       grub2-x86_64-xen
 %endif
-Requires:       qemu-ovmf-x86_64
+# Uncomment when ovmf is supported
+#Requires:       qemu-ovmf-x86_64
 Requires:       qemu-x86
 %endif
 %ifarch %arm aarch64
@@ -556,6 +572,21 @@
 %patch14 -p1
 %patch15 -p1
 %patch16 -p1
+%patch17 -p1
+%patch18 -p1
+%patch19 -p1
+%patch20 -p1
+%patch21 -p1
+%patch22 -p1
+%patch23 -p1
+%patch24 -p1
+%patch25 -p1
+%patch26 -p1
+%patch27 -p1
+%patch28 -p1
+%patch29 -p1
+%patch30 -p1
+%patch31 -p1
 # Upstream qemu patches
 %patch250 -p1
 %patch251 -p1
@@ -820,6 +851,25 @@
 LABEL="xvd_aliases_end"
 _EOR_
 #
+dracut_moduledir=$RPM_BUILD_ROOT/usr/lib/dracut/modules.d/50%{name}-tools-domU
+mkdir -p ${dracut_moduledir}
+tee ${dracut_moduledir}/module-setup.sh <<'_EOS_'
+#!/bin/bash
+check() {
+  require_binaries xenstore-read || return 1
+  return 0
+}
+
+depends() {
+  return 0
+}
+install() {
+  inst_multiple xenstore-read
+  inst_multiple ${udevdir}/%{name}-tools-domU.sh
+  inst_rules 60-persistent-xvd.rules
+}
+_EOS_
+#
 udev_programdir=$RPM_BUILD_ROOT/usr/lib/udev
 mkdir -p ${udev_programdir}
 tee ${udev_programdir}/%{name}-tools-domU.sh <<'_EOS_'
@@ -1289,6 +1339,7 @@
 /bin/xenstore-*
 %if %{?with_systemd}0
 /usr/lib/udev
+/usr/lib/dracut
 %endif
 
 %files devel

++++++ 57c4412b-x86-HVM-add-guarding-logic-for-VMX-specific-code.patch ++++++
# Commit 81caac0cd0f56b0052a7884e6bd99e3a652ddd59
# Date 2016-08-29 16:05:31 +0200
# Author Suravee Suthikulpanit <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/HVM: add guarding logic for VMX specific code

The struct hvm_domain.vmx is defined in a union along with the svm.
This can causes issue for SVM since this code is used in the common
scheduling code for x86. The logic must check for cpu_has_vmx before
accessing the hvm_domain.vmx sturcture.

Signed-off-by: Suravee Suthikulpanit <[email protected]>
Acked-by: Jan Beulich <[email protected]>

--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -611,7 +611,7 @@ unsigned long hvm_cr4_guest_reserved_bit
     struct vcpu *v_ = (v);                                      \
     struct domain *d_ = v_->domain;                             \
     if ( has_hvm_container_domain(d_) &&                        \
-         d_->arch.hvm_domain.vmx.vcpu_block )                   \
+         (cpu_has_vmx && d_->arch.hvm_domain.vmx.vcpu_block) )  \
         d_->arch.hvm_domain.vmx.vcpu_block(v_);                 \
 })
 
++++++ 57c57f73-libxc-correct-max_pfn-calculation-for-saving-domain.patch ++++++
# Commit 9daed8321b44c3ca82e412eb130f84e6b6c17dc5
# Date 2016-08-30 13:43:31 +0100
# Author Juergen Gross <[email protected]>
# Committer Wei Liu <[email protected]>
libxc: correct max_pfn calculation for saving domain

Commit 91e204d37f44913913776d0a89279721694f8b32 ("libxc: try to find
last used pfn when migrating") introduced a bug for the case of a
domain supporting the virtual mapped linear p2m list: the maximum pfn
of the domain calculated from the p2m memory allocation might be too
low.

Correct this.

Reported-by: Stefan Bader <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
Tested-by: Stefan Bader <[email protected]>
Acked-by: Wei Liu <[email protected]>

--- a/tools/libxc/xc_sr_save_x86_pv.c
+++ b/tools/libxc/xc_sr_save_x86_pv.c
@@ -430,6 +430,8 @@ static int map_p2m_list(struct xc_sr_con
 
         if ( level == 2 )
         {
+            if ( saved_idx == idx_end )
+                saved_idx++;
             max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1;
             if ( max_pfn < ctx->x86_pv.max_pfn )
             {
++++++ 57c805bf-x86-levelling-restrict-non-architectural-OSXSAVE-handling.patch 
++++++
# Commit 3b7cac5232012e167b284aba738fef1eceda33f8
# Date 2016-09-01 11:41:03 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/levelling: Restrict non-architectural OSXSAVE handling to emulated CPUID

There is no need to extend the workaround to the faulted CPUID view, as
Linux's dependence on the workaround is stricly via the emulated view.

This causes a guest kernel faulted CPUID to observe architectural behaviour
with respect to its CR4.OSXSAVE setting.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -972,6 +972,8 @@ void pv_cpuid(struct cpu_user_regs *regs
              *
              * Therefore, the leaking of Xen's OSXSAVE setting has become a
              * defacto part of the PV ABI and can't reasonably be corrected.
+             * It can however be restricted to only the enlightened CPUID
+             * view, as seen by the guest kernel.
              *
              * The following situations and logic now applies:
              *
@@ -985,14 +987,18 @@ void pv_cpuid(struct cpu_user_regs *regs
              *
              * - Enlightened CPUID or CPUID faulting available:
              *    Xen can fully control what is seen here.  Guest kernels need
-             *    to see the leaked OSXSAVE, but guest userspace is given
-             *    architectural behaviour, to reflect the guest kernels
-             *    intentions.
+             *    to see the leaked OSXSAVE via the enlightened path, but
+             *    guest userspace and the native is given architectural
+             *    behaviour.
+             *
+             *    Emulated vs Faulted CPUID is distinguised based on whether a
+             *    #UD or #GP is currently being serviced.
              */
             /* OSXSAVE cleared by pv_featureset.  Fast-forward CR4 back in. */
-            if ( (guest_kernel_mode(curr, regs) &&
-                  (read_cr4() & X86_CR4_OSXSAVE)) ||
-                 (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) )
+            if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ||
+                 (regs->entry_vector == TRAP_invalid_op &&
+                  guest_kernel_mode(curr, regs) &&
+                  (read_cr4() & X86_CR4_OSXSAVE)) )
                 c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
 
             /*
++++++ 57c805c1-x86-levelling-pass-vcpu-to-ctxt_switch_levelling.patch ++++++
# Commit 33b23e5ab319a6bf9bfd38c4d9268fa6d9d072c6
# Date 2016-09-01 11:41:05 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/levelling: Pass a vcpu rather than a domain to ctxt_switch_levelling()

A subsequent change needs to special-case OSXSAVE handling, which is per-vcpu
rather than per-domain.

No functional change.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -203,9 +203,10 @@ static void __init noinline probe_maskin
  * used to context switch to the default host state (by the cpu bringup-code,
  * crash path, etc).
  */
-static void amd_ctxt_switch_levelling(const struct domain *nextd)
+static void amd_ctxt_switch_levelling(const struct vcpu *next)
 {
        struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+       const struct domain *nextd = next ? next->domain : NULL;
        const struct cpuidmasks *masks =
                (nextd && is_pv_domain(nextd) && 
nextd->arch.pv_domain.cpuidmasks)
                ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -90,11 +90,11 @@ static const struct cpu_dev default_cpu
 };
 static const struct cpu_dev *this_cpu = &default_cpu;
 
-static void default_ctxt_switch_levelling(const struct domain *nextd)
+static void default_ctxt_switch_levelling(const struct vcpu *next)
 {
        /* Nop */
 }
-void (* __read_mostly ctxt_switch_levelling)(const struct domain *nextd) =
+void (* __read_mostly ctxt_switch_levelling)(const struct vcpu *next) =
        default_ctxt_switch_levelling;
 
 bool_t opt_cpu_info;
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -151,9 +151,10 @@ static void __init probe_masking_msrs(vo
  * used to context switch to the default host state (by the cpu bringup-code,
  * crash path, etc).
  */
-static void intel_ctxt_switch_levelling(const struct domain *nextd)
+static void intel_ctxt_switch_levelling(const struct vcpu *next)
 {
        struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
+       const struct domain *nextd = next ? next->domain : NULL;
        const struct cpuidmasks *masks;
 
        if (cpu_has_cpuid_faulting) {
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2107,7 +2107,7 @@ void context_switch(struct vcpu *prev, s
             load_segments(next);
         }
 
-        ctxt_switch_levelling(nextd);
+        ctxt_switch_levelling(next);
     }
 
     context_saved(prev);
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -211,7 +211,7 @@ extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data cpu_data[smp_processor_id()]
 
-extern void (*ctxt_switch_levelling)(const struct domain *nextd);
+extern void (*ctxt_switch_levelling)(const struct vcpu *next);
 
 extern u64 host_pat;
 extern bool_t opt_cpu_info;
++++++ 57c805c3-x86-levelling-provide-architectural-OSXSAVE-handling.patch 
++++++
# Commit 08e7738ec3644350fbac0325085baac6b3c7cd11
# Date 2016-09-01 11:41:07 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/levelling: Provide architectural OSXSAVE handling to masked native CPUID

Contrary to c/s b2507fe7 "x86/domctl: Update PV domain cpumasks when setting
cpuid policy", Intel CPUID masks are applied after fast forwarding hardware
state, rather than before.  (All behaviour in this regard appears completely
undocumented by both Intel and AMD).

Therefore, a set bit in the MSR causes hardware to be fast-forwarded, while a
clear bit forces the guests view to 0, even if Xen's CR4.OSXSAVE is actually
set.

This allows Xen to provide an architectural view of a guest kernels
CR4.OSXSAVE setting to any native CPUID instruction issused by guest kernel or
userspace, even when masking is used.

The masking value defaults to 1 (if the guest has XSAVE available) to cause
fast-forwarding to occur for the HVM and idle vcpus.

When setting the MSRs, a PV guest kernel's choice of OXSAVE is taken into
account, and clobbered from the MSR if not set.  This causes the
fast-forwarding of Xen's CR4 state not to happen.

As a side effect however, levelling potentially need updating on all PV CR4
changes.

Reported-by: Jan Beulich <[email protected]>
Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

# Commit 1461504ce3c414fc5dc717ce16f039d0742b455a
# Date 2016-09-02 08:12:29 +0200
# Author Andrew Cooper <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/levelling: fix breakage on older Intel boxes from c/s 08e7738

cpufeat_mask() yields an unsigned integer constant.  As a result, taking its
complement causes zero extention rather than sign extention.

The result is that, when a guest OS has OXSAVE disabled, all features in 1d
are hidden from native CPUID.  Amongst other things, this causes the early
code in Linux to find no LAPIC, but for everything to appear fine later when
userspace is up and running.

Signed-off-by: Andrew Cooper <[email protected]>
Tested-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -211,6 +211,24 @@ static void amd_ctxt_switch_levelling(co
                (nextd && is_pv_domain(nextd) && 
nextd->arch.pv_domain.cpuidmasks)
                ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
 
+       if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
+               uint64_t val = masks->_1cd;
+
+               /*
+                * OSXSAVE defaults to 1, which causes fast-forwarding of
+                * Xen's real setting.  Clobber it if disabled by the guest
+                * kernel.
+                */
+               if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
+                   !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
+                       val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 
32);
+
+               if (unlikely(these_masks->_1cd != val)) {
+                       wrmsr_amd(MSR_K8_FEATURE_MASK, val);
+                       these_masks->_1cd = val;
+               }
+       }
+
 #define LAZY(cap, msr, field)                                          \
        ({                                                              \
                if (unlikely(these_masks->field != masks->field) &&     \
@@ -221,7 +239,6 @@ static void amd_ctxt_switch_levelling(co
                }                                                       \
        })
 
-       LAZY(LCAP_1cd,  MSR_K8_FEATURE_MASK,       _1cd);
        LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK,   e1cd);
        LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
        LAZY(LCAP_6c,   MSR_AMD_THRM_FEATURE_MASK, _6c);
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -182,6 +182,24 @@ static void intel_ctxt_switch_levelling(
        masks = (nextd && is_pv_domain(nextd) && 
nextd->arch.pv_domain.cpuidmasks)
                ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
 
+        if (msr_basic) {
+               uint64_t val = masks->_1cd;
+
+               /*
+                * OSXSAVE defaults to 1, which causes fast-forwarding of
+                * Xen's real setting.  Clobber it if disabled by the guest
+                * kernel.
+                */
+               if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
+                   !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
+                       val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
+
+               if (unlikely(these_masks->_1cd != val)) {
+                       wrmsrl(msr_basic, val);
+                       these_masks->_1cd = val;
+               }
+        }
+
 #define LAZY(msr, field)                                               \
        ({                                                              \
                if (unlikely(these_masks->field != masks->field) &&     \
@@ -192,7 +210,6 @@ static void intel_ctxt_switch_levelling(
                }                                                       \
        })
 
-       LAZY(msr_basic, _1cd);
        LAZY(msr_ext,   e1cd);
        LAZY(msr_xsave, Da1);
 
@@ -218,6 +235,11 @@ static void __init noinline intel_init_l
                ecx &= opt_cpuid_mask_ecx;
                edx &= opt_cpuid_mask_edx;
 
+               /* Fast-forward bits - Must be set. */
+               if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
+                       ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
+               edx |= cpufeat_mask(X86_FEATURE_APIC);
+
                cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
        }
 
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -110,10 +110,18 @@ static void update_domain_cpuid_info(str
             case X86_VENDOR_INTEL:
                 /*
                  * Intel masking MSRs are documented as AND masks.
-                 * Experimentally, they are applied before OSXSAVE and APIC
+                 * Experimentally, they are applied after OSXSAVE and APIC
                  * are fast-forwarded from real hardware state.
                  */
                 mask &= ((uint64_t)edx << 32) | ecx;
+
+                if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) )
+                    ecx = cpufeat_mask(X86_FEATURE_OSXSAVE);
+                else
+                    ecx = 0;
+                edx = cpufeat_mask(X86_FEATURE_APIC);
+
+                mask |= ((uint64_t)edx << 32) | ecx;
                 break;
 
             case X86_VENDOR_AMD:
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2696,6 +2696,7 @@ static int emulate_privileged_op(struct
         case 4: /* Write CR4 */
             v->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(v, *reg);
             write_cr4(pv_guest_cr4_to_real_cr4(v));
+            ctxt_switch_levelling(v);
             break;
 
         default:
++++++ 57c82be2-x86-32on64-adjust-call-gate-emulation.patch ++++++
# Commit ee1cc4bfdca84d526805c4c72302c026f5e9cd94
# Date 2016-09-01 15:23:46 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/32on64: misc adjustments to call gate emulation

- There's no 32-bit displacement in 16-bit addressing mode.
- It is wrong to ASSERT() anything on parts of an instruction fetched
  from guest memory.
- The two scaling bits of a SIB byte don't affect whether there is a
  scaled index register or not.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -3176,7 +3176,7 @@ static void emulate_gate_op(struct cpu_u
                             sib = insn_fetch(u8, base, eip, limit);
 
                             modrm = (modrm & ~7) | (sib & 7);
-                            if ( (sib >>= 3) != 4 )
+                            if ( ((sib >>= 3) & 7) != 4 )
                                 opnd_off = *(unsigned long *)
                                     decode_register(sib & 7, regs, 0);
                             opnd_off <<= sib >> 3;
@@ -3236,7 +3236,10 @@ static void emulate_gate_op(struct cpu_u
                         opnd_off += insn_fetch(s8, base, eip, limit);
                         break;
                     case 0x80:
-                        opnd_off += insn_fetch(s32, base, eip, limit);
+                        if ( ad_bytes > 2 )
+                            opnd_off += insn_fetch(s32, base, eip, limit);
+                        else
+                            opnd_off += insn_fetch(s16, base, eip, limit);
                         break;
                     }
                     if ( ad_bytes == 4 )
@@ -3273,8 +3276,7 @@ static void emulate_gate_op(struct cpu_u
 #define ad_default ad_bytes
     opnd_sel = insn_fetch(u16, base, opnd_off, limit);
 #undef ad_default
-    ASSERT((opnd_sel & ~3) == regs->error_code);
-    if ( dpl < (opnd_sel & 3) )
+    if ( (opnd_sel & ~3) != regs->error_code || dpl < (opnd_sel & 3) )
     {
         do_guest_trap(TRAP_gp_fault, regs, 1);
         return;
++++++ 57c93e52-fix-error-in-libxl_device_usbdev_list.patch ++++++
References: bsc#989679

Subject: libxl: fix libxl_device_usbdev_list()
From: Juergen Gross [email protected] Fri Sep 2 10:16:14 2016 +0200
Date: Fri Sep 2 09:54:42 2016 +0100:
Git: 74157a2f9886b55cd45714e58c80035bfe3e080c

Commit 03814de1d2ecdabedabceb8e728d934a632a43b9 ("libxl: Do not trust
frontend for vusb") introduced an error in libxl_device_usbdev_list().
Fix it.

Signed-off-by: Juergen Gross <[email protected]>
Acked-by: Wei Liu <[email protected]>

Index: xen-4.7.0-testing/tools/libxl/libxl_pvusb.c
===================================================================
--- xen-4.7.0-testing.orig/tools/libxl/libxl_pvusb.c
+++ xen-4.7.0-testing/tools/libxl/libxl_pvusb.c
@@ -732,7 +732,7 @@ libxl_device_usbdev_list(libxl_ctx *ctx,
     *num = 0;
 
     libxl_vusbs_path = GCSPRINTF("%s/device/vusb",
-                                 libxl__xs_libxl_path(gc, !domid));
+                                 libxl__xs_libxl_path(gc, domid));
     usbctrls = libxl__xs_directory(gc, XBT_NULL, libxl_vusbs_path, &nc);
 
     for (i = 0; i < nc; i++) {
++++++ 57c96df3-credit1-fix-a-race-when-picking-initial-pCPU.patch ++++++
References: bsc#991934

# Commit 9109bf55084398c4547b8956906410c158eb9a17
# Date 2016-09-02 14:17:55 +0200
# Author Dario Faggioli <[email protected]>
# Committer Jan Beulich <[email protected]>
credit1: fix a race when picking initial pCPU for a vCPU

In the Credit1 hunk of 9f358ddd69463 ("xen: Have
schedulers revise initial placement") csched_cpu_pick()
is called without taking the runqueue lock of the
(temporary) pCPU that the vCPU has been assigned to
(e.g., in XEN_DOMCTL_max_vcpus).

However, although 'hidden' in the IS_RUNQ_IDLE() macro,
that function does access the runq (for doing load
balancing calculations). Two scenarios are possible:
 1) we are on cpu X, and IS_RUNQ_IDLE() peeks at cpu's
    X own runq;
 2) we are on cpu X, but IS_RUNQ_IDLE() peeks at some
    other cpu's runq.

Scenario 2) absolutely requies that the appropriate
runq lock is taken. Scenario 1) works even without
taking the cpu's own runq lock. That is actually what
happens when when _csched_pick_cpu() is called from
csched_vcpu_acct() (in turn, called by csched_tick()).

Races have been observed and reported (by both XenServer
own testing and OSSTest [1]), in the form of
IS_RUNQ_IDLE() falling over LIST_POISON, because we're
not currently holding the proper lock, in
csched_vcpu_insert(), when scenario 1) occurs.

However, for better robustness, from now on we always
ask for the proper runq lock to be held when calling
IS_RUNQ_IDLE() (which is also becoming a static inline
function instead of macro).

In order to comply with that, we take the lock around
the call to _csched_cpu_pick() in csched_vcpu_acct().

[1] https://lists.xen.org/archives/html/xen-devel/2016-08/msg02144.html

Reported-by: Andrew Cooper <[email protected]>
Signed-off-by: Dario Faggioli <[email protected]>
Reviewed-by: George Dunlap <[email protected]>

--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -84,9 +84,6 @@
 #define CSCHED_VCPU(_vcpu)  ((struct csched_vcpu *) (_vcpu)->sched_priv)
 #define CSCHED_DOM(_dom)    ((struct csched_dom *) (_dom)->sched_priv)
 #define RUNQ(_cpu)          (&(CSCHED_PCPU(_cpu)->runq))
-/* Is the first element of _cpu's runq its idle vcpu? */
-#define IS_RUNQ_IDLE(_cpu)  (list_empty(RUNQ(_cpu)) || \
-                             is_idle_vcpu(__runq_elem(RUNQ(_cpu)->next)->vcpu))
 
 
 /*
@@ -248,6 +245,18 @@ __runq_elem(struct list_head *elem)
     return list_entry(elem, struct csched_vcpu, runq_elem);
 }
 
+/* Is the first element of cpu's runq (if any) cpu's idle vcpu? */
+static inline bool_t is_runq_idle(unsigned int cpu)
+{
+    /*
+     * We're peeking at cpu's runq, we must hold the proper lock.
+     */
+    ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
+
+    return list_empty(RUNQ(cpu)) ||
+           is_idle_vcpu(__runq_elem(RUNQ(cpu)->next)->vcpu);
+}
+
 static inline void
 __runq_insert(struct csched_vcpu *svc)
 {
@@ -767,7 +776,7 @@ _csched_cpu_pick(const struct scheduler
          * runnable vcpu on cpu, we add cpu to the idlers.
          */
         cpumask_and(&idlers, &cpu_online_map, CSCHED_PRIV(ops)->idlers);
-        if ( vc->processor == cpu && IS_RUNQ_IDLE(cpu) )
+        if ( vc->processor == cpu && is_runq_idle(cpu) )
             __cpumask_set_cpu(cpu, &idlers);
         cpumask_and(&cpus, &cpus, &idlers);
 
@@ -947,21 +956,33 @@ csched_vcpu_acct(struct csched_private *
     /*
      * Put this VCPU and domain back on the active list if it was
      * idling.
-     *
-     * If it's been active a while, check if we'd be better off
-     * migrating it to run elsewhere (see multi-core and multi-thread
-     * support in csched_cpu_pick()).
      */
     if ( list_empty(&svc->active_vcpu_elem) )
     {
         __csched_vcpu_acct_start(prv, svc);
     }
-    else if ( _csched_cpu_pick(ops, current, 0) != cpu )
+    else
     {
-        SCHED_VCPU_STAT_CRANK(svc, migrate_r);
-        SCHED_STAT_CRANK(migrate_running);
-        set_bit(_VPF_migrating, &current->pause_flags);
-        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        unsigned int new_cpu;
+        unsigned long flags;
+        spinlock_t *lock = vcpu_schedule_lock_irqsave(current, &flags);
+
+        /*
+         * If it's been active a while, check if we'd be better off
+         * migrating it to run elsewhere (see multi-core and multi-thread
+         * support in csched_cpu_pick()).
+         */
+        new_cpu = _csched_cpu_pick(ops, current, 0);
+
+        vcpu_schedule_unlock_irqrestore(lock, flags, current);
+
+        if ( new_cpu != cpu )
+        {
+            SCHED_VCPU_STAT_CRANK(svc, migrate_r);
+            SCHED_STAT_CRANK(migrate_running);
+            set_bit(_VPF_migrating, &current->pause_flags);
+            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        }
     }
 }
 
@@ -994,9 +1015,13 @@ csched_vcpu_insert(const struct schedule
 
     BUG_ON( is_idle_vcpu(vc) );
 
-    /* This is safe because vc isn't yet being scheduled */
+    /* csched_cpu_pick() looks in vc->processor's runq, so we need the lock. */
+    lock = vcpu_schedule_lock_irq(vc);
+
     vc->processor = csched_cpu_pick(ops, vc);
 
+    spin_unlock_irq(lock);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
++++++ 57c96e2c-x86-correct-PT_NOTE-file-position.patch ++++++
# Commit f8f185dc4359a1cd8e7896dfbcacb54b473436c8
# Date 2016-09-02 14:18:52 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86: correct PT_NOTE file position

Program and section headers disagreed about the file offset at which
the build ID note lives.

Reported-by: Sylvain Munaut <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/boot/mkelf32.c
+++ b/xen/arch/x86/boot/mkelf32.c
@@ -394,7 +394,7 @@ int main(int argc, char **argv)
         note_phdr.p_paddr   = note_base;
         note_phdr.p_filesz  = note_sz;
         note_phdr.p_memsz   = note_sz;
-        note_phdr.p_offset  = offset;
+        note_phdr.p_offset  = RAW_OFFSET + offset;
 
         /* Tack on the .note\0 */
         out_shdr[2].sh_size += sizeof(out_shstrtab_extra);
++++++ 57cfed43-VMX-correct-feature-checks-for-MPX-and-XSAVES.patch ++++++
# Commit 68eb1a4d92be58e26bd11d02b8e0317bd56294ac
# Date 2016-09-07 12:34:43 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
VMX: correct feature checks for MPX and XSAVES

Their VMCS fields aren't tied to the respective base CPU feature flags
but instead to VMX specific ones.

Note that while the VMCS GUEST_BNDCFGS field exists if either of the
two respective features is available, MPX continues to get exposed to
guests only with both features present.

Also add the so far missing handling of
- GUEST_BNDCFGS in construct_vmcs()
- MSR_IA32_BNDCFGS in vmx_msr_{read,write}_intercept()
and mirror the extra correctness checks during MSR write to
vmx_load_msr().

Reported-by: "Rockosov, Dmitry" <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Tested-by: "Rockosov, Dmitry" <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -168,8 +168,7 @@ static void __init calculate_hvm_feature
      */
     if ( cpu_has_vmx )
     {
-        if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) ||
-             !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) )
+        if ( !cpu_has_vmx_mpx )
             __clear_bit(X86_FEATURE_MPX, hvm_featureset);
 
         if ( !cpu_has_vmx_xsaves )
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1281,6 +1281,8 @@ static int construct_vmcs(struct vcpu *v
         __vmwrite(HOST_PAT, host_pat);
         __vmwrite(GUEST_PAT, guest_pat);
     }
+    if ( cpu_has_vmx_mpx )
+        __vmwrite(GUEST_BNDCFGS, 0);
     if ( cpu_has_vmx_xsaves )
         __vmwrite(XSS_EXIT_BITMAP, 0);
 
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -786,14 +786,15 @@ static int vmx_load_vmcs_ctxt(struct vcp
 
 static unsigned int __init vmx_init_msr(void)
 {
-    return !!cpu_has_mpx + !!cpu_has_xsaves;
+    return (cpu_has_mpx && cpu_has_vmx_mpx) +
+           (cpu_has_xsaves && cpu_has_vmx_xsaves);
 }
 
 static void vmx_save_msr(struct vcpu *v, struct hvm_msr *ctxt)
 {
     vmx_vmcs_enter(v);
 
-    if ( cpu_has_mpx )
+    if ( cpu_has_mpx && cpu_has_vmx_mpx )
     {
         __vmread(GUEST_BNDCFGS, &ctxt->msr[ctxt->count].val);
         if ( ctxt->msr[ctxt->count].val )
@@ -802,7 +803,7 @@ static void vmx_save_msr(struct vcpu *v,
 
     vmx_vmcs_exit(v);
 
-    if ( cpu_has_xsaves )
+    if ( cpu_has_xsaves && cpu_has_vmx_xsaves )
     {
         ctxt->msr[ctxt->count].val = v->arch.hvm_vcpu.msr_xss;
         if ( ctxt->msr[ctxt->count].val )
@@ -822,13 +823,15 @@ static int vmx_load_msr(struct vcpu *v,
         switch ( ctxt->msr[i].index )
         {
         case MSR_IA32_BNDCFGS:
-            if ( cpu_has_mpx )
+            if ( cpu_has_mpx && cpu_has_vmx_mpx &&
+                 is_canonical_address(ctxt->msr[i].val) &&
+                 !(ctxt->msr[i].val & IA32_BNDCFGS_RESERVED) )
                 __vmwrite(GUEST_BNDCFGS, ctxt->msr[i].val);
             else if ( ctxt->msr[i].val )
                 err = -ENXIO;
             break;
         case MSR_IA32_XSS:
-            if ( cpu_has_xsaves )
+            if ( cpu_has_xsaves && cpu_has_vmx_xsaves )
                 v->arch.hvm_vcpu.msr_xss = ctxt->msr[i].val;
             else
                 err = -ENXIO;
@@ -2640,6 +2643,11 @@ static int vmx_msr_read_intercept(unsign
     case MSR_IA32_DEBUGCTLMSR:
         __vmread(GUEST_IA32_DEBUGCTL, msr_content);
         break;
+    case MSR_IA32_BNDCFGS:
+        if ( !cpu_has_mpx || !cpu_has_vmx_mpx )
+            goto gp_fault;
+        __vmread(GUEST_BNDCFGS, msr_content);
+        break;
     case IA32_FEATURE_CONTROL_MSR:
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
         if ( !nvmx_msr_read_intercept(msr, msr_content) )
@@ -2866,6 +2874,13 @@ static int vmx_msr_write_intercept(unsig
 
         break;
     }
+    case MSR_IA32_BNDCFGS:
+        if ( !cpu_has_mpx || !cpu_has_vmx_mpx ||
+             !is_canonical_address(msr_content) ||
+             (msr_content & IA32_BNDCFGS_RESERVED) )
+            goto gp_fault;
+        __vmwrite(GUEST_BNDCFGS, msr_content);
+        break;
     case IA32_FEATURE_CONTROL_MSR:
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_TRUE_ENTRY_CTLS:
         if ( !nvmx_msr_write_intercept(msr, msr_content) )
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -375,6 +375,9 @@ extern u64 vmx_ept_vpid_cap;
     (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS)
 #define cpu_has_vmx_pml \
     (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_PML)
+#define cpu_has_vmx_mpx \
+    ((vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) && \
+     (vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS))
 #define cpu_has_vmx_xsaves \
     (vmx_secondary_exec_control & SECONDARY_EXEC_XSAVES)
 #define cpu_has_vmx_tsc_scaling \
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -56,7 +56,10 @@
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
 
-#define MSR_IA32_BNDCFGS               0x00000D90
+#define MSR_IA32_BNDCFGS               0x00000d90
+#define IA32_BNDCFGS_ENABLE            0x00000001
+#define IA32_BNDCFGS_PRESERVE          0x00000002
+#define IA32_BNDCFGS_RESERVED          0x00000ffc
 
 #define MSR_IA32_XSS                   0x00000da0
 
++++++ 57d1563d-x86-32on64-don-t-allow-recursive-page-tables-from-L3.patch 
++++++
References: bsc#995785 CVE-2016-7092 XSA-185

# Commit c844d637d92a75854ea5c8d4e5ca34302a9f623c
# Date 2016-09-08 14:14:53 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/32on64: don't allow recursive page tables from L3

L3 entries are special in PAE mode, and hence can't reasonably be used
for setting up recursive (and hence linear) page table mappings. Since
abuse is possible when the guest in fact gets run on 4-level page
tables, this needs to be excluded explicitly.

This is XSA-185 / CVE-2016-7092.

Reported-by: Jérémie Boutoille <[email protected]>
Reported-by: "栾尚聪(好风)" <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1123,7 +1123,9 @@ get_page_from_l3e(
 
     rc = get_page_and_type_from_pagenr(
         l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, 1);
-    if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
+    if ( unlikely(rc == -EINVAL) &&
+         !is_pv_32bit_domain(d) &&
+         get_l3_linear_pagetable(l3e, pfn, d) )
         rc = 0;
 
     return rc;
++++++ 
57d15679-x86-emulate-Correct-boundary-interactions-of-emulated-insns.patch 
++++++
References: bsc#995789 CVE-2016-7093 XSA-186

# Commit e9575f980df81aeb0e5b6139f485fd6f7bb7f5b6
# Date 2016-09-08 14:15:53 +0200
# Author Andrew Cooper <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/emulate: Correct boundary interactions of emulated instructions

This reverts most of c/s 0640ffb6 "x86emul: fix rIP handling".

Experimentally, in long mode processors will execute an instruction stream
which crosses the 64bit -1 -> 0 virtual boundary, whether the instruction
boundary is aligned on the virtual boundary, or is misaligned.

In compatibility mode, Intel processors will execute an instruction stream
which crosses the 32bit -1 -> 0 virtual boundary, while AMD processors raise a
segmentation fault.  Xen's segmentation behaviour matches AMD.

For 16bit code, hardware does not ever truncated %ip.  %eip is always used and
behaves normally as a 32bit register, including in 16bit protected mode
segments, as well as in Real and Unreal mode.

This is XSA-186 / CVE-2016-7093.

Reported-by: Brian Marcotte <[email protected]>
Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1538,10 +1538,6 @@ x86_emulate(
 #endif
     }
 
-    /* Truncate rIP to def_ad_bytes (2 or 4) if necessary. */
-    if ( def_ad_bytes < sizeof(_regs.eip) )
-        _regs.eip &= (1UL << (def_ad_bytes * 8)) - 1;
-
     /* Prefix bytes. */
     for ( ; ; )
     {
@@ -3843,21 +3839,11 @@ x86_emulate(
 
     /* Commit shadow register state. */
     _regs.eflags &= ~EFLG_RF;
-    switch ( __builtin_expect(def_ad_bytes, sizeof(_regs.eip)) )
-    {
-        uint16_t ip;
 
-    case 2:
-        ip = _regs.eip;
-        _regs.eip = ctxt->regs->eip;
-        *(uint16_t *)&_regs.eip = ip;
-        break;
-#ifdef __x86_64__
-    case 4:
-        _regs.rip = _regs._eip;
-        break;
-#endif
-    }
+    /* Zero the upper 32 bits of %rip if not in long mode. */
+    if ( def_ad_bytes < sizeof(_regs.eip) )
+        _regs.eip = (uint32_t)_regs.eip;
+
     *ctxt->regs = _regs;
 
  done:
++++++ 57d1569a-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch ++++++
References: bsc#995792 CVE-2016-7094 XSA-187

# Commit a9f3b3bad17d91e2067fc00d51b0302349570d08
# Date 2016-09-08 14:16:26 +0200
# Author Andrew Cooper <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/shadow: Avoid overflowing sh_ctxt->seg_reg[]

hvm_get_seg_reg() does not perform a range check on its input segment, calls
hvm_get_segment_register() and writes straight into sh_ctxt->seg_reg[].

x86_seg_none is outside the bounds of sh_ctxt->seg_reg[], and will hit a BUG()
in {vmx,svm}_get_segment_register().

HVM guests running with shadow paging can end up performing a virtual to
linear translation with x86_seg_none.  This is used for addresses which are
already linear.  However, none of this is a legitimate pagetable update, so
fail the emulation in such a case.

This is XSA-187 / CVE-2016-7094.

Reported-by: Andrew Cooper <[email protected]>
Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Tim Deegan <[email protected]>

--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -140,9 +140,18 @@ static int hvm_translate_linear_addr(
     struct sh_emulate_ctxt *sh_ctxt,
     unsigned long *paddr)
 {
-    struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
+    struct segment_register *reg;
     int okay;
 
+    /*
+     * Can arrive here with non-user segments.  However, no such cirucmstance
+     * is part of a legitimate pagetable update, so fail the emulation.
+     */
+    if ( !is_x86_user_segment(seg) )
+        return X86EMUL_UNHANDLEABLE;
+
+    reg = hvm_get_seg_reg(seg, sh_ctxt);
+
     okay = hvm_virtual_to_linear_addr(
         seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
 
++++++ 57d18642-hvm-fep-Allow-test-insns-crossing-1-0-boundary.patch ++++++
References: bsc#995789

# Commit 7b5cee79dad24e7006059667b02bd7de685d8ee5
# Date 2016-09-08 16:39:46 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary

The Force Emulation Prefix is named to follow its PV counterpart for cpuid or
rdtsc, but isn't really an instruction prefix.  It behaves as a break-out into
Xen, with the purpose of emulating the next instruction in the current state.

It is important to be able to test legal situations which occur in real
hardware, including instruction which cross certain boundaries, and
instructions starting at 0.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3905,6 +3905,10 @@ void hvm_ud_intercept(struct cpu_user_re
         {
             regs->eip += sizeof(sig);
             regs->eflags &= ~X86_EFLAGS_RF;
+
+            /* Zero the upper 32 bits of %rip if not in long mode. */
+            if ( !(hvm_long_mode_enabled(cur) && cs.attr.fields.l) )
+                regs->eip = regs->_eip;
         }
     }
 
++++++ 
57d18642-x86-segment-Bounds-check-accesses-to-emulation-ctxt-seg_reg.patch 
++++++
References: bsc#995792

# Commit 4fa0105d95be6e7145a1f6fd1036ccd43976228c
# Date 2016-09-08 16:39:46 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/segment: Bounds check accesses to emulation ctxt->seg_reg[]

HVM HAP codepaths have space for all segment registers in the seg_reg[]
cache (with x86_seg_none still risking an array overrun), while the shadow
codepaths only have space for the user segments.

Range check the input segment of *_get_seg_reg() against the size of the array
used to cache the results, to avoid overruns in the case that the callers
don't filter their input suitably.

Subsume the is_x86_user_segment(seg) checks from the shadow code, which were
an incomplete attempt at range checking, and are now superceeded.  Make
hvm_get_seg_reg() static, as it is not used outside of shadow/common.c

No functional change, but far easier to reason that no overflow is possible.

Reported-by: Andrew Cooper <[email protected]>
Signed-off-by: Andrew Cooper <[email protected]>
Acked-by: Tim Deegan <[email protected]>
Acked-by: Jan Beulich <[email protected]>

# Commit 4c47c47938ea24c73d9459f9f0b6923513772b5d
# Date 2016-09-09 15:31:01 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
xen/x86: Fix build with clang following c/s 4fa0105

https://travis-ci.org/xen-project/xen/jobs/158494027#L2344

Clang complains:

  emulate.c:2016:14: error: comparison of unsigned enum expression < 0
  is always false [-Werror,-Wtautological-compare]
      if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
           ~~~ ^ ~

Clang is wrong to raise a warning like this.  The signed-ness of an enum is
implementation defined in C, and robust code must not assume the choices made
by the compiler.

In this case, dropping the < 0 check creates a latent bug which would result
in an array underflow when compiled with a compiler which chooses a signed
enum.

Work around the bug by explicitly pulling seg into an unsigned integer, and
only perform the upper bounds check.

No functional change.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
Reviewed-by: George Dunlap <[email protected]>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -534,6 +534,8 @@ static int hvmemul_virtual_to_linear(
     *reps = min_t(unsigned long, *reps, max_reps);
 
     reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+    if ( IS_ERR(reg) )
+        return -PTR_ERR(reg);
 
     if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
     {
@@ -1369,6 +1371,10 @@ static int hvmemul_read_segment(
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
     struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+
+    if ( IS_ERR(sreg) )
+         return -PTR_ERR(sreg);
+
     memcpy(reg, sreg, sizeof(struct segment_register));
     return X86EMUL_OKAY;
 }
@@ -1382,6 +1388,9 @@ static int hvmemul_write_segment(
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
     struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
 
+    if ( IS_ERR(sreg) )
+         return -PTR_ERR(sreg);
+
     memcpy(sreg, reg, sizeof(struct segment_register));
     __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
 
@@ -1934,13 +1943,22 @@ void hvm_emulate_writeback(
     }
 }
 
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid.  Other callers must explicitly check for errors.
+ */
 struct segment_register *hvmemul_get_seg_reg(
     enum x86_segment seg,
     struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
-    if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) )
-        hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
-    return &hvmemul_ctxt->seg_reg[seg];
+    unsigned int idx = seg;
+
+    if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+        return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
+    if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) )
+        hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]);
+    return &hvmemul_ctxt->seg_reg[idx];
 }
 
 static const char *guest_x86_mode_to_str(int mode)
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -123,12 +123,22 @@ __initcall(shadow_audit_key_init);
 /* x86 emulator support for the shadow code
  */
 
-struct segment_register *hvm_get_seg_reg(
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid.  Other callers must explicitly check for errors.
+ */
+static struct segment_register *hvm_get_seg_reg(
     enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
 {
-    struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
-    if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
-        hvm_get_segment_register(current, seg, seg_reg);
+    unsigned int idx = seg;
+    struct segment_register *seg_reg;
+
+    if ( idx >= ARRAY_SIZE(sh_ctxt->seg_reg) )
+        return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
+    seg_reg = &sh_ctxt->seg_reg[idx];
+    if ( !__test_and_set_bit(idx, &sh_ctxt->valid_seg_regs) )
+        hvm_get_segment_register(current, idx, seg_reg);
     return seg_reg;
 }
 
@@ -143,14 +153,9 @@ static int hvm_translate_linear_addr(
     struct segment_register *reg;
     int okay;
 
-    /*
-     * Can arrive here with non-user segments.  However, no such cirucmstance
-     * is part of a legitimate pagetable update, so fail the emulation.
-     */
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-
     reg = hvm_get_seg_reg(seg, sh_ctxt);
+    if ( IS_ERR(reg) )
+        return -PTR_ERR(reg);
 
     okay = hvm_virtual_to_linear_addr(
         seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
@@ -253,9 +258,6 @@ hvm_emulate_write(enum x86_segment seg,
     unsigned long addr;
     int rc;
 
-    if ( !is_x86_user_segment(seg) )
-        return X86EMUL_UNHANDLEABLE;
-
     /* How many emulations could we save if we unshadowed on stack writes? */
     if ( seg == x86_seg_ss )
         perfc_incr(shadow_fault_emulate_stack);
@@ -283,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     unsigned long addr, old, new;
     int rc;
 
-    if ( !is_x86_user_segment(seg) || bytes > sizeof(long) )
+    if ( bytes > sizeof(long) )
         return X86EMUL_UNHANDLEABLE;
 
     rc = hvm_translate_linear_addr(
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -740,8 +740,6 @@ const struct x86_emulate_ops *shadow_ini
     struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
 void shadow_continue_emulation(
     struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
-struct segment_register *hvm_get_seg_reg(
-    enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 /**************************************************************************/
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -13,6 +13,7 @@
 #define __ASM_X86_HVM_EMULATE_H__
 
 #include <xen/config.h>
+#include <xen/err.h>
 #include <asm/hvm/hvm.h>
 #include <asm/x86_emulate.h>
 
++++++ ioemu-disable-scsi.patch ++++++
--- /var/tmp/diff_new_pack.3aw7sj/_old  2016-09-17 14:34:21.000000000 +0200
+++ /var/tmp/diff_new_pack.3aw7sj/_new  2016-09-17 14:34:21.000000000 +0200
@@ -63,6 +63,18 @@
 ===================================================================
 --- 
xen-4.4.0-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
 +++ xen-4.4.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c
+@@ -156,8 +156,10 @@ static void platform_fixed_ioport_write2
+         /* Unplug devices.  Value is a bitmask of which devices to
+            unplug, with bit 0 the IDE devices, bit 1 the network
+            devices, and bit 2 the non-primary-master IDE devices. */
+-        if (val & UNPLUG_ALL_IDE_DISKS)
++        if (val & UNPLUG_ALL_IDE_DISKS) {
+             ide_unplug_harddisks();
++            pci_unplug_scsi();
++        }
+         if (val & UNPLUG_ALL_NICS) {
+             pci_unplug_netifs();
+             net_tap_shutdown_all();
 @@ -364,6 +364,8 @@ static void suse_platform_ioport_write(v
         * If it controlled just disk or just LAN, it would use 8 below. */
          fprintf(logfile, "Disconnect IDE hard disk...\n");

commit xen for openSUSE:Factory

Reply via email to