commit xen for openSUSE:Factory

root Fri, 22 Sep 2017 12:32:38 -0700

Hello community,

here is the log from the commit of package xen for openSUSE:Factory checked in 
at 2017-09-22 21:31:52
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/xen (Old)
 and      /work/SRC/openSUSE:Factory/.xen.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "xen"

Fri Sep 22 21:31:52 2017 rev:234 rq:527360 version:4.9.0_50

Changes:
--------
--- /work/SRC/openSUSE:Factory/xen/xen.changes  2017-08-24 18:21:48.419959788 
+0200
+++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes     2017-09-22 
21:31:58.884976319 +0200
@@ -1,0 +2,56 @@
+Thu Sep  7 04:58:12 MDT 2017 - [email protected]
+
+- bsc#1057358 - Cannot Boot into SLES12.3 with Xen hypervisor when
+  Secure Boot is Enabled
+  xen.spec
+
+-------------------------------------------------------------------
+Tue Sep  5 12:00:59 UTC 2017 - [email protected]
+
+- bsc#1055695 - XEN: 11SP4 and 12SP3 HVM guests can not be restored
+  update from v6 to v9 to cover more cases for ballooned domUs
+  libxc.sr.superpage.patch
+
+-------------------------------------------------------------------
+Mon Aug 28 14:51:54 UTC 2017 - [email protected]
+
+- bsc#1026236 - remove suse_vtsc_tolerance= cmdline option for Xen
+  drop the patch because it is not upstream acceptable
+  remove xen.suse_vtsc_tolerance.patch
+
+-------------------------------------------------------------------
+Sat Aug 26 10:52:46 UTC 2017 - [email protected]
+
+- bsc#1055695 - XEN: 11SP4 and 12SP3 HVM guests can not be restored
+  after the save using xl stack
+  libxc.sr.superpage.patch
+
+-------------------------------------------------------------------
+Tue Aug 22 13:25:33 UTC 2017 - [email protected]
+
+- Unignore gcc-PIE
+  the toolstack disables PIE for firmware builds as needed
+
+-------------------------------------------------------------------
+Mon Aug 21 10:42:46 MDT 2017 - [email protected]
+
+- Upstream patches from Jan (bsc#1027519)
+  592fd5f0-stop_machine-fill-result-only-in-case-of-error.patch
+  596f257e-x86-fix-hvmemul_insn_fetch.patch
+  5982fd99-VT-d-don-t-panic-warn-on-iommu-no-igfx.patch
+  598c3630-VT-d-PI-disable-when-CPU-side-PI-is-off.patch
+  598c3706-cpufreq-only-stop-ondemand-governor-if-started.patch
+  5992f1e5-x86-grant-disallow-misaligned-PTEs.patch (Replaces xsa227.patch)
+  5992f20d-gnttab-split-maptrack-lock-to-make-it-useful-again.patch (Replaces 
xsa228.patch)
+  5992f233-gnttab-correct-pin-status-fixup-for-copy.patch (Replaces 
xsa230.patch)
+  59958e76-gnttab-dont-use-possibly-unbounded-tail-calls.patch (Replaces 
xsa226-1.patch)
+  59958ebf-gnttab-fix-transitive-grant-handling.patch (Replaces xsa226-2.patch)
+  59958edd-gnttab-avoid-spurious-maptrack-handle-alloc-failures.patch
+
+-------------------------------------------------------------------
+Wed Aug 16 15:03:46 MDT 2017 - [email protected]
+
+- bsc#1044974 - xen-tools require python-pam
+  xen.spec
+
+-------------------------------------------------------------------
@@ -27,0 +84,7 @@
+Tue Aug  8 08:20:41 MDT 2017 - [email protected]
+
+- bsc#1052686 - VUL-0: xen: grant_table: possibly premature
+  clearing of GTF_writing / GTF_reading (XSA-230)
+  xsa230.patch
+
+-------------------------------------------------------------------
@@ -32,0 +96,14 @@
+
+-------------------------------------------------------------------
+Thu Aug  3 11:51:11 MDT 2017 - [email protected]
+
+- bsc#1051787 - VUL-0: CVE-2017-12135: xen: possibly unbounded
+  recursion in grant table code (XSA-226)
+  xsa226-1.patch
+  xsa226-2.patch
+- bsc#1051788 - VUL-0: CVE-2017-12137: xen: x86: PV privilege
+  escalation via map_grant_ref (XSA-227)
+  xsa227.patch
+- bsc#1051789 - VUL-0: CVE-2017-12136: xen: grant_table: Race
+  conditions with maptrack free list handling (XSA-228)
+  xsa228.patch

Old:
----
  xen.suse_vtsc_tolerance.patch

New:
----
  592fd5f0-stop_machine-fill-result-only-in-case-of-error.patch
  596f257e-x86-fix-hvmemul_insn_fetch.patch
  5982fd99-VT-d-don-t-panic-warn-on-iommu-no-igfx.patch
  598c3630-VT-d-PI-disable-when-CPU-side-PI-is-off.patch
  598c3706-cpufreq-only-stop-ondemand-governor-if-started.patch
  5992f1e5-x86-grant-disallow-misaligned-PTEs.patch
  5992f20d-gnttab-split-maptrack-lock-to-make-it-useful-again.patch
  5992f233-gnttab-correct-pin-status-fixup-for-copy.patch
  59958e76-gnttab-dont-use-possibly-unbounded-tail-calls.patch
  59958ebf-gnttab-fix-transitive-grant-handling.patch
  59958edd-gnttab-avoid-spurious-maptrack-handle-alloc-failures.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ xen.spec ++++++
--- /var/tmp/diff_new_pack.QNfxlD/_old  2017-09-22 21:32:00.788708337 +0200
+++ /var/tmp/diff_new_pack.QNfxlD/_new  2017-09-22 21:32:00.792707775 +0200
@@ -16,6 +16,8 @@
 #
 
 
+# needssslcertforbuild
+
 Name:           xen
 ExclusiveArch:  %ix86 x86_64 %arm aarch64
 %define changeset 35349
@@ -68,14 +70,15 @@
 BuildRequires:  libfdt1-devel
 %endif
 %endif
-#!BuildIgnore:  gcc-PIE
 # JWF: Until Anthony's series to load BIOS via toolstack is merged,
 # autoconf is needed by autogen.sh.
 # http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01626.html
 BuildRequires:  autoconf >= 2.67
 BuildRequires:  bison
 BuildRequires:  fdupes
+%if 0%{?suse_version} > 1315
 BuildRequires:  figlet
+%endif
 BuildRequires:  flex
 BuildRequires:  glib2-devel
 BuildRequires:  libaio-devel
@@ -119,6 +122,9 @@
 BuildRequires:  glibc-devel-32bit
 BuildRequires:  makeinfo
 %endif
+%ifarch x86_64
+BuildRequires:  pesign-obs-integration
+%endif
 
 Version:        4.9.0_50
 Release:        0
@@ -155,6 +161,17 @@
 # For xen-libs
 Source99:       baselibs.conf
 # Upstream patches
+Patch1:         592fd5f0-stop_machine-fill-result-only-in-case-of-error.patch
+Patch2:         596f257e-x86-fix-hvmemul_insn_fetch.patch
+Patch3:         5982fd99-VT-d-don-t-panic-warn-on-iommu-no-igfx.patch
+Patch4:         598c3630-VT-d-PI-disable-when-CPU-side-PI-is-off.patch
+Patch5:         598c3706-cpufreq-only-stop-ondemand-governor-if-started.patch
+Patch6:         5992f1e5-x86-grant-disallow-misaligned-PTEs.patch
+Patch7:         
5992f20d-gnttab-split-maptrack-lock-to-make-it-useful-again.patch
+Patch8:         5992f233-gnttab-correct-pin-status-fixup-for-copy.patch
+Patch9:         59958e76-gnttab-dont-use-possibly-unbounded-tail-calls.patch
+Patch10:        59958ebf-gnttab-fix-transitive-grant-handling.patch
+Patch11:        
59958edd-gnttab-avoid-spurious-maptrack-handle-alloc-failures.patch
 # Our platform specific patches
 Patch400:       xen-destdir.patch
 Patch401:       vif-bridge-no-iptables.patch
@@ -187,7 +204,6 @@
 Patch463:       libxl.add-option-to-disable-disk-cache-flushes-in-qdisk.patch
 Patch464:       blktap2-no-uninit.patch
 Patch465:       libxl.set-migration-constraints-from-cmdline.patch
-Patch466:       xen.suse_vtsc_tolerance.patch
 Patch467:       libxc.sr.superpage.patch
 # Hypervisor and PV driver Patches
 Patch601:       x86-ioapic-ack-default.patch
@@ -252,10 +268,6 @@
 Requires:       multipath-tools
 Requires:       python
 Requires:       python-curses
-Requires:       python-lxml
-Requires:       python-openssl
-Requires:       python-pam
-Requires:       python-xml
 Requires:       qemu-seabios
 Requires:       xen-libs = %{version}
 # subpackage existed in 10.3
@@ -345,6 +357,17 @@
 %prep
 %setup -q -n %xen_build_dir -a 1 -a 5 -a 6 -a 57
 # Upstream patches
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
 # Our platform specific patches
 %patch400 -p1
 %patch401 -p1
@@ -378,7 +401,6 @@
 %patch463 -p1
 %patch464 -p1
 %patch465 -p1
-%patch466 -p1
 %patch467 -p1
 # Hypervisor and PV driver Patches
 %patch601 -p1
@@ -598,7 +620,7 @@
     fi
     find $RPM_BUILD_ROOT/boot -ls
 }
-export BRP_PESIGN_FILES="*.ko *.efi /lib/firmware"
+export BRP_PESIGN_FILES="*.efi /lib/firmware"
 CC=gcc
 %if %{?with_gcc47}0
 CC=gcc-4.7
@@ -687,7 +709,7 @@
 ln -s /var/lib/xen/images $RPM_BUILD_ROOT/etc/xen/images
 
 # Bootloader
-install -m755 %SOURCE36 $RPM_BUILD_ROOT/%{_libdir}/python%{pyver}/site-packages
+install -m644 %SOURCE36 $RPM_BUILD_ROOT/%{_libdir}/python%{pyver}/site-packages
 
 # Systemd
 %if %{?include_systemd_preset}0
@@ -711,7 +733,7 @@
        echo "ExecStart=-/usr/bin/env modprobe $mod" >> 
$RPM_BUILD_ROOT%{_unitdir}/${bn}
 done
 rm -rfv $RPM_BUILD_ROOT%{_initddir}
-install %SOURCE35 $RPM_BUILD_ROOT/var/adm/fillup-templates/sysconfig.pciback
+install -m644 %SOURCE35 
$RPM_BUILD_ROOT/var/adm/fillup-templates/sysconfig.pciback
 
 # Clean up unpackaged files
 find $RPM_BUILD_ROOT \( \

++++++ 592fd5f0-stop_machine-fill-result-only-in-case-of-error.patch ++++++
# Commit d8b833d78f6bfde9855a949b5e6d3790d78c0fb7
# Date 2017-06-01 10:53:04 +0200
# Author Gregory Herrero <[email protected]>
# Committer Jan Beulich <[email protected]>
stop_machine: fill fn_result only in case of error

When stop_machine_run() is called with NR_CPUS as last argument,
fn_result member must be filled only if an error happens since it is
shared across all cpus.

Assume CPU1 detects an error and set fn_result to -1, then CPU2 doesn't
detect an error and set fn_result to 0. The error detected by CPU1 will
be ignored.

Note that in case multiple failures occur on different CPUs, only the
last error will be reported.

Signed-off-by: Gregory Herrero <[email protected]>
Reviewed-by: Wei Liu <[email protected]>

--- a/xen/common/stop_machine.c
+++ b/xen/common/stop_machine.c
@@ -94,6 +94,7 @@ int stop_machine_run(int (*fn)(void *),
     stopmachine_data.fn_data = data;
     stopmachine_data.nr_cpus = nr_cpus;
     stopmachine_data.fn_cpu = cpu;
+    stopmachine_data.fn_result = 0;
     atomic_set(&stopmachine_data.done, 0);
     stopmachine_data.state = STOPMACHINE_START;
 
@@ -112,7 +113,11 @@ int stop_machine_run(int (*fn)(void *),
 
     stopmachine_set_state(STOPMACHINE_INVOKE);
     if ( (cpu == smp_processor_id()) || (cpu == NR_CPUS) )
-        stopmachine_data.fn_result = (*fn)(data);
+    {
+        ret = (*fn)(data);
+        if ( ret )
+            write_atomic(&stopmachine_data.fn_result, ret);
+    }
     stopmachine_wait_state();
     ret = stopmachine_data.fn_result;
 
@@ -150,8 +155,12 @@ static void stopmachine_action(unsigned
         case STOPMACHINE_INVOKE:
             if ( (stopmachine_data.fn_cpu == smp_processor_id()) ||
                  (stopmachine_data.fn_cpu == NR_CPUS) )
-                stopmachine_data.fn_result =
-                    stopmachine_data.fn(stopmachine_data.fn_data);
+            {
+                int ret = stopmachine_data.fn(stopmachine_data.fn_data);
+
+                if ( ret )
+                    write_atomic(&stopmachine_data.fn_result, ret);
+            }
             break;
         default:
             break;
++++++ 596f257e-x86-fix-hvmemul_insn_fetch.patch ++++++
# Commit 0943a03037418e6e40cdd420f2472bbf9afae7a2
# Date 2017-07-19 10:25:18 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/hvm: Fixes to hvmemul_insn_fetch()

Force insn_off to a single byte, as offset can wrap around or truncate with
respect to sh_ctxt->insn_buf_eip under a number of normal circumstances.

Furthermore, don't use an ASSERT() for bounds checking the write into
hvmemul_ctxt->insn_buf[].

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

# Commit 5a992b670bff697c40b513c9e037598ba35ca7d4
# Date 2017-07-27 11:39:57 +0100
# Author Andrew Cooper <[email protected]>
# Committer Andrew Cooper <[email protected]>
x86/hvm: Fix boundary check in hvmemul_insn_fetch()

c/s 0943a03037 added some extra protection for overflowing the emulation
instruction cache, but Coverity points out that boundary condition is off by
one when memcpy()'ing out of the buffer.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>

# Commit 58e8986267d976b00c60e0089baa2e5f66f16d3e
# Date 2017-08-10 12:37:24 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/HVM: fix boundary check in hvmemul_insn_fetch() (again)

Commit 5a992b670b ("x86/hvm: Fix boundary check in
hvmemul_insn_fetch()") went a little too far in its correction to
commit 0943a03037 ("x86/hvm: Fixes to hvmemul_insn_fetch()"): Keep the
start offset check, but restore the original end offset one.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -939,7 +939,8 @@ int hvmemul_insn_fetch(
 {
     struct hvm_emulate_ctxt *hvmemul_ctxt =
         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
-    unsigned int insn_off = offset - hvmemul_ctxt->insn_buf_eip;
+    /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
+    uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
 
     /*
      * Fall back if requested bytes are not in the prefetch cache.
@@ -953,7 +954,17 @@ int hvmemul_insn_fetch(
 
         if ( rc == X86EMUL_OKAY && bytes )
         {
-            ASSERT(insn_off + bytes <= sizeof(hvmemul_ctxt->insn_buf));
+            /*
+             * Will we overflow insn_buf[]?  This shouldn't be able to happen,
+             * which means something went wrong with instruction decoding...
+             */
+            if ( insn_off >= sizeof(hvmemul_ctxt->insn_buf) ||
+                 insn_off + bytes > sizeof(hvmemul_ctxt->insn_buf) )
+            {
+                ASSERT_UNREACHABLE();
+                return X86EMUL_UNHANDLEABLE;
+            }
+
             memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes);
             hvmemul_ctxt->insn_buf_bytes = insn_off + bytes;
         }
++++++ 5982fd99-VT-d-don-t-panic-warn-on-iommu-no-igfx.patch ++++++
# Commit dbf2a768565d8b79c65471a3d3b982b2874d6492
# Date 2017-08-03 12:40:25 +0200
# Author Rusty Bird <[email protected]>
# Committer Jan Beulich <[email protected]>
VT-d: don't panic/warn on iommu=no-igfx

When operating on an Intel graphics device, iommu_enable_translation()
panicked (force_iommu==1) or warned (force_iommu==0) about the BIOS if
is_igd_vt_enabled_quirk() returned 0. That's good if the actual BIOS
problem has been detected. But since commit 1463411, returning 0 could
also happen if the user simply passed "iommu=no-igfx", in which case
bailing out with an info message (instead of a panic/warning) would be
more appropriate.

The panic broke the combination "iommu=force,no-igfx", and also the case
where "iommu=no-igfx" is passed but force_iommu=1 is set automatically
by x2apic_bsp_setup().

Move the iommu_igfx check from is_igd_vt_enabled_quirk() into its only
caller iommu_enable_translation(), and tweak the logic.

Signed-off-by: Rusty Bird <[email protected]>
Acked-by: Kevin Tian <[email protected]>

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -747,14 +747,24 @@ static void iommu_enable_translation(str
     unsigned long flags;
     struct iommu *iommu = drhd->iommu;
 
-    if ( is_igd_drhd(drhd) && !is_igd_vt_enabled_quirk() ) 
+    if ( is_igd_drhd(drhd) )
     {
-        if ( force_iommu )
-            panic("BIOS did not enable IGD for VT properly, crash Xen for 
security purpose");
+        if ( !iommu_igfx )
+        {
+            printk(XENLOG_INFO VTDPREFIX
+                   "Passed iommu=no-igfx option.  Disabling IGD VT-d 
engine.\n");
+            return;
+        }
 
-        printk(XENLOG_WARNING VTDPREFIX
-               "BIOS did not enable IGD for VT properly.  Disabling IGD VT-d 
engine.\n");
-        return;
+        if ( !is_igd_vt_enabled_quirk() )
+        {
+            if ( force_iommu )
+                panic("BIOS did not enable IGD for VT properly, crash Xen for 
security purpose");
+
+            printk(XENLOG_WARNING VTDPREFIX
+                   "BIOS did not enable IGD for VT properly.  Disabling IGD 
VT-d engine.\n");
+            return;
+        }
     }
 
     /* apply platform specific errata workarounds */
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -70,9 +70,6 @@ int is_igd_vt_enabled_quirk(void)
 {
     u16 ggc;
 
-    if ( !iommu_igfx )
-        return 0;
-
     if ( !IS_ILK(ioh_id) )
         return 1;
 
++++++ 598c3630-VT-d-PI-disable-when-CPU-side-PI-is-off.patch ++++++
# Commit e489eb6138e7efe4214a7e9ba0d21f54fc5b7d35
# Date 2017-08-10 12:32:16 +0200
# Author Chao Gao <[email protected]>
# Committer Jan Beulich <[email protected]>
VT-d PI: disable VT-d PI when CPU-side PI isn't enabled

>From the context calling pi_desc_init(), we can conclude the current
implementation of VT-d PI depends on CPU-side PI. If we enable VT-d PI
and disable CPU-side PI by disabling APICv explicitly in xen boot
command line, we would get an assertion failure.

This patch clears iommu_intpost once finding CPU-side PI won't be enabled.
It is safe for this is done before this flag starts taking effect. Also
take this chance to remove the useless check of "acknowledge interrupt on
exit", which is a minimal requirement which has been checked earlier.

Signed-off-by: Chao Gao <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
Acked-by: Kevin Tian <[email protected]>

--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -345,11 +345,19 @@ static int vmx_init_vmcs_config(void)
 
     /*
      * "Process posted interrupt" can be set only when "virtual-interrupt
-     * delivery" and "acknowledge interrupt on exit" is set
+     * delivery" and "acknowledge interrupt on exit" is set. For the latter
+     * is a minimal requirement, only check the former, which is optional.
      */
-    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
-          || !(_vmx_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT) )
-        _vmx_pin_based_exec_control  &= ~ PIN_BASED_POSTED_INTERRUPT;
+    if ( !(_vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) 
)
+        _vmx_pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT;
+
+    if ( iommu_intpost &&
+         !(_vmx_pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
+    {
+        printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted "
+               "Interrupt is not enabled\n");
+        iommu_intpost = 0;
+    }
 
     /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */
     if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS )
++++++ 598c3706-cpufreq-only-stop-ondemand-governor-if-started.patch ++++++
# Commit e7ec6f5f32cd2d0f723083cde3d7761c4e675f2c
# Date 2017-08-10 12:35:50 +0200
# Author Christopher Clark <[email protected]>
# Committer Jan Beulich <[email protected]>
cpufreq: only stop ondemand governor if already started

On CPUFREQ_GOV_STOP in cpufreq_governor_dbs, shortcut to
return success if the governor is already stopped.

Avoid executing dbs_timer_exit, to prevent tripping an assertion
within a call to kill_timer on a timer that has not been prepared
with init_timer, if the CPUFREQ_GOV_START case has not
run beforehand.

kill_timer validates timer state:
 * itself, via BUG_ON(this_cpu(timers).running == timer);
 * within active_timer, ASSERTing timer->status is within bounds;
 * within list_del, which ASSERTs timer inactive list membership.

Patch is synonymous to an OpenXT patch produced at Citrix prior to
June 2014.

Signed-off-by: Christopher Clark <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/drivers/cpufreq/cpufreq_ondemand.c
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c
@@ -273,6 +273,10 @@ int cpufreq_governor_dbs(struct cpufreq_
         break;
 
     case CPUFREQ_GOV_STOP:
+        if ( !this_dbs_info->enable )
+            /* Already not enabled */
+            break;
+
         dbs_timer_exit(this_dbs_info);
         dbs_enable--;
 
++++++ 5992f1e5-x86-grant-disallow-misaligned-PTEs.patch ++++++
# Commit ce442926c2530da9376199dcc769436376ad2386
# Date 2017-08-15 15:06:45 +0200
# Author Andrew Cooper <[email protected]>
# Committer Jan Beulich <[email protected]>
x86/grant: disallow misaligned PTEs

Pagetable entries must be aligned to function correctly.  Disallow attempts
from the guest to have a grant PTE created at a misaligned address, which
would result in corruption of the L1 table with largely-guest-controlled
values.

This is CVE-2017-12137 / XSA-227.

Signed-off-by: Andrew Cooper <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4006,6 +4006,9 @@ static int create_grant_pte_mapping(
     l1_pgentry_t ol1e;
     struct domain *d = v->domain;
 
+    if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) )
+        return GNTST_general_error;
+
     adjust_guest_l1e(nl1e, d);
 
     gmfn = pte_addr >> PAGE_SHIFT;
@@ -4063,6 +4066,16 @@ static int destroy_grant_pte_mapping(
     struct page_info *page;
     l1_pgentry_t ol1e;
 
+    /*
+     * addr comes from Xen's active_entry tracking so isn't guest controlled,
+     * but it had still better be PTE-aligned.
+     */
+    if ( !IS_ALIGNED(addr, sizeof(ol1e)) )
+    {
+        ASSERT_UNREACHABLE();
+        return GNTST_general_error;
+    }
+
     gmfn = addr >> PAGE_SHIFT;
     page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
 
++++++ 5992f20d-gnttab-split-maptrack-lock-to-make-it-useful-again.patch ++++++
# Commit 02cbeeb6207508b0f04a2c6181445c8eb3f1e117
# Date 2017-08-15 15:07:25 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: split maptrack lock to make it fulfill its purpose again

The way the lock is currently being used in get_maptrack_handle(), it
protects only the maptrack limit: The function acts on current's list
only, so races on list accesses are impossible even without the lock.

Otoh list access races are possible between __get_maptrack_handle() and
put_maptrack_handle(), due to the invocation of the former for other
than current from steal_maptrack_handle(). Introduce a per-vCPU lock
for list accesses to become race free again. This lock will be
uncontended except when it becomes necessary to take the steal path,
i.e. in the common case there should be no meaningful performance
impact.

When in get_maptrack_handle adds a stolen entry to a fresh, empty,
freelist, we think that there is probably no concurrency.  However,
this is not a fast path and adding the locking there makes the code
clearly correct.

Also, while we are here: the stolen maptrack_entry's tail pointer was
not properly set.  Set it.

This is CVE-2017-12136 / XSA-228.

Reported-by: Ian Jackson <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Signed-off-by: Ian Jackson <[email protected]>

--- a/docs/misc/grant-tables.txt
+++ b/docs/misc/grant-tables.txt
@@ -87,7 +87,8 @@ is complete.
                                inconsistent grant table state such as current
                                version, partially initialized active table 
pages,
                                etc.
-  grant_table->maptrack_lock : spinlock used to protect the maptrack free list
+  grant_table->maptrack_lock : spinlock used to protect the maptrack limit
+  v->maptrack_freelist_lock  : spinlock used to protect the maptrack free list
   active_grant_entry->lock   : spinlock used to serialize modifications to
                                active entries
 
@@ -102,6 +103,10 @@ is complete.
  The maptrack free list is protected by its own spinlock. The maptrack
  lock may be locked while holding the grant table lock.
 
+ The maptrack_freelist_lock is an innermost lock.  It may be locked
+ while holding other locks, but no other locks may be acquired within
+ it.
+
  Active entries are obtained by calling active_entry_acquire(gt, ref).
  This function returns a pointer to the active entry after locking its
  spinlock. The caller must hold the grant table read lock before
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -304,11 +304,16 @@ __get_maptrack_handle(
 {
     unsigned int head, next, prev_head;
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     do {
         /* No maptrack pages allocated for this VCPU yet? */
         head = read_atomic(&v->maptrack_head);
         if ( unlikely(head == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         /*
          * Always keep one entry in the free list to make it easier to
@@ -316,12 +321,17 @@ __get_maptrack_handle(
          */
         next = read_atomic(&maptrack_entry(t, head).ref);
         if ( unlikely(next == MAPTRACK_TAIL) )
+        {
+            spin_unlock(&v->maptrack_freelist_lock);
             return -1;
+        }
 
         prev_head = head;
         head = cmpxchg(&v->maptrack_head, prev_head, next);
     } while ( head != prev_head );
 
+    spin_unlock(&v->maptrack_freelist_lock);
+
     return head;
 }
 
@@ -380,6 +390,8 @@ put_maptrack_handle(
     /* 2. Add entry to the tail of the list on the original VCPU. */
     v = currd->vcpu[maptrack_entry(t, handle).vcpu];
 
+    spin_lock(&v->maptrack_freelist_lock);
+
     cur_tail = read_atomic(&v->maptrack_tail);
     do {
         prev_tail = cur_tail;
@@ -388,6 +400,8 @@ put_maptrack_handle(
 
     /* 3. Update the old tail entry to point to the new entry. */
     write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
+
+    spin_unlock(&v->maptrack_freelist_lock);
 }
 
 static inline int
@@ -411,10 +425,6 @@ get_maptrack_handle(
      */
     if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
     {
-        /*
-         * Can drop the lock since no other VCPU can be adding a new
-         * frame once they've run out.
-         */
         spin_unlock(&lgt->maptrack_lock);
 
         /*
@@ -426,8 +436,12 @@ get_maptrack_handle(
             handle = steal_maptrack_handle(lgt, curr);
             if ( handle == -1 )
                 return -1;
+            spin_lock(&curr->maptrack_freelist_lock);
+            maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL;
             curr->maptrack_tail = handle;
-            write_atomic(&curr->maptrack_head, handle);
+            if ( curr->maptrack_head == MAPTRACK_TAIL )
+                write_atomic(&curr->maptrack_head, handle);
+            spin_unlock(&curr->maptrack_freelist_lock);
         }
         return steal_maptrack_handle(lgt, curr);
     }
@@ -460,12 +474,15 @@ get_maptrack_handle(
     smp_wmb();
     lgt->maptrack_limit += MAPTRACK_PER_PAGE;
 
+    spin_unlock(&lgt->maptrack_lock);
+    spin_lock(&curr->maptrack_freelist_lock);
+
     do {
         new_mt[i - 1].ref = read_atomic(&curr->maptrack_head);
         head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1);
     } while ( head != new_mt[i - 1].ref );
 
-    spin_unlock(&lgt->maptrack_lock);
+    spin_unlock(&curr->maptrack_freelist_lock);
 
     return handle;
 }
@@ -3473,6 +3490,7 @@ grant_table_destroy(
 
 void grant_table_init_vcpu(struct vcpu *v)
 {
+    spin_lock_init(&v->maptrack_freelist_lock);
     v->maptrack_head = MAPTRACK_TAIL;
     v->maptrack_tail = MAPTRACK_TAIL;
 }
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -78,7 +78,7 @@ struct grant_table {
     /* Mapping tracking table per vcpu. */
     struct grant_mapping **maptrack;
     unsigned int          maptrack_limit;
-    /* Lock protecting the maptrack page list, head, and limit */
+    /* Lock protecting the maptrack limit */
     spinlock_t            maptrack_lock;
     /* The defined versions are 1 and 2.  Set to 0 if we don't know
        what version to use yet. */
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -230,6 +230,7 @@ struct vcpu
     int              controller_pause_count;
 
     /* Grant table map tracking. */
+    spinlock_t       maptrack_freelist_lock;
     unsigned int     maptrack_head;
     unsigned int     maptrack_tail;
 
++++++ 5992f233-gnttab-correct-pin-status-fixup-for-copy.patch ++++++
# Commit 6e2a4c73564ab907b732059adb317d6ca2d138a2
# Date 2017-08-15 15:08:03 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: correct pin status fixup for copy

Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev*
also need to be taken into account when deciding whether to clear
_GTF_{read,writ}ing. At least for consistency with code elsewhere the
read part better doesn't use any mask at all.

This is XSA-230.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2122,10 +2122,10 @@ __release_grant_for_copy(
 static void __fixup_status_for_copy_pin(const struct active_grant_entry *act,
                                    uint16_t *status)
 {
-    if ( !(act->pin & GNTPIN_hstw_mask) )
+    if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
-    if ( !(act->pin & GNTPIN_hstr_mask) )
+    if ( !act->pin )
         gnttab_clear_flag(_GTF_reading, status);
 }
 
@@ -2333,7 +2333,7 @@ __acquire_grant_for_copy(
  
  unlock_out_clear:
     if ( !(readonly) &&
-         !(act->pin & GNTPIN_hstw_mask) )
+         !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
         gnttab_clear_flag(_GTF_writing, status);
 
     if ( !act->pin )
++++++ 59958e76-gnttab-dont-use-possibly-unbounded-tail-calls.patch ++++++
# Commit 999d2ccb7f73408aa22656e1ba2f98b077eaa1c2
# Date 2017-08-17 14:39:18 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: don't use possibly unbounded tail calls

There is no guarantee that the compiler would actually translate them
to branches instead of calls, so only ones with a known recursion limit
are okay:
- __release_grant_for_copy() can call itself only once, as
  __acquire_grant_for_copy() won't permit use of multi-level transitive
  grants,
- __acquire_grant_for_copy() is fine to call itself with the last
  argument false, as that prevents further recursion,
- __acquire_grant_for_copy() must not call itself to recover from an
  observed change to the active entry's pin count

This is part of XSA-226.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

# Commit ca617570542e1d7d8de636d5396959bbf1dabab7
# Date 2017-08-21 15:43:36 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: fix "don't use possibly unbounded tail calls"

The compat mode code also needs adjustment to deal with the changed
return value from gnttab_copy().

This is part of XSA-226.

Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -258,9 +258,9 @@ int compat_grant_table_op(unsigned int c
                 rc = gnttab_copy(guest_handle_cast(nat.uop, gnttab_copy_t), n);
             if ( rc > 0 )
             {
-                ASSERT(rc < n);
-                i -= n - rc;
-                n = rc;
+                ASSERT(rc <= n);
+                i -= rc;
+                n -= rc;
             }
             if ( rc >= 0 )
             {
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2103,8 +2103,10 @@ __release_grant_for_copy(
 
     if ( td != rd )
     {
-        /* Recursive calls, but they're tail calls, so it's
-           okay. */
+        /*
+         * Recursive calls, but they're bounded (acquire permits only a single
+         * level of transitivity), so it's okay.
+         */
         if ( released_write )
             __release_grant_for_copy(td, trans_gref, 0);
         else if ( released_read )
@@ -2255,10 +2257,11 @@ __acquire_grant_for_copy(
                 return rc;
             }
 
-            /* We dropped the lock, so we have to check that nobody
-               else tried to pin (or, for that matter, unpin) the
-               reference in *this* domain.  If they did, just give up
-               and try again. */
+            /*
+             * We dropped the lock, so we have to check that nobody else tried
+             * to pin (or, for that matter, unpin) the reference in *this*
+             * domain.  If they did, just give up and tell the caller to retry.
+             */
             if ( act->pin != old_pin )
             {
                 __fixup_status_for_copy_pin(act, status);
@@ -2266,9 +2269,8 @@ __acquire_grant_for_copy(
                 active_entry_release(act);
                 grant_read_unlock(rgt);
                 put_page(*page);
-                return __acquire_grant_for_copy(rd, gref, ldom, readonly,
-                                                frame, page, page_off, length,
-                                                allow_transitive);
+                *page = NULL;
+                return ERESTART;
             }
 
             /* The actual remote remote grant may or may not be a
@@ -2574,7 +2576,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(src);
         rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2584,7 +2586,7 @@ static int gnttab_copy_one(const struct
     {
         gnttab_copy_release_buf(dest);
         rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
-        if ( rc < 0 )
+        if ( rc )
             goto out;
     }
 
@@ -2593,6 +2595,14 @@ static int gnttab_copy_one(const struct
     return rc;
 }
 
+/*
+ * gnttab_copy(), other than the various other helpers of
+ * do_grant_table_op(), returns (besides possible error indicators)
+ * "count - i" rather than "i" to ensure that even if no progress
+ * was made at all (perhaps due to gnttab_copy_one() returning a
+ * positive value) a non-zero value is being handed back (zero needs
+ * to be avoided, as that means "success, all done").
+ */
 static long gnttab_copy(
     XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
 {
@@ -2606,7 +2616,7 @@ static long gnttab_copy(
     {
         if ( i && hypercall_preempt_check() )
         {
-            rc = i;
+            rc = count - i;
             break;
         }
 
@@ -2616,13 +2626,20 @@ static long gnttab_copy(
             break;
         }
 
-        op.status = gnttab_copy_one(&op, &dest, &src);
-        if ( op.status != GNTST_okay )
+        rc = gnttab_copy_one(&op, &dest, &src);
+        if ( rc > 0 )
+        {
+            rc = count - i;
+            break;
+        }
+        if ( rc != GNTST_okay )
         {
             gnttab_copy_release_buf(&src);
             gnttab_copy_release_buf(&dest);
         }
 
+        op.status = rc;
+        rc = 0;
         if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
         {
             rc = -EFAULT;
@@ -3160,6 +3177,7 @@ do_grant_table_op(
         rc = gnttab_copy(copy, count);
         if ( rc > 0 )
         {
+            rc = count - rc;
             guest_handle_add_offset(copy, rc);
             uop = guest_handle_cast(copy, void);
         }
++++++ 59958ebf-gnttab-fix-transitive-grant-handling.patch ++++++
# Commit ad48fb963dbff02762d2db5396fa655ac0c432c7
# Date 2017-08-17 14:40:31 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: fix transitive grant handling

Processing of transitive grants must not use the fast path, or else
reference counting breaks due to the skipped recursive call to
__acquire_grant_for_copy() (its __release_grant_for_copy()
counterpart occurs independent of original pin count). Furthermore
after re-acquiring temporarily dropped locks we need to verify no grant
properties changed if the original pin count was non-zero; checking
just the pin counts is sufficient only for well-behaved guests. As a
result, __release_grant_for_copy() needs to mirror that new behavior.

Furthermore a __release_grant_for_copy() invocation was missing on the
retry path of __acquire_grant_for_copy(), and gnttab_set_version() also
needs to bail out upon encountering a transitive grant.

This is part of XSA-226.

Reported-by: Andrew Cooper <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -2050,13 +2050,8 @@ __release_grant_for_copy(
     unsigned long r_frame;
     uint16_t *status;
     grant_ref_t trans_gref;
-    int released_read;
-    int released_write;
     struct domain *td;
 
-    released_read = 0;
-    released_write = 0;
-
     grant_read_lock(rgt);
 
     act = active_entry_acquire(rgt, gref);
@@ -2086,17 +2081,11 @@ __release_grant_for_copy(
 
         act->pin -= GNTPIN_hstw_inc;
         if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
-        {
-            released_write = 1;
             gnttab_clear_flag(_GTF_writing, status);
-        }
     }
 
     if ( !act->pin )
-    {
         gnttab_clear_flag(_GTF_reading, status);
-        released_read = 1;
-    }
 
     active_entry_release(act);
     grant_read_unlock(rgt);
@@ -2104,13 +2093,10 @@ __release_grant_for_copy(
     if ( td != rd )
     {
         /*
-         * Recursive calls, but they're bounded (acquire permits only a single
+         * Recursive call, but it is bounded (acquire permits only a single
          * level of transitivity), so it's okay.
          */
-        if ( released_write )
-            __release_grant_for_copy(td, trans_gref, 0);
-        else if ( released_read )
-            __release_grant_for_copy(td, trans_gref, 1);
+        __release_grant_for_copy(td, trans_gref, readonly);
 
         rcu_unlock_domain(td);
     }
@@ -2184,8 +2170,108 @@ __acquire_grant_for_copy(
                  act->domid, ldom, act->pin);
 
     old_pin = act->pin;
-    if ( !act->pin ||
-         (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+    if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
+    {
+        if ( (!old_pin || (!readonly &&
+                           !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) 
&&
+             (rc = _set_status_v2(ldom, readonly, 0, shah, act,
+                                  status)) != GNTST_okay )
+            goto unlock_out;
+
+        if ( !allow_transitive )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant when transitivity not allowed\n");
+
+        trans_domid = sha2->transitive.trans_domid;
+        trans_gref = sha2->transitive.gref;
+        barrier(); /* Stop the compiler from re-loading
+                      trans_domid from shared memory */
+        if ( trans_domid == rd->domain_id )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grants cannot be self-referential\n");
+
+        /*
+         * We allow the trans_domid == ldom case, which corresponds to a
+         * grant being issued by one domain, sent to another one, and then
+         * transitively granted back to the original domain.  Allowing it
+         * is easy, and means that you don't need to go out of your way to
+         * avoid it in the guest.
+         */
+
+        /* We need to leave the rrd locked during the grant copy. */
+        td = rcu_lock_domain_by_id(trans_domid);
+        if ( td == NULL )
+            PIN_FAIL(unlock_out_clear, GNTST_general_error,
+                     "transitive grant referenced bad domain %d\n",
+                     trans_domid);
+
+        /*
+         * __acquire_grant_for_copy() could take the lock on the
+         * remote table (if rd == td), so we have to drop the lock
+         * here and reacquire.
+         */
+        active_entry_release(act);
+        grant_read_unlock(rgt);
+
+        rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
+                                      readonly, &grant_frame, page,
+                                      &trans_page_off, &trans_length, 0);
+
+        grant_read_lock(rgt);
+        act = active_entry_acquire(rgt, gref);
+
+        if ( rc != GNTST_okay )
+        {
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            return rc;
+        }
+
+        /*
+         * We dropped the lock, so we have to check that the grant didn't
+         * change, and that nobody else tried to pin/unpin it. If anything
+         * changed, just give up and tell the caller to retry.
+         */
+        if ( rgt->gt_version != 2 ||
+             act->pin != old_pin ||
+             (old_pin && (act->domid != ldom || act->frame != grant_frame ||
+                          act->start != trans_page_off ||
+                          act->length != trans_length ||
+                          act->trans_domain != td ||
+                          act->trans_gref != trans_gref ||
+                          !act->is_sub_page)) )
+        {
+            __release_grant_for_copy(td, trans_gref, readonly);
+            __fixup_status_for_copy_pin(act, status);
+            rcu_unlock_domain(td);
+            active_entry_release(act);
+            grant_read_unlock(rgt);
+            put_page(*page);
+            *page = NULL;
+            return ERESTART;
+        }
+
+        if ( !old_pin )
+        {
+            act->domid = ldom;
+            act->start = trans_page_off;
+            act->length = trans_length;
+            act->trans_domain = td;
+            act->trans_gref = trans_gref;
+            act->frame = grant_frame;
+            act->gfn = -1ul;
+            /*
+             * The actual remote remote grant may or may not be a sub-page,
+             * but we always treat it as one because that blocks mappings of
+             * transitive grants.
+             */
+            act->is_sub_page = 1;
+        }
+    }
+    else if ( !old_pin ||
+              (!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
     {
         if ( (rc = _set_status(rgt->gt_version, ldom,
                                readonly, 0, shah, act,
@@ -2206,79 +2292,6 @@ __acquire_grant_for_copy(
             trans_page_off = 0;
             trans_length = PAGE_SIZE;
         }
-        else if ( (shah->flags & GTF_type_mask) == GTF_transitive )
-        {
-            if ( !allow_transitive )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant when transitivity not allowed\n");
-
-            trans_domid = sha2->transitive.trans_domid;
-            trans_gref = sha2->transitive.gref;
-            barrier(); /* Stop the compiler from re-loading
-                          trans_domid from shared memory */
-            if ( trans_domid == rd->domain_id )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grants cannot be self-referential\n");
-
-            /* We allow the trans_domid == ldom case, which
-               corresponds to a grant being issued by one domain, sent
-               to another one, and then transitively granted back to
-               the original domain.  Allowing it is easy, and means
-               that you don't need to go out of your way to avoid it
-               in the guest. */
-
-            /* We need to leave the rrd locked during the grant copy */
-            td = rcu_lock_domain_by_id(trans_domid);
-            if ( td == NULL )
-                PIN_FAIL(unlock_out_clear, GNTST_general_error,
-                         "transitive grant referenced bad domain %d\n",
-                         trans_domid);
-
-            /*
-             * __acquire_grant_for_copy() could take the lock on the
-             * remote table (if rd == td), so we have to drop the lock
-             * here and reacquire
-             */
-            active_entry_release(act);
-            grant_read_unlock(rgt);
-
-            rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
-                                          readonly, &grant_frame, page,
-                                          &trans_page_off, &trans_length, 0);
-
-            grant_read_lock(rgt);
-            act = active_entry_acquire(rgt, gref);
-
-            if ( rc != GNTST_okay ) {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                return rc;
-            }
-
-            /*
-             * We dropped the lock, so we have to check that nobody else tried
-             * to pin (or, for that matter, unpin) the reference in *this*
-             * domain.  If they did, just give up and tell the caller to retry.
-             */
-            if ( act->pin != old_pin )
-            {
-                __fixup_status_for_copy_pin(act, status);
-                rcu_unlock_domain(td);
-                active_entry_release(act);
-                grant_read_unlock(rgt);
-                put_page(*page);
-                *page = NULL;
-                return ERESTART;
-            }
-
-            /* The actual remote remote grant may or may not be a
-               sub-page, but we always treat it as one because that
-               blocks mappings of transitive grants. */
-            is_sub_page = 1;
-            act->gfn = -1ul;
-        }
         else if ( !(sha2->hdr.flags & GTF_sub_page) )
         {
             rc = __get_paged_frame(sha2->full_page.frame, &grant_frame, page, 
readonly, rd);
@@ -2710,10 +2723,13 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA
     case 2:
         for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
         {
-            if ( ((shared_entry_v2(gt, i).hdr.flags & GTF_type_mask) ==
-                  GTF_permit_access) &&
-                 (shared_entry_v2(gt, i).full_page.frame >> 32) )
+            switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
             {
+            case GTF_permit_access:
+                 if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
+                     break;
+                 /* fall through */
+            case GTF_transitive:
                 gdprintk(XENLOG_WARNING,
                          "tried to change grant table version to 1 with 
non-representable entries\n");
                 res = -ERANGE;
++++++ 59958edd-gnttab-avoid-spurious-maptrack-handle-alloc-failures.patch 
++++++
# Commit d02f1a0b7576bafb2fba903c7e6e7221ab0d2847
# Date 2017-08-17 14:41:01 +0200
# Author Jan Beulich <[email protected]>
# Committer Jan Beulich <[email protected]>
gnttab: avoid spurious maptrack handle allocation failures

When no memory is available in the hypervisor, rather than immediately
failing the request, try to steal a handle from another vCPU.

Reported-by: George Dunlap <[email protected]>
Signed-off-by: Jan Beulich <[email protected]>
Reviewed-by: Andrew Cooper <[email protected]>

--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -411,7 +411,7 @@ get_maptrack_handle(
     struct vcpu          *curr = current;
     unsigned int          i, head;
     grant_handle_t        handle;
-    struct grant_mapping *new_mt;
+    struct grant_mapping *new_mt = NULL;
 
     handle = __get_maptrack_handle(lgt, curr);
     if ( likely(handle != -1) )
@@ -420,10 +420,15 @@ get_maptrack_handle(
     spin_lock(&lgt->maptrack_lock);
 
     /*
-     * If we've run out of frames, try stealing an entry from another
-     * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+     * If we've run out of handles and still have frame headroom, try
+     * allocating a new maptrack frame.  If there is no headroom, or we're
+     * out of memory, try stealing an entry from another VCPU (in case the
+     * guest isn't mapping across its VCPUs evenly).
      */
-    if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
+    if ( nr_maptrack_frames(lgt) < max_maptrack_frames )
+        new_mt = alloc_xenheap_page();
+
+    if ( !new_mt )
     {
         spin_unlock(&lgt->maptrack_lock);
 
@@ -446,12 +451,6 @@ get_maptrack_handle(
         return steal_maptrack_handle(lgt, curr);
     }
 
-    new_mt = alloc_xenheap_page();
-    if ( !new_mt )
-    {
-        spin_unlock(&lgt->maptrack_lock);
-        return -1;
-    }
     clear_page(new_mt);
 
     /*
++++++ gcc7-mini-os.patch ++++++
--- /var/tmp/diff_new_pack.QNfxlD/_old  2017-09-22 21:32:01.152657106 +0200
+++ /var/tmp/diff_new_pack.QNfxlD/_new  2017-09-22 21:32:01.152657106 +0200
@@ -1,28 +1,41 @@
-ld -nostdlib 
-L/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/cross-root-i686/i686-xen-elf/lib
  -m elf_i386 -T 
/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/mini-os-x86_32-grub/arch/x86/minios-x86_32.lds
  
/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/mini-os-x86_32-grub/mini-os.o
  -o 
/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/mini-os-x86_32-grub/mini-os
-/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/mini-os-x86_32-grub/mini-os.o:
 In function `_strtoll_r':
-/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/newlib-x86_32/i686-xen-elf/newlib/libc/stdlib/../../../../../newlib-1.16.0/newlib/libc/stdlib/strtoll_r.c:110:
 undefined reference to `__udivmoddi4'
-make[2]: *** [Makefile:167: 
/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom/mini-os-x86_32-grub/mini-os]
 Error 1
-make[2]: Leaving directory 
'/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/extras/mini-os-remote'
-make[1]: *** [Makefile:566: pv-grub] Error 2
-make[1]: Leaving directory 
'/home/abuild/rpmbuild/BUILD/xen-4.9.0-testing/stubdom'
-make: *** [Makefile:106: install-stubdom] Error 2
+From d991bdbc062248221511ecb795617c36b37e1d2e Mon Sep 17 00:00:00 2001
+From: Wei Liu <[email protected]>
+Date: Wed, 9 Aug 2017 13:15:48 +0100
+Subject: [PATCH] lib/math.c: implement __udivmoddi4
 
-GCC 6.x used to generate __umoddi3 and __udivdi3, now __udivmoddi4 is 
generated.
+Some code compiled by gcc 7 requires this.
 
-Index: xen-4.9.0-testing/extras/mini-os-remote/Makefile
-===================================================================
---- xen-4.9.0-testing.orig/extras/mini-os-remote/Makefile
-+++ xen-4.9.0-testing/extras/mini-os-remote/Makefile
-@@ -162,7 +162,11 @@ $(OBJ_DIR)/arch/x86/minios-x86%.lds:  ar
-       $(CPP) $(ASFLAGS) -P $< -o $@
+Signed-off-by: Wei Liu <[email protected]>
+Reviewed-by: Samuel Thibault <[email protected]>
+---
+ lib/math.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git xen-4.9.0-testing.orig/extras/mini-os-remote/lib/math.c 
xen-4.9.0-testing/extras/mini-os-remote/lib/math.c
+index 561393e..b98cc1d 100644
+--- xen-4.9.0-testing.orig/extras/mini-os-remote/lib/math.c
++++ xen-4.9.0-testing/extras/mini-os-remote/lib/math.c
+@@ -6,6 +6,7 @@
+  *        File: math.c
+  *      Author: Rolf Neugebauer ([email protected])
+  *     Changes: 
++ *        Implement __udivmoddi4 (Wei Liu <[email protected]>)
+  *              
+  *        Date: Aug 2003
+  * 
+@@ -397,6 +398,15 @@ __umoddi3(u_quad_t a, u_quad_t b)
+ }
  
- $(OBJ_DIR)/$(TARGET): $(OBJS) $(APP_O) arch_lib 
$(OBJ_DIR)/$(TARGET_ARCH_DIR)/minios-$(MINIOS_TARGET_ARCH).lds
--      $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) 
-o [email protected]
-+      if [ "$(MINIOS_TARGET_ARCH)" = "x86_32" -a -f 
/usr/lib64/gcc/x86_64-suse-linux/7/32/libgcc.a ] ; then \
-+              $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) 
-L/usr/lib64/gcc/x86_64-suse-linux/7/32 $(LDLIBS) -lgcc -o [email protected] ; \
-+      else \
-+              $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) 
$(LDLIBS) -o [email protected] ; \
-+      fi
-       $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start [email protected] [email protected]
-       $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) [email protected] $(EXTRA_OBJS) -o $@
-       gzip -f -9 -c $@ >[email protected]
+ /*
++ * Returns the quotient and places remainder in r
++ */
++u_quad_t
++__udivmoddi4(u_quad_t a, u_quad_t b, u_quad_t *r)
++{
++      return __qdivrem(a, b, r);
++}
++
++/*
+  * From
+  * moddi3.c
+  */

++++++ libxc.sr.superpage.patch ++++++
++++ 739 lines (skipped)
++++ between /work/SRC/openSUSE:Factory/xen/libxc.sr.superpage.patch
++++ and /work/SRC/openSUSE:Factory/.xen.new/libxc.sr.superpage.patch

++++++ vif-route.patch ++++++
--- /var/tmp/diff_new_pack.QNfxlD/_old  2017-09-22 21:32:01.300636276 +0200
+++ /var/tmp/diff_new_pack.QNfxlD/_new  2017-09-22 21:32:01.300636276 +0200
@@ -1,10 +1,10 @@
 References: bsc#985503
 
-Index: xen-4.7.0-testing/tools/hotplug/Linux/vif-route
+Index: xen-4.9.0-testing/tools/hotplug/Linux/vif-route
 ===================================================================
---- xen-4.7.0-testing.orig/tools/hotplug/Linux/vif-route
-+++ xen-4.7.0-testing/tools/hotplug/Linux/vif-route
-@@ -35,7 +35,7 @@ case "${command}" in
+--- xen-4.9.0-testing.orig/tools/hotplug/Linux/vif-route
++++ xen-4.9.0-testing/tools/hotplug/Linux/vif-route
+@@ -37,7 +37,7 @@ case "${command}" in
          ;;
  esac
  

++++++ xen-supportconfig ++++++
--- /var/tmp/diff_new_pack.QNfxlD/_old  2017-09-22 21:32:01.364627268 +0200
+++ /var/tmp/diff_new_pack.QNfxlD/_new  2017-09-22 21:32:01.368626704 +0200
@@ -90,11 +90,8 @@
 fi
 plugin_command "route -n"
 plugin_command "arp -v"
-if plugin_command "brctl show"; then
-    for BRIDGE in `brctl show | grep -v ^bridge | egrep "^[a-z]|^[A-Z]" | awk 
'{print $1}'`; do
-        plugin_command "brctl showmacs $BRIDGE"
-    done
-fi
+plugin_command "ip link show type bridge"
+plugin_command "bridge link show"
 # list contents of common config and image directories
 plugin_command "ls -alR /etc/xen/vm/"
 plugin_command "ls -alR /etc/xen/auto/"

++++++ xnloader.py ++++++
--- /var/tmp/diff_new_pack.QNfxlD/_old  2017-09-22 21:32:01.488609815 +0200
+++ /var/tmp/diff_new_pack.QNfxlD/_new  2017-09-22 21:32:01.492609252 +0200
@@ -8,7 +8,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# 51 Franklin St, Boston, MA 02110
 
 # Binary patching of xnloader.sys
 # For launching NetWare on Xen 4.2 and newer

commit xen for openSUSE:Factory

Reply via email to