Re: [Xen-devel] [PATCH v2 1/3] x86: suppress SMEP and SMAP while running 32-bit PV guest code

2016-05-13 Thread Andrew Cooper
On 10/03/16 09:53, Jan Beulich wrote:
> Since such guests' kernel code runs in ring 1, their memory accesses,
> at the paging layer, are supervisor mode ones, and hence subject to
> SMAP/SMEP checks. Such guests cannot be expected to be aware of those
> two features though (and so far we also don't expose the respective
> feature flags), and hence may suffer page faults they cannot deal with.
>
> While the placement of the re-enabling slightly weakens the intended
> protection, it was selected such that 64-bit paths would remain
> unaffected where possible. At the expense of a further performance hit
> the re-enabling could be put right next to the CLACs.
>
> Note that this introduces a number of extra TLB flushes - CR4.SMEP
> transitioning from 0 to 1 always causes a flush, and it transitioning
> from 1 to 0 may also do.
>
> Signed-off-by: Jan Beulich 

Reviewed-by: Andrew Cooper 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 1/3] x86: suppress SMEP and SMAP while running 32-bit PV guest code

2016-03-10 Thread Jan Beulich
Since such guests' kernel code runs in ring 1, their memory accesses,
at the paging layer, are supervisor mode ones, and hence subject to
SMAP/SMEP checks. Such guests cannot be expected to be aware of those
two features though (and so far we also don't expose the respective
feature flags), and hence may suffer page faults they cannot deal with.

While the placement of the re-enabling slightly weakens the intended
protection, it was selected such that 64-bit paths would remain
unaffected where possible. At the expense of a further performance hit
the re-enabling could be put right next to the CLACs.

Note that this introduces a number of extra TLB flushes - CR4.SMEP
transitioning from 0 to 1 always causes a flush, and it transitioning
from 1 to 0 may also do.

Signed-off-by: Jan Beulich 
---
v2: Use more generic symbol/label names. Comment the BUG in assembly
code and restrict it to debug builds. Add C equivalent to #PF
re-execution condition in a comment. Use .skip instead of .org in
handle_exception to avoid gas bug (and its slightly ugly
workaround). Use a properly named label instead of a numeric one
in handle_exception.

--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -67,6 +67,8 @@ boolean_param("smep", opt_smep);
 static bool_t __initdata opt_smap = 1;
 boolean_param("smap", opt_smap);
 
+unsigned long __read_mostly cr4_pv32_mask;
+
 /* Boot dom0 in pvh mode */
 static bool_t __initdata opt_dom0pvh;
 boolean_param("dom0pvh", opt_dom0pvh);
@@ -1335,6 +1337,8 @@ void __init noreturn __start_xen(unsigne
 if ( cpu_has_smap )
 set_in_cr4(X86_CR4_SMAP);
 
+cr4_pv32_mask = mmu_cr4_features & (X86_CR4_SMEP | X86_CR4_SMAP);
+
 if ( cpu_has_fsgsbase )
 set_in_cr4(X86_CR4_FSGSBASE);
 
@@ -1471,7 +1475,10 @@ void __init noreturn __start_xen(unsigne
  * copy_from_user().
  */
 if ( cpu_has_smap )
+{
+cr4_pv32_mask &= ~X86_CR4_SMAP;
 write_cr4(read_cr4() & ~X86_CR4_SMAP);
+}
 
 printk("%sNX (Execute Disable) protection %sactive\n",
cpu_has_nx ? XENLOG_INFO : XENLOG_WARNING "Warning: ",
@@ -1488,7 +1495,10 @@ void __init noreturn __start_xen(unsigne
 panic("Could not set up DOM0 guest OS");
 
 if ( cpu_has_smap )
+{
 write_cr4(read_cr4() | X86_CR4_SMAP);
+cr4_pv32_mask |= X86_CR4_SMAP;
+}
 
 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
 scrub_heap_pages();
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -16,14 +16,16 @@ ENTRY(compat_hypercall)
 ASM_CLAC
 pushq $0
 SAVE_VOLATILE type=TRAP_syscall compat=1
+CR4_PV32_RESTORE
 
 cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
 movl  $HYPERCALL_VECTOR,%edi
 call  check_for_unexpected_msi
-LOAD_C_CLOBBERED
+LOAD_C_CLOBBERED compat=1 ax=0
 UNLIKELY_END(msi_check)
 
+movl  UREGS_rax(%rsp),%eax
 GET_CURRENT(%rbx)
 
 cmpl  $NR_hypercalls,%eax
@@ -33,7 +35,6 @@ UNLIKELY_END(msi_check)
 pushq UREGS_rbx(%rsp); pushq %rcx; pushq %rdx; pushq %rsi; pushq %rdi
 pushq UREGS_rbp+5*8(%rsp)
 leaq  compat_hypercall_args_table(%rip),%r10
-movl  %eax,%eax
 movl  $6,%ecx
 subb  (%r10,%rax,1),%cl
 movq  %rsp,%rdi
@@ -48,7 +49,6 @@ UNLIKELY_END(msi_check)
 #define SHADOW_BYTES 16 /* Shadow EIP + shadow hypercall # */
 #else
 /* Relocate argument registers and zero-extend to 64 bits. */
-movl  %eax,%eax  /* Hypercall #  */
 xchgl %ecx,%esi  /* Arg 2, Arg 4 */
 movl  %edx,%edx  /* Arg 3*/
 movl  %edi,%r8d  /* Arg 5*/
@@ -174,10 +174,46 @@ compat_bad_hypercall:
 /* %rbx: struct vcpu, interrupts disabled */
 ENTRY(compat_restore_all_guest)
 ASSERT_INTERRUPTS_DISABLED
+.Lcr4_orig:
+ASM_NOP3 /* mov   %cr4, %rax */
+ASM_NOP6 /* and   $..., %rax */
+ASM_NOP3 /* mov   %rax, %cr4 */
+.pushsection .altinstr_replacement, "ax"
+.Lcr4_alt:
+mov   %cr4, %rax
+and   $~(X86_CR4_SMEP|X86_CR4_SMAP), %rax
+mov   %rax, %cr4
+.Lcr4_alt_end:
+.section .altinstructions, "a"
+altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMEP, 12, \
+ (.Lcr4_alt_end - .Lcr4_alt)
+altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMAP, 12, \
+ (.Lcr4_alt_end - .Lcr4_alt)
+.popsection
 RESTORE_ALL adj=8 compat=1
 .Lft0:  iretq
 _ASM_PRE_EXTABLE(.Lft0, handle_exception)
 
+/* This mustn't modify registers other than %rax. */
+ENTRY(cr4_pv32_restore)
+mov   %cr4, %rax
+test  $X86_CR4_SMEP|X86_CR4_SMAP,%eax
+jnz   0f
+orcr4_pv32_mask(%rip), %rax
+mov   %rax, %cr4
+ret
+0:
+#ifndef NDEBUG
+/* Check