On Wed, Jul 10, 2019 at 09:42:46PM +0200, Thomas Gleixner wrote:
> The pinning of sensitive CR0 and CR4 bits caused a boot crash when loading
> the kvm_intel module on a kernel compiled with CONFIG_PARAVIRT=n.
> 
> The reason is that the static key which controls the pinning is marked RO
> after init. The kvm_intel module contains a CR4 write which requires to
> update the static key entry list. That obviously does not work when the key
> is in a RO section.
> 
> With CONFIG_PARAVIRT enabled this does not happen because the CR4 write
> uses the paravirt indirection and the actual write function is built in.
> 
> As the key is intended to be immutable after init, move
> native_write_cr0/3() out of line.
> 
> While at it consolidate the update of the cr4 shadow variable and store the
> value right away when the pinning is initialized on a booting CPU. No point
> in reading it back 20 instructions later. This allows to confine the static
> key and the pinning variable to cpu/common and allows to mark them static.
> 
> Fixes: 8dbec27a242c ("x86/asm: Pin sensitive CR0 bits")
> Fixes: 873d50d58f67 ("x86/asm: Pin sensitive CR4 bits")
> Reported-by: Linus Torvalds <torva...@linux-foundation.org>
> Reported-by: Xi Ruoyao <xry...@mengyan1223.wang>
> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
> Tested-by: Xi Ruoyao <xry...@mengyan1223.wang>

Thank you for tracking this down and solving it!

Nit: should be "cr0/4()" in Subject and in paragraph 4.

Acked-by: Kees Cook <keesc...@chromium.org>

-Kees

> ---
>  arch/x86/include/asm/processor.h     |    1 
>  arch/x86/include/asm/special_insns.h |   41 -------------------
>  arch/x86/kernel/cpu/common.c         |   72 
> +++++++++++++++++++++++++++--------
>  arch/x86/kernel/smpboot.c            |   14 ------
>  arch/x86/xen/smp_pv.c                |    1 
>  5 files changed, 61 insertions(+), 68 deletions(-)
> 
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -741,6 +741,7 @@ extern void load_direct_gdt(int);
>  extern void load_fixmap_gdt(int);
>  extern void load_percpu_segment(int);
>  extern void cpu_init(void);
> +extern void cr4_init(void);
>  
>  static inline unsigned long get_debugctlmsr(void)
>  {
> --- a/arch/x86/include/asm/special_insns.h
> +++ b/arch/x86/include/asm/special_insns.h
> @@ -18,9 +18,7 @@
>   */
>  extern unsigned long __force_order;
>  
> -/* Starts false and gets enabled once CPU feature detection is done. */
> -DECLARE_STATIC_KEY_FALSE(cr_pinning);
> -extern unsigned long cr4_pinned_bits;
> +void native_write_cr0(unsigned long val);
>  
>  static inline unsigned long native_read_cr0(void)
>  {
> @@ -29,24 +27,6 @@ static inline unsigned long native_read_
>       return val;
>  }
>  
> -static inline void native_write_cr0(unsigned long val)
> -{
> -     unsigned long bits_missing = 0;
> -
> -set_register:
> -     asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
> -
> -     if (static_branch_likely(&cr_pinning)) {
> -             if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
> -                     bits_missing = X86_CR0_WP;
> -                     val |= bits_missing;
> -                     goto set_register;
> -             }
> -             /* Warn after we've set the missing bits. */
> -             WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
> -     }
> -}
> -
>  static inline unsigned long native_read_cr2(void)
>  {
>       unsigned long val;
> @@ -91,24 +71,7 @@ static inline unsigned long native_read_
>       return val;
>  }
>  
> -static inline void native_write_cr4(unsigned long val)
> -{
> -     unsigned long bits_missing = 0;
> -
> -set_register:
> -     asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
> -
> -     if (static_branch_likely(&cr_pinning)) {
> -             if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
> -                     bits_missing = ~val & cr4_pinned_bits;
> -                     val |= bits_missing;
> -                     goto set_register;
> -             }
> -             /* Warn after we've set the missing bits. */
> -             WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
> -                       bits_missing);
> -     }
> -}
> +void native_write_cr4(unsigned long val);
>  
>  #ifdef CONFIG_X86_64
>  static inline unsigned long native_read_cr8(void)
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -366,10 +366,62 @@ static __always_inline void setup_umip(s
>       cr4_clear_bits(X86_CR4_UMIP);
>  }
>  
> -DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
> -EXPORT_SYMBOL(cr_pinning);
> -unsigned long cr4_pinned_bits __ro_after_init;
> -EXPORT_SYMBOL(cr4_pinned_bits);
> +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
> +static unsigned long cr4_pinned_bits __ro_after_init;
> +
> +void native_write_cr0(unsigned long val)
> +{
> +     unsigned long bits_missing = 0;
> +
> +set_register:
> +     asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
> +
> +     if (static_branch_likely(&cr_pinning)) {
> +             if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
> +                     bits_missing = X86_CR0_WP;
> +                     val |= bits_missing;
> +                     goto set_register;
> +             }
> +             /* Warn after we've set the missing bits. */
> +             WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
> +     }
> +}
> +EXPORT_SYMBOL(native_write_cr0);
> +
> +void native_write_cr4(unsigned long val)
> +{
> +     unsigned long bits_missing = 0;
> +
> +set_register:
> +     asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
> +
> +     if (static_branch_likely(&cr_pinning)) {
> +             if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
> +                     bits_missing = ~val & cr4_pinned_bits;
> +                     val |= bits_missing;
> +                     goto set_register;
> +             }
> +             /* Warn after we've set the missing bits. */
> +             WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
> +                       bits_missing);
> +     }
> +}
> +EXPORT_SYMBOL(native_write_cr4);
> +
> +void cr4_init(void)
> +{
> +     unsigned long cr4 = __read_cr4();
> +
> +     if (boot_cpu_has(X86_FEATURE_PCID))
> +             cr4 |= X86_CR4_PCIDE;
> +     if (static_branch_likely(&cr_pinning))
> +             cr4 |= cr4_pinned_bits;
> +
> +     __write_cr4(cr4);
> +
> +     /* Initialize cr4 shadow for this CPU. */
> +     this_cpu_write(cpu_tlbstate.cr4, cr4);
> +}
>  
>  /*
>   * Once CPU feature detection is finished (and boot params have been
> @@ -1723,12 +1775,6 @@ void cpu_init(void)
>  
>       wait_for_master_cpu(cpu);
>  
> -     /*
> -      * Initialize the CR4 shadow before doing anything that could
> -      * try to read it.
> -      */
> -     cr4_init_shadow();
> -
>       if (cpu)
>               load_ucode_ap();
>  
> @@ -1823,12 +1869,6 @@ void cpu_init(void)
>  
>       wait_for_master_cpu(cpu);
>  
> -     /*
> -      * Initialize the CR4 shadow before doing anything that could
> -      * try to read it.
> -      */
> -     cr4_init_shadow();
> -
>       show_ucode_info_early();
>  
>       pr_info("Initializing CPU#%d\n", cpu);
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -210,28 +210,16 @@ static int enable_start_cpu0;
>   */
>  static void notrace start_secondary(void *unused)
>  {
> -     unsigned long cr4 = __read_cr4();
> -
>       /*
>        * Don't put *anything* except direct CPU state initialization
>        * before cpu_init(), SMP booting is too fragile that we want to
>        * limit the things done here to the most necessary things.
>        */
> -     if (boot_cpu_has(X86_FEATURE_PCID))
> -             cr4 |= X86_CR4_PCIDE;
> -     if (static_branch_likely(&cr_pinning))
> -             cr4 |= cr4_pinned_bits;
> -
> -     __write_cr4(cr4);
> +     cr4_init();
>  
>  #ifdef CONFIG_X86_32
>       /* switch away from the initial page table */
>       load_cr3(swapper_pg_dir);
> -     /*
> -      * Initialize the CR4 shadow before doing anything that could
> -      * try to read it.
> -      */
> -     cr4_init_shadow();
>       __flush_tlb_all();
>  #endif
>       load_current_idt();
> --- a/arch/x86/xen/smp_pv.c
> +++ b/arch/x86/xen/smp_pv.c
> @@ -58,6 +58,7 @@ static void cpu_bringup(void)
>  {
>       int cpu;
>  
> +     cr4_init();
>       cpu_init();
>       touch_softlockup_watchdog();
>       preempt_disable();

-- 
Kees Cook

Reply via email to