Re: [PATCH v6 46/76] x86/sev-es: Adjust #VC IST Stack on entering NMI handler

2020-08-31 Thread Borislav Petkov
On Mon, Aug 24, 2020 at 10:54:41AM +0200, Joerg Roedel wrote:
> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> index 4fc9954a9560..951f098a4bf5 100644
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -33,6 +33,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define CREATE_TRACE_POINTS
>  #include 
> @@ -488,6 +489,9 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
>   this_cpu_write(nmi_cr2, read_cr2());
>  nmi_restart:
>  
> + /* Needs to happen before DR7 is accessed */

... because? Let's explain why.

> + sev_es_ist_enter(regs);
> +
>   this_cpu_write(nmi_dr7, local_db_save());
>  
>   irq_state = idtentry_enter_nmi(regs);
> @@ -501,6 +505,8 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
>  
>   local_db_restore(this_cpu_read(nmi_dr7));
>  
> + sev_es_ist_exit();
> +
>   if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
>   write_cr2(this_cpu_read(nmi_cr2));
>   if (this_cpu_dec_return(nmi_state))
> diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
> index 64002d86a237..95831d103418 100644
> --- a/arch/x86/kernel/sev-es.c
> +++ b/arch/x86/kernel/sev-es.c
> @@ -52,6 +52,9 @@ struct sev_es_runtime_data {
>  
>  static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
>  
> +DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
> +EXPORT_SYMBOL_GPL(sev_es_enable_key);

So the GPL export is not needed. The key itself can be made static when
you uninline the sev_es_ist_enter/exit accessors as they're called only
in nmi.c but I guess Peter would object to that in high-NMI-load perf
scenarios...

The export looks unneeded tho.

> +
>  static void __init sev_es_setup_vc_stacks(int cpu)
>  {
>   struct sev_es_runtime_data *data;
> @@ -73,6 +76,59 @@ static void __init sev_es_setup_vc_stacks(int cpu)
>   cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
>  }
>  
> +static __always_inline bool on_vc_stack(unsigned long sp)
> +{
> + return ((sp >= __this_cpu_ist_bot_va(VC)) && (sp < 
> __this_cpu_ist_top_va(VC)));
> +}
> +
> +/*
> + * This function handles the case when an NM is raised in the #VC exception

 NMI

> + * handler entry code. In this case the IST entry for VC must be adjusted, so

"VC" or "#VC"? Choose one pls.

> + * that any subsequent VC exception will not overwrite the stack contents of 
> the
> + * interrupted VC handler.
> + *
> + * The IST entry is adjusted unconditionally so that it can be also be
> + * unconditionally back-adjusted in sev_es_ist_exit(). Otherwise a nested
  ^

"adjusted back"

> + * sev_es_ist_exit() call may back-adjust the IST entry too early.

Ditto.

> + */
> +void noinstr __sev_es_ist_enter(struct pt_regs *regs)
> +{
> + unsigned long old_ist, new_ist;
> + unsigned long *p;
> +
> + /* Read old IST entry */
> + old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
> +
> + /* Make room on the IST stack */
> + if (on_vc_stack(regs->sp))
> + new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
> + else
> + new_ist = old_ist - sizeof(old_ist);
> +
> + /* Store old IST entry */
> + p   = (unsigned long *)new_ist;
> + *p  = old_ist;

What's wrong with:

*(unsigned long *)new_ist = old_ist;

?

> +
> + /* Set new IST entry */
> + this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
> +}
> +
> +void noinstr __sev_es_ist_exit(void)
> +{
> + unsigned long ist;
> + unsigned long *p;
> +
> + /* Read IST entry */
> + ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
> +
> + if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
> + return;
> +
> + /* Read back old IST entry and write it to the TSS */
> + p = (unsigned long *)ist;
> + this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *p);

And

this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long 
*)ist);

?

> +}
> +
>  /* Needed in vc_early_forward_exception */
>  void do_early_exception(struct pt_regs *regs, int trapnr);
>  
> @@ -277,6 +333,9 @@ void __init sev_es_init_vc_handling(void)
>   if (!sev_es_active())
>   return;
>  
> + /* Enable SEV-ES special handling */
> + static_branch_enable(_es_enable_key);
> +
>   /* Initialize per-cpu GHCB pages */
>   for_each_possible_cpu(cpu) {
>   sev_es_alloc_runtime_data(cpu);
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 74cfe6eb7ebb..030d882eaad1 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -59,6 +59,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #ifdef CONFIG_X86_64
>  #include 
> @@ -731,6 +732,7 @@ static bool is_sysenter_singlestep(struct pt_regs *regs)
>  
>  static __always_inline void debug_enter(unsigned long *dr6, unsigned long 
> *dr7)
>  {
> +
>   /*
>* Disable breakpoints during exception handling; recursive 

[PATCH v6 46/76] x86/sev-es: Adjust #VC IST Stack on entering NMI handler

2020-08-24 Thread Joerg Roedel
From: Joerg Roedel 

When an NMI hits in the #VC handler entry code before it switched to
another stack, any subsequent #VC exception in the NMI code-path will
overwrite the interrupted #VC handlers stack.

Make sure this doesn't happen by  explicitly adjusting the #VC IST entry
in the NMI handler for the time in can cause #VC exceptions.

Signed-off-by: Joerg Roedel 
Link: https://lore.kernel.org/r/20200724160336.5435-46-j...@8bytes.org
---
 arch/x86/include/asm/sev-es.h | 19 +++
 arch/x86/kernel/nmi.c |  6 
 arch/x86/kernel/sev-es.c  | 59 +++
 arch/x86/kernel/traps.c   |  2 ++
 4 files changed, 86 insertions(+)

diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h
index 824e9e6b067c..2dd19932a60d 100644
--- a/arch/x86/include/asm/sev-es.h
+++ b/arch/x86/include/asm/sev-es.h
@@ -77,4 +77,23 @@ static inline u64 lower_bits(u64 val, unsigned int bits)
 extern void vc_no_ghcb(void);
 extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern struct static_key_false sev_es_enable_key;
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+   if (static_branch_unlikely(_es_enable_key))
+   __sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+   if (static_branch_unlikely(_es_enable_key))
+   __sev_es_ist_exit();
+}
+#else
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+#endif
+
 #endif
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4fc9954a9560..951f098a4bf5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define CREATE_TRACE_POINTS
 #include 
@@ -488,6 +489,9 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
this_cpu_write(nmi_cr2, read_cr2());
 nmi_restart:
 
+   /* Needs to happen before DR7 is accessed */
+   sev_es_ist_enter(regs);
+
this_cpu_write(nmi_dr7, local_db_save());
 
irq_state = idtentry_enter_nmi(regs);
@@ -501,6 +505,8 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 
local_db_restore(this_cpu_read(nmi_dr7));
 
+   sev_es_ist_exit();
+
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 64002d86a237..95831d103418 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -52,6 +52,9 @@ struct sev_es_runtime_data {
 
 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
 
+DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+EXPORT_SYMBOL_GPL(sev_es_enable_key);
+
 static void __init sev_es_setup_vc_stacks(int cpu)
 {
struct sev_es_runtime_data *data;
@@ -73,6 +76,59 @@ static void __init sev_es_setup_vc_stacks(int cpu)
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
 }
 
+static __always_inline bool on_vc_stack(unsigned long sp)
+{
+   return ((sp >= __this_cpu_ist_bot_va(VC)) && (sp < 
__this_cpu_ist_top_va(VC)));
+}
+
+/*
+ * This function handles the case when an NM is raised in the #VC exception
+ * handler entry code. In this case the IST entry for VC must be adjusted, so
+ * that any subsequent VC exception will not overwrite the stack contents of 
the
+ * interrupted VC handler.
+ *
+ * The IST entry is adjusted unconditionally so that it can be also be
+ * unconditionally back-adjusted in sev_es_ist_exit(). Otherwise a nested
+ * sev_es_ist_exit() call may back-adjust the IST entry too early.
+ */
+void noinstr __sev_es_ist_enter(struct pt_regs *regs)
+{
+   unsigned long old_ist, new_ist;
+   unsigned long *p;
+
+   /* Read old IST entry */
+   old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+   /* Make room on the IST stack */
+   if (on_vc_stack(regs->sp))
+   new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
+   else
+   new_ist = old_ist - sizeof(old_ist);
+
+   /* Store old IST entry */
+   p   = (unsigned long *)new_ist;
+   *p  = old_ist;
+
+   /* Set new IST entry */
+   this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
+}
+
+void noinstr __sev_es_ist_exit(void)
+{
+   unsigned long ist;
+   unsigned long *p;
+
+   /* Read IST entry */
+   ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+   if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+   return;
+
+   /* Read back old IST entry and write it to the TSS */
+   p = (unsigned long *)ist;
+   this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *p);
+}
+
 /* Needed in vc_early_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -277,6 +333,9 @@ void __init