On 04/19/2017 at 05:21 AM, Tom Lendacky wrote:
> Provide support so that kexec can be used to boot a kernel when SME is
> enabled.
>
> Support is needed to allocate pages for kexec without encryption.  This
> is needed in order to be able to reboot in the kernel in the same manner
> as originally booted.

Hi Tom,

Looks like kdump will break, I didn't see the similar handling for kdump cases, 
see kernel:
    kimage_alloc_crash_control_pages(), kimage_load_crash_segment(), etc.

We need to support kdump with SME, kdump 
kernel/initramfs/purgatory/elfcorehdr/etc
are all loaded into the reserved memory(see crashkernel=X) by userspace 
kexec-tools.
I think a straightforward way would be to mark the whole reserved memory range 
without
encryption before loading all the kexec segments for kdump, I guess we can 
handle this
easily in arch_kexec_unprotect_crashkres().

Moreover, now that "elfcorehdr=X" is left as decrypted, it needs to be remapped 
to the
encrypted data.

Regards,
Xunlei

>
> Additionally, when shutting down all of the CPUs we need to be sure to
> flush the caches and then halt. This is needed when booting from a state
> where SME was not active into a state where SME is active (or vice-versa).
> Without these steps, it is possible for cache lines to exist for the same
> physical location but tagged both with and without the encryption bit. This
> can cause random memory corruption when caches are flushed depending on
> which cacheline is written last.
>
> Signed-off-by: Tom Lendacky <thomas.lenda...@amd.com>
> ---
>  arch/x86/include/asm/init.h          |    1 +
>  arch/x86/include/asm/irqflags.h      |    5 +++++
>  arch/x86/include/asm/kexec.h         |    8 ++++++++
>  arch/x86/include/asm/pgtable_types.h |    1 +
>  arch/x86/kernel/machine_kexec_64.c   |   35 
> +++++++++++++++++++++++++++++++++-
>  arch/x86/kernel/process.c            |   26 +++++++++++++++++++++++--
>  arch/x86/mm/ident_map.c              |   11 +++++++----
>  include/linux/kexec.h                |   14 ++++++++++++++
>  kernel/kexec_core.c                  |    7 +++++++
>  9 files changed, 101 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> index 737da62..b2ec511 100644
> --- a/arch/x86/include/asm/init.h
> +++ b/arch/x86/include/asm/init.h
> @@ -6,6 +6,7 @@ struct x86_mapping_info {
>       void *context;                   /* context for alloc_pgt_page */
>       unsigned long pmd_flag;          /* page flag for PMD entry */
>       unsigned long offset;            /* ident mapping offset */
> +     unsigned long kernpg_flag;       /* kernel pagetable flag override */
>  };
>  
>  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
> index ac7692d..38b5920 100644
> --- a/arch/x86/include/asm/irqflags.h
> +++ b/arch/x86/include/asm/irqflags.h
> @@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
>       asm volatile("hlt": : :"memory");
>  }
>  
> +static inline __cpuidle void native_wbinvd_halt(void)
> +{
> +     asm volatile("wbinvd; hlt" : : : "memory");
> +}
> +
>  #endif
>  
>  #ifdef CONFIG_PARAVIRT
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index 70ef205..e8183ac 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -207,6 +207,14 @@ struct kexec_entry64_regs {
>       uint64_t r15;
>       uint64_t rip;
>  };
> +
> +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
> +                                    gfp_t gfp);
> +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
> +
> +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
> +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
> +
>  #endif
>  
>  typedef void crash_vmclear_fn(void);
> diff --git a/arch/x86/include/asm/pgtable_types.h 
> b/arch/x86/include/asm/pgtable_types.h
> index ce8cb1c..0f326f4 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -213,6 +213,7 @@ enum page_cache_mode {
>  #define PAGE_KERNEL          __pgprot(__PAGE_KERNEL | _PAGE_ENC)
>  #define PAGE_KERNEL_RO               __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
>  #define PAGE_KERNEL_EXEC     __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
> +#define PAGE_KERNEL_EXEC_NOENC       __pgprot(__PAGE_KERNEL_EXEC)
>  #define PAGE_KERNEL_RX               __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
>  #define PAGE_KERNEL_NOCACHE  __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
>  #define PAGE_KERNEL_LARGE    __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
> diff --git a/arch/x86/kernel/machine_kexec_64.c 
> b/arch/x86/kernel/machine_kexec_64.c
> index 085c3b3..11c0ca9 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -86,7 +86,7 @@ static int init_transition_pgtable(struct kimage *image, 
> pgd_t *pgd)
>               set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
>       }
>       pte = pte_offset_kernel(pmd, vaddr);
> -     set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
> +     set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
>       return 0;
>  err:
>       free_transition_pgtable(image);
> @@ -114,6 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned 
> long start_pgtable)
>               .alloc_pgt_page = alloc_pgt_page,
>               .context        = image,
>               .pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
> +             .kernpg_flag    = _KERNPG_TABLE_NOENC,
>       };
>       unsigned long mstart, mend;
>       pgd_t *level4p;
> @@ -597,3 +598,35 @@ void arch_kexec_unprotect_crashkres(void)
>  {
>       kexec_mark_crashkres(false);
>  }
> +
> +int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
> +{
> +     int ret;
> +
> +     if (sme_active()) {
> +             /*
> +              * If SME is active we need to be sure that kexec pages are
> +              * not encrypted because when we boot to the new kernel the
> +              * pages won't be accessed encrypted (initially).
> +              */
> +             ret = set_memory_decrypted((unsigned long)vaddr, pages);
> +             if (ret)
> +                     return ret;
> +
> +             if (gfp & __GFP_ZERO)
> +                     memset(vaddr, 0, pages * PAGE_SIZE);
> +     }
> +
> +     return 0;
> +}
> +
> +void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
> +{
> +     if (sme_active()) {
> +             /*
> +              * If SME is active we need to reset the pages back to being
> +              * an encrypted mapping before freeing them.
> +              */
> +             set_memory_encrypted((unsigned long)vaddr, pages);
> +     }
> +}
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 0bb8842..f4e5de6 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -24,6 +24,7 @@
>  #include <linux/cpuidle.h>
>  #include <trace/events/power.h>
>  #include <linux/hw_breakpoint.h>
> +#include <linux/kexec.h>
>  #include <asm/cpu.h>
>  #include <asm/apic.h>
>  #include <asm/syscalls.h>
> @@ -355,8 +356,25 @@ bool xen_set_default_idle(void)
>       return ret;
>  }
>  #endif
> +
>  void stop_this_cpu(void *dummy)
>  {
> +     bool do_wbinvd_halt = false;
> +
> +     if (kexec_in_progress && boot_cpu_has(X86_FEATURE_SME)) {
> +             /*
> +              * If we are performing a kexec and the processor supports
> +              * SME then we need to clear out cache information before
> +              * halting. With kexec, going from SME inactive to SME active
> +              * requires clearing cache entries so that addresses without
> +              * the encryption bit set don't corrupt the same physical
> +              * address that has the encryption bit set when caches are
> +              * flushed. Perform a wbinvd followed by a halt to achieve
> +              * this.
> +              */
> +             do_wbinvd_halt = true;
> +     }
> +
>       local_irq_disable();
>       /*
>        * Remove this CPU:
> @@ -365,8 +383,12 @@ void stop_this_cpu(void *dummy)
>       disable_local_APIC();
>       mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
>  
> -     for (;;)
> -             halt();
> +     for (;;) {
> +             if (do_wbinvd_halt)
> +                     native_wbinvd_halt();
> +             else
> +                     halt();
> +     }
>  }
>  
>  /*
> diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
> index 04210a2..2c9fd3e 100644
> --- a/arch/x86/mm/ident_map.c
> +++ b/arch/x86/mm/ident_map.c
> @@ -20,6 +20,7 @@ static void ident_pmd_init(struct x86_mapping_info *info, 
> pmd_t *pmd_page,
>  static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
>                         unsigned long addr, unsigned long end)
>  {
> +     unsigned long kernpg_flag = info->kernpg_flag ? : _KERNPG_TABLE;
>       unsigned long next;
>  
>       for (; addr < end; addr = next) {
> @@ -39,7 +40,7 @@ static int ident_pud_init(struct x86_mapping_info *info, 
> pud_t *pud_page,
>               if (!pmd)
>                       return -ENOMEM;
>               ident_pmd_init(info, pmd, addr, next);
> -             set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
> +             set_pud(pud, __pud(__pa(pmd) | kernpg_flag));
>       }
>  
>       return 0;
> @@ -48,6 +49,7 @@ static int ident_pud_init(struct x86_mapping_info *info, 
> pud_t *pud_page,
>  static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
>                         unsigned long addr, unsigned long end)
>  {
> +     unsigned long kernpg_flag = info->kernpg_flag ? : _KERNPG_TABLE;
>       unsigned long next;
>  
>       for (; addr < end; addr = next) {
> @@ -67,7 +69,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, 
> p4d_t *p4d_page,
>               if (!pud)
>                       return -ENOMEM;
>               ident_pud_init(info, pud, addr, next);
> -             set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
> +             set_p4d(p4d, __p4d(__pa(pud) | kernpg_flag));
>       }
>  
>       return 0;
> @@ -76,6 +78,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, 
> p4d_t *p4d_page,
>  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
>                             unsigned long pstart, unsigned long pend)
>  {
> +     unsigned long kernpg_flag = info->kernpg_flag ? : _KERNPG_TABLE;
>       unsigned long addr = pstart + info->offset;
>       unsigned long end = pend + info->offset;
>       unsigned long next;
> @@ -104,14 +107,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info 
> *info, pgd_t *pgd_page,
>               if (result)
>                       return result;
>               if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
> -                     set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
> +                     set_pgd(pgd, __pgd(__pa(p4d) | kernpg_flag));
>               } else {
>                       /*
>                        * With p4d folded, pgd is equal to p4d.
>                        * The pgd entry has to point to the pud page table in 
> this case.
>                        */
>                       pud_t *pud = pud_offset(p4d, 0);
> -                     set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
> +                     set_pgd(pgd, __pgd(__pa(pud) | kernpg_flag));
>               }
>       }
>  
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index d419d0e..1c76e3b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -383,6 +383,20 @@ static inline void *boot_phys_to_virt(unsigned long 
> entry)
>       return phys_to_virt(boot_phys_to_phys(entry));
>  }
>  
> +#ifndef arch_kexec_post_alloc_pages
> +static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int 
> pages,
> +                                           gfp_t gfp)
> +{
> +     return 0;
> +}
> +#endif
> +
> +#ifndef arch_kexec_pre_free_pages
> +static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
> +{
> +}
> +#endif
> +
>  #else /* !CONFIG_KEXEC_CORE */
>  struct pt_regs;
>  struct task_struct;
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index bfe62d5..bb5e7e3 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -38,6 +38,7 @@
>  #include <linux/syscore_ops.h>
>  #include <linux/compiler.h>
>  #include <linux/hugetlb.h>
> +#include <linux/mem_encrypt.h>
>  
>  #include <asm/page.h>
>  #include <asm/sections.h>
> @@ -315,6 +316,9 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, 
> unsigned int order)
>               count = 1 << order;
>               for (i = 0; i < count; i++)
>                       SetPageReserved(pages + i);
> +
> +             arch_kexec_post_alloc_pages(page_address(pages), count,
> +                                         gfp_mask);
>       }
>  
>       return pages;
> @@ -326,6 +330,9 @@ static void kimage_free_pages(struct page *page)
>  
>       order = page_private(page);
>       count = 1 << order;
> +
> +     arch_kexec_pre_free_pages(page_address(page), count);
> +
>       for (i = 0; i < count; i++)
>               ClearPageReserved(page + i);
>       __free_pages(page, order);
>
>
> _______________________________________________
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to