Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Borislav Petkov
On Wed, May 31, 2017 at 11:03:52PM +0800, Xunlei Pang wrote:
> For kdump case, it will be put in some reserved crash memory allocated
> by kexec-tools, and passed the corresponding start address of the
> allocated reserved crash memory to kdump kernel via "elfcorehdr=",
> please see kernel functions setup_elfcorehdr() and vmcore_init() for
> how it is parsed by kdump kernel.

... which could be a great way to pass the SME status to the second
kernel without any funky sysfs games.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Xunlei Pang
On 05/31/2017 at 01:46 AM, Tom Lendacky wrote:
> On 5/25/2017 11:17 PM, Xunlei Pang wrote:
>> On 04/19/2017 at 05:21 AM, Tom Lendacky wrote:
>>> Provide support so that kexec can be used to boot a kernel when SME is
>>> enabled.
>>>
>>> Support is needed to allocate pages for kexec without encryption.  This
>>> is needed in order to be able to reboot in the kernel in the same manner
>>> as originally booted.
>>
>> Hi Tom,
>>
>> Looks like kdump will break, I didn't see the similar handling for kdump 
>> cases, see kernel:
>>  kimage_alloc_crash_control_pages(), kimage_load_crash_segment(), etc. >
>> We need to support kdump with SME, kdump 
>> kernel/initramfs/purgatory/elfcorehdr/etc
>> are all loaded into the reserved memory(see crashkernel=X) by userspace 
>> kexec-tools.
>> I think a straightforward way would be to mark the whole reserved memory 
>> range without
>> encryption before loading all the kexec segments for kdump, I guess we can 
>> handle this
>> easily in arch_kexec_unprotect_crashkres().
>
> Yes, that would work.
>
>>
>> Moreover, now that "elfcorehdr=X" is left as decrypted, it needs to be 
>> remapped to the
>> encrypted data.
>
> This is an area that I'm not familiar with, so I don't completely
> understand the flow in regards to where/when/how the ELF headers are
> copied and what needs to be done.
>
> Can you elaborate a bit on this?

"elfcorehdr" is generated by userspace 
kexec-tools(git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git), 
it's
actually ELF CORE header data(elf header, PT_LOAD/PT_NOTE program header), see 
kexec/crashdump-elf.c::FUNC().

For kdump case, it will be put in some reserved crash memory allocated by 
kexec-tools, and passed the corresponding
start address of the allocated reserved crash memory to kdump kernel via 
"elfcorehdr=", please see kernel functions
setup_elfcorehdr() and vmcore_init() for how it is parsed by kdump kernel.

Regards,
Xunlei

>>
>>>
>>> Additionally, when shutting down all of the CPUs we need to be sure to
>>> flush the caches and then halt. This is needed when booting from a state
>>> where SME was not active into a state where SME is active (or vice-versa).
>>> Without these steps, it is possible for cache lines to exist for the same
>>> physical location but tagged both with and without the encryption bit. This
>>> can cause random memory corruption when caches are flushed depending on
>>> which cacheline is written last.
>>>
>>> Signed-off-by: Tom Lendacky 
>>> ---
>>>   arch/x86/include/asm/init.h  |1 +
>>>   arch/x86/include/asm/irqflags.h  |5 +
>>>   arch/x86/include/asm/kexec.h |8 
>>>   arch/x86/include/asm/pgtable_types.h |1 +
>>>   arch/x86/kernel/machine_kexec_64.c   |   35 
>>> +-
>>>   arch/x86/kernel/process.c|   26 +++--
>>>   arch/x86/mm/ident_map.c  |   11 +++
>>>   include/linux/kexec.h|   14 ++
>>>   kernel/kexec_core.c  |7 +++
>>>   9 files changed, 101 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
>>> index 737da62..b2ec511 100644
>>> --- a/arch/x86/include/asm/init.h
>>> +++ b/arch/x86/include/asm/init.h
>>> @@ -6,6 +6,7 @@ struct x86_mapping_info {
>>>   void *context; /* context for alloc_pgt_page */
>>>   unsigned long pmd_flag; /* page flag for PMD entry */
>>>   unsigned long offset; /* ident mapping offset */
>>> +unsigned long kernpg_flag; /* kernel pagetable flag override */
>>>   };
>>> int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t 
>>> *pgd_page,
>>> diff --git a/arch/x86/include/asm/irqflags.h 
>>> b/arch/x86/include/asm/irqflags.h
>>> index ac7692d..38b5920 100644
>>> --- a/arch/x86/include/asm/irqflags.h
>>> +++ b/arch/x86/include/asm/irqflags.h
>>> @@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
>>>   asm volatile("hlt": : :"memory");
>>>   }
>>>   +static inline __cpuidle void native_wbinvd_halt(void)
>>> +{
>>> +asm volatile("wbinvd; hlt" : : : "memory");
>>> +}
>>> +
>>>   #endif
>>> #ifdef CONFIG_PARAVIRT
>>> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
>>> index 70ef205..e8183ac 100644
>>> --- a/arch/x86/include/asm/kexec.h
>>> +++ b/arch/x86/include/asm/kexec.h
>>> @@ -207,6 +207,14 @@ struct kexec_entry64_regs {
>>>   uint64_t r15;
>>>   uint64_t rip;
>>>   };
>>> +
>>> +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
>>> +   gfp_t gfp);
>>> +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
>>> +
>>> +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
>>> +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
>>> +
>>>   #endif
>>> typedef void crash_vmclear_fn(void);
>>> diff --git 

Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-31 Thread Borislav Petkov
On Tue, May 30, 2017 at 12:46:14PM -0500, Tom Lendacky wrote:
> This is an area that I'm not familiar with, so I don't completely
> understand the flow in regards to where/when/how the ELF headers are
> copied and what needs to be done.

So my suggestion is still to put kexec/kdump on the backburner for now
and concentrate on the 30-ish patchset first. Once they're done, we can
start dealing with it. Ditto with the IOMMU side of things. One thing at
a time.

Thanks.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-30 Thread Tom Lendacky

On 5/25/2017 11:17 PM, Xunlei Pang wrote:

On 04/19/2017 at 05:21 AM, Tom Lendacky wrote:

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.


Hi Tom,

Looks like kdump will break, I didn't see the similar handling for kdump cases, 
see kernel:
 kimage_alloc_crash_control_pages(), kimage_load_crash_segment(), etc. >
We need to support kdump with SME, kdump 
kernel/initramfs/purgatory/elfcorehdr/etc
are all loaded into the reserved memory(see crashkernel=X) by userspace 
kexec-tools.
I think a straightforward way would be to mark the whole reserved memory range 
without
encryption before loading all the kexec segments for kdump, I guess we can 
handle this
easily in arch_kexec_unprotect_crashkres().


Yes, that would work.



Moreover, now that "elfcorehdr=X" is left as decrypted, it needs to be remapped 
to the
encrypted data.


This is an area that I'm not familiar with, so I don't completely
understand the flow in regards to where/when/how the ELF headers are
copied and what needs to be done.

Can you elaborate a bit on this?

Thanks,
Tom



Regards,
Xunlei



Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Signed-off-by: Tom Lendacky 
---
  arch/x86/include/asm/init.h  |1 +
  arch/x86/include/asm/irqflags.h  |5 +
  arch/x86/include/asm/kexec.h |8 
  arch/x86/include/asm/pgtable_types.h |1 +
  arch/x86/kernel/machine_kexec_64.c   |   35 +-
  arch/x86/kernel/process.c|   26 +++--
  arch/x86/mm/ident_map.c  |   11 +++
  include/linux/kexec.h|   14 ++
  kernel/kexec_core.c  |7 +++
  9 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62..b2ec511 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -6,6 +6,7 @@ struct x86_mapping_info {
void *context;   /* context for alloc_pgt_page */
unsigned long pmd_flag;  /* page flag for PMD entry */
unsigned long offset;/* ident mapping offset */
+   unsigned long kernpg_flag;   /* kernel pagetable flag override */
  };
  
  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index ac7692d..38b5920 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
asm volatile("hlt": : :"memory");
  }
  
+static inline __cpuidle void native_wbinvd_halt(void)

+{
+   asm volatile("wbinvd; hlt" : : : "memory");
+}
+
  #endif
  
  #ifdef CONFIG_PARAVIRT

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
uint64_t r15;
uint64_t rip;
  };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+  gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
  #endif
  
  typedef void crash_vmclear_fn(void);

diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index ce8cb1c..0f326f4 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
  #define PAGE_KERNEL   __pgprot(__PAGE_KERNEL | _PAGE_ENC)
  #define PAGE_KERNEL_RO__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
  #define PAGE_KERNEL_EXEC  __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
  #define PAGE_KERNEL_RX__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
  #define PAGE_KERNEL_NOCACHE   __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
  #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c 
b/arch/x86/kernel/machine_kexec_64.c
index 085c3b3..11c0ca9 100644
--- 

Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-25 Thread Xunlei Pang
On 04/19/2017 at 05:21 AM, Tom Lendacky wrote:
> Provide support so that kexec can be used to boot a kernel when SME is
> enabled.
>
> Support is needed to allocate pages for kexec without encryption.  This
> is needed in order to be able to reboot in the kernel in the same manner
> as originally booted.

Hi Tom,

Looks like kdump will break, I didn't see the similar handling for kdump cases, 
see kernel:
kimage_alloc_crash_control_pages(), kimage_load_crash_segment(), etc.

We need to support kdump with SME, kdump 
kernel/initramfs/purgatory/elfcorehdr/etc
are all loaded into the reserved memory(see crashkernel=X) by userspace 
kexec-tools.
I think a straightforward way would be to mark the whole reserved memory range 
without
encryption before loading all the kexec segments for kdump, I guess we can 
handle this
easily in arch_kexec_unprotect_crashkres().

Moreover, now that "elfcorehdr=X" is left as decrypted, it needs to be remapped 
to the
encrypted data.

Regards,
Xunlei

>
> Additionally, when shutting down all of the CPUs we need to be sure to
> flush the caches and then halt. This is needed when booting from a state
> where SME was not active into a state where SME is active (or vice-versa).
> Without these steps, it is possible for cache lines to exist for the same
> physical location but tagged both with and without the encryption bit. This
> can cause random memory corruption when caches are flushed depending on
> which cacheline is written last.
>
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/include/asm/init.h  |1 +
>  arch/x86/include/asm/irqflags.h  |5 +
>  arch/x86/include/asm/kexec.h |8 
>  arch/x86/include/asm/pgtable_types.h |1 +
>  arch/x86/kernel/machine_kexec_64.c   |   35 
> +-
>  arch/x86/kernel/process.c|   26 +++--
>  arch/x86/mm/ident_map.c  |   11 +++
>  include/linux/kexec.h|   14 ++
>  kernel/kexec_core.c  |7 +++
>  9 files changed, 101 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> index 737da62..b2ec511 100644
> --- a/arch/x86/include/asm/init.h
> +++ b/arch/x86/include/asm/init.h
> @@ -6,6 +6,7 @@ struct x86_mapping_info {
>   void *context;   /* context for alloc_pgt_page */
>   unsigned long pmd_flag;  /* page flag for PMD entry */
>   unsigned long offset;/* ident mapping offset */
> + unsigned long kernpg_flag;   /* kernel pagetable flag override */
>  };
>  
>  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
> index ac7692d..38b5920 100644
> --- a/arch/x86/include/asm/irqflags.h
> +++ b/arch/x86/include/asm/irqflags.h
> @@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
>   asm volatile("hlt": : :"memory");
>  }
>  
> +static inline __cpuidle void native_wbinvd_halt(void)
> +{
> + asm volatile("wbinvd; hlt" : : : "memory");
> +}
> +
>  #endif
>  
>  #ifdef CONFIG_PARAVIRT
> diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> index 70ef205..e8183ac 100644
> --- a/arch/x86/include/asm/kexec.h
> +++ b/arch/x86/include/asm/kexec.h
> @@ -207,6 +207,14 @@ struct kexec_entry64_regs {
>   uint64_t r15;
>   uint64_t rip;
>  };
> +
> +extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
> +gfp_t gfp);
> +#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
> +
> +extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
> +#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
> +
>  #endif
>  
>  typedef void crash_vmclear_fn(void);
> diff --git a/arch/x86/include/asm/pgtable_types.h 
> b/arch/x86/include/asm/pgtable_types.h
> index ce8cb1c..0f326f4 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -213,6 +213,7 @@ enum page_cache_mode {
>  #define PAGE_KERNEL  __pgprot(__PAGE_KERNEL | _PAGE_ENC)
>  #define PAGE_KERNEL_RO   __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
>  #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
> +#define PAGE_KERNEL_EXEC_NOENC   __pgprot(__PAGE_KERNEL_EXEC)
>  #define PAGE_KERNEL_RX   __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
>  #define PAGE_KERNEL_NOCACHE  __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
>  #define PAGE_KERNEL_LARGE__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
> diff --git a/arch/x86/kernel/machine_kexec_64.c 
> b/arch/x86/kernel/machine_kexec_64.c
> index 085c3b3..11c0ca9 100644
> --- a/arch/x86/kernel/machine_kexec_64.c
> +++ b/arch/x86/kernel/machine_kexec_64.c
> @@ -86,7 +86,7 @@ static int init_transition_pgtable(struct kimage *image, 
> pgd_t *pgd)
>  

Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-19 Thread Tom Lendacky

On 5/19/2017 4:28 PM, Borislav Petkov wrote:

On Fri, May 19, 2017 at 04:07:24PM -0500, Tom Lendacky wrote:

As long as those never change from static inline everything will be
fine. I can change it, but I really like how it explicitly indicates


I know what you want to do. But you're practically defining a helper
which contains two arbitrary instructions which probably no one else
will need.

So how about we simplify this function even more. We don't need to pay
attention to kexec being in progress because we're halting anyway so who
cares how fast we halt.

Might have to state that in the comment below though, instead of what's
there now.

And for the exact same moot reason, we don't need to look at SME CPUID
feature - we can just as well WBINVD unconditionally.

void stop_this_cpu(void *dummy)
{
local_irq_disable();
/*
 * Remove this CPU:
 */
set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(_info));

for (;;) {
/*
 * If we are performing a kexec and the processor supports
 * SME then we need to clear out cache information before
 * halting. With kexec, going from SME inactive to SME active
 * requires clearing cache entries so that addresses without
 * the encryption bit set don't corrupt the same physical
 * address that has the encryption bit set when caches are
 * flushed. Perform a wbinvd followed by a halt to achieve
 * this.
 */
asm volatile("wbinvd; hlt" ::: "memory");
}
}

How's that?


I can live with that!

Thanks,
Tom




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-19 Thread Borislav Petkov
On Fri, May 19, 2017 at 04:07:24PM -0500, Tom Lendacky wrote:
> As long as those never change from static inline everything will be
> fine. I can change it, but I really like how it explicitly indicates

I know what you want to do. But you're practically defining a helper
which contains two arbitrary instructions which probably no one else
will need.

So how about we simplify this function even more. We don't need to pay
attention to kexec being in progress because we're halting anyway so who
cares how fast we halt.

Might have to state that in the comment below though, instead of what's
there now.

And for the exact same moot reason, we don't need to look at SME CPUID
feature - we can just as well WBINVD unconditionally.

void stop_this_cpu(void *dummy)
{
local_irq_disable();
/*
 * Remove this CPU:
 */
set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(_info));

for (;;) {
/*
 * If we are performing a kexec and the processor supports
 * SME then we need to clear out cache information before
 * halting. With kexec, going from SME inactive to SME active
 * requires clearing cache entries so that addresses without
 * the encryption bit set don't corrupt the same physical
 * address that has the encryption bit set when caches are
 * flushed. Perform a wbinvd followed by a halt to achieve
 * this.
 */
asm volatile("wbinvd; hlt" ::: "memory");
}
}

How's that?

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-19 Thread Tom Lendacky

On 5/19/2017 3:58 PM, Borislav Petkov wrote:

On Fri, May 19, 2017 at 03:45:28PM -0500, Tom Lendacky wrote:

Actually there is.  The above will result in data in the cache because
halt() turns into a function call if CONFIG_PARAVIRT is defined (refer
to the comment above where do_wbinvd_halt is set to true). I could make
this a native_wbinvd() and native_halt()


That's why we have the native_* versions - to bypass paravirt crap.


As long as those never change from static inline everything will be
fine. I can change it, but I really like how it explicitly indicates
what is needed in this case. Even if the function gets changed from
static inline the fact that the instructions are sequential in the
function covers that case.

Thanks,
Tom




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-19 Thread Borislav Petkov
On Fri, May 19, 2017 at 03:45:28PM -0500, Tom Lendacky wrote:
> Actually there is.  The above will result in data in the cache because
> halt() turns into a function call if CONFIG_PARAVIRT is defined (refer
> to the comment above where do_wbinvd_halt is set to true). I could make
> this a native_wbinvd() and native_halt()

That's why we have the native_* versions - to bypass paravirt crap.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-19 Thread Tom Lendacky

On 5/17/2017 2:17 PM, Borislav Petkov wrote:

On Tue, Apr 18, 2017 at 04:21:21PM -0500, Tom Lendacky wrote:

Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/init.h  |1 +
 arch/x86/include/asm/irqflags.h  |5 +
 arch/x86/include/asm/kexec.h |8 
 arch/x86/include/asm/pgtable_types.h |1 +
 arch/x86/kernel/machine_kexec_64.c   |   35 +-
 arch/x86/kernel/process.c|   26 +++--
 arch/x86/mm/ident_map.c  |   11 +++
 include/linux/kexec.h|   14 ++
 kernel/kexec_core.c  |7 +++
 9 files changed, 101 insertions(+), 7 deletions(-)


...


@@ -86,7 +86,7 @@ static int init_transition_pgtable(struct kimage *image, 
pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
-   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
 err:
free_transition_pgtable(image);
@@ -114,6 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned long 
start_pgtable)
.alloc_pgt_page = alloc_pgt_page,
.context= image,
.pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
+   .kernpg_flag= _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
pgd_t *level4p;
@@ -597,3 +598,35 @@ void arch_kexec_unprotect_crashkres(void)
 {
kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+   int ret;
+
+   if (sme_active()) {


if (!sme_active())
return 0;

/*
 * If SME...



Ok.




+   /*
+* If SME is active we need to be sure that kexec pages are
+* not encrypted because when we boot to the new kernel the
+* pages won't be accessed encrypted (initially).
+*/
+   ret = set_memory_decrypted((unsigned long)vaddr, pages);
+   if (ret)
+   return ret;
+
+   if (gfp & __GFP_ZERO)
+   memset(vaddr, 0, pages * PAGE_SIZE);


This function is called after alloc_pages() which already zeroes memory
when __GFP_ZERO is supplied.

If you need to clear the memory *after* set_memory_encrypted() happens,
then you should probably mask out __GFP_ZERO before the alloc_pages()
call so as not to do it twice.


I'll look into that.  I could put the memset() at the end of this
function so that it is done here no matter what.  And update the
default arch_kexec_post_alloc_pages() to also do the memset(). It
just hides the clearing of the pages a bit though by doing that.




+   }
+
+   return 0;
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+   if (sme_active()) {
+   /*
+* If SME is active we need to reset the pages back to being
+* an encrypted mapping before freeing them.
+*/
+   set_memory_encrypted((unsigned long)vaddr, pages);
+   }
+}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 0bb8842..f4e5de6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -355,8 +356,25 @@ bool xen_set_default_idle(void)
return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
+   bool do_wbinvd_halt = false;
+
+   if (kexec_in_progress && boot_cpu_has(X86_FEATURE_SME)) {
+   /*
+* If we are performing a kexec and the processor supports
+* SME then we need to clear out cache information before
+* halting. With kexec, going from SME inactive to SME active
+* requires clearing cache entries so that addresses without
+* the encryption bit set don't corrupt the same physical
+* address that has the encryption bit set when caches are
+* 

Re: [PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-05-17 Thread Borislav Petkov
On Tue, Apr 18, 2017 at 04:21:21PM -0500, Tom Lendacky wrote:
> Provide support so that kexec can be used to boot a kernel when SME is
> enabled.
> 
> Support is needed to allocate pages for kexec without encryption.  This
> is needed in order to be able to reboot in the kernel in the same manner
> as originally booted.
> 
> Additionally, when shutting down all of the CPUs we need to be sure to
> flush the caches and then halt. This is needed when booting from a state
> where SME was not active into a state where SME is active (or vice-versa).
> Without these steps, it is possible for cache lines to exist for the same
> physical location but tagged both with and without the encryption bit. This
> can cause random memory corruption when caches are flushed depending on
> which cacheline is written last.
> 
> Signed-off-by: Tom Lendacky 
> ---
>  arch/x86/include/asm/init.h  |1 +
>  arch/x86/include/asm/irqflags.h  |5 +
>  arch/x86/include/asm/kexec.h |8 
>  arch/x86/include/asm/pgtable_types.h |1 +
>  arch/x86/kernel/machine_kexec_64.c   |   35 
> +-
>  arch/x86/kernel/process.c|   26 +++--
>  arch/x86/mm/ident_map.c  |   11 +++
>  include/linux/kexec.h|   14 ++
>  kernel/kexec_core.c  |7 +++
>  9 files changed, 101 insertions(+), 7 deletions(-)

...

> @@ -86,7 +86,7 @@ static int init_transition_pgtable(struct kimage *image, 
> pgd_t *pgd)
>   set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
>   }
>   pte = pte_offset_kernel(pmd, vaddr);
> - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
> + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
>   return 0;
>  err:
>   free_transition_pgtable(image);
> @@ -114,6 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned 
> long start_pgtable)
>   .alloc_pgt_page = alloc_pgt_page,
>   .context= image,
>   .pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
> + .kernpg_flag= _KERNPG_TABLE_NOENC,
>   };
>   unsigned long mstart, mend;
>   pgd_t *level4p;
> @@ -597,3 +598,35 @@ void arch_kexec_unprotect_crashkres(void)
>  {
>   kexec_mark_crashkres(false);
>  }
> +
> +int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
> +{
> + int ret;
> +
> + if (sme_active()) {

if (!sme_active())
return 0;

/*
 * If SME...


> + /*
> +  * If SME is active we need to be sure that kexec pages are
> +  * not encrypted because when we boot to the new kernel the
> +  * pages won't be accessed encrypted (initially).
> +  */
> + ret = set_memory_decrypted((unsigned long)vaddr, pages);
> + if (ret)
> + return ret;
> +
> + if (gfp & __GFP_ZERO)
> + memset(vaddr, 0, pages * PAGE_SIZE);

This function is called after alloc_pages() which already zeroes memory
when __GFP_ZERO is supplied.

If you need to clear the memory *after* set_memory_encrypted() happens,
then you should probably mask out __GFP_ZERO before the alloc_pages()
call so as not to do it twice.

> + }
> +
> + return 0;
> +}
> +
> +void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
> +{
> + if (sme_active()) {
> + /*
> +  * If SME is active we need to reset the pages back to being
> +  * an encrypted mapping before freeing them.
> +  */
> + set_memory_encrypted((unsigned long)vaddr, pages);
> + }
> +}
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 0bb8842..f4e5de6 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -355,8 +356,25 @@ bool xen_set_default_idle(void)
>   return ret;
>  }
>  #endif
> +
>  void stop_this_cpu(void *dummy)
>  {
> + bool do_wbinvd_halt = false;
> +
> + if (kexec_in_progress && boot_cpu_has(X86_FEATURE_SME)) {
> + /*
> +  * If we are performing a kexec and the processor supports
> +  * SME then we need to clear out cache information before
> +  * halting. With kexec, going from SME inactive to SME active
> +  * requires clearing cache entries so that addresses without
> +  * the encryption bit set don't corrupt the same physical
> +  * address that has the encryption bit set when caches are
> +  * flushed. Perform a wbinvd followed by a halt to achieve
> +  * this.
> +  */
> + do_wbinvd_halt = true;
> + }
> +
>   local_irq_disable();
>   /*
>

[PATCH v5 28/32] x86/mm, kexec: Allow kexec to be used with SME

2017-04-18 Thread Tom Lendacky
Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/init.h  |1 +
 arch/x86/include/asm/irqflags.h  |5 +
 arch/x86/include/asm/kexec.h |8 
 arch/x86/include/asm/pgtable_types.h |1 +
 arch/x86/kernel/machine_kexec_64.c   |   35 +-
 arch/x86/kernel/process.c|   26 +++--
 arch/x86/mm/ident_map.c  |   11 +++
 include/linux/kexec.h|   14 ++
 kernel/kexec_core.c  |7 +++
 9 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 737da62..b2ec511 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -6,6 +6,7 @@ struct x86_mapping_info {
void *context;   /* context for alloc_pgt_page */
unsigned long pmd_flag;  /* page flag for PMD entry */
unsigned long offset;/* ident mapping offset */
+   unsigned long kernpg_flag;   /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index ac7692d..38b5920 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -58,6 +58,11 @@ static inline __cpuidle void native_halt(void)
asm volatile("hlt": : :"memory");
 }
 
+static inline __cpuidle void native_wbinvd_halt(void)
+{
+   asm volatile("wbinvd; hlt" : : : "memory");
+}
+
 #endif
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 70ef205..e8183ac 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
uint64_t r15;
uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+  gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
diff --git a/arch/x86/include/asm/pgtable_types.h 
b/arch/x86/include/asm/pgtable_types.h
index ce8cb1c..0f326f4 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL__pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC   __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE  __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
diff --git a/arch/x86/kernel/machine_kexec_64.c 
b/arch/x86/kernel/machine_kexec_64.c
index 085c3b3..11c0ca9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -86,7 +86,7 @@ static int init_transition_pgtable(struct kimage *image, 
pgd_t *pgd)
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
}
pte = pte_offset_kernel(pmd, vaddr);
-   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+   set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
 err:
free_transition_pgtable(image);
@@ -114,6 +114,7 @@ static int init_pgtable(struct kimage *image, unsigned long 
start_pgtable)
.alloc_pgt_page = alloc_pgt_page,
.context= image,
.pmd_flag   = __PAGE_KERNEL_LARGE_EXEC,
+   .kernpg_flag= _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
pgd_t *level4p;
@@ -597,3 +598,35 @@ void arch_kexec_unprotect_crashkres(void)
 {
kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+   int ret;
+
+   if