[PATCH v2] Remove the memory encryption mask to obtain the true physical address

2019-01-27 Thread Lianbo Jiang
For AMD machine with SME feature, if SME is enabled in the first
kernel, the crashed kernel's page table(pgd/pud/pmd/pte) contains
the memory encryption mask, so makedumpfile needs to remove the
memory encryption mask to obtain the true physical address.

Signed-off-by: Lianbo Jiang 
---
Changes since v1:
1. Merge them into a patch.
2. The sme_mask is not an enum number, remove it.
3. Sanity check whether the sme_mask is in vmcoreinfo.
4. Deal with the huge pages case.
5. Cover the 5-level path.

 arch/x86_64.c  | 30 +-
 makedumpfile.c |  4 
 makedumpfile.h |  1 +
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/x86_64.c b/arch/x86_64.c
index 537fb78..7b3ed10 100644
--- a/arch/x86_64.c
+++ b/arch/x86_64.c
@@ -291,6 +291,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long pagetable)
unsigned long page_dir, pgd, pud_paddr, pud_pte, pmd_paddr, pmd_pte;
unsigned long pte_paddr, pte;
unsigned long p4d_paddr, p4d_pte;
+   unsigned long sme_me_mask = ~0UL;
 
/*
 * Get PGD.
@@ -302,6 +303,9 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long pagetable)
return NOT_PADDR;
}
 
+   if (NUMBER(sme_mask) != NOT_FOUND_NUMBER)
+   sme_me_mask = ~(NUMBER(sme_mask));
+
if (check_5level_paging()) {
page_dir += pgd5_index(vaddr) * sizeof(unsigned long);
if (!readmem(PADDR, page_dir, &pgd, sizeof pgd)) {
@@ -309,7 +313,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long pagetable)
return NOT_PADDR;
}
if (info->vaddr_for_vtop == vaddr)
-   MSG("  PGD : %16lx => %16lx\n", page_dir, pgd);
+   MSG("  PGD : %16lx => %16lx\n", page_dir, (pgd & 
sme_me_mask));
 
if (!(pgd & _PAGE_PRESENT)) {
ERRMSG("Can't get a valid pgd.\n");
@@ -318,20 +322,20 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
pagetable)
/*
 * Get P4D.
 */
-   p4d_paddr  = pgd & ENTRY_MASK;
+   p4d_paddr  = pgd & ENTRY_MASK & sme_me_mask;
p4d_paddr += p4d_index(vaddr) * sizeof(unsigned long);
if (!readmem(PADDR, p4d_paddr, &p4d_pte, sizeof p4d_pte)) {
ERRMSG("Can't get p4d_pte (p4d_paddr:%lx).\n", 
p4d_paddr);
return NOT_PADDR;
}
if (info->vaddr_for_vtop == vaddr)
-   MSG("  P4D : %16lx => %16lx\n", p4d_paddr, p4d_pte);
+   MSG("  P4D : %16lx => %16lx\n", p4d_paddr, (p4d_pte & 
sme_me_mask));
 
if (!(p4d_pte & _PAGE_PRESENT)) {
ERRMSG("Can't get a valid p4d_pte.\n");
return NOT_PADDR;
}
-   pud_paddr  = p4d_pte & ENTRY_MASK;
+   pud_paddr  = p4d_pte & ENTRY_MASK & sme_me_mask;
}else {
page_dir += pgd_index(vaddr) * sizeof(unsigned long);
if (!readmem(PADDR, page_dir, &pgd, sizeof pgd)) {
@@ -339,13 +343,13 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
pagetable)
return NOT_PADDR;
}
if (info->vaddr_for_vtop == vaddr)
-   MSG("  PGD : %16lx => %16lx\n", page_dir, pgd);
+   MSG("  PGD : %16lx => %16lx\n", page_dir, (pgd & 
sme_me_mask));
 
if (!(pgd & _PAGE_PRESENT)) {
ERRMSG("Can't get a valid pgd.\n");
return NOT_PADDR;
}
-   pud_paddr  = pgd & ENTRY_MASK;
+   pud_paddr  = pgd & ENTRY_MASK & sme_me_mask;
}
 
/*
@@ -357,47 +361,47 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
pagetable)
return NOT_PADDR;
}
if (info->vaddr_for_vtop == vaddr)
-   MSG("  PUD : %16lx => %16lx\n", pud_paddr, pud_pte);
+   MSG("  PUD : %16lx => %16lx\n", pud_paddr, (pud_pte & 
sme_me_mask));
 
if (!(pud_pte & _PAGE_PRESENT)) {
ERRMSG("Can't get a valid pud_pte.\n");
return NOT_PADDR;
}
if (pud_pte & _PAGE_PSE)/* 1GB pages */
-   return (pud_pte & ENTRY_MASK & PUD_MASK) +
+   return (pud_pte & ENTRY_MASK & PUD_MASK & sme_me_mask) +
(vaddr & ~PUD_MASK);
 
/*
 * Get PMD.
 */
-   pmd_paddr  = pud_pte & ENTRY_MASK;
+   pmd_paddr  = pud_pte & ENTRY_MASK & sme_me_mask;
pmd_paddr += pmd_index(vaddr) * sizeof(unsigned long);
if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) {
ERRMSG("Can't get pmd_pte (pmd_paddr:%lx).\n", pmd_paddr);
return NOT_PADDR;
}
if (info->vaddr_for_vtop == vaddr)
-   MSG("  PMD : %

Re: [PATCH 2/2] Remove the memory encryption mask to obtain the true physical address

2019-01-27 Thread lijiang
在 2019年01月28日 09:55, lijiang 写道:
> 在 2019年01月25日 22:32, Lendacky, Thomas 写道:
>> On 1/24/19 9:55 PM, dyo...@redhat.com wrote:
>>> + Tom
>>> On 01/25/19 at 11:06am, lijiang wrote:
 在 2019年01月24日 06:16, Kazuhito Hagio 写道:
> On 1/22/2019 3:03 AM, Lianbo Jiang wrote:
>> For AMD machine with SME feature, if SME is enabled in the first
>> kernel, the crashed kernel's page table(pgd/pud/pmd/pte) contains
>> the memory encryption mask, so makedumpfile needs to remove the
>> memory encryption mask to obtain the true physical address.
>>
>> Signed-off-by: Lianbo Jiang 
>> ---
>>  arch/x86_64.c  | 3 +++
>>  makedumpfile.c | 1 +
>>  2 files changed, 4 insertions(+)
>>
>> diff --git a/arch/x86_64.c b/arch/x86_64.c
>> index 537fb78..7651d36 100644
>> --- a/arch/x86_64.c
>> +++ b/arch/x86_64.c
>> @@ -346,6 +346,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
>> pagetable)
>>  return NOT_PADDR;
>>  }
>>  pud_paddr  = pgd & ENTRY_MASK;
>> +pud_paddr = pud_paddr & ~(NUMBER(sme_mask));
>>  }
>>
>>  /*
>> @@ -371,6 +372,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
>> pagetable)
>>   * Get PMD.
>>   */
>>  pmd_paddr  = pud_pte & ENTRY_MASK;
>> +pmd_paddr = pmd_paddr & ~(NUMBER(sme_mask));
>>  pmd_paddr += pmd_index(vaddr) * sizeof(unsigned long);
>>  if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) {
>>  ERRMSG("Can't get pmd_pte (pmd_paddr:%lx).\n", 
>> pmd_paddr);
>> @@ -391,6 +393,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
>> pagetable)
>>   * Get PTE.
>>   */
>>  pte_paddr  = pmd_pte & ENTRY_MASK;
>> +pte_paddr = pte_paddr & ~(NUMBER(sme_mask));
>>  pte_paddr += pte_index(vaddr) * sizeof(unsigned long);
>>  if (!readmem(PADDR, pte_paddr, &pte, sizeof pte)) {
>>  ERRMSG("Can't get pte (pte_paddr:%lx).\n", pte_paddr);
>> diff --git a/makedumpfile.c b/makedumpfile.c
>> index a03aaa1..81c7bb4 100644
>> --- a/makedumpfile.c
>> +++ b/makedumpfile.c
>> @@ -977,6 +977,7 @@ next_page:
>>  read_size = MIN(info->page_size - PAGEOFFSET(paddr), size);
>>
>>  pgaddr = PAGEBASE(paddr);
>> +pgaddr = pgaddr & ~(NUMBER(sme_mask));
>
> Since NUMBER(sme_mask) is initialized with -1 (NOT_FOUND_NUMBER),
> if the sme_mask is not in vmcoreinfo, ~(NUMBER(sme_mask)) will be 0.
> So the four lines added above need
>
>   if (NUMBER(sme_mask) != NOT_FOUND_NUMBER)
> ...
>

 Thank you very much for pointing out my mistake.

 I will improve it and post again.
>>
>> Might be worth creating a local variable that includes ENTRY_MASK and
>> NUMBER(sme_mask) so that you make the check just once. Then use that
>> variable in place of ENTRY_MASK in the remainder of the function so
>> that the correct value is used throughout.
>>

Ok.

>> This would also cover the 5-level path which would make this future
>> proof should AMD someday support 5-level paging.
>>
> 
> Thank you, Tom. Makedumpfile will cover the 5-level path in next post,
> though AMD does not support 5-level paging yet.
> 

I mean that i will improve this patch and cover the 5-level path in patch v2.

Thanks.

> Thanks.
> Lianbo
> 

> and, what I'm wondering is whether it doesn't need to take hugepages
> into account such as this
>
> 392 if (pmd_pte & _PAGE_PSE)/* 2MB pages */
> 393 return (pmd_pte & ENTRY_MASK & PMD_MASK) +
> 394 (vaddr & ~PMD_MASK);
> "arch/x86_64.c"
>

 This is a good question. Theoretically, it should be modified accordingly 
 for
 huge pages case.
>>
>> Yes, this should also have the ~(NUMBER(sme_mask)) applied to it. You
>> can probably add some debugging to see if you're hitting this case and
>> whether the encryption bit (sme_mask) is set just to help understand what
>> is occurring. This also goes for the 1GB page check above.  However, if
>> you use my suggestion of a local variable then you should be covered.
>>

Thank you, Tom.

I will modify this patch and cover the huge pages case in patch v2. 

Thanks.
Lianbo

>> Thanks,
>> Tom
>>

 But makedumpfile still works well without this change. And i'm sure that 
 the
 huge pages are enabled in crashed kernel. This is very strange.

 Thanks.
 Lianbo

> Thanks,
> Kazu
>
>
>>  pgbuf = cache_search(pgaddr, read_size);
>>  if (!pgbuf) {
>>  ++cache_miss;
>> --
>> 2.17.1
>>
>
>

___
kexec mailing list
kexec@lists.infradead.org
http://

Re: [PATCH 2/2] Remove the memory encryption mask to obtain the true physical address

2019-01-27 Thread lijiang
在 2019年01月25日 22:32, Lendacky, Thomas 写道:
> On 1/24/19 9:55 PM, dyo...@redhat.com wrote:
>> + Tom
>> On 01/25/19 at 11:06am, lijiang wrote:
>>> 在 2019年01月24日 06:16, Kazuhito Hagio 写道:
 On 1/22/2019 3:03 AM, Lianbo Jiang wrote:
> For AMD machine with SME feature, if SME is enabled in the first
> kernel, the crashed kernel's page table(pgd/pud/pmd/pte) contains
> the memory encryption mask, so makedumpfile needs to remove the
> memory encryption mask to obtain the true physical address.
>
> Signed-off-by: Lianbo Jiang 
> ---
>  arch/x86_64.c  | 3 +++
>  makedumpfile.c | 1 +
>  2 files changed, 4 insertions(+)
>
> diff --git a/arch/x86_64.c b/arch/x86_64.c
> index 537fb78..7651d36 100644
> --- a/arch/x86_64.c
> +++ b/arch/x86_64.c
> @@ -346,6 +346,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
> pagetable)
>   return NOT_PADDR;
>   }
>   pud_paddr  = pgd & ENTRY_MASK;
> + pud_paddr = pud_paddr & ~(NUMBER(sme_mask));
>   }
>
>   /*
> @@ -371,6 +372,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
> pagetable)
>* Get PMD.
>*/
>   pmd_paddr  = pud_pte & ENTRY_MASK;
> + pmd_paddr = pmd_paddr & ~(NUMBER(sme_mask));
>   pmd_paddr += pmd_index(vaddr) * sizeof(unsigned long);
>   if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) {
>   ERRMSG("Can't get pmd_pte (pmd_paddr:%lx).\n", pmd_paddr);
> @@ -391,6 +393,7 @@ __vtop4_x86_64(unsigned long vaddr, unsigned long 
> pagetable)
>* Get PTE.
>*/
>   pte_paddr  = pmd_pte & ENTRY_MASK;
> + pte_paddr = pte_paddr & ~(NUMBER(sme_mask));
>   pte_paddr += pte_index(vaddr) * sizeof(unsigned long);
>   if (!readmem(PADDR, pte_paddr, &pte, sizeof pte)) {
>   ERRMSG("Can't get pte (pte_paddr:%lx).\n", pte_paddr);
> diff --git a/makedumpfile.c b/makedumpfile.c
> index a03aaa1..81c7bb4 100644
> --- a/makedumpfile.c
> +++ b/makedumpfile.c
> @@ -977,6 +977,7 @@ next_page:
>   read_size = MIN(info->page_size - PAGEOFFSET(paddr), size);
>
>   pgaddr = PAGEBASE(paddr);
> + pgaddr = pgaddr & ~(NUMBER(sme_mask));

 Since NUMBER(sme_mask) is initialized with -1 (NOT_FOUND_NUMBER),
 if the sme_mask is not in vmcoreinfo, ~(NUMBER(sme_mask)) will be 0.
 So the four lines added above need

   if (NUMBER(sme_mask) != NOT_FOUND_NUMBER)
 ...

>>>
>>> Thank you very much for pointing out my mistake.
>>>
>>> I will improve it and post again.
> 
> Might be worth creating a local variable that includes ENTRY_MASK and
> NUMBER(sme_mask) so that you make the check just once. Then use that
> variable in place of ENTRY_MASK in the remainder of the function so
> that the correct value is used throughout.
> 
> This would also cover the 5-level path which would make this future
> proof should AMD someday support 5-level paging.
> 

Thank you, Tom. Makedumpfile will cover the 5-level path in next post,
though AMD does not support 5-level paging yet.

Thanks.
Lianbo

>>>
 and, what I'm wondering is whether it doesn't need to take hugepages
 into account such as this

 392 if (pmd_pte & _PAGE_PSE)/* 2MB pages */
 393 return (pmd_pte & ENTRY_MASK & PMD_MASK) +
 394 (vaddr & ~PMD_MASK);
 "arch/x86_64.c"

>>>
>>> This is a good question. Theoretically, it should be modified accordingly 
>>> for
>>> huge pages case.
> 
> Yes, this should also have the ~(NUMBER(sme_mask)) applied to it. You
> can probably add some debugging to see if you're hitting this case and
> whether the encryption bit (sme_mask) is set just to help understand what
> is occurring. This also goes for the 1GB page check above.  However, if
> you use my suggestion of a local variable then you should be covered.
> 
> Thanks,
> Tom
> 
>>>
>>> But makedumpfile still works well without this change. And i'm sure that the
>>> huge pages are enabled in crashed kernel. This is very strange.
>>>
>>> Thanks.
>>> Lianbo
>>>
 Thanks,
 Kazu


>   pgbuf = cache_search(pgaddr, read_size);
>   if (!pgbuf) {
>   ++cache_miss;
> --
> 2.17.1
>



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec