Re: [PATCH v3 1/3] kexec: Move vmcoreinfo out of the kernel's .bss section

2017-03-20 Thread Eric W. Biederman
Xunlei Pang  writes:

> As Eric said,
> "what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section.  And modify the code to regenerate
> and keep this information in something like the control page.
>
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures.  I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section."
>
> This patch allocates extra pages for these vmcoreinfo_XXX variables,
> one advantage is that it enhances some safety of vmcoreinfo, because
> vmcoreinfo now is kept far away from other kernel data structures.

Can you preceed this patch with a patch that removes CRASHTIME from
vmcoreinfo?  If someone actually cares we can add a separate note that holds
a 64bit crashtime in the per cpu notes.  

As we are looking at reliability concerns removing CRASHTIME should make
everything in vmcoreinfo a boot time constant.  Which should simplify
everything considerably.

Which means we only need to worry abou the per-cpu notes being written
at the time of a crash.

> Suggested-by: Eric Biederman 
> Signed-off-by: Xunlei Pang 
> ---
>  arch/ia64/kernel/machine_kexec.c |  5 -
>  arch/x86/kernel/crash.c  |  2 +-
>  include/linux/kexec.h|  2 +-
>  kernel/kexec_core.c  | 29 -
>  kernel/ksysfs.c  |  2 +-
>  5 files changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/arch/ia64/kernel/machine_kexec.c 
> b/arch/ia64/kernel/machine_kexec.c
> index 599507b..c14815d 100644
> --- a/arch/ia64/kernel/machine_kexec.c
> +++ b/arch/ia64/kernel/machine_kexec.c
> @@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
>  #endif
>  }
>  
> -phys_addr_t paddr_vmcoreinfo_note(void)
> -{
> - return ia64_tpa((unsigned long)(char *)_note);
> -}
> -
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index 3741461..4d35fbb 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -456,7 +456,7 @@ static int prepare_elf64_headers(struct crash_elf_data 
> *ced,
>   bufp += sizeof(Elf64_Phdr);
>   phdr->p_type = PT_NOTE;
>   phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
> - phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
> + phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
>   (ehdr->e_phnum)++;
>  
>  #ifdef CONFIG_X86_64
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index e98e546..f1c601b 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -317,7 +317,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct 
> kimage *image,
>  extern struct resource crashk_low_res;
>  typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
>  extern note_buf_t __percpu *crash_notes;
> -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +extern u32 *vmcoreinfo_note;
>  extern size_t vmcoreinfo_size;
>  extern size_t vmcoreinfo_max_size;
>  
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index bfe62d5..e3a4bda 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -52,10 +52,10 @@
>  note_buf_t __percpu *crash_notes;
>  
>  /* vmcoreinfo stuff */
> -static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static unsigned char *vmcoreinfo_data;
>  size_t vmcoreinfo_size;
> -size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +size_t vmcoreinfo_max_size = VMCOREINFO_BYTES;
> +u32 *vmcoreinfo_note;
>  
>  /* Flag to indicate we are going to kexec a new kernel */
>  bool kexec_in_progress = false;
> @@ -1369,6 +1369,9 @@ static void update_vmcoreinfo_note(void)
>  
>  void crash_save_vmcoreinfo(void)
>  {
> + if (!vmcoreinfo_note)
> + return;
> +
>   vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>   update_vmcoreinfo_note();
>  }
> @@ -1397,13 +1400,29 @@ void vmcoreinfo_append_str(const char *fmt, ...)
>  void __weak arch_crash_save_vmcoreinfo(void)
>  {}
>  
> -phys_addr_t __weak paddr_vmcoreinfo_note(void)
> +phys_addr_t paddr_vmcoreinfo_note(void)
>  {
> - return __pa_symbol((unsigned long)(char *)_note);
> + return __pa(vmcoreinfo_note);
>  }
>  
>  static int __init crash_save_vmcoreinfo_init(void)
>  {
> + /* One page should be enough for VMCOREINFO_BYTES under all archs */
> + vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
> + if (!vmcoreinfo_data) {
> + pr_warn("Memory allocation for vmcoreinfo_data failed\n");
> + return -ENOMEM;
> + }
> +
> + vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
> + GFP_KERNEL | __GFP_ZERO);
> + if (!vmcoreinfo_note) {
> + free_page((unsigned long)vmcoreinfo_data);
> + vmcoreinfo_data = NULL;
> + pr_warn("Memory 

[PATCH] elf_info.c: fix memory leak in get_kcore_dump_loads()

2017-03-20 Thread Pingfan Liu
Signed-off-by: Pingfan Liu 
---
 elf_info.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/elf_info.c b/elf_info.c
index d84553a..35e754b 100644
--- a/elf_info.c
+++ b/elf_info.c
@@ -893,12 +893,14 @@ int get_kcore_dump_loads(void)
|| !is_phys_addr(p->virt_start))
continue;
if (j >= loads)
+   free(pls)
return FALSE;
 
if (j == 0) {
offset_pt_load_memory = p->file_offset;
if (offset_pt_load_memory == 0) {
ERRMSG("Can't get the offset of page data.\n");
+   free(pls)
return FALSE;
}
}
-- 
2.7.4


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: Update vmcoreinfo after crash happened

2017-03-20 Thread Xunlei Pang
On 03/20/2017 at 09:04 PM, Petr Tesarik wrote:
> On Mon, 20 Mar 2017 10:17:42 +0800
> Xunlei Pang  wrote:
>
>> On 03/19/2017 at 02:23 AM, Petr Tesarik wrote:
>>> On Thu, 16 Mar 2017 21:40:58 +0800
>>> Xunlei Pang  wrote:
>>>
 On 03/16/2017 at 09:18 PM, Baoquan He wrote:
> On 03/16/17 at 08:36pm, Xunlei Pang wrote:
>> On 03/16/2017 at 08:27 PM, Baoquan He wrote:
>>> Hi Xunlei,
>>>
>>> Did you really see this ever happened? Because the vmcore size estimate
>>> feature, namely --mem-usage option of makedumpfile, depends on the
>>> vmcoreinfo in 1st kernel, your change will break it.
>> Hi Baoquan,
>>
>> I can reproduce it using a kernel module which modifies the vmcoreinfo,
>> so it's a problem can actually happen.
>>
>>> If not, it could be not good to change that.
>> That's a good point, then I guess we can keep the 
>> crash_save_vmcoreinfo_init(),
>> and store again all the vmcoreinfo after crash. What do you think?
> Well, then it will make makedumpfile segfault happen too when execute
> below command in 1st kernel if it existed:
>   makedumpfile --mem-usage /proc/kcore
 Yes, if the initial vmcoreinfo data was modified before "makedumpfile 
 --mem-usage", it might happen,
 after all the system is going something wrong. And that's why we deploy 
 kdump service at the very
 beginning when the system has a low possibility of going wrong.

 But we have to guarantee kdump vmcore can be generated correctly as 
 possible as it can.

> So we still need to face that problem and need fix it. vmcoreinfo_note
> is in kernel data area, how does module intrude into this area? And can
> we fix the module code?
>
 Bugs always exist in products, we can't know what will happen and fix all 
 the errors,
 that's why we need kdump.

 I think the following update should guarantee the correct vmcoreinfo for 
 kdump.
>>> I'm still not convinced. I would probably have more trust in a clean
>>> kernel (after boot) than a kernel that has already crashed (presumably
>>> because of a serious bug). How can be reliability improved by running
>>> more code in unsafe environment?
>> Correct, I realized that, so used crc32 to protect the original data,
>> but since Eric left a more reasonable idea, I will try that later.
>>
>>> If some code overwrites reserved areas (such as vmcoreinfo), then it's
>>> seriously buggy. And in my opinion, it is more difficult to identify
>>> such bugs if they are masked by re-initializing vmcoreinfo after crash.
>>> In fact, if makedumpfile in the kexec'ed kernel complains that it
>>> didn't find valid VMCOREINFO content, that's already a hint.
>>>
>>> As a side note, if you're debugging a vmcoreinfo corruption, it's
>>> possible to use a standalone VMCOREINFO file with makedumpfile, so you
>>> can pre-generate it and save it in the kdump initrd.
>>>
>>> In short, I don't see a compelling case for this change.
>> E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
>> system; 3) trigger kdump, then we obviously will fail to recognize the
>> crash context correctly due to the corrupted vmcoreinfo.  Everyone
>> will get confused if met such unfortunate customer-side issue.
>>
>> Although it's corner case, if it's easy to fix, then I think we better do it.
>>
>> Now except for vmcoreinfo, all the crash data is well protected (including
>> cpu note which is fully updated in the crash path, thus its correctness is
>> guaranteed).
> Hm, I think we shouldn't combine the two things.
>
> Protecting VMCOREINFO with SHA (just as the other information passed to
> the secondary kernel) sounds right to me. Re-creating the info while
> the kernel is already crashing does not sound particularly good.
>
> Yes, your patch may help in some scenarios, but in general it also
> increases the amount of code that must reliably work in a crashed
> environment. I can still recall why the LKCD approach (save the dump
> directly from the crashed kernel) was abandoned...

Agree on this point, there is nearly no extra code added to the crash path in 
v3,
maybe you can have a quick look.

>
> Apart, there's a lot of other information that might be corrupted (e.g.
> the purgatory code, elfcorehdr, secondary kernel, or the initrd).

Those are located at the crash memory, they can be protected by either
SHA or the arch_kexec_protect_crashkres() mechanism(if implemented).

>
> Why is this VMCOREINFO so special?

It is also a chunk passed to 2nd kernel like the above-mentioned information,
we better treat it like them as well.

Regards,
Xunlei

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: Update vmcoreinfo after crash happened

2017-03-20 Thread Petr Tesarik
On Mon, 20 Mar 2017 10:17:42 +0800
Xunlei Pang  wrote:

> On 03/19/2017 at 02:23 AM, Petr Tesarik wrote:
> > On Thu, 16 Mar 2017 21:40:58 +0800
> > Xunlei Pang  wrote:
> >
> >> On 03/16/2017 at 09:18 PM, Baoquan He wrote:
> >>> On 03/16/17 at 08:36pm, Xunlei Pang wrote:
>  On 03/16/2017 at 08:27 PM, Baoquan He wrote:
> > Hi Xunlei,
> >
> > Did you really see this ever happened? Because the vmcore size estimate
> > feature, namely --mem-usage option of makedumpfile, depends on the
> > vmcoreinfo in 1st kernel, your change will break it.
>  Hi Baoquan,
> 
>  I can reproduce it using a kernel module which modifies the vmcoreinfo,
>  so it's a problem can actually happen.
> 
> > If not, it could be not good to change that.
>  That's a good point, then I guess we can keep the 
>  crash_save_vmcoreinfo_init(),
>  and store again all the vmcoreinfo after crash. What do you think?
> >>> Well, then it will make makedumpfile segfault happen too when execute
> >>> below command in 1st kernel if it existed:
> >>>   makedumpfile --mem-usage /proc/kcore
> >> Yes, if the initial vmcoreinfo data was modified before "makedumpfile 
> >> --mem-usage", it might happen,
> >> after all the system is going something wrong. And that's why we deploy 
> >> kdump service at the very
> >> beginning when the system has a low possibility of going wrong.
> >>
> >> But we have to guarantee kdump vmcore can be generated correctly as 
> >> possible as it can.
> >>
> >>> So we still need to face that problem and need fix it. vmcoreinfo_note
> >>> is in kernel data area, how does module intrude into this area? And can
> >>> we fix the module code?
> >>>
> >> Bugs always exist in products, we can't know what will happen and fix all 
> >> the errors,
> >> that's why we need kdump.
> >>
> >> I think the following update should guarantee the correct vmcoreinfo for 
> >> kdump.
> > I'm still not convinced. I would probably have more trust in a clean
> > kernel (after boot) than a kernel that has already crashed (presumably
> > because of a serious bug). How can be reliability improved by running
> > more code in unsafe environment?
> 
> Correct, I realized that, so used crc32 to protect the original data,
> but since Eric left a more reasonable idea, I will try that later.
> 
> >
> > If some code overwrites reserved areas (such as vmcoreinfo), then it's
> > seriously buggy. And in my opinion, it is more difficult to identify
> > such bugs if they are masked by re-initializing vmcoreinfo after crash.
> > In fact, if makedumpfile in the kexec'ed kernel complains that it
> > didn't find valid VMCOREINFO content, that's already a hint.
> >
> > As a side note, if you're debugging a vmcoreinfo corruption, it's
> > possible to use a standalone VMCOREINFO file with makedumpfile, so you
> > can pre-generate it and save it in the kdump initrd.
> >
> > In short, I don't see a compelling case for this change.
> 
> E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the
> system; 3) trigger kdump, then we obviously will fail to recognize the
> crash context correctly due to the corrupted vmcoreinfo.  Everyone
> will get confused if met such unfortunate customer-side issue.
> 
> Although it's corner case, if it's easy to fix, then I think we better do it.
> 
> Now except for vmcoreinfo, all the crash data is well protected (including
> cpu note which is fully updated in the crash path, thus its correctness is
> guaranteed).

Hm, I think we shouldn't combine the two things.

Protecting VMCOREINFO with SHA (just as the other information passed to
the secondary kernel) sounds right to me. Re-creating the info while
the kernel is already crashing does not sound particularly good.

Yes, your patch may help in some scenarios, but in general it also
increases the amount of code that must reliably work in a crashed
environment. I can still recall why the LKCD approach (save the dump
directly from the crashed kernel) was abandoned...

Apart, there's a lot of other information that might be corrupted (e.g.
the purgatory code, elfcorehdr, secondary kernel, or the initrd).

Why is this VMCOREINFO so special?

Regards,
Petr Tesarik

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v33 00/14] add kdump support

2017-03-20 Thread Pratyush Anand



On Wednesday 15 March 2017 03:26 PM, AKASHI Takahiro wrote:

This patch series adds kdump support on arm64.

To load a crash-dump kernel to the systems, a series of patches to
kexec-tools[1] are also needed. Please use the latest one, v6 [2].
For your convinience, you can pick them up from:
   https://git.linaro.org/people/takahiro.akashi/linux-aarch64.git arm64/kdump
   https://git.linaro.org/people/takahiro.akashi/kexec-tools.git arm64/kdump

To examine vmcore (/proc/vmcore) on a crash-dump kernel, you can use
  - crash utility (v7.1.8 or later) [3]

I tested this patchset on fast model and hikey.

The previous versions were also:
Tested-by: Pratyush Anand  (v32, mustang and seattle)


You have it for this version as well.

Thanks for the patchset.

~Pratyush

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] crashdump: Remove stray get_crashkernel_region() declaration

2017-03-20 Thread Simon Horman
On Thu, Mar 16, 2017 at 11:12:24AM +0100, Daniel Kiper wrote:
> Signed-off-by: Daniel Kiper 

Thanks, applied.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH][kexec-tools] ppc: Fix format warning with die()

2017-03-20 Thread Simon Horman
On Thu, Mar 16, 2017 at 04:11:18PM +0200, Jussi Kukkonen wrote:
> Enable compiling kexec-tools for ppc with -Werror=format-security.
> 
> Signed-off-by: Jussi Kukkonen 

Thanks, applied.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[RFC PATCH] x86_64/mm/boot: Fix kernel_ident_mapping_init() failure for kexec

2017-03-20 Thread Xunlei Pang
I found that the kdump is broken on linux-4.11.0-rc2+, probably
due to the 5level-paging feature that "#define p4d_present(p4d) 1",
as a result in ident_p4d_init(), it will go into ident_pud_init()
directly without allocating the new pud.

Looks like this patch can make it work again.

Cc: Kirill A. Shutemov 
Signed-off-by: Xunlei Pang 
---
 arch/x86/mm/ident_map.c| 22 ++
 include/asm-generic/5level-fixup.h |  6 +++---
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 04210a2..d14bb52 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -97,22 +97,20 @@ int kernel_ident_mapping_init(struct x86_mapping_info 
*info, pgd_t *pgd_page,
continue;
}
 
-   p4d = (p4d_t *)info->alloc_pgt_page(info->context);
-   if (!p4d)
-   return -ENOMEM;
+   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+   p4d = (p4d_t *)info->alloc_pgt_page(info->context);
+   if (!p4d)
+   return -ENOMEM;
+   } else {
+   p4d = pgd;
+   }
+
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
-   if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+
+   if (IS_ENABLED(CONFIG_X86_5LEVEL))
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
-   } else {
-   /*
-* With p4d folded, pgd is equal to p4d.
-* The pgd entry has to point to the pud page table in 
this case.
-*/
-   pud_t *pud = pud_offset(p4d, 0);
-   set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
-   }
}
 
return 0;
diff --git a/include/asm-generic/5level-fixup.h 
b/include/asm-generic/5level-fixup.h
index b5ca82d..3131ada 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,9 +17,9 @@
 
 #define p4d_alloc(mm, pgd, address)(pgd)
 #define p4d_offset(pgd, start) (pgd)
-#define p4d_none(p4d)  0
-#define p4d_bad(p4d)   0
-#define p4d_present(p4d)   1
+#define p4d_none(p4d)  pgd_none(p4d)
+#define p4d_bad(p4d)   pgd_bad(p4d)
+#define p4d_present(p4d)   pgd_present(p4d)
 #define p4d_ERROR(p4d) do { } while (0)
 #define p4d_clear(p4d) pgd_clear(p4d)
 #define p4d_val(p4d)   pgd_val(p4d)
-- 
1.8.3.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec