RE: [PATCH 3/3] close_dump_bitmap: simplify logic
>> > The boolean expression replicates the logic of open_dump_bitmap(). >> > It's simpler and less error-prone to simply check if fd_bitmap is >> > valid. >> > >> > Signed-off-by: Martin Wilck>> > --- >> > makedumpfile.c | 3 +-- >> > 1 file changed, 1 insertion(+), 2 deletions(-) >> > >> > diff --git a/makedumpfile.c b/makedumpfile.c >> > index 43278f1..771ab7c 100644 >> > --- a/makedumpfile.c >> > +++ b/makedumpfile.c >> > @@ -8579,8 +8579,7 @@ close_dump_file(void) >> > void >> > close_dump_bitmap(void) >> > { >> > - if (!info->working_dir && !info->flag_reassemble && !info- >> > >flag_refiltering >> > - && !info->flag_sadump && !info->flag_mem_usage) >> > + if (!info->fd_bitmap) >> >> Strictly speaking, zero is a valid FD. I can see that it is highly >> unlikely to be the bitmap FD, but it would be a nice cleanup to >> initialize fd_bitmap to a negative number and check info->fd_bitmap < >> 0. >> I'm just not sure where to put the initializition... > > >> > OTOH I know I'm asking you to fix something that you didn't break. > >I had the same thought, and the same excuse not to address it in this >patch set. If you grep makedumpfile.c for "fd_bitmap", you'll see many >checks like "if (info->fd_bitmap)". I just followed that pattern for >now. I see, it would be better to make the checks strict on this occasion. So, could you work for that cleanup before your three patches as an additional cleanup patch ? Thanks, Atsushi Kumagai ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v2 2/2] kexec: extend kexec_file_load system call
Here is a new version implementing your suggestions. I also changed it to kmalloc fdset instead of using the stack. What do you think? From: AKASHI TakahiroDevice tree blob must be passed to a second kernel on DTB-capable archs, like powerpc and arm64, but the current kernel interface lacks this support. This patch extends kexec_file_load system call by adding an extra argument to this syscall so that an arbitrary number of file descriptors can be handed out from user space to the kernel. long sys_kexec_file_load(int kernel_fd, int initrd_fd, unsigned long cmdline_len, const char __user *cmdline_ptr, unsigned long flags, const struct kexec_fdset __user *ufdset); If KEXEC_FILE_EXTRA_FDS is set to the "flags" argument, the "ufdset" argument points to the following struct buffer: struct kexec_fdset { int nr_fds; struct kexec_file_fd fds[0]; } Signed-off-by: AKASHI Takahiro Signed-off-by: Thiago Jung Bauermann --- include/linux/fs.h | 1 + include/linux/kexec.h | 7 +++- include/linux/syscalls.h | 4 +- include/uapi/linux/kexec.h | 22 +++ kernel/kexec_file.c| 92 +++--- 5 files changed, 117 insertions(+), 9 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 3523bf62f328..2eb0674392d1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2656,6 +2656,7 @@ extern int do_pipe_flags(int *, int); id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image)\ id(KEXEC_INITRAMFS, kexec-initramfs)\ + id(KEXEC_PARTIAL_DTB, kexec-partial-dtb)\ id(POLICY, security-policy) \ id(MAX_ID, ) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4f85d284ed0b..29202935055d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -148,7 +148,10 @@ struct kexec_file_ops { kexec_verify_sig_t *verify_sig; #endif }; -#endif + +int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf, + unsigned long size); +#endif /* CONFIG_KEXEC_FILE */ struct kimage { kimage_entry_t head; @@ -280,7 +283,7 @@ extern int kexec_load_disabled; /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ -KEXEC_FILE_NO_INITRAMFS) +KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_EXTRA_FDS) #define VMCOREINFO_BYTES (4096) #define VMCOREINFO_NOTE_NAME "VMCOREINFO" diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d02239022bd0..fc072bdb74e3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -66,6 +66,7 @@ struct perf_event_attr; struct file_handle; struct sigaltstack; union bpf_attr; +struct kexec_fdset; #include #include @@ -321,7 +322,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, unsigned long cmdline_len, const char __user *cmdline_ptr, - unsigned long flags); + unsigned long flags, + const struct kexec_fdset __user *ufdset); asmlinkage long sys_exit(int error_code); asmlinkage long sys_exit_group(int error_code); diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index aae5ebf2022b..6279be79efba 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -23,6 +23,28 @@ #define KEXEC_FILE_UNLOAD 0x0001 #define KEXEC_FILE_ON_CRASH0x0002 #define KEXEC_FILE_NO_INITRAMFS0x0004 +#define KEXEC_FILE_EXTRA_FDS 0x0008 + +enum kexec_file_type { + KEXEC_FILE_TYPE_KERNEL, + KEXEC_FILE_TYPE_INITRAMFS, + + /* +* Device Tree Blob containing just the nodes and properties that +* the kexec_file_load caller wants to add or modify. +*/ + KEXEC_FILE_TYPE_PARTIAL_DTB, +}; + +struct kexec_file_fd { + enum kexec_file_type type; + int fd; +}; + +struct kexec_fdset { + int nr_fds; + struct kexec_file_fd fds[0]; +}; /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 113af2f219b9..302427e5ee71 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -116,6 +116,22 @@ void kimage_file_post_load_cleanup(struct kimage *image) image->image_loader_data = NULL; } +/** +
Re: [PATCH v5 04/13] powerpc: Factor out relocation code from module_64.c to elf_util_64.c.
Am Montag, 15 August 2016, 17:46:34 schrieb Balbir Singh: > On Thu, Aug 11, 2016 at 08:08:09PM -0300, Thiago Jung Bauermann wrote: > > +/** > > + * elf64_apply_relocate_add - apply 64 bit RELA relocations > > + * @elf_info: Support information for the ELF binary being relocated. > > + * @strtab:String table for the associated symbol table. > > + * @symindex: Section header index for the associated symbol table. > > + * @relsec:Section header index for the relocations to apply. > > + * @obj_name: The name of the ELF binary, for information messages. > > + */ > > +int elf64_apply_relocate_add(const struct elf_info *elf_info, > > +const char *strtab, unsigned int symindex, > > +unsigned int relsec, const char *obj_name) > > +{ > > + unsigned int i; > > + Elf64_Shdr *sechdrs = elf_info->sechdrs; > > + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr; > > + Elf64_Sym *sym; > > + unsigned long *location; > > + unsigned long value; > > + > > For the relocatable kernel we expect only > > R_PPC64_RELATIVE > R_PPC64_NONE > R_PPC64_ADDR64 > > In the future we can use this to check/assert the usage of this > for the core kernel (vmlinux) when loaded. > > Did we check elf64_apply_relocate_add with zImage and vmlinux? kexec_file_load doesn't call call elf64_apply_relocate_add on the kernel image, it only uses it to relocate the purgatory. So whether it is loading a zImage or a vmlinux file, the function will work in the same way since the purgatory binary is the same regardless of the kernel image format. For the same reason, as it currently stands kexec_file_load can't check the relocation types used in the kernel image. But it is possible to add such a check/assertion in kexec_elf_64.c:build_elf_exec_info if we want. I tested kexec_file_load on both relocatable and non-relocatable vmlinux and it works correctly. I hadn't tested with zImage yet. I just did, and I had two problems: 1. For some reason, it has an INTERP segment. This patch series doesn't support loading program interpreters for ELF binaries, so kexec_elf_64.c:build_elf_exec_info refuses to load them. 2. If I disable the check for the INTERP segment, the zImage file loads correctly, but then I get an exception during reboot when loading the kexec image, right before jumping into the purgatory. I suspect this is because the LOAD segment has a virtual address of 0, and the first kernel is not coping well with that. But I still have to debug it further. Is there a reason for the zImage ELF header to request an interpreter and to have a virtual address of 0? -- []'s Thiago Jung Bauermann IBM Linux Technology Center ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path
On 08/15/2016 12:06 PM, Corey Minyard wrote: On 08/15/2016 06:35 AM, 河合英宏 / KAWAI,HIDEHIRO wrote: Hi Corey, From: Corey Minyard [mailto:cminy...@mvista.com] Sent: Friday, August 12, 2016 10:56 PM I'll try to test this, but I have one comment inline... Thank you very much! On 08/11/2016 10:17 PM, Dave Young wrote: On 08/10/16 at 05:09pm, Hidehiro Kawai wrote: [snip] diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 610f0f3..1723b17 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs) static void crash_kexec_prepare_cpus(void) { +static int cpus_stopped; unsigned int msecs; +unsigned int ncpus; -unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ +if (cpus_stopped) +return; Wouldn't you want an atomic operation and some special handling here to ensure that only one CPU does this? So if a CPU comes in here and another CPU is already in the process stopping the CPUs it won't result in a deadlock. Because this function can be called only one panicking CPU, there is no problem. There are two paths which crash_kexec_prepare_cpus is called. Path 1 (panic path): panic() crash_smp_send_stop() crash_kexec_prepare_cpus() Path 2 (oops path): crash_kexec() __crash_kexec() machine_crash_shutdown() default_machine_crash_shutdown() // for MIPS crash_kexec_prepare_cpus() Here, panic() and crash_kexec() run exclusively via panic_cpu atomic variable. So we can use cpus_stopped as normal variable. Ok, if the code can only be entered once, what's the purpose of cpus_stopped? I guess that's what confused me. You are right, the panic_cpu atomic should keep this on a single CPU. Never mind, I see the path through panic() where that is required. My question below still remains, though. -corey Also, panic() will call panic_smp_self_stop() if it finds another CPU has already called panic, which will just spin with interrupts off by default. I didn't see a definition for it in MIPS, wouldn't it need to be overridden to avoid a deadlock? -corey Best regards, Hidehiro Kawai ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path
On 08/15/2016 06:35 AM, 河合英宏 / KAWAI,HIDEHIRO wrote: Hi Corey, From: Corey Minyard [mailto:cminy...@mvista.com] Sent: Friday, August 12, 2016 10:56 PM I'll try to test this, but I have one comment inline... Thank you very much! On 08/11/2016 10:17 PM, Dave Young wrote: On 08/10/16 at 05:09pm, Hidehiro Kawai wrote: [snip] diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 610f0f3..1723b17 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs) static void crash_kexec_prepare_cpus(void) { + static int cpus_stopped; unsigned int msecs; + unsigned int ncpus; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + if (cpus_stopped) + return; Wouldn't you want an atomic operation and some special handling here to ensure that only one CPU does this? So if a CPU comes in here and another CPU is already in the process stopping the CPUs it won't result in a deadlock. Because this function can be called only one panicking CPU, there is no problem. There are two paths which crash_kexec_prepare_cpus is called. Path 1 (panic path): panic() crash_smp_send_stop() crash_kexec_prepare_cpus() Path 2 (oops path): crash_kexec() __crash_kexec() machine_crash_shutdown() default_machine_crash_shutdown() // for MIPS crash_kexec_prepare_cpus() Here, panic() and crash_kexec() run exclusively via panic_cpu atomic variable. So we can use cpus_stopped as normal variable. Ok, if the code can only be entered once, what's the purpose of cpus_stopped? I guess that's what confused me. You are right, the panic_cpu atomic should keep this on a single CPU. Also, panic() will call panic_smp_self_stop() if it finds another CPU has already called panic, which will just spin with interrupts off by default. I didn't see a definition for it in MIPS, wouldn't it need to be overridden to avoid a deadlock? -corey Best regards, Hidehiro Kawai ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v5 02/13] kexec_file: Change kexec_add_buffer to take kexec_buf as argument.
Am Montag, 15 August 2016, 17:30:49 schrieb Balbir Singh: > On Thu, Aug 11, 2016 at 08:08:07PM -0300, Thiago Jung Bauermann wrote: > > Adapt all callers to the new function prototype. > > Could you please expand on this? Is the following better? Adapt all callers to set up a kexec_buf to pass to kexec_add_buffer. > Looks good otherwise > > Acked-by: Balbir SinghThank you for reviewing this series! -- []'s Thiago Jung Bauermann IBM Linux Technology Center ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
RE: Re: [V4 PATCH 1/2] x86/panic: Replace smp_send_stop() with kdump friendly version in panic path
Hi Dave, Thank you for the review. > From: Dave Young [mailto:dyo...@redhat.com] > Sent: Friday, August 12, 2016 12:17 PM > > Thanks for the update. > On 08/10/16 at 05:09pm, Hidehiro Kawai wrote: > > Daniel Walker reported problems which happens when > > crash_kexec_post_notifiers kernel option is enabled > > (https://lkml.org/lkml/2015/6/24/44). > > > > In that case, smp_send_stop() is called before entering kdump routines > > which assume other CPUs are still online. As the result, for x86, > > kdump routines fail to save other CPUs' registers and disable > > virtualization extensions. > > Seems you simplified the changelog, but I think a little more details > will be helpful to understand the patch. You know sometimes lkml.org > does not work well. So, I'll try another archives when I post patch set next time. > > To fix this problem, call a new kdump friendly function, > > crash_smp_send_stop(), instead of the smp_send_stop() when > > crash_kexec_post_notifiers is enabled. crash_smp_send_stop() is a > > weak function, and it just call smp_send_stop(). Architecture > > codes should override it so that kdump can work appropriately. > > This patch only provides x86-specific version. > > > > For Xen's PV kernel, just keep the current behavior. > > Could you explain a bit about above Xen PV kernel behavior? > > BTW, this version looks better, I think I'm fine with this version > besides of the questions about changelog. As for Dom0 kernel, it doesn't use crash_kexec routines, and it relies on panic notifier chain. At the end of the chain, xen_panic_event is called, and it issues a hypercall which requests Hypervisor to execute kdump. This means whether crash_kexec_panic_notifiers is set or not, panic notifiers are called after smp_send_stop. Even if we save registers in Dom0 kernel, they seem to be ignored (Hypervisor is responsible for that). This is why I kept the current behavior for Xen. For PV DomU kernel, kdump is not supported. For PV HVM DomU, I'm not sure what will happen on panic because I couldn't boot PV HVM DomU and test it. But I think it will work similarly to baremetal kernels with extra cleanups for Hypervisor. Best regards, Hidehiro Kawai > > Changes in V4: > > - Keep to use smp_send_stop if crash_kexec_post_notifiers is not set > > - Rename panic_smp_send_stop to crash_smp_send_stop > > - Don't change the behavior for Xen's PV kernel > > > > Changes in V3: > > - Revise comments, description, and symbol names > > > > Changes in V2: > > - Replace smp_send_stop() call with crash_kexec version which > > saves cpu states and cleans up VMX/SVM > > - Drop a fix for Problem 1 at this moment > > > > Reported-by: Daniel Walker> > Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" > > option) > > Signed-off-by: Hidehiro Kawai > > Cc: Dave Young > > Cc: Baoquan He > > Cc: Vivek Goyal > > Cc: Eric Biederman > > Cc: Masami Hiramatsu > > Cc: Daniel Walker > > Cc: Xunlei Pang > > Cc: Thomas Gleixner > > Cc: Ingo Molnar > > Cc: "H. Peter Anvin" > > Cc: Borislav Petkov > > Cc: David Vrabel > > Cc: Toshi Kani > > Cc: Andrew Morton > > --- > > arch/x86/include/asm/kexec.h |1 + > > arch/x86/include/asm/smp.h |1 + > > arch/x86/kernel/crash.c | 22 +--- > > arch/x86/kernel/smp.c|5 > > kernel/panic.c | 47 > > -- > > 5 files changed, 66 insertions(+), 10 deletions(-) > > > > diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h > > index d2434c1..282630e 100644 > > --- a/arch/x86/include/asm/kexec.h > > +++ b/arch/x86/include/asm/kexec.h > > @@ -210,6 +210,7 @@ struct kexec_entry64_regs { > > > > typedef void crash_vmclear_fn(void); > > extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; > > +extern void kdump_nmi_shootdown_cpus(void); > > > > #endif /* __ASSEMBLY__ */ > > > > diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h > > index ebd0c16..f70989c 100644 > > --- a/arch/x86/include/asm/smp.h > > +++ b/arch/x86/include/asm/smp.h > > @@ -50,6 +50,7 @@ struct smp_ops { > > void (*smp_cpus_done)(unsigned max_cpus); > > > > void (*stop_other_cpus)(int wait); > > + void (*crash_stop_other_cpus)(void); > > void (*smp_send_reschedule)(int cpu); > > > > int (*cpu_up)(unsigned cpu, struct task_struct *tidle); > > diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c > > index 9616cf7..650830e 100644 > > --- a/arch/x86/kernel/crash.c > > +++ b/arch/x86/kernel/crash.c > > @@ -133,15 +133,31 @@ static void kdump_nmi_callback(int cpu, struct > > pt_regs
RE: Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path
Hi Corey, > From: Corey Minyard [mailto:cminy...@mvista.com] > Sent: Friday, August 12, 2016 10:56 PM > I'll try to test this, but I have one comment inline... Thank you very much! > On 08/11/2016 10:17 PM, Dave Young wrote: > > On 08/10/16 at 05:09pm, Hidehiro Kawai wrote: [snip] > >> diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c > >> index 610f0f3..1723b17 100644 > >> --- a/arch/mips/kernel/crash.c > >> +++ b/arch/mips/kernel/crash.c > >> @@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs) > >> > >> static void crash_kexec_prepare_cpus(void) > >> { > >> + static int cpus_stopped; > >>unsigned int msecs; > >> + unsigned int ncpus; > >> > >> - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ > >> + if (cpus_stopped) > >> + return; > > Wouldn't you want an atomic operation and some special handling here to > ensure that only one CPU does this? So if a CPU comes in here and > another CPU is already in the process stopping the CPUs it won't result in a > deadlock. Because this function can be called only one panicking CPU, there is no problem. There are two paths which crash_kexec_prepare_cpus is called. Path 1 (panic path): panic() crash_smp_send_stop() crash_kexec_prepare_cpus() Path 2 (oops path): crash_kexec() __crash_kexec() machine_crash_shutdown() default_machine_crash_shutdown() // for MIPS crash_kexec_prepare_cpus() Here, panic() and crash_kexec() run exclusively via panic_cpu atomic variable. So we can use cpus_stopped as normal variable. Best regards, Hidehiro Kawai ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] kexec: Account crashk_low_res to kexec_crash_size
On 2016/08/15 at 15:17, Dave Young wrote: > Hi Xunlei, > > On 08/13/16 at 04:26pm, Xunlei Pang wrote: >> "/sys/kernel/kexec_crash_size" only includes crashk_res, it >> is fine in most cases, but sometimes we have crashk_low_res. >> For example, when "crashkernel=size[KMG],high" combined with >> "crashkernel=size[KMG],low" is used for 64-bit x86. >> >> Let "/sys/kernel/kexec_crash_size" reflect all the reserved >> memory including crashk_low_res, this is more understandable >> from its naming. > Maybe export another file for the kexec_crash_low_size so that > we can clearly get how much the low area is. I'm fine with it. >> Although we can get all the crash memory from "/proc/iomem" >> by filtering all "Crash kernel" keyword, it is more convenient >> to utilize this file, and the two ways should stay consistent. > Shrink low area does not make much sense, one may either use it or > shrink it to 0. > > Actually think more about it, the crashk_low is only for x86, > it might be even better to move it to x86 code instead of in > common code. > > Opinion? crashk_low is defined in kernel/kexec_core.c, it's an architecture independent definition though it's only used by x86 currently, maybe it can be used by others in the future. It's why I'm not handling it specifically for x86. I just tested the original proc interface further, and it can be shrinked to be zero. So I guess we can ease the restriction on shrinking the low area as well. What do you think? Regards, Xunlei > > Thanks > Dave >> Note that write to "/sys/kernel/kexec_crash_size" is to shrink >> the reserved memory, and we want to shrink crashk_res only. >> So we add some additional check in crash_shrink_memory() since >> crashk_low_res now is involved. >> >> Signed-off-by: Xunlei Pang>> --- >> kernel/kexec_core.c | 15 ++- >> 1 file changed, 14 insertions(+), 1 deletion(-) >> >> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c >> index 5616755..d5ae780 100644 >> --- a/kernel/kexec_core.c >> +++ b/kernel/kexec_core.c >> @@ -932,6 +932,8 @@ size_t crash_get_memory_size(void) >> mutex_lock(_mutex); >> if (crashk_res.end != crashk_res.start) >> size = resource_size(_res); >> +if (crashk_low_res.end != crashk_low_res.start) >> +size += resource_size(_low_res); >> mutex_unlock(_mutex); >> return size; >> } >> @@ -949,7 +951,7 @@ int crash_shrink_memory(unsigned long new_size) >> { >> int ret = 0; >> unsigned long start, end; >> -unsigned long old_size; >> +unsigned long low_size, old_size; >> struct resource *ram_res; >> >> mutex_lock(_mutex); >> @@ -958,6 +960,17 @@ int crash_shrink_memory(unsigned long new_size) >> ret = -ENOENT; >> goto unlock; >> } >> + >> +start = crashk_low_res.start; >> +end = crashk_low_res.end; >> +low_size = (end == 0) ? 0 : end - start + 1; >> +/* Do not shrink crashk_low_res. */ >> +if (new_size <= low_size) { >> +ret = -EINVAL; >> +goto unlock; >> +} >> + >> +new_size -= low_size; >> start = crashk_res.start; >> end = crashk_res.end; >> old_size = (end == 0) ? 0 : end - start + 1; >> -- >> 1.8.3.1 >> >> >> ___ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec > ___ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH v5 04/13] powerpc: Factor out relocation code from module_64.c to elf_util_64.c.
On Thu, Aug 11, 2016 at 08:08:09PM -0300, Thiago Jung Bauermann wrote: > The kexec_file_load system call needs to relocate the purgatory, so > factor out the module relocation code so that it can be shared. > > This patch's purpose is to move the ELF relocation logic from > apply_relocate_add to elf_util_64.c with as few changes as > possible. The following changes were needed: > > To avoid having module-specific code in a general purpose utility > function, struct elf_info was created to contain the information > needed for ELF binaries manipulation. > > my_r2, stub_for_addr and create_stub were changed to use it instead of > having to receive a struct module, since they are called from > elf64_apply_relocate_add. > > local_entry_offset and squash_toc_save_inst were only used by > apply_rellocate_add, so they were moved to elf_util_64.c as well. > > Signed-off-by: Thiago Jung Bauermann> --- > arch/powerpc/include/asm/elf_util.h | 70 > arch/powerpc/include/asm/module.h | 14 +- > arch/powerpc/kernel/Makefile| 4 + > arch/powerpc/kernel/elf_util_64.c | 269 +++ > arch/powerpc/kernel/module_64.c | 312 > > 5 files changed, 386 insertions(+), 283 deletions(-) > > diff --git a/arch/powerpc/include/asm/elf_util.h > b/arch/powerpc/include/asm/elf_util.h > new file mode 100644 > index ..37372559fe62 > --- /dev/null > +++ b/arch/powerpc/include/asm/elf_util.h > @@ -0,0 +1,70 @@ > +/* > + * Utility functions to work with ELF files. > + * > + * Copyright (C) 2016, IBM Corporation > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2, or (at your option) > + * any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#ifndef _ASM_POWERPC_ELF_UTIL_H > +#define _ASM_POWERPC_ELF_UTIL_H > + > +#include > + > +struct elf_info { > + struct elf_shdr *sechdrs; > + > + /* Index of stubs section. */ > + unsigned int stubs_section; > + /* Index of TOC section. */ > + unsigned int toc_section; > +}; > + > +#ifdef __powerpc64__ > +#ifdef PPC64_ELF_ABI_v2 > + > +/* An address is simply the address of the function. */ > +typedef unsigned long func_desc_t; > +#else > + > +/* An address is address of the OPD entry, which contains address of fn. */ > +typedef struct ppc64_opd_entry func_desc_t; > +#endif /* PPC64_ELF_ABI_v2 */ > + > +/* Like PPC32, we need little trampolines to do > 24-bit jumps (into > + the kernel itself). But on PPC64, these need to be used for every > + jump, actually, to reset r2 (TOC+0x8000). */ > +struct ppc64_stub_entry > +{ > + /* 28 byte jump instruction sequence (7 instructions). We only > + * need 6 instructions on ABIv2 but we always allocate 7 so > + * so we don't have to modify the trampoline load instruction. */ > + u32 jump[7]; > + /* Used by ftrace to identify stubs */ > + u32 magic; > + /* Data for the above code */ > + func_desc_t funcdata; > +}; > +#endif > + > +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this > + gives the value maximum span in an instruction which uses a signed > + offset) */ > +static inline unsigned long my_r2(const struct elf_info *elf_info) > +{ > + return elf_info->sechdrs[elf_info->toc_section].sh_addr + 0x8000; > +} > + > +int elf64_apply_relocate_add(const struct elf_info *elf_info, > + const char *strtab, unsigned int symindex, > + unsigned int relsec, const char *obj_name); > + > +#endif /* _ASM_POWERPC_ELF_UTIL_H */ > diff --git a/arch/powerpc/include/asm/module.h > b/arch/powerpc/include/asm/module.h > index cd4ffd86765f..f2073115d518 100644 > --- a/arch/powerpc/include/asm/module.h > +++ b/arch/powerpc/include/asm/module.h > @@ -12,7 +12,14 @@ > #include > #include > #include > +#include > > +/* Both low and high 16 bits are added as SIGNED additions, so if low > + 16 bits has high bit set, high 16 bits must be adjusted. These > + macros do that (stolen from binutils). */ > +#define PPC_LO(v) ((v) & 0x) > +#define PPC_HI(v) (((v) >> 16) & 0x) > +#define PPC_HA(v) PPC_HI ((v) + 0x8000) > > #ifndef __powerpc64__ > /* > @@ -33,8 +40,7 @@ struct ppc_plt_entry { > > struct mod_arch_specific { > #ifdef __powerpc64__ > - unsigned int stubs_section; /* Index of stubs section in module */ > - unsigned int toc_section; /* What section is the TOC? */ > + struct elf_info elf_info; > bool toc_fixed; /* Have we fixed up .TOC.? */ >
Re: [PATCH v5 02/13] kexec_file: Change kexec_add_buffer to take kexec_buf as argument.
On Thu, Aug 11, 2016 at 08:08:07PM -0300, Thiago Jung Bauermann wrote: > Adapt all callers to the new function prototype. > Could you please expand on this? > In addition, change the type of kexec_buf.buffer from char * to void *. > There is no particular reason for it to be a char *, and the change > allows us to get rid of 3 existing casts to char * in the code. > > Signed-off-by: Thiago Jung Bauermann> Acked-by: Dave Young > --- Looks good otherwise Acked-by: Balbir Singh ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v2 3/6] kexec_file: Allow skipping checksum calculation for some segments.
Adds checksum argument to kexec_add_buffer specifying whether the given segment should be part of the checksum calculation. The next patch will add a way to update segments after a kimage is loaded. Segments that will be updated in this way should not be checksummed, otherwise they will cause the purgatory checksum verification to fail when the machine is rebooted. As a bonus, we don't need to special-case the purgatory segment anymore to avoid checksumming it. Adjust call sites for the new argument. Signed-off-by: Thiago Jung Bauermann--- arch/powerpc/kernel/kexec_elf_64.c | 6 +++--- arch/x86/kernel/crash.c| 4 ++-- arch/x86/kernel/kexec-bzimage64.c | 6 +++--- include/linux/kexec.h | 10 +++--- kernel/kexec_file.c| 23 --- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 22afc7b5ee73..4c528c81b076 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -128,7 +128,7 @@ static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, kbuf.memsz = phdr->p_memsz; kbuf.buf_align = phdr->p_align; kbuf.buf_min = phdr->p_paddr + base; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out; load_addr = kbuf.mem; @@ -188,7 +188,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf, kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; kbuf.top_down = false; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out; initrd_load_addr = kbuf.mem; @@ -245,7 +245,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf, kbuf.bufsz = kbuf.memsz = fdt_size; kbuf.buf_align = PAGE_SIZE; kbuf.top_down = true; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out; fdt_load_addr = kbuf.mem; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 38a1cdf6aa05..634ab16377b1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -642,7 +642,7 @@ int crash_load_segments(struct kimage *image) * copied in purgatory after crash. Just add a zero filled * segment for now to make sure checksum logic works fine. */ - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) return ret; image->arch.backup_load_addr = kbuf.mem; @@ -661,7 +661,7 @@ int crash_load_segments(struct kimage *image) kbuf.memsz = kbuf.bufsz; kbuf.buf_align = ELF_CORE_HEADER_ALIGN; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) { vfree((void *)image->arch.elf_headers); return ret; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 4b3a75329fb6..a46e3fbb0639 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -422,7 +422,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.memsz = kbuf.bufsz; kbuf.buf_align = 16; kbuf.buf_min = MIN_BOOTPARAM_ADDR; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out_free_params; bootparam_load_addr = kbuf.mem; @@ -435,7 +435,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.memsz = PAGE_ALIGN(header->init_size); kbuf.buf_align = header->kernel_alignment; kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out_free_params; kernel_load_addr = kbuf.mem; @@ -449,7 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; kbuf.buf_min = MIN_INITRD_LOAD_ADDR; - ret = kexec_add_buffer(); + ret = kexec_add_buffer(, true); if (ret) goto out_free_params; initrd_load_addr = kbuf.mem; diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 4559a1a01b0a..37eea32fdff1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -100,6 +100,9 @@ struct kexec_segment { size_t bufsz; unsigned long mem; size_t memsz; + + /* Whether this segment is part of the checksum calculation. */ + bool do_checksum; }; #ifdef
[PATCH v2 5/6] kexec: Share logic to copy segment page contents.
Make kimage_load_normal_segment and kexec_update_segment share code which they currently duplicate. Signed-off-by: Thiago Jung Bauermann--- kernel/kexec_core.c | 159 +++- 1 file changed, 95 insertions(+), 64 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 806735201de6..68b5b245e457 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -721,6 +721,65 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } +struct kimage_update_buffer_state { + /* Destination memory address currently being copied to. */ + unsigned long maddr; + + /* Bytes in buffer still left to copy. */ + size_t ubytes; + + /* Bytes in memory still left to copy. */ + size_t mbytes; + + /* If true, copy from kbuf. */ + bool from_kernel; + + /* Clear pages before copying? */ + bool clear_pages; + + /* Buffer position to continue copying from. */ + const unsigned char *kbuf; + const unsigned char __user *buf; +}; + +static int kimage_update_page(struct page *page, + struct kimage_update_buffer_state *state) +{ + char *ptr; + int result = 0; + size_t uchunk, mchunk; + + ptr = kmap(page); + + /* Start with a clear page */ + if (state->clear_pages) + clear_page(ptr); + + ptr += state->maddr & ~PAGE_MASK; + mchunk = min_t(size_t, state->mbytes, + PAGE_SIZE - (state->maddr & ~PAGE_MASK)); + uchunk = min(state->ubytes, mchunk); + + if (state->from_kernel) + memcpy(ptr, state->kbuf, uchunk); + else + result = copy_from_user(ptr, state->buf, uchunk); + + kunmap(page); + if (result) + return -EFAULT; + + state->ubytes -= uchunk; + state->maddr += mchunk; + if (state->from_kernel) + state->kbuf += mchunk; + else + state->buf += mchunk; + state->mbytes -= mchunk; + + return 0; +} + /** * kexec_update_segment - update the contents of a kimage segment * @buffer:New contents of the segment. @@ -739,6 +798,7 @@ int kexec_update_segment(const char *buffer, unsigned long bufsz, unsigned long entry; unsigned long *ptr = NULL; void *dest = NULL; + struct kimage_update_buffer_state state; if (kexec_image == NULL) { pr_err("Can't update segment: no kexec image loaded.\n"); @@ -768,8 +828,15 @@ int kexec_update_segment(const char *buffer, unsigned long bufsz, return -EINVAL; } - for (entry = kexec_image->head; !(entry & IND_DONE) && memsz; -entry = *ptr++) { + state.maddr = load_addr; + state.ubytes = bufsz; + state.mbytes = memsz; + state.kbuf = buffer; + state.from_kernel = true; + state.clear_pages = false; + + for (entry = kexec_image->head; !(entry & IND_DONE) && + state.mbytes; entry = *ptr++) { void *addr = (void *) (entry & PAGE_MASK); switch (entry & IND_FLAGS) { @@ -786,26 +853,13 @@ int kexec_update_segment(const char *buffer, unsigned long bufsz, return -EINVAL; } - if (dest == (void *) load_addr) { - struct page *page; - char *ptr; - size_t uchunk, mchunk; - - page = kmap_to_page(addr); - - ptr = kmap(page); - ptr += load_addr & ~PAGE_MASK; - mchunk = min_t(size_t, memsz, - PAGE_SIZE - (load_addr & ~PAGE_MASK)); - uchunk = min(bufsz, mchunk); - memcpy(ptr, buffer, uchunk); - - kunmap(page); + if (dest == (void *) state.maddr) { + int ret; - bufsz -= uchunk; - load_addr += mchunk; - buffer += mchunk; - memsz -= mchunk; + ret = kimage_update_page(kmap_to_page(addr), +); + if (ret) + return ret; } dest += PAGE_SIZE; } @@ -823,31 +877,30 @@ int kexec_update_segment(const char *buffer, unsigned long bufsz, static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { - unsigned long
[PATCH v2 6/6] IMA: Demonstration code for kexec buffer passing.
This patch is not intended to be committed. It shows how kernel code can use the kexec buffer passing mechanism to pass information to the next kernel. Signed-off-by: Thiago Jung Bauermann--- include/linux/ima.h | 11 + kernel/kexec_file.c | 4 ++ security/integrity/ima/ima.h | 5 +++ security/integrity/ima/ima_init.c | 26 +++ security/integrity/ima/ima_template.c | 85 +++ 5 files changed, 131 insertions(+) diff --git a/include/linux/ima.h b/include/linux/ima.h index 0eb7c2e7f0d6..96528d007139 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -11,6 +11,7 @@ #define _LINUX_IMA_H #include +#include struct linux_binprm; #ifdef CONFIG_IMA @@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, loff_t size, enum kernel_read_file_id id); extern void ima_post_path_mknod(struct dentry *dentry); +#ifdef CONFIG_KEXEC_FILE +extern void ima_add_kexec_buffer(struct kimage *image); +#endif + #else static inline int ima_bprm_check(struct linux_binprm *bprm) { @@ -60,6 +65,12 @@ static inline void ima_post_path_mknod(struct dentry *dentry) return; } +#ifdef CONFIG_KEXEC_FILE +static inline void ima_add_kexec_buffer(struct kimage *image) +{ +} +#endif + #endif /* CONFIG_IMA */ #ifdef CONFIG_IMA_APPRAISE diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index aed51175915f..bf8f61c20c11 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -321,6 +322,9 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, } } + /* IMA needs to pass the measurement list to the next kernel. */ + ima_add_kexec_buffer(image); + /* Call arch image load handlers */ ldata = arch_kexec_kernel_image_load(image); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index db25f54a04fe..0334001055d7 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -102,6 +102,11 @@ struct ima_queue_entry { }; extern struct list_head ima_measurements; /* list of all measurements */ +#ifdef CONFIG_KEXEC_FILE +extern void *kexec_buffer; +extern size_t kexec_buffer_size; +#endif + /* Internal IMA function definitions */ int ima_init(void); int ima_fs_init(void); diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 32912bd54ead..a1924d0f3b2b 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "ima.h" @@ -104,6 +105,29 @@ void __init ima_load_x509(void) } #endif +#ifdef CONFIG_KEXEC_FILE +static void ima_load_kexec_buffer(void) +{ + int rc; + + /* Fetch the buffer from the previous kernel, if any. */ + rc = kexec_get_handover_buffer(_buffer, _buffer_size); + if (rc == 0) { + /* Demonstrate that buffer handover works. */ + pr_err("kexec buffer contents: %s\n", (char *) kexec_buffer); + pr_err("kexec buffer contents after update: %s\n", + (char *) kexec_buffer + 4 * PAGE_SIZE + 10); + + kexec_free_handover_buffer(); + } else if (rc == -ENOENT) + pr_debug("No kexec buffer from the previous kernel.\n"); + else + pr_debug("Error restoring kexec buffer: %d\n", rc); +} +#else +static void ima_load_kexec_buffer(void) { } +#endif + int __init ima_init(void) { u8 pcr_i[TPM_DIGEST_SIZE]; @@ -134,5 +158,7 @@ int __init ima_init(void) ima_init_policy(); + ima_load_kexec_buffer(); + return ima_fs_init(); } diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index febd12ed9b55..a8609f3a13d2 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -15,6 +15,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include #include "ima.h" #include "ima_template_lib.h" @@ -182,6 +184,89 @@ static int template_desc_init_fields(const char *template_fmt, return 0; } +#ifdef CONFIG_KEXEC_FILE +void *kexec_buffer = NULL; +size_t kexec_buffer_size = 0; + +/* Physical address of the measurement buffer in the next kernel. */ +unsigned long kexec_buffer_load_addr = 0; + +/* + * Called during reboot. IMA can add here new events that were generated after + * the kexec image was loaded. + */ +static int ima_update_kexec_buffer(struct notifier_block *self, + unsigned long action, void *data) +{ + int ret; + + if (!kexec_in_progress) + return NOTIFY_OK; + + /* +* Add content deep in the buffer to show that we can update +
[PATCH v2 2/6] powerpc: kexec_file: Add buffer hand-over support for the next kernel
The buffer hand-over mechanism allows the currently running kernel to pass data to kernel that will be kexec'd via a kexec segment. The second kernel can check whether the previous kernel sent data and retrieve it. This is the architecture-specific part. Signed-off-by: Thiago Jung Bauermann--- arch/powerpc/include/asm/kexec.h | 12 +++- arch/powerpc/kernel/kexec_elf_64.c | 2 +- arch/powerpc/kernel/machine_kexec_64.c | 114 +++-- 3 files changed, 120 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 31bc64e07c8f..b20738df26f8 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -92,12 +92,20 @@ static inline bool kdump_in_progress(void) } #ifdef CONFIG_KEXEC_FILE +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + phys_addr_t handover_buffer_addr; + unsigned long handover_buffer_size; +}; + int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr, unsigned long stack_top, int debug); -int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, - unsigned long initrd_len, const char *cmdline); +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline); bool find_debug_console(const void *fdt, int chosen_node); int merge_partial_dtb(void *to, const void *from); #endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c index 1b902ad66e2a..22afc7b5ee73 100644 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -219,7 +219,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf, } } - ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); if (ret) goto out; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a484a6346146..190c652e49b7 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -490,6 +490,60 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } +bool kexec_can_hand_over_buffer(void) +{ + return true; +} + +int arch_kexec_add_handover_buffer(struct kimage *image, + unsigned long load_addr, unsigned long size) +{ + image->arch.handover_buffer_addr = load_addr; + image->arch.handover_buffer_size = size; + + return 0; +} + +int kexec_get_handover_buffer(void **addr, unsigned long *size) +{ + int ret; + u64 start_addr, end_addr; + + ret = of_property_read_u64(of_chosen, + "linux,kexec-handover-buffer-start", + _addr); + if (ret == -EINVAL) + return -ENOENT; + else if (ret) + return -EINVAL; + + ret = of_property_read_u64(of_chosen, "linux,kexec-handover-buffer-end", + _addr); + if (ret == -EINVAL) + return -ENOENT; + else if (ret) + return -EINVAL; + + *addr = __va(start_addr); + /* -end is the first address after the buffer. */ + *size = end_addr - start_addr; + + return 0; +} + +int kexec_free_handover_buffer(void) +{ + int ret; + void *addr; + unsigned long size; + + ret = kexec_get_handover_buffer(, ); + if (ret) + return ret; + + return memblock_free((phys_addr_t) addr, size); +} + /** * arch_kexec_walk_mem() - call func(data) for each unreserved memory block * @kbuf: Context info for the search. Also passed to @func. @@ -687,9 +741,52 @@ int setup_purgatory(struct kimage *image, const void *slave_code, return 0; } -/* - * setup_new_fdt() - modify /chosen and memory reservation for the next kernel - * @fdt: +/** + * setup_handover_buffer() - add properties and reservation for the handover buffer + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, negative errno on error. + */ +static int setup_handover_buffer(const struct kimage *image, void *fdt, +int chosen_node) +{ + int ret; + + if (image->arch.handover_buffer_addr == 0) + return 0; + + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,kexec-handover-buffer-start", +
[PATCH v2 4/6] kexec_file: Add mechanism to update kexec segments.
kexec_update_segment allows a given segment in kexec_image to have its contents updated. This is useful if the current kernel wants to send information to the next kernel that is up-to-date at the time of reboot. Signed-off-by: Thiago Jung Bauermann--- include/linux/kexec.h | 2 ++ kernel/kexec_core.c | 99 +++ 2 files changed, 101 insertions(+) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 37eea32fdff1..14dda81e3e01 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -259,6 +259,8 @@ extern int kexec_purgatory_get_set_symbol(struct kimage *image, unsigned int size, bool get_value); extern void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +int kexec_update_segment(const char *buffer, unsigned long bufsz, +unsigned long load_addr, unsigned long memsz); extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 561675589511..806735201de6 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -721,6 +721,105 @@ static struct page *kimage_alloc_page(struct kimage *image, return page; } +/** + * kexec_update_segment - update the contents of a kimage segment + * @buffer:New contents of the segment. + * @bufsz: @buffer size. + * @load_addr: Segment's physical address in the next kernel. + * @memsz: Segment size. + * + * This function assumes kexec_mutex is held. + * + * Return: 0 on success, negative errno on error. + */ +int kexec_update_segment(const char *buffer, unsigned long bufsz, +unsigned long load_addr, unsigned long memsz) +{ + int i; + unsigned long entry; + unsigned long *ptr = NULL; + void *dest = NULL; + + if (kexec_image == NULL) { + pr_err("Can't update segment: no kexec image loaded.\n"); + return -EINVAL; + } + + /* +* kexec_add_buffer rounds up segment sizes to PAGE_SIZE, so +* we have to do it here as well. +*/ + memsz = ALIGN(memsz, PAGE_SIZE); + + for (i = 0; i < kexec_image->nr_segments; i++) + /* We only support updating whole segments. */ + if (load_addr == kexec_image->segment[i].mem && + memsz == kexec_image->segment[i].memsz) { + if (kexec_image->segment[i].do_checksum) { + pr_err("Trying to update non-modifiable segment.\n"); + return -EINVAL; + } + + break; + } + if (i == kexec_image->nr_segments) { + pr_err("Couldn't find segment to update: 0x%lx, size 0x%lx\n", + load_addr, memsz); + return -EINVAL; + } + + for (entry = kexec_image->head; !(entry & IND_DONE) && memsz; +entry = *ptr++) { + void *addr = (void *) (entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = __va(addr); + break; + case IND_SOURCE: + /* Shouldn't happen, but verify just to be safe. */ + if (dest == NULL) { + pr_err("Invalid kexec entries list."); + return -EINVAL; + } + + if (dest == (void *) load_addr) { + struct page *page; + char *ptr; + size_t uchunk, mchunk; + + page = kmap_to_page(addr); + + ptr = kmap(page); + ptr += load_addr & ~PAGE_MASK; + mchunk = min_t(size_t, memsz, + PAGE_SIZE - (load_addr & ~PAGE_MASK)); + uchunk = min(bufsz, mchunk); + memcpy(ptr, buffer, uchunk); + + kunmap(page); + + bufsz -= uchunk; + load_addr += mchunk; + buffer += mchunk; + memsz -= mchunk; + } + dest += PAGE_SIZE; + } + + /* Shouldn't happen, but verify just to be safe. */ + if (ptr == NULL) { + pr_err("Invalid kexec entries list."); + return
[PATCH v2 0/6] kexec_file: Add buffer hand-over for the next kernel
Hello, This patch series implements a mechanism which allows the kernel to pass on a buffer to the kernel that will be kexec'd. This buffer is passed as a segment which is added to the kimage when it is being prepared by kexec_file_load. How the second kernel is informed of this buffer is architecture-specific. On powerpc, this is done via the device tree, by checking the properties /chosen/linux,kexec-handover-buffer-start and /chosen/linux,kexec-handover-buffer-end, which is analogous to how the kernel finds the initrd. This is needed because the Integrity Measurement Architecture subsystem needs to preserve its measurement list accross the kexec reboot. The following patch series for the IMA subsystem uses this feature for that purpose: https://lists.infradead.org/pipermail/kexec/2016-August/016745.html This is so that IMA can implement trusted boot support on the OpenPower platform, because on such systems an intermediary Linux instance running as part of the firmware is used to boot the target operating system via kexec. Using this mechanism, IMA on this intermediary instance can hand over to the target OS the measurements of the components that were used to boot it. Because there could be additional measurement events between the kexec_file_load call and the actual reboot, IMA needs a way to update the buffer with those additional events before rebooting. One can minimize the interval between the kexec_file_load and the reboot syscalls, but as small as it can be, there is always the possibility that the measurement list will be out of date at the time of reboot. To address this issue, this patch series also introduces kexec_update_segment, which allows a reboot notifier to change the contents of the image segment during the reboot process. Patch 5 makes kimage_load_normal_segment and kexec_update_segment share code. It's not much code that they can share though, so I'm not sure if the result is actually better. The last patch is not intended to be merged, it just demonstrates how this feature can be used. This series applies on top of v5 of the "kexec_file_load implementation for PowerPC" patch series (which applies on top of v4.8-rc1): https://lists.infradead.org/pipermail/kexec/2016-August/016843.html Changes for v2: - Rebased on v5 of kexec_file_load implementation for PowerPC patch series. - Patch "kexec_file: Add buffer hand-over support for the next kernel" - Changed kexec_add_handover_buffer to receive a struct kexec_buf, as suggested by Dave Young. - Patch "powerpc: kexec_file: Add buffer hand-over support for the next kernel" - Moved setup_handover_buffer from kexec_elf_64.c to machine_kexec_64.c. - Call setup_handover_buffer from setup_new_fdt instead of elf64_load. - Changed kexec_get_handover_buffer to read from the expanded device tree instead of the flattened device tree. - Patch "kexec_file: Add mechanism to update kexec segments.": - Removed unnecessary "#include " in kexec_file.c. - Round up memsz argument to PAGE_SIZE. - Check if kexec_image is NULL in kexec_update_segment. - Patch "IMA: Demonstration code for kexec buffer passing." - Avoid registering reboot notifier again if kexec_file_load is called more than once. Thiago Jung Bauermann (6): kexec_file: Add buffer hand-over support for the next kernel powerpc: kexec_file: Add buffer hand-over support for the next kernel kexec_file: Allow skipping checksum calculation for some segments. kexec_file: Add mechanism to update kexec segments. kexec: Share logic to copy segment page contents. IMA: Demonstration code for kexec buffer passing. arch/powerpc/include/asm/kexec.h | 12 +- arch/powerpc/kernel/kexec_elf_64.c | 8 +- arch/powerpc/kernel/machine_kexec_64.c | 114 - arch/x86/kernel/crash.c| 4 +- arch/x86/kernel/kexec-bzimage64.c | 6 +- include/linux/ima.h| 11 ++ include/linux/kexec.h | 37 +- kernel/kexec_core.c| 216 ++--- kernel/kexec_file.c| 91 -- security/integrity/ima/ima.h | 5 + security/integrity/ima/ima_init.c | 26 security/integrity/ima/ima_template.c | 85 + 12 files changed, 546 insertions(+), 69 deletions(-) -- 1.9.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH v2 1/6] kexec_file: Add buffer hand-over support for the next kernel
The buffer hand-over mechanism allows the currently running kernel to pass data to kernel that will be kexec'd via a kexec segment. The second kernel can check whether the previous kernel sent data and retrieve it. This is the architecture-independent part of the feature. Signed-off-by: Thiago Jung Bauermann--- include/linux/kexec.h | 29 ++ kernel/kexec_file.c | 68 +++ 2 files changed, 97 insertions(+) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ceccc5856aab..4559a1a01b0a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -388,6 +388,35 @@ static inline void *boot_phys_to_virt(unsigned long entry) return phys_to_virt(boot_phys_to_phys(entry)); } +#ifdef CONFIG_KEXEC_FILE +bool __weak kexec_can_hand_over_buffer(void); +int __weak arch_kexec_add_handover_buffer(struct kimage *image, + unsigned long load_addr, + unsigned long size); +int kexec_add_handover_buffer(struct kexec_buf *kbuf); +int __weak kexec_get_handover_buffer(void **addr, unsigned long *size); +int __weak kexec_free_handover_buffer(void); +#else +static inline bool kexec_can_hand_over_buffer(void) +{ + return false; +} + +static inline int kexec_add_handover_buffer(struct kexec_buf *kbuf) +{ + return -ENOTSUPP; +} + +static inline int kexec_get_handover_buffer(void **addr, unsigned long *size) +{ + return -ENOTSUPP; +} + +static inline int kexec_free_handover_buffer(void) +{ + return -ENOTSUPP; +} +#endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 772cb491715e..c8418d62e2fc 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -135,6 +135,74 @@ int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf, return -EINVAL; } +/** + * kexec_can_hand_over_buffer - can we pass data to the kexec'd kernel? + */ +bool __weak kexec_can_hand_over_buffer(void) +{ + return false; +} + +/** + * arch_kexec_add_handover_buffer - do arch-specific steps to handover buffer + * + * Architectures should use this function to pass on the handover buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int __weak arch_kexec_add_handover_buffer(struct kimage *image, + unsigned long load_addr, + unsigned long size) +{ + return -ENOTSUPP; +} + +/** + * kexec_add_handover_buffer - add buffer to be used by the next kernel + * @kbuf: Buffer contents and memory parameters. + * + * This function assumes that kexec_mutex is held. + * On successful return, @kbuf->mem will have the physical address of + * the buffer in the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int kexec_add_handover_buffer(struct kexec_buf *kbuf) +{ + int ret; + + if (!kexec_can_hand_over_buffer()) + return -ENOTSUPP; + + ret = kexec_add_buffer(kbuf); + if (ret) + return ret; + + return arch_kexec_add_handover_buffer(kbuf->image, kbuf->mem, + kbuf->memsz); +} + +/** + * kexec_get_handover_buffer - get the handover buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int __weak kexec_get_handover_buffer(void **addr, unsigned long *size) +{ + return -ENOTSUPP; +} + +/** + * kexec_free_handover_buffer - free memory used by the handover buffer + */ +int __weak kexec_free_handover_buffer(void) +{ + return -ENOTSUPP; +} + /* * In file mode list of segments is prepared by kernel. Copy relevant * data from user space, do error checking, prepare segment list -- 1.9.1 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH] kexec: Account crashk_low_res to kexec_crash_size
Hi Xunlei, On 08/13/16 at 04:26pm, Xunlei Pang wrote: > "/sys/kernel/kexec_crash_size" only includes crashk_res, it > is fine in most cases, but sometimes we have crashk_low_res. > For example, when "crashkernel=size[KMG],high" combined with > "crashkernel=size[KMG],low" is used for 64-bit x86. > > Let "/sys/kernel/kexec_crash_size" reflect all the reserved > memory including crashk_low_res, this is more understandable > from its naming. Maybe export another file for the kexec_crash_low_size so that we can clearly get how much the low area is. > > Although we can get all the crash memory from "/proc/iomem" > by filtering all "Crash kernel" keyword, it is more convenient > to utilize this file, and the two ways should stay consistent. Shrink low area does not make much sense, one may either use it or shrink it to 0. Actually think more about it, the crashk_low is only for x86, it might be even better to move it to x86 code instead of in common code. Opinion? Thanks Dave > > Note that write to "/sys/kernel/kexec_crash_size" is to shrink > the reserved memory, and we want to shrink crashk_res only. > So we add some additional check in crash_shrink_memory() since > crashk_low_res now is involved. > > Signed-off-by: Xunlei Pang> --- > kernel/kexec_core.c | 15 ++- > 1 file changed, 14 insertions(+), 1 deletion(-) > > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c > index 5616755..d5ae780 100644 > --- a/kernel/kexec_core.c > +++ b/kernel/kexec_core.c > @@ -932,6 +932,8 @@ size_t crash_get_memory_size(void) > mutex_lock(_mutex); > if (crashk_res.end != crashk_res.start) > size = resource_size(_res); > + if (crashk_low_res.end != crashk_low_res.start) > + size += resource_size(_low_res); > mutex_unlock(_mutex); > return size; > } > @@ -949,7 +951,7 @@ int crash_shrink_memory(unsigned long new_size) > { > int ret = 0; > unsigned long start, end; > - unsigned long old_size; > + unsigned long low_size, old_size; > struct resource *ram_res; > > mutex_lock(_mutex); > @@ -958,6 +960,17 @@ int crash_shrink_memory(unsigned long new_size) > ret = -ENOENT; > goto unlock; > } > + > + start = crashk_low_res.start; > + end = crashk_low_res.end; > + low_size = (end == 0) ? 0 : end - start + 1; > + /* Do not shrink crashk_low_res. */ > + if (new_size <= low_size) { > + ret = -EINVAL; > + goto unlock; > + } > + > + new_size -= low_size; > start = crashk_res.start; > end = crashk_res.end; > old_size = (end == 0) ? 0 : end - start + 1; > -- > 1.8.3.1 > > > ___ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec