RE: [PATCH 3/3] close_dump_bitmap: simplify logic

2016-08-15 Thread Atsushi Kumagai
>> > The boolean expression replicates the logic of open_dump_bitmap().
>> > It's simpler and less error-prone to simply check if fd_bitmap is
>> > valid.
>> >
>> > Signed-off-by: Martin Wilck 
>> > ---
>> >  makedumpfile.c | 3 +--
>> >  1 file changed, 1 insertion(+), 2 deletions(-)
>> >
>> > diff --git a/makedumpfile.c b/makedumpfile.c
>> > index 43278f1..771ab7c 100644
>> > --- a/makedumpfile.c
>> > +++ b/makedumpfile.c
>> > @@ -8579,8 +8579,7 @@ close_dump_file(void)
>> >  void
>> >  close_dump_bitmap(void)
>> >  {
>> > -  if (!info->working_dir && !info->flag_reassemble && !info-
>> > >flag_refiltering
>> > -  && !info->flag_sadump && !info->flag_mem_usage)
>> > +  if (!info->fd_bitmap)
>>
>> Strictly speaking, zero is a valid FD. I can see that it is highly
>> unlikely to be the bitmap FD, but it would be a nice cleanup to
>> initialize fd_bitmap to a negative number and check info->fd_bitmap <
>> 0.
>> I'm just not sure where to put the initializition...
>
>
>> > OTOH I know I'm asking you to fix something that you didn't break.
>
>I had the same thought, and the same excuse not to address it in this
>patch set. If you grep makedumpfile.c for "fd_bitmap", you'll see many
>checks like "if (info->fd_bitmap)". I just followed that pattern for
>now.

I see, it would be better to make the checks strict on this occasion.
So, could you work for that cleanup before your three patches as an
additional cleanup patch ?


Thanks,
Atsushi Kumagai
___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 2/2] kexec: extend kexec_file_load system call

2016-08-15 Thread Thiago Jung Bauermann
Here is a new version implementing your suggestions.
I also changed it to kmalloc fdset instead of using the stack.

What do you think?

From: AKASHI Takahiro 

Device tree blob must be passed to a second kernel on DTB-capable
archs, like powerpc and arm64, but the current kernel interface
lacks this support.

This patch extends kexec_file_load system call by adding an extra
argument to this syscall so that an arbitrary number of file descriptors
can be handed out from user space to the kernel.

long sys_kexec_file_load(int kernel_fd, int initrd_fd,
 unsigned long cmdline_len,
 const char __user *cmdline_ptr,
 unsigned long flags,
 const struct kexec_fdset __user *ufdset);

If KEXEC_FILE_EXTRA_FDS is set to the "flags" argument, the "ufdset"
argument points to the following struct buffer:

struct kexec_fdset {
int nr_fds;
struct kexec_file_fd fds[0];
}

Signed-off-by: AKASHI Takahiro 
Signed-off-by: Thiago Jung Bauermann 
---
 include/linux/fs.h |  1 +
 include/linux/kexec.h  |  7 +++-
 include/linux/syscalls.h   |  4 +-
 include/uapi/linux/kexec.h | 22 +++
 kernel/kexec_file.c| 92 +++---
 5 files changed, 117 insertions(+), 9 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3523bf62f328..2eb0674392d1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2656,6 +2656,7 @@ extern int do_pipe_flags(int *, int);
id(MODULE, kernel-module)   \
id(KEXEC_IMAGE, kexec-image)\
id(KEXEC_INITRAMFS, kexec-initramfs)\
+   id(KEXEC_PARTIAL_DTB, kexec-partial-dtb)\
id(POLICY, security-policy) \
id(MAX_ID, )
 
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4f85d284ed0b..29202935055d 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -148,7 +148,10 @@ struct kexec_file_ops {
kexec_verify_sig_t *verify_sig;
 #endif
 };
-#endif
+
+int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf,
+   unsigned long size);
+#endif /* CONFIG_KEXEC_FILE */
 
 struct kimage {
kimage_entry_t head;
@@ -280,7 +283,7 @@ extern int kexec_load_disabled;
 
 /* List of defined/legal kexec file flags */
 #define KEXEC_FILE_FLAGS   (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
-KEXEC_FILE_NO_INITRAMFS)
+KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_EXTRA_FDS)
 
 #define VMCOREINFO_BYTES   (4096)
 #define VMCOREINFO_NOTE_NAME   "VMCOREINFO"
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d02239022bd0..fc072bdb74e3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -66,6 +66,7 @@ struct perf_event_attr;
 struct file_handle;
 struct sigaltstack;
 union bpf_attr;
+struct kexec_fdset;
 
 #include 
 #include 
@@ -321,7 +322,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, 
unsigned long nr_segments,
 asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
unsigned long cmdline_len,
const char __user *cmdline_ptr,
-   unsigned long flags);
+   unsigned long flags,
+   const struct kexec_fdset __user *ufdset);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage long sys_exit_group(int error_code);
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index aae5ebf2022b..6279be79efba 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -23,6 +23,28 @@
 #define KEXEC_FILE_UNLOAD  0x0001
 #define KEXEC_FILE_ON_CRASH0x0002
 #define KEXEC_FILE_NO_INITRAMFS0x0004
+#define KEXEC_FILE_EXTRA_FDS   0x0008
+
+enum kexec_file_type {
+   KEXEC_FILE_TYPE_KERNEL,
+   KEXEC_FILE_TYPE_INITRAMFS,
+
+   /*
+* Device Tree Blob containing just the nodes and properties that
+* the kexec_file_load caller wants to add or modify.
+*/
+   KEXEC_FILE_TYPE_PARTIAL_DTB,
+};
+
+struct kexec_file_fd {
+   enum kexec_file_type type;
+   int fd;
+};
+
+struct kexec_fdset {
+   int nr_fds;
+   struct kexec_file_fd fds[0];
+};
 
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 113af2f219b9..302427e5ee71 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -116,6 +116,22 @@ void kimage_file_post_load_cleanup(struct kimage *image)
image->image_loader_data = NULL;
 }
 
+/**
+ 

Re: [PATCH v5 04/13] powerpc: Factor out relocation code from module_64.c to elf_util_64.c.

2016-08-15 Thread Thiago Jung Bauermann
Am Montag, 15 August 2016, 17:46:34 schrieb Balbir Singh:
> On Thu, Aug 11, 2016 at 08:08:09PM -0300, Thiago Jung Bauermann wrote:
> > +/**
> > + * elf64_apply_relocate_add - apply 64 bit RELA relocations
> > + * @elf_info:  Support information for the ELF binary being 
relocated.
> > + * @strtab:String table for the associated symbol 
table.
> > + * @symindex:  Section header index for the associated 
symbol table.
> > + * @relsec:Section header index for the relocations to 
apply.
> > + * @obj_name:  The name of the ELF binary, for information 
messages.
> > + */
> > +int elf64_apply_relocate_add(const struct elf_info *elf_info,
> > +const char *strtab, unsigned int symindex,
> > +unsigned int relsec, const char *obj_name)
> > +{
> > +   unsigned int i;
> > +   Elf64_Shdr *sechdrs = elf_info->sechdrs;
> > +   Elf64_Rela *rela = (void *)sechdrs[relsec].sh_addr;
> > +   Elf64_Sym *sym;
> > +   unsigned long *location;
> > +   unsigned long value;
> > +
> 
> For the relocatable kernel we expect only
> 
> R_PPC64_RELATIVE
> R_PPC64_NONE
> R_PPC64_ADDR64
> 
> In the future we can use this to check/assert the usage of this
> for the core kernel (vmlinux) when loaded.
> 
> Did we check elf64_apply_relocate_add with zImage and vmlinux?

kexec_file_load doesn't call call elf64_apply_relocate_add on the kernel 
image, it only uses it to relocate the purgatory. So whether it is loading a 
zImage or a vmlinux file, the function will work in the same way since the 
purgatory binary is the same regardless of the kernel image format.

For the same reason, as it currently stands kexec_file_load can't check the 
relocation types used in the kernel image. But it is possible to add such a 
check/assertion in kexec_elf_64.c:build_elf_exec_info if we want.

I tested kexec_file_load on both relocatable and non-relocatable vmlinux and 
it works correctly.

I hadn't tested with zImage yet. I just did, and I had two problems:

1. For some reason, it has an INTERP segment. This patch series doesn't 
support loading program interpreters for ELF binaries, so 
kexec_elf_64.c:build_elf_exec_info refuses to load them.

2. If I disable the check for the INTERP segment, the zImage file loads 
correctly, but then I get an exception during reboot when loading the kexec 
image, right before jumping into the purgatory. I suspect this is because 
the LOAD segment has a virtual address of 0, and the first kernel is not 
coping well with that. But I still have to debug it further.

Is there a reason for the zImage ELF header to request an interpreter and to 
have a virtual address of 0?

-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-15 Thread Corey Minyard

On 08/15/2016 12:06 PM, Corey Minyard wrote:

On 08/15/2016 06:35 AM, 河合英宏 / KAWAI,HIDEHIRO wrote:

Hi Corey,


From: Corey Minyard [mailto:cminy...@mvista.com]
Sent: Friday, August 12, 2016 10:56 PM
I'll try to test this, but I have one comment inline...

Thank you very much!


On 08/11/2016 10:17 PM, Dave Young wrote:

On 08/10/16 at 05:09pm, Hidehiro Kawai wrote:

[snip]

diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 610f0f3..1723b17 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void 
*passed_regs)


   static void crash_kexec_prepare_cpus(void)
   {
+static int cpus_stopped;
   unsigned int msecs;
+unsigned int ncpus;

-unsigned int ncpus = num_online_cpus() - 1;/* Excluding the 
panic cpu */

+if (cpus_stopped)
+return;

Wouldn't you want an atomic operation and some special handling here to
ensure that only one CPU does this?  So if a CPU comes in here and
another CPU is already in the process stopping the CPUs it won't 
result in a

deadlock.

Because this function can be called only one panicking CPU,
there is no problem.

There are two paths which crash_kexec_prepare_cpus is called.

Path 1 (panic path):
panic()
   crash_smp_send_stop()
 crash_kexec_prepare_cpus()

Path 2 (oops path):
crash_kexec()
   __crash_kexec()
 machine_crash_shutdown()
   default_machine_crash_shutdown() // for MIPS
 crash_kexec_prepare_cpus()

Here, panic() and crash_kexec() run exclusively via
panic_cpu atomic variable.  So we can use cpus_stopped as
normal variable.


Ok, if the code can only be entered once, what's the purpose of 
cpus_stopped?
I guess that's what confused me.  You are right, the panic_cpu atomic 
should

keep this on a single CPU.


Never mind, I see the path through panic() where that is required. My 
question

below still remains, though.

-corey



Also, panic() will call panic_smp_self_stop() if it finds another CPU 
has already
called panic, which will just spin with interrupts off by default. I 
didn't see a
definition for it in MIPS, wouldn't it need to be overridden to avoid 
a deadlock?


-corey



Best regards,

Hidehiro Kawai






___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-15 Thread Corey Minyard

On 08/15/2016 06:35 AM, 河合英宏 / KAWAI,HIDEHIRO wrote:

Hi Corey,


From: Corey Minyard [mailto:cminy...@mvista.com]
Sent: Friday, August 12, 2016 10:56 PM
I'll try to test this, but I have one comment inline...

Thank you very much!


On 08/11/2016 10:17 PM, Dave Young wrote:

On 08/10/16 at 05:09pm, Hidehiro Kawai wrote:

[snip]

diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 610f0f3..1723b17 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs)

   static void crash_kexec_prepare_cpus(void)
   {
+   static int cpus_stopped;
unsigned int msecs;
+   unsigned int ncpus;

-   unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+   if (cpus_stopped)
+   return;

Wouldn't you want an atomic operation and some special handling here to
ensure that only one CPU does this?  So if a CPU comes in here and
another CPU is already in the process stopping the CPUs it won't result in a
deadlock.

Because this function can be called only one panicking CPU,
there is no problem.

There are two paths which crash_kexec_prepare_cpus is called.

Path 1 (panic path):
panic()
   crash_smp_send_stop()
 crash_kexec_prepare_cpus()

Path 2 (oops path):
crash_kexec()
   __crash_kexec()
 machine_crash_shutdown()
   default_machine_crash_shutdown() // for MIPS
 crash_kexec_prepare_cpus()

Here, panic() and crash_kexec() run exclusively via
panic_cpu atomic variable.  So we can use cpus_stopped as
normal variable.


Ok, if the code can only be entered once, what's the purpose of 
cpus_stopped?

I guess that's what confused me.  You are right, the panic_cpu atomic should
keep this on a single CPU.

Also, panic() will call panic_smp_self_stop() if it finds another CPU 
has already
called panic, which will just spin with interrupts off by default. I 
didn't see a
definition for it in MIPS, wouldn't it need to be overridden to avoid a 
deadlock?


-corey



Best regards,

Hidehiro Kawai




___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v5 02/13] kexec_file: Change kexec_add_buffer to take kexec_buf as argument.

2016-08-15 Thread Thiago Jung Bauermann
Am Montag, 15 August 2016, 17:30:49 schrieb Balbir Singh:
> On Thu, Aug 11, 2016 at 08:08:07PM -0300, Thiago Jung Bauermann wrote:
> > Adapt all callers to the new function prototype.
> 
> Could you please expand on this?

Is the following better?

Adapt all callers to set up a kexec_buf to pass to kexec_add_buffer.

> Looks good otherwise
> 
> Acked-by: Balbir Singh 

Thank you for reviewing this series!

-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


RE: Re: [V4 PATCH 1/2] x86/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-15 Thread 河合英宏 / KAWAI,HIDEHIRO
Hi Dave,

Thank you for the review.

> From: Dave Young [mailto:dyo...@redhat.com]
> Sent: Friday, August 12, 2016 12:17 PM
> 
> Thanks for the update.
> On 08/10/16 at 05:09pm, Hidehiro Kawai wrote:
> > Daniel Walker reported problems which happens when
> > crash_kexec_post_notifiers kernel option is enabled
> > (https://lkml.org/lkml/2015/6/24/44).
> >
> > In that case, smp_send_stop() is called before entering kdump routines
> > which assume other CPUs are still online.  As the result, for x86,
> > kdump routines fail to save other CPUs' registers  and disable
> > virtualization extensions.
> 
> Seems you simplified the changelog, but I think a little more details
> will be helpful to understand the patch. You know sometimes lkml.org
> does not work well.

So, I'll try another archives when I post patch set next time.

> > To fix this problem, call a new kdump friendly function,
> > crash_smp_send_stop(), instead of the smp_send_stop() when
> > crash_kexec_post_notifiers is enabled.  crash_smp_send_stop() is a
> > weak function, and it just call smp_send_stop().  Architecture
> > codes should override it so that kdump can work appropriately.
> > This patch only provides x86-specific version.
> >
> > For Xen's PV kernel, just keep the current behavior.
> 
> Could you explain a bit about above Xen PV kernel behavior?
> 
> BTW, this version looks better,  I think I'm fine with this version
> besides of the questions about changelog.

As for Dom0 kernel, it doesn't use crash_kexec routines, and
it relies on panic notifier chain.  At the end of the chain,
xen_panic_event is called, and it issues a hypercall which
requests Hypervisor to execute kdump.  This means whether
crash_kexec_panic_notifiers is set or not, panic notifiers
are called after smp_send_stop.  Even if we save registers
in Dom0 kernel, they seem to be ignored (Hypervisor is responsible
for that).  This is why I kept the current behavior for Xen.

For PV DomU kernel, kdump is not supported.  For PV HVM
DomU, I'm not sure what will happen on panic because I
couldn't boot PV HVM DomU and test it.  But I think it will
work similarly to baremetal kernels with extra cleanups
for Hypervisor.

Best regards,

Hidehiro Kawai

> > Changes in V4:
> > - Keep to use smp_send_stop if crash_kexec_post_notifiers is not set
> > - Rename panic_smp_send_stop to crash_smp_send_stop
> > - Don't change the behavior for Xen's PV kernel
> >
> > Changes in V3:
> > - Revise comments, description, and symbol names
> >
> > Changes in V2:
> > - Replace smp_send_stop() call with crash_kexec version which
> >   saves cpu states and cleans up VMX/SVM
> > - Drop a fix for Problem 1 at this moment
> >
> > Reported-by: Daniel Walker 
> > Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" 
> > option)
> > Signed-off-by: Hidehiro Kawai 
> > Cc: Dave Young 
> > Cc: Baoquan He 
> > Cc: Vivek Goyal 
> > Cc: Eric Biederman 
> > Cc: Masami Hiramatsu 
> > Cc: Daniel Walker 
> > Cc: Xunlei Pang 
> > Cc: Thomas Gleixner 
> > Cc: Ingo Molnar 
> > Cc: "H. Peter Anvin" 
> > Cc: Borislav Petkov 
> > Cc: David Vrabel 
> > Cc: Toshi Kani 
> > Cc: Andrew Morton 
> > ---
> >  arch/x86/include/asm/kexec.h |1 +
> >  arch/x86/include/asm/smp.h   |1 +
> >  arch/x86/kernel/crash.c  |   22 +---
> >  arch/x86/kernel/smp.c|5 
> >  kernel/panic.c   |   47 
> > --
> >  5 files changed, 66 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
> > index d2434c1..282630e 100644
> > --- a/arch/x86/include/asm/kexec.h
> > +++ b/arch/x86/include/asm/kexec.h
> > @@ -210,6 +210,7 @@ struct kexec_entry64_regs {
> >
> >  typedef void crash_vmclear_fn(void);
> >  extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
> > +extern void kdump_nmi_shootdown_cpus(void);
> >
> >  #endif /* __ASSEMBLY__ */
> >
> > diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
> > index ebd0c16..f70989c 100644
> > --- a/arch/x86/include/asm/smp.h
> > +++ b/arch/x86/include/asm/smp.h
> > @@ -50,6 +50,7 @@ struct smp_ops {
> > void (*smp_cpus_done)(unsigned max_cpus);
> >
> > void (*stop_other_cpus)(int wait);
> > +   void (*crash_stop_other_cpus)(void);
> > void (*smp_send_reschedule)(int cpu);
> >
> > int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
> > diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> > index 9616cf7..650830e 100644
> > --- a/arch/x86/kernel/crash.c
> > +++ b/arch/x86/kernel/crash.c
> > @@ -133,15 +133,31 @@ static void kdump_nmi_callback(int cpu, struct 
> > pt_regs 

RE: Re: [V4 PATCH 2/2] mips/panic: Replace smp_send_stop() with kdump friendly version in panic path

2016-08-15 Thread 河合英宏 / KAWAI,HIDEHIRO
Hi Corey,

> From: Corey Minyard [mailto:cminy...@mvista.com]
> Sent: Friday, August 12, 2016 10:56 PM
> I'll try to test this, but I have one comment inline...

Thank you very much!

> On 08/11/2016 10:17 PM, Dave Young wrote:
> > On 08/10/16 at 05:09pm, Hidehiro Kawai wrote:
[snip]
> >> diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
> >> index 610f0f3..1723b17 100644
> >> --- a/arch/mips/kernel/crash.c
> >> +++ b/arch/mips/kernel/crash.c
> >> @@ -47,9 +47,14 @@ static void crash_shutdown_secondary(void *passed_regs)
> >>
> >>   static void crash_kexec_prepare_cpus(void)
> >>   {
> >> +  static int cpus_stopped;
> >>unsigned int msecs;
> >> +  unsigned int ncpus;
> >>
> >> -  unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
> >> +  if (cpus_stopped)
> >> +  return;
> 
> Wouldn't you want an atomic operation and some special handling here to
> ensure that only one CPU does this?  So if a CPU comes in here and
> another CPU is already in the process stopping the CPUs it won't result in a
> deadlock.

Because this function can be called only one panicking CPU,
there is no problem.

There are two paths which crash_kexec_prepare_cpus is called.

Path 1 (panic path):
panic()
  crash_smp_send_stop()
crash_kexec_prepare_cpus()

Path 2 (oops path):
crash_kexec()
  __crash_kexec()
machine_crash_shutdown()
  default_machine_crash_shutdown() // for MIPS
crash_kexec_prepare_cpus()

Here, panic() and crash_kexec() run exclusively via
panic_cpu atomic variable.  So we can use cpus_stopped as
normal variable.

Best regards,

Hidehiro Kawai

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: Account crashk_low_res to kexec_crash_size

2016-08-15 Thread Xunlei Pang
On 2016/08/15 at 15:17, Dave Young wrote:
> Hi Xunlei,
>
> On 08/13/16 at 04:26pm, Xunlei Pang wrote:
>> "/sys/kernel/kexec_crash_size" only includes crashk_res, it
>> is fine in most cases, but sometimes we have crashk_low_res.
>> For example, when "crashkernel=size[KMG],high" combined with
>> "crashkernel=size[KMG],low" is used for 64-bit x86.
>>
>> Let "/sys/kernel/kexec_crash_size" reflect all the reserved
>> memory including crashk_low_res, this is more understandable
>> from its naming.
> Maybe export another file for the kexec_crash_low_size so that
> we can clearly get how much the low area is.

I'm fine with it.

>> Although we can get all the crash memory from "/proc/iomem"
>> by filtering all "Crash kernel" keyword, it is more convenient
>> to utilize this file, and the two ways should stay consistent.
> Shrink low area does not make much sense, one may either use it or
> shrink it to 0.
>
> Actually think more about it, the crashk_low is only for x86,
> it might be even better to move it to x86 code instead of in
> common code.
>
> Opinion?

crashk_low is defined in kernel/kexec_core.c, it's an architecture independent 
definition
though it's only used by x86 currently, maybe it can be used by others in the 
future.
It's why I'm not handling it specifically for x86.

I just tested the original proc interface further, and it can be shrinked to be 
zero.
So I guess we can ease the restriction on shrinking the low area as well.

What do you think?

Regards,
Xunlei

>
> Thanks
> Dave
>> Note that write to "/sys/kernel/kexec_crash_size" is to shrink
>> the reserved memory, and we want to shrink crashk_res only.
>> So we add some additional check in crash_shrink_memory() since
>> crashk_low_res now is involved.
>>
>> Signed-off-by: Xunlei Pang 
>> ---
>>  kernel/kexec_core.c | 15 ++-
>>  1 file changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index 5616755..d5ae780 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -932,6 +932,8 @@ size_t crash_get_memory_size(void)
>>  mutex_lock(_mutex);
>>  if (crashk_res.end != crashk_res.start)
>>  size = resource_size(_res);
>> +if (crashk_low_res.end != crashk_low_res.start)
>> +size += resource_size(_low_res);
>>  mutex_unlock(_mutex);
>>  return size;
>>  }
>> @@ -949,7 +951,7 @@ int crash_shrink_memory(unsigned long new_size)
>>  {
>>  int ret = 0;
>>  unsigned long start, end;
>> -unsigned long old_size;
>> +unsigned long low_size, old_size;
>>  struct resource *ram_res;
>>  
>>  mutex_lock(_mutex);
>> @@ -958,6 +960,17 @@ int crash_shrink_memory(unsigned long new_size)
>>  ret = -ENOENT;
>>  goto unlock;
>>  }
>> +
>> +start = crashk_low_res.start;
>> +end = crashk_low_res.end;
>> +low_size = (end == 0) ? 0 : end - start + 1;
>> +/* Do not shrink crashk_low_res. */
>> +if (new_size <= low_size) {
>> +ret = -EINVAL;
>> +goto unlock;
>> +}
>> +
>> +new_size -= low_size;
>>  start = crashk_res.start;
>>  end = crashk_res.end;
>>  old_size = (end == 0) ? 0 : end - start + 1;
>> -- 
>> 1.8.3.1
>>
>>
>> ___
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
> ___
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH v5 04/13] powerpc: Factor out relocation code from module_64.c to elf_util_64.c.

2016-08-15 Thread Balbir Singh
On Thu, Aug 11, 2016 at 08:08:09PM -0300, Thiago Jung Bauermann wrote:
> The kexec_file_load system call needs to relocate the purgatory, so
> factor out the module relocation code so that it can be shared.
> 
> This patch's purpose is to move the ELF relocation logic from
> apply_relocate_add to elf_util_64.c with as few changes as
> possible. The following changes were needed:
> 
> To avoid having module-specific code in a general purpose utility
> function, struct elf_info was created to contain the information
> needed for ELF binaries manipulation.
> 
> my_r2, stub_for_addr and create_stub were changed to use it instead of
> having to receive a struct module, since they are called from
> elf64_apply_relocate_add.
> 
> local_entry_offset and squash_toc_save_inst were only used by
> apply_rellocate_add, so they were moved to elf_util_64.c as well.
> 
> Signed-off-by: Thiago Jung Bauermann 
> ---
>  arch/powerpc/include/asm/elf_util.h |  70 
>  arch/powerpc/include/asm/module.h   |  14 +-
>  arch/powerpc/kernel/Makefile|   4 +
>  arch/powerpc/kernel/elf_util_64.c   | 269 +++
>  arch/powerpc/kernel/module_64.c | 312 
> 
>  5 files changed, 386 insertions(+), 283 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/elf_util.h 
> b/arch/powerpc/include/asm/elf_util.h
> new file mode 100644
> index ..37372559fe62
> --- /dev/null
> +++ b/arch/powerpc/include/asm/elf_util.h
> @@ -0,0 +1,70 @@
> +/*
> + * Utility functions to work with ELF files.
> + *
> + * Copyright (C) 2016, IBM Corporation
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2, or (at your option)
> + * any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef _ASM_POWERPC_ELF_UTIL_H
> +#define _ASM_POWERPC_ELF_UTIL_H
> +
> +#include 
> +
> +struct elf_info {
> + struct elf_shdr *sechdrs;
> +
> + /* Index of stubs section. */
> + unsigned int stubs_section;
> + /* Index of TOC section. */
> + unsigned int toc_section;
> +};
> +
> +#ifdef __powerpc64__
> +#ifdef PPC64_ELF_ABI_v2
> +
> +/* An address is simply the address of the function. */
> +typedef unsigned long func_desc_t;
> +#else
> +
> +/* An address is address of the OPD entry, which contains address of fn. */
> +typedef struct ppc64_opd_entry func_desc_t;
> +#endif /* PPC64_ELF_ABI_v2 */
> +
> +/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
> +   the kernel itself).  But on PPC64, these need to be used for every
> +   jump, actually, to reset r2 (TOC+0x8000). */
> +struct ppc64_stub_entry
> +{
> + /* 28 byte jump instruction sequence (7 instructions). We only
> +  * need 6 instructions on ABIv2 but we always allocate 7 so
> +  * so we don't have to modify the trampoline load instruction. */
> + u32 jump[7];
> + /* Used by ftrace to identify stubs */
> + u32 magic;
> + /* Data for the above code */
> + func_desc_t funcdata;
> +};
> +#endif
> +
> +/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
> +   gives the value maximum span in an instruction which uses a signed
> +   offset) */
> +static inline unsigned long my_r2(const struct elf_info *elf_info)
> +{
> + return elf_info->sechdrs[elf_info->toc_section].sh_addr + 0x8000;
> +}
> +
> +int elf64_apply_relocate_add(const struct elf_info *elf_info,
> +  const char *strtab, unsigned int symindex,
> +  unsigned int relsec, const char *obj_name);
> +
> +#endif /* _ASM_POWERPC_ELF_UTIL_H */
> diff --git a/arch/powerpc/include/asm/module.h 
> b/arch/powerpc/include/asm/module.h
> index cd4ffd86765f..f2073115d518 100644
> --- a/arch/powerpc/include/asm/module.h
> +++ b/arch/powerpc/include/asm/module.h
> @@ -12,7 +12,14 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
> +/* Both low and high 16 bits are added as SIGNED additions, so if low
> +   16 bits has high bit set, high 16 bits must be adjusted.  These
> +   macros do that (stolen from binutils). */
> +#define PPC_LO(v) ((v) & 0x)
> +#define PPC_HI(v) (((v) >> 16) & 0x)
> +#define PPC_HA(v) PPC_HI ((v) + 0x8000)
>  
>  #ifndef __powerpc64__
>  /*
> @@ -33,8 +40,7 @@ struct ppc_plt_entry {
>  
>  struct mod_arch_specific {
>  #ifdef __powerpc64__
> - unsigned int stubs_section; /* Index of stubs section in module */
> - unsigned int toc_section;   /* What section is the TOC? */
> + struct elf_info elf_info;
>   bool toc_fixed; /* Have we fixed up .TOC.? */
>  

Re: [PATCH v5 02/13] kexec_file: Change kexec_add_buffer to take kexec_buf as argument.

2016-08-15 Thread Balbir Singh
On Thu, Aug 11, 2016 at 08:08:07PM -0300, Thiago Jung Bauermann wrote:
> Adapt all callers to the new function prototype.
>

Could you please expand on this?
 
> In addition, change the type of kexec_buf.buffer from char * to void *.
> There is no particular reason for it to be a char *, and the change
> allows us to get rid of 3 existing casts to char * in the code.
> 
> Signed-off-by: Thiago Jung Bauermann 
> Acked-by: Dave Young 
> ---

Looks good otherwise

Acked-by: Balbir Singh 

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 3/6] kexec_file: Allow skipping checksum calculation for some segments.

2016-08-15 Thread Thiago Jung Bauermann
Adds checksum argument to kexec_add_buffer specifying whether the given
segment should be part of the checksum calculation.

The next patch will add a way to update segments after a kimage is loaded.
Segments that will be updated in this way should not be checksummed,
otherwise they will cause the purgatory checksum verification to fail
when the machine is rebooted.

As a bonus, we don't need to special-case the purgatory segment anymore
to avoid checksumming it.

Adjust call sites for the new argument.

Signed-off-by: Thiago Jung Bauermann 
---
 arch/powerpc/kernel/kexec_elf_64.c |  6 +++---
 arch/x86/kernel/crash.c|  4 ++--
 arch/x86/kernel/kexec-bzimage64.c  |  6 +++---
 include/linux/kexec.h  | 10 +++---
 kernel/kexec_file.c| 23 ---
 5 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/kexec_elf_64.c 
b/arch/powerpc/kernel/kexec_elf_64.c
index 22afc7b5ee73..4c528c81b076 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -128,7 +128,7 @@ static int elf_exec_load(struct kimage *image, struct 
elfhdr *ehdr,
kbuf.memsz = phdr->p_memsz;
kbuf.buf_align = phdr->p_align;
kbuf.buf_min = phdr->p_paddr + base;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out;
load_addr = kbuf.mem;
@@ -188,7 +188,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf,
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = false;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out;
initrd_load_addr = kbuf.mem;
@@ -245,7 +245,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf,
kbuf.bufsz = kbuf.memsz = fdt_size;
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = true;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out;
fdt_load_addr = kbuf.mem;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 38a1cdf6aa05..634ab16377b1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -642,7 +642,7 @@ int crash_load_segments(struct kimage *image)
 * copied in purgatory after crash. Just add a zero filled
 * segment for now to make sure checksum logic works fine.
 */
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
return ret;
image->arch.backup_load_addr = kbuf.mem;
@@ -661,7 +661,7 @@ int crash_load_segments(struct kimage *image)
 
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret) {
vfree((void *)image->arch.elf_headers);
return ret;
diff --git a/arch/x86/kernel/kexec-bzimage64.c 
b/arch/x86/kernel/kexec-bzimage64.c
index 4b3a75329fb6..a46e3fbb0639 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -422,7 +422,7 @@ static void *bzImage64_load(struct kimage *image, char 
*kernel,
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = 16;
kbuf.buf_min = MIN_BOOTPARAM_ADDR;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out_free_params;
bootparam_load_addr = kbuf.mem;
@@ -435,7 +435,7 @@ static void *bzImage64_load(struct kimage *image, char 
*kernel,
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out_free_params;
kernel_load_addr = kbuf.mem;
@@ -449,7 +449,7 @@ static void *bzImage64_load(struct kimage *image, char 
*kernel,
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
-   ret = kexec_add_buffer();
+   ret = kexec_add_buffer(, true);
if (ret)
goto out_free_params;
initrd_load_addr = kbuf.mem;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4559a1a01b0a..37eea32fdff1 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -100,6 +100,9 @@ struct kexec_segment {
size_t bufsz;
unsigned long mem;
size_t memsz;
+
+   /* Whether this segment is part of the checksum calculation. */
+   bool do_checksum;
 };
 
 #ifdef 

[PATCH v2 5/6] kexec: Share logic to copy segment page contents.

2016-08-15 Thread Thiago Jung Bauermann
Make kimage_load_normal_segment and kexec_update_segment share code
which they currently duplicate.

Signed-off-by: Thiago Jung Bauermann 
---
 kernel/kexec_core.c | 159 +++-
 1 file changed, 95 insertions(+), 64 deletions(-)

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 806735201de6..68b5b245e457 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -721,6 +721,65 @@ static struct page *kimage_alloc_page(struct kimage *image,
return page;
 }
 
+struct kimage_update_buffer_state {
+   /* Destination memory address currently being copied to. */
+   unsigned long maddr;
+
+   /* Bytes in buffer still left to copy. */
+   size_t ubytes;
+
+   /* Bytes in memory still left to copy. */
+   size_t mbytes;
+
+   /* If true, copy from kbuf. */
+   bool from_kernel;
+
+   /* Clear pages before copying? */
+   bool clear_pages;
+
+   /* Buffer position to continue copying from. */
+   const unsigned char *kbuf;
+   const unsigned char __user *buf;
+};
+
+static int kimage_update_page(struct page *page,
+ struct kimage_update_buffer_state *state)
+{
+   char *ptr;
+   int result = 0;
+   size_t uchunk, mchunk;
+
+   ptr = kmap(page);
+
+   /* Start with a clear page */
+   if (state->clear_pages)
+   clear_page(ptr);
+
+   ptr += state->maddr & ~PAGE_MASK;
+   mchunk = min_t(size_t, state->mbytes,
+  PAGE_SIZE - (state->maddr & ~PAGE_MASK));
+   uchunk = min(state->ubytes, mchunk);
+
+   if (state->from_kernel)
+   memcpy(ptr, state->kbuf, uchunk);
+   else
+   result = copy_from_user(ptr, state->buf, uchunk);
+
+   kunmap(page);
+   if (result)
+   return -EFAULT;
+
+   state->ubytes -= uchunk;
+   state->maddr += mchunk;
+   if (state->from_kernel)
+   state->kbuf += mchunk;
+   else
+   state->buf += mchunk;
+   state->mbytes -= mchunk;
+
+   return 0;
+}
+
 /**
  * kexec_update_segment - update the contents of a kimage segment
  * @buffer:New contents of the segment.
@@ -739,6 +798,7 @@ int kexec_update_segment(const char *buffer, unsigned long 
bufsz,
unsigned long entry;
unsigned long *ptr = NULL;
void *dest = NULL;
+   struct kimage_update_buffer_state state;
 
if (kexec_image == NULL) {
pr_err("Can't update segment: no kexec image loaded.\n");
@@ -768,8 +828,15 @@ int kexec_update_segment(const char *buffer, unsigned long 
bufsz,
return -EINVAL;
}
 
-   for (entry = kexec_image->head; !(entry & IND_DONE) && memsz;
-entry = *ptr++) {
+   state.maddr = load_addr;
+   state.ubytes = bufsz;
+   state.mbytes = memsz;
+   state.kbuf = buffer;
+   state.from_kernel = true;
+   state.clear_pages = false;
+
+   for (entry = kexec_image->head; !(entry & IND_DONE) &&
+   state.mbytes; entry = *ptr++) {
void *addr = (void *) (entry & PAGE_MASK);
 
switch (entry & IND_FLAGS) {
@@ -786,26 +853,13 @@ int kexec_update_segment(const char *buffer, unsigned 
long bufsz,
return -EINVAL;
}
 
-   if (dest == (void *) load_addr) {
-   struct page *page;
-   char *ptr;
-   size_t uchunk, mchunk;
-
-   page = kmap_to_page(addr);
-
-   ptr = kmap(page);
-   ptr += load_addr & ~PAGE_MASK;
-   mchunk = min_t(size_t, memsz,
-  PAGE_SIZE - (load_addr & 
~PAGE_MASK));
-   uchunk = min(bufsz, mchunk);
-   memcpy(ptr, buffer, uchunk);
-
-   kunmap(page);
+   if (dest == (void *) state.maddr) {
+   int ret;
 
-   bufsz -= uchunk;
-   load_addr += mchunk;
-   buffer += mchunk;
-   memsz -= mchunk;
+   ret = kimage_update_page(kmap_to_page(addr),
+);
+   if (ret)
+   return ret;
}
dest += PAGE_SIZE;
}
@@ -823,31 +877,30 @@ int kexec_update_segment(const char *buffer, unsigned 
long bufsz,
 static int kimage_load_normal_segment(struct kimage *image,
 struct kexec_segment *segment)
 {
-   unsigned long 

[PATCH v2 6/6] IMA: Demonstration code for kexec buffer passing.

2016-08-15 Thread Thiago Jung Bauermann
This patch is not intended to be committed.

It shows how kernel code can use the kexec buffer passing mechanism
to pass information to the next kernel.

Signed-off-by: Thiago Jung Bauermann 
---
 include/linux/ima.h   | 11 +
 kernel/kexec_file.c   |  4 ++
 security/integrity/ima/ima.h  |  5 +++
 security/integrity/ima/ima_init.c | 26 +++
 security/integrity/ima/ima_template.c | 85 +++
 5 files changed, 131 insertions(+)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0eb7c2e7f0d6..96528d007139 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
 #define _LINUX_IMA_H
 
 #include 
+#include 
 struct linux_binprm;
 
 #ifdef CONFIG_IMA
@@ -23,6 +24,10 @@ extern int ima_post_read_file(struct file *file, void *buf, 
loff_t size,
  enum kernel_read_file_id id);
 extern void ima_post_path_mknod(struct dentry *dentry);
 
+#ifdef CONFIG_KEXEC_FILE
+extern void ima_add_kexec_buffer(struct kimage *image);
+#endif
+
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
 {
@@ -60,6 +65,12 @@ static inline void ima_post_path_mknod(struct dentry *dentry)
return;
 }
 
+#ifdef CONFIG_KEXEC_FILE
+static inline void ima_add_kexec_buffer(struct kimage *image)
+{
+}
+#endif
+
 #endif /* CONFIG_IMA */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index aed51175915f..bf8f61c20c11 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -321,6 +322,9 @@ kimage_file_prepare_segments(struct kimage *image, int 
kernel_fd, int initrd_fd,
}
}
 
+   /* IMA needs to pass the measurement list to the next kernel. */
+   ima_add_kexec_buffer(image);
+
/* Call arch image load handlers */
ldata = arch_kexec_kernel_image_load(image);
 
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index db25f54a04fe..0334001055d7 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -102,6 +102,11 @@ struct ima_queue_entry {
 };
 extern struct list_head ima_measurements;  /* list of all measurements */
 
+#ifdef CONFIG_KEXEC_FILE
+extern void *kexec_buffer;
+extern size_t kexec_buffer_size;
+#endif
+
 /* Internal IMA function definitions */
 int ima_init(void);
 int ima_fs_init(void);
diff --git a/security/integrity/ima/ima_init.c 
b/security/integrity/ima/ima_init.c
index 32912bd54ead..a1924d0f3b2b 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "ima.h"
 
@@ -104,6 +105,29 @@ void __init ima_load_x509(void)
 }
 #endif
 
+#ifdef CONFIG_KEXEC_FILE
+static void ima_load_kexec_buffer(void)
+{
+   int rc;
+
+   /* Fetch the buffer from the previous kernel, if any. */
+   rc = kexec_get_handover_buffer(_buffer, _buffer_size);
+   if (rc == 0) {
+   /* Demonstrate that buffer handover works. */
+   pr_err("kexec buffer contents: %s\n", (char *) kexec_buffer);
+   pr_err("kexec buffer contents after update: %s\n",
+  (char *) kexec_buffer + 4 * PAGE_SIZE + 10);
+
+   kexec_free_handover_buffer();
+   } else if (rc == -ENOENT)
+   pr_debug("No kexec buffer from the previous kernel.\n");
+   else
+   pr_debug("Error restoring kexec buffer: %d\n", rc);
+}
+#else
+static void ima_load_kexec_buffer(void) { }
+#endif
+
 int __init ima_init(void)
 {
u8 pcr_i[TPM_DIGEST_SIZE];
@@ -134,5 +158,7 @@ int __init ima_init(void)
 
ima_init_policy();
 
+   ima_load_kexec_buffer();
+
return ima_fs_init();
 }
diff --git a/security/integrity/ima/ima_template.c 
b/security/integrity/ima/ima_template.c
index febd12ed9b55..a8609f3a13d2 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -15,6 +15,8 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include 
+#include 
 #include "ima.h"
 #include "ima_template_lib.h"
 
@@ -182,6 +184,89 @@ static int template_desc_init_fields(const char 
*template_fmt,
return 0;
 }
 
+#ifdef CONFIG_KEXEC_FILE
+void *kexec_buffer = NULL;
+size_t kexec_buffer_size = 0;
+
+/* Physical address of the measurement buffer in the next kernel. */
+unsigned long kexec_buffer_load_addr = 0;
+
+/*
+ * Called during reboot. IMA can add here new events that were generated after
+ * the kexec image was loaded.
+ */
+static int ima_update_kexec_buffer(struct notifier_block *self,
+  unsigned long action, void *data)
+{
+   int ret;
+
+   if (!kexec_in_progress)
+   return NOTIFY_OK;
+
+   /*
+* Add content deep in the buffer to show that we can update
+  

[PATCH v2 2/6] powerpc: kexec_file: Add buffer hand-over support for the next kernel

2016-08-15 Thread Thiago Jung Bauermann
The buffer hand-over mechanism allows the currently running kernel to pass
data to kernel that will be kexec'd via a kexec segment. The second kernel
can check whether the previous kernel sent data and retrieve it.

This is the architecture-specific part.

Signed-off-by: Thiago Jung Bauermann 
---
 arch/powerpc/include/asm/kexec.h   |  12 +++-
 arch/powerpc/kernel/kexec_elf_64.c |   2 +-
 arch/powerpc/kernel/machine_kexec_64.c | 114 +++--
 3 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 31bc64e07c8f..b20738df26f8 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -92,12 +92,20 @@ static inline bool kdump_in_progress(void)
 }
 
 #ifdef CONFIG_KEXEC_FILE
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+   phys_addr_t handover_buffer_addr;
+   unsigned long handover_buffer_size;
+};
+
 int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
unsigned long fdt_load_addr, unsigned long stack_top,
int debug);
-int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
- unsigned long initrd_len, const char *cmdline);
+int setup_new_fdt(const struct kimage *image, void *fdt,
+ unsigned long initrd_load_addr, unsigned long initrd_len,
+ const char *cmdline);
 bool find_debug_console(const void *fdt, int chosen_node);
 int merge_partial_dtb(void *to, const void *from);
 #endif /* CONFIG_KEXEC_FILE */
diff --git a/arch/powerpc/kernel/kexec_elf_64.c 
b/arch/powerpc/kernel/kexec_elf_64.c
index 1b902ad66e2a..22afc7b5ee73 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -219,7 +219,7 @@ void *elf64_load(struct kimage *image, char *kernel_buf,
}
}
 
-   ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline);
+   ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
if (ret)
goto out;
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c 
b/arch/powerpc/kernel/machine_kexec_64.c
index a484a6346146..190c652e49b7 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -490,6 +490,60 @@ int arch_kimage_file_post_load_cleanup(struct kimage 
*image)
return image->fops->cleanup(image->image_loader_data);
 }
 
+bool kexec_can_hand_over_buffer(void)
+{
+   return true;
+}
+
+int arch_kexec_add_handover_buffer(struct kimage *image,
+  unsigned long load_addr, unsigned long size)
+{
+   image->arch.handover_buffer_addr = load_addr;
+   image->arch.handover_buffer_size = size;
+
+   return 0;
+}
+
+int kexec_get_handover_buffer(void **addr, unsigned long *size)
+{
+   int ret;
+   u64 start_addr, end_addr;
+
+   ret = of_property_read_u64(of_chosen,
+  "linux,kexec-handover-buffer-start",
+  _addr);
+   if (ret == -EINVAL)
+   return -ENOENT;
+   else if (ret)
+   return -EINVAL;
+
+   ret = of_property_read_u64(of_chosen, "linux,kexec-handover-buffer-end",
+  _addr);
+   if (ret == -EINVAL)
+   return -ENOENT;
+   else if (ret)
+   return -EINVAL;
+
+   *addr =  __va(start_addr);
+   /* -end is the first address after the buffer. */
+   *size = end_addr - start_addr;
+
+   return 0;
+}
+
+int kexec_free_handover_buffer(void)
+{
+   int ret;
+   void *addr;
+   unsigned long size;
+
+   ret = kexec_get_handover_buffer(, );
+   if (ret)
+   return ret;
+
+   return memblock_free((phys_addr_t) addr, size);
+}
+
 /**
  * arch_kexec_walk_mem() - call func(data) for each unreserved memory block
  * @kbuf:  Context info for the search. Also passed to @func.
@@ -687,9 +741,52 @@ int setup_purgatory(struct kimage *image, const void 
*slave_code,
return 0;
 }
 
-/*
- * setup_new_fdt() - modify /chosen and memory reservation for the next kernel
- * @fdt:
+/**
+ * setup_handover_buffer() - add properties and reservation for the handover 
buffer
+ * @image: kexec image being loaded.
+ * @fdt:   Flattened device tree for the next kernel.
+ * @chosen_node:   Offset to the chosen node.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+static int setup_handover_buffer(const struct kimage *image, void *fdt,
+int chosen_node)
+{
+   int ret;
+
+   if (image->arch.handover_buffer_addr == 0)
+   return 0;
+
+   ret = fdt_setprop_u64(fdt, chosen_node,
+ "linux,kexec-handover-buffer-start",
+ 

[PATCH v2 4/6] kexec_file: Add mechanism to update kexec segments.

2016-08-15 Thread Thiago Jung Bauermann
kexec_update_segment allows a given segment in kexec_image to have
its contents updated. This is useful if the current kernel wants to
send information to the next kernel that is up-to-date at the time of
reboot.

Signed-off-by: Thiago Jung Bauermann 
---
 include/linux/kexec.h |  2 ++
 kernel/kexec_core.c   | 99 +++
 2 files changed, 101 insertions(+)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 37eea32fdff1..14dda81e3e01 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -259,6 +259,8 @@ extern int kexec_purgatory_get_set_symbol(struct kimage 
*image,
  unsigned int size, bool get_value);
 extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
 const char *name);
+int kexec_update_segment(const char *buffer, unsigned long bufsz,
+unsigned long load_addr, unsigned long memsz);
 extern void __crash_kexec(struct pt_regs *);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 561675589511..806735201de6 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -721,6 +721,105 @@ static struct page *kimage_alloc_page(struct kimage 
*image,
return page;
 }
 
+/**
+ * kexec_update_segment - update the contents of a kimage segment
+ * @buffer:New contents of the segment.
+ * @bufsz: @buffer size.
+ * @load_addr: Segment's physical address in the next kernel.
+ * @memsz: Segment size.
+ *
+ * This function assumes kexec_mutex is held.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_update_segment(const char *buffer, unsigned long bufsz,
+unsigned long load_addr, unsigned long memsz)
+{
+   int i;
+   unsigned long entry;
+   unsigned long *ptr = NULL;
+   void *dest = NULL;
+
+   if (kexec_image == NULL) {
+   pr_err("Can't update segment: no kexec image loaded.\n");
+   return -EINVAL;
+   }
+
+   /*
+* kexec_add_buffer rounds up segment sizes to PAGE_SIZE, so
+* we have to do it here as well.
+*/
+   memsz = ALIGN(memsz, PAGE_SIZE);
+
+   for (i = 0; i < kexec_image->nr_segments; i++)
+   /* We only support updating whole segments. */
+   if (load_addr == kexec_image->segment[i].mem &&
+   memsz == kexec_image->segment[i].memsz) {
+   if (kexec_image->segment[i].do_checksum) {
+   pr_err("Trying to update non-modifiable 
segment.\n");
+   return -EINVAL;
+   }
+
+   break;
+   }
+   if (i == kexec_image->nr_segments) {
+   pr_err("Couldn't find segment to update: 0x%lx, size 0x%lx\n",
+  load_addr, memsz);
+   return -EINVAL;
+   }
+
+   for (entry = kexec_image->head; !(entry & IND_DONE) && memsz;
+entry = *ptr++) {
+   void *addr = (void *) (entry & PAGE_MASK);
+
+   switch (entry & IND_FLAGS) {
+   case IND_DESTINATION:
+   dest = addr;
+   break;
+   case IND_INDIRECTION:
+   ptr = __va(addr);
+   break;
+   case IND_SOURCE:
+   /* Shouldn't happen, but verify just to be safe. */
+   if (dest == NULL) {
+   pr_err("Invalid kexec entries list.");
+   return -EINVAL;
+   }
+
+   if (dest == (void *) load_addr) {
+   struct page *page;
+   char *ptr;
+   size_t uchunk, mchunk;
+
+   page = kmap_to_page(addr);
+
+   ptr = kmap(page);
+   ptr += load_addr & ~PAGE_MASK;
+   mchunk = min_t(size_t, memsz,
+  PAGE_SIZE - (load_addr & 
~PAGE_MASK));
+   uchunk = min(bufsz, mchunk);
+   memcpy(ptr, buffer, uchunk);
+
+   kunmap(page);
+
+   bufsz -= uchunk;
+   load_addr += mchunk;
+   buffer += mchunk;
+   memsz -= mchunk;
+   }
+   dest += PAGE_SIZE;
+   }
+
+   /* Shouldn't happen, but verify just to be safe. */
+   if (ptr == NULL) {
+   pr_err("Invalid kexec entries list.");
+   return 

[PATCH v2 0/6] kexec_file: Add buffer hand-over for the next kernel

2016-08-15 Thread Thiago Jung Bauermann
Hello,

This patch series implements a mechanism which allows the kernel to pass
on a buffer to the kernel that will be kexec'd. This buffer is passed
as a segment which is added to the kimage when it is being prepared
by kexec_file_load.

How the second kernel is informed of this buffer is architecture-specific.
On powerpc, this is done via the device tree, by checking
the properties /chosen/linux,kexec-handover-buffer-start and
/chosen/linux,kexec-handover-buffer-end, which is analogous to how the
kernel finds the initrd.

This is needed because the Integrity Measurement Architecture subsystem
needs to preserve its measurement list accross the kexec reboot. The
following patch series for the IMA subsystem uses this feature for that
purpose:

https://lists.infradead.org/pipermail/kexec/2016-August/016745.html

This is so that IMA can implement trusted boot support on the OpenPower
platform, because on such systems an intermediary Linux instance running
as part of the firmware is used to boot the target operating system via
kexec. Using this mechanism, IMA on this intermediary instance can
hand over to the target OS the measurements of the components that were
used to boot it.

Because there could be additional measurement events between the
kexec_file_load call and the actual reboot, IMA needs a way to update the
buffer with those additional events before rebooting. One can minimize
the interval between the kexec_file_load and the reboot syscalls, but as
small as it can be, there is always the possibility that the measurement
list will be out of date at the time of reboot.

To address this issue, this patch series also introduces
kexec_update_segment, which allows a reboot notifier to change the
contents of the image segment during the reboot process.

Patch 5 makes kimage_load_normal_segment and kexec_update_segment share
code. It's not much code that they can share though, so I'm not sure if
the result is actually better.

The last patch is not intended to be merged, it just demonstrates how
this feature can be used.

This series applies on top of v5 of the "kexec_file_load implementation
for PowerPC" patch series (which applies on top of v4.8-rc1):

https://lists.infradead.org/pipermail/kexec/2016-August/016843.html

Changes for v2:
- Rebased on v5 of kexec_file_load implementation for PowerPC patch series.
- Patch "kexec_file: Add buffer hand-over support for the next kernel"
  - Changed kexec_add_handover_buffer to receive a struct kexec_buf, as
suggested by Dave Young.
- Patch "powerpc: kexec_file: Add buffer hand-over support for the next kernel"
  - Moved setup_handover_buffer from kexec_elf_64.c to machine_kexec_64.c.
  - Call setup_handover_buffer from setup_new_fdt instead of elf64_load.
  - Changed kexec_get_handover_buffer to read from the expanded device tree
instead of the flattened device tree.
- Patch "kexec_file: Add mechanism to update kexec segments.":
  - Removed unnecessary "#include " in kexec_file.c.
  - Round up memsz argument to PAGE_SIZE.
  - Check if kexec_image is NULL in kexec_update_segment.
- Patch "IMA: Demonstration code for kexec buffer passing."
  - Avoid registering reboot notifier again if kexec_file_load is called
more than once.


Thiago Jung Bauermann (6):
  kexec_file: Add buffer hand-over support for the next kernel
  powerpc: kexec_file: Add buffer hand-over support for the next kernel
  kexec_file: Allow skipping checksum calculation for some segments.
  kexec_file: Add mechanism to update kexec segments.
  kexec: Share logic to copy segment page contents.
  IMA: Demonstration code for kexec buffer passing.

 arch/powerpc/include/asm/kexec.h   |  12 +-
 arch/powerpc/kernel/kexec_elf_64.c |   8 +-
 arch/powerpc/kernel/machine_kexec_64.c | 114 -
 arch/x86/kernel/crash.c|   4 +-
 arch/x86/kernel/kexec-bzimage64.c  |   6 +-
 include/linux/ima.h|  11 ++
 include/linux/kexec.h  |  37 +-
 kernel/kexec_core.c| 216 ++---
 kernel/kexec_file.c|  91 --
 security/integrity/ima/ima.h   |   5 +
 security/integrity/ima/ima_init.c  |  26 
 security/integrity/ima/ima_template.c  |  85 +
 12 files changed, 546 insertions(+), 69 deletions(-)

-- 
1.9.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 1/6] kexec_file: Add buffer hand-over support for the next kernel

2016-08-15 Thread Thiago Jung Bauermann
The buffer hand-over mechanism allows the currently running kernel to pass
data to kernel that will be kexec'd via a kexec segment. The second kernel
can check whether the previous kernel sent data and retrieve it.

This is the architecture-independent part of the feature.

Signed-off-by: Thiago Jung Bauermann 
---
 include/linux/kexec.h | 29 ++
 kernel/kexec_file.c   | 68 +++
 2 files changed, 97 insertions(+)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ceccc5856aab..4559a1a01b0a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -388,6 +388,35 @@ static inline void *boot_phys_to_virt(unsigned long entry)
return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifdef CONFIG_KEXEC_FILE
+bool __weak kexec_can_hand_over_buffer(void);
+int __weak arch_kexec_add_handover_buffer(struct kimage *image,
+ unsigned long load_addr,
+ unsigned long size);
+int kexec_add_handover_buffer(struct kexec_buf *kbuf);
+int __weak kexec_get_handover_buffer(void **addr, unsigned long *size);
+int __weak kexec_free_handover_buffer(void);
+#else
+static inline bool kexec_can_hand_over_buffer(void)
+{
+   return false;
+}
+
+static inline int kexec_add_handover_buffer(struct kexec_buf *kbuf)
+{
+   return -ENOTSUPP;
+}
+
+static inline int kexec_get_handover_buffer(void **addr, unsigned long *size)
+{
+   return -ENOTSUPP;
+}
+
+static inline int kexec_free_handover_buffer(void)
+{
+   return -ENOTSUPP;
+}
+#endif /* CONFIG_KEXEC_FILE */
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 772cb491715e..c8418d62e2fc 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -135,6 +135,74 @@ int __weak arch_kexec_verify_buffer(enum kexec_file_type 
type, const void *buf,
return -EINVAL;
 }
 
+/**
+ * kexec_can_hand_over_buffer - can we pass data to the kexec'd kernel?
+ */
+bool __weak kexec_can_hand_over_buffer(void)
+{
+   return false;
+}
+
+/**
+ * arch_kexec_add_handover_buffer - do arch-specific steps to handover buffer
+ *
+ * Architectures should use this function to pass on the handover buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_add_handover_buffer(struct kimage *image,
+ unsigned long load_addr,
+ unsigned long size)
+{
+   return -ENOTSUPP;
+}
+
+/**
+ * kexec_add_handover_buffer - add buffer to be used by the next kernel
+ * @kbuf:  Buffer contents and memory parameters.
+ *
+ * This function assumes that kexec_mutex is held.
+ * On successful return, @kbuf->mem will have the physical address of
+ * the buffer in the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_add_handover_buffer(struct kexec_buf *kbuf)
+{
+   int ret;
+
+   if (!kexec_can_hand_over_buffer())
+   return -ENOTSUPP;
+
+   ret = kexec_add_buffer(kbuf);
+   if (ret)
+   return ret;
+
+   return arch_kexec_add_handover_buffer(kbuf->image, kbuf->mem,
+ kbuf->memsz);
+}
+
+/**
+ * kexec_get_handover_buffer - get the handover buffer from the previous kernel
+ * @addr:  On successful return, set to point to the buffer contents.
+ * @size:  On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak kexec_get_handover_buffer(void **addr, unsigned long *size)
+{
+   return -ENOTSUPP;
+}
+
+/**
+ * kexec_free_handover_buffer - free memory used by the handover buffer
+ */
+int __weak kexec_free_handover_buffer(void)
+{
+   return -ENOTSUPP;
+}
+
 /*
  * In file mode list of segments is prepared by kernel. Copy relevant
  * data from user space, do error checking, prepare segment list
-- 
1.9.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH] kexec: Account crashk_low_res to kexec_crash_size

2016-08-15 Thread Dave Young
Hi Xunlei,

On 08/13/16 at 04:26pm, Xunlei Pang wrote:
> "/sys/kernel/kexec_crash_size" only includes crashk_res, it
> is fine in most cases, but sometimes we have crashk_low_res.
> For example, when "crashkernel=size[KMG],high" combined with
> "crashkernel=size[KMG],low" is used for 64-bit x86.
> 
> Let "/sys/kernel/kexec_crash_size" reflect all the reserved
> memory including crashk_low_res, this is more understandable
> from its naming.

Maybe export another file for the kexec_crash_low_size so that
we can clearly get how much the low area is.

> 
> Although we can get all the crash memory from "/proc/iomem"
> by filtering all "Crash kernel" keyword, it is more convenient
> to utilize this file, and the two ways should stay consistent.

Shrink low area does not make much sense, one may either use it or
shrink it to 0.

Actually think more about it, the crashk_low is only for x86,
it might be even better to move it to x86 code instead of in
common code.

Opinion?

Thanks
Dave
> 
> Note that write to "/sys/kernel/kexec_crash_size" is to shrink
> the reserved memory, and we want to shrink crashk_res only.
> So we add some additional check in crash_shrink_memory() since
> crashk_low_res now is involved.
> 
> Signed-off-by: Xunlei Pang 
> ---
>  kernel/kexec_core.c | 15 ++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index 5616755..d5ae780 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -932,6 +932,8 @@ size_t crash_get_memory_size(void)
>   mutex_lock(_mutex);
>   if (crashk_res.end != crashk_res.start)
>   size = resource_size(_res);
> + if (crashk_low_res.end != crashk_low_res.start)
> + size += resource_size(_low_res);
>   mutex_unlock(_mutex);
>   return size;
>  }
> @@ -949,7 +951,7 @@ int crash_shrink_memory(unsigned long new_size)
>  {
>   int ret = 0;
>   unsigned long start, end;
> - unsigned long old_size;
> + unsigned long low_size, old_size;
>   struct resource *ram_res;
>  
>   mutex_lock(_mutex);
> @@ -958,6 +960,17 @@ int crash_shrink_memory(unsigned long new_size)
>   ret = -ENOENT;
>   goto unlock;
>   }
> +
> + start = crashk_low_res.start;
> + end = crashk_low_res.end;
> + low_size = (end == 0) ? 0 : end - start + 1;
> + /* Do not shrink crashk_low_res. */
> + if (new_size <= low_size) {
> + ret = -EINVAL;
> + goto unlock;
> + }
> +
> + new_size -= low_size;
>   start = crashk_res.start;
>   end = crashk_res.end;
>   old_size = (end == 0) ? 0 : end - start + 1;
> -- 
> 1.8.3.1
> 
> 
> ___
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec